diff --git a/common.go b/common.go index 5f2dab3..9d6b2aa 100644 --- a/common.go +++ b/common.go @@ -306,7 +306,7 @@ func detectFormat(path string) (string, error) { } switch filepath.Base(path) { - case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml": + case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml", "JMdict_e_examp": return "edict", nil case "JMnedict", "JMnedict.xml": return "enamdict", nil @@ -336,7 +336,8 @@ func detectFormat(path string) (string, error) { func ExportDb(inputPath, outputPath, format, language, title string, stride int, pretty bool) error { handlers := map[string]func(string, string, string, string, int, bool) error{ - "edict": jmdictExportDb, + "edict": jmdExportDb, + "forms": formsExportDb, "enamdict": jmnedictExportDb, "epwing": epwingExportDb, "kanjidic": kanjidicExportDb, diff --git a/edict.go b/edict.go deleted file mode 100644 index b6326f0..0000000 --- a/edict.go +++ /dev/null @@ -1,252 +0,0 @@ -package yomichan - -import ( - "os" - "strings" - - "foosoft.net/projects/jmdict" -) - -const edrdgAttribution = "This publication has included material from the JMdict (EDICT, etc.) dictionary files in accordance with the licence provisions of the Electronic Dictionaries Research Group. See http://www.edrdg.org/" - -func jmdictBuildRules(term *dbTerm) { - for _, tag := range term.DefinitionTags { - switch tag { - case "adj-i", "v1", "vk", "vz": - term.addRules(tag) - default: - if strings.HasPrefix(tag, "v5") { - term.addRules("v5") - } else if strings.HasPrefix(tag, "vs-") { - term.addRules("vs") - } - } - } -} - -func jmdictBuildScore(term *dbTerm) { - for _, tag := range term.DefinitionTags { - switch tag { - case "arch": - term.Score -= 100 - } - } - for _, tag := range term.TermTags { - switch tag { - case "news", "ichi", "spec", "gai1": - term.Score += 100 - case "P": - term.Score += 500 - case "iK", "ik", "ok", "oK", "io", "oik": - term.Score -= 100 - } - } -} - -func jmdictAddPriorities(term *dbTerm, priorities ...string) { - for _, priority := range priorities { - switch priority { - case "news1", "ichi1", "spec1", "gai1": - term.addTermTags("P") - fallthrough - case "news2", "ichi2", "spec2", "gai2": - term.addTermTags(priority[:len(priority)-1]) - } - } -} - -func jmdictBuildTagMeta(entities map[string]string) dbTagList { - tags := dbTagList{ - dbTag{Name: "news", Notes: "appears frequently in Mainichi Shimbun", Category: "frequent", Order: -2}, - dbTag{Name: "ichi", Notes: "listed as common in Ichimango Goi Bunruishuu", Category: "frequent", Order: -2}, - dbTag{Name: "spec", Notes: "common words not included in frequency lists", Category: "frequent", Order: -2}, - dbTag{Name: "gai", Notes: "common loanword", Category: "frequent", Order: -2}, - dbTag{Name: "P", Notes: "popular term", Category: "popular", Order: -10, Score: 10}, - } - - for name, value := range entities { - tag := dbTag{Name: name, Notes: value} - - switch name { - case "exp", "id": - tag.Category = "expression" - tag.Order = -5 - case "arch": - tag.Category = "archaism" - tag.Order = -4 - case "iK", "ik", "ok", "oK", "io", "oik": - tag.Score = -5 - case "adj-f", "adj-i", "adj-ix", "adj-ku", "adj-na", "adj-nari", "adj-no", "adj-pn", "adj-shiku", "adj-t", "adv", "adv-to", "aux-adj", - "aux", "aux-v", "conj", "cop-da", "ctr", "int", "n-adv", "n", "n-pref", "n-pr", "n-suf", "n-t", "num", "pn", "pref", "prt", "suf", - "unc", "v1", "v1-s", "v2a-s", "v2b-k", "v2d-s", "v2g-k", "v2g-s", "v2h-k", "v2h-s", "v2k-k", "v2k-s", "v2m-s", "v2n-s", "v2r-k", - "v2r-s", "v2s-s", "v2t-k", "v2t-s", "v2w-s", "v2y-k", "v2y-s", "v2z-s", "v4b", "v4h", "v4k", "v4m", "v4r", "v4s", "v4t", "v5aru", - "v5b", "v5g", "v5k", "v5k-s", "v5m", "v5n", "v5r-i", "v5r", "v5s", "v5t", "v5u", "v5u-s", "vi", "vk", "vn", "vr", "vs-c", "vs-i", - "vs", "vs-s", "vt", "vz": - tag.Category = "partOfSpeech" - tag.Order = -3 - } - - tags = append(tags, tag) - } - - return tags -} - -func jmdictExtractTerms(edictEntry jmdict.JmdictEntry, language string) []dbTerm { - var terms []dbTerm - - convert := func(reading jmdict.JmdictReading, kanji *jmdict.JmdictKanji) { - if kanji != nil && reading.Restrictions != nil && !hasString(kanji.Expression, reading.Restrictions) { - return - } - - var termBase dbTerm - termBase.addTermTags(reading.Information...) - - if kanji == nil { - termBase.Expression = reading.Reading - jmdictAddPriorities(&termBase, reading.Priorities...) - } else { - termBase.Expression = kanji.Expression - termBase.Reading = reading.Reading - termBase.addTermTags(kanji.Information...) - - for _, priority := range kanji.Priorities { - if hasString(priority, reading.Priorities) { - jmdictAddPriorities(&termBase, priority) - } - } - } - - var partsOfSpeech []string - for index, sense := range edictEntry.Sense { - - if len(sense.PartsOfSpeech) != 0 { - partsOfSpeech = sense.PartsOfSpeech - } - - if sense.RestrictedReadings != nil && !hasString(reading.Reading, sense.RestrictedReadings) { - continue - } - - if kanji != nil && sense.RestrictedKanji != nil && !hasString(kanji.Expression, sense.RestrictedKanji) { - continue - } - - term := dbTerm{ - Reading: termBase.Reading, - Expression: termBase.Expression, - Score: len(edictEntry.Sense) - index, - Sequence: edictEntry.Sequence, - } - - for _, glossary := range sense.Glossary { - if glossary.Language == nil && language == "" || glossary.Language != nil && language == *glossary.Language { - term.Glossary = append(term.Glossary, glossary.Content) - } - } - - if len(term.Glossary) == 0 { - continue - } - - term.addDefinitionTags(termBase.DefinitionTags...) - term.addTermTags(termBase.TermTags...) - term.addDefinitionTags(partsOfSpeech...) - term.addDefinitionTags(sense.Fields...) - term.addDefinitionTags(sense.Misc...) - term.addDefinitionTags(sense.Dialects...) - - jmdictBuildRules(&term) - jmdictBuildScore(&term) - - terms = append(terms, term) - } - } - - if len(edictEntry.Kanji) > 0 { - for _, kanji := range edictEntry.Kanji { - for _, reading := range edictEntry.Readings { - if reading.NoKanji == nil { - convert(reading, &kanji) - } - } - } - for _, reading := range edictEntry.Readings { - if reading.NoKanji != nil { - convert(reading, nil) - } - } - } else { - for _, reading := range edictEntry.Readings { - convert(reading, nil) - } - } - - return terms -} - -func jmdictExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error { - reader, err := os.Open(inputPath) - if err != nil { - return err - } - defer reader.Close() - - dict, entities, err := jmdict.LoadJmdictNoTransform(reader) - if err != nil { - return err - } - - var langTag string - switch language { - case "dutch": - langTag = "dut" - case "french": - langTag = "fre" - case "german": - langTag = "ger" - case "hungarian": - langTag = "hun" - case "italian": - langTag = "ita" - case "russian": - langTag = "rus" - case "slovenian": - langTag = "slv" - case "spanish": - langTag = "spa" - case "swedish": - langTag = "swe" - } - - var terms dbTermList - for _, entry := range dict.Entries { - terms = append(terms, jmdictExtractTerms(entry, langTag)...) - } - - if title == "" { - title = "JMdict" - } - - recordData := map[string]dbRecordList{ - "term": terms.crush(), - "tag": jmdictBuildTagMeta(entities).crush(), - } - - index := dbIndex{ - Title: title, - Revision: "jmdict4", - Sequenced: true, - Attribution: edrdgAttribution, - } - index.setDefaults() - - return writeDb( - outputPath, - index, - recordData, - stride, - pretty, - ) -} diff --git a/go.mod b/go.mod index 0bca3dd..4f31a22 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( foosoft.net/projects/zero-epwing-go v0.0.0-20220704035039-bc008453615d github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e github.com/mattn/go-sqlite3 v1.14.14 + golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f ) require golang.org/x/text v0.3.7 // indirect diff --git a/go.sum b/go.sum index ca51ada..4dd5f91 100644 --- a/go.sum +++ b/go.sum @@ -6,5 +6,7 @@ github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e h1:wSQCJiig/QkoUnpvelSP github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e/go.mod h1:5G2EjwzgZUPnnReoKvPWVneT8APYbyKkihDVAHUi0II= github.com/mattn/go-sqlite3 v1.14.14 h1:qZgc/Rwetq+MtyE18WhzjokPD93dNqLGNT3QJuLvBGw= github.com/mattn/go-sqlite3 v1.14.14/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= +golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f h1:90Jq/vvGVDsqj8QqCynjFw9MCerDguSMODLYII416Y8= +golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= diff --git a/jmdict.go b/jmdict.go new file mode 100644 index 0000000..74809e7 --- /dev/null +++ b/jmdict.go @@ -0,0 +1,221 @@ +package yomichan + +import ( + "os" + "regexp" + "strconv" + "strings" + + "foosoft.net/projects/jmdict" + "golang.org/x/exp/slices" +) + +func grammarRules(partsOfSpeech []string) []string { + rules := []string{} + for _, partOfSpeech := range partsOfSpeech { + switch partOfSpeech { + case "adj-i", "vk", "vz": + rules = append(rules, partOfSpeech) + default: + if strings.HasPrefix(partOfSpeech, "v5") { + rules = append(rules, "v5") + } else if strings.HasPrefix(partOfSpeech, "v1") { + rules = append(rules, "v1") + } else if strings.HasPrefix(partOfSpeech, "vs-") { + rules = append(rules, "vs") + } + } + } + return rules +} + +func calculateTermScore(senseNumber int, headword headword) int { + const senseWeight int = 1 + const entryPositionWeight int = 100 + const priorityWeight int = 10000 + + score := 0 + score -= (senseNumber - 1) * senseWeight + score -= headword.Index * entryPositionWeight + score += headword.Score() * priorityWeight + + return score +} + +func doDisplaySenseNumberTag(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) bool { + // Display sense numbers if the entry has more than one sense + // or if the headword is found in multiple entries. + hash := headword.Hash() + if meta.seqToSenseCount[entry.Sequence] > 1 { + return true + } else if len(meta.headwordHashToSeqs[hash]) > 1 { + return true + } else { + return false + } +} + +func jmdictPublicationDate(dictionary jmdict.Jmdict) string { + dateEntry := dictionary.Entries[len(dictionary.Entries)-1] + r := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`) + jmdictDate := r.FindString(dateEntry.Sense[0].Glossary[0].Content) + return jmdictDate +} + +func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm { + term := baseFormsTerm(entry) + term.Expression = headword.Expression + term.Reading = headword.Reading + + term.addTermTags(headword.TermTags...) + + term.addDefinitionTags("forms") + senseNumber := meta.seqToSenseCount[entry.Sequence] + 1 + term.Score = calculateTermScore(senseNumber, headword) + return term +} + +func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm { + term := dbTerm{ + Expression: headword.Expression, + Sequence: -entry.Sequence, + } + for _, sense := range entry.Sense { + rules := grammarRules(sense.PartsOfSpeech) + term.addRules(rules...) + } + term.addTermTags(headword.TermTags...) + term.Score = calculateTermScore(0, headword) + + redirectHeadword := meta.seqToMainHeadword[entry.Sequence] + expHash := redirectHeadword.ExpHash() + doDisplayReading := (len(meta.expHashToReadings[expHash]) > 1) + + content := contentSpan( + contentAttr{fontSize: "130%"}, + "โŸถ", + redirectHeadword.ToInternalLink(doDisplayReading), + ) + + term.Glossary = []any{contentStructure(content)} + return term +} + +func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm { + term := dbTerm{ + Expression: headword.Expression, + Reading: headword.Reading, + Sequence: entry.Sequence, + } + + term.Glossary = createGlossary(sense, meta) + + term.addTermTags(headword.TermTags...) + + if doDisplaySenseNumberTag(headword, entry, meta) { + senseNumberTag := strconv.Itoa(senseNumber) + term.addDefinitionTags(senseNumberTag) + } + term.addDefinitionTags(sense.PartsOfSpeech...) + term.addDefinitionTags(sense.Fields...) + term.addDefinitionTags(sense.Misc...) + term.addDefinitionTags(sense.Dialects...) + + rules := grammarRules(sense.PartsOfSpeech) + term.addRules(rules...) + + term.Score = calculateTermScore(senseNumber, headword) + + return term +} + +func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) { + if meta.seqToSenseCount[entry.Sequence] == 0 { + return nil, false + } + if headword.IsSearchOnly { + searchTerm := createSearchTerm(headword, entry, meta) + return []dbTerm{searchTerm}, true + } + terms := []dbTerm{} + senseNumber := 1 + for _, sense := range entry.Sense { + if !glossaryContainsLanguage(sense.Glossary, meta.language) { + continue + } + if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) { + senseNumber += 1 + continue + } + if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) { + senseNumber += 1 + continue + } + senseTerm := createSenseTerm(sense, senseNumber, headword, entry, meta) + senseNumber += 1 + terms = append(terms, senseTerm) + } + + if meta.hasMultipleForms[entry.Sequence] { + formsTerm := createFormsTerm(headword, entry, meta) + terms = append(terms, formsTerm) + } + return terms, true +} + +func jmdExportDb(inputPath string, outputPath string, languageName string, title string, stride int, pretty bool) error { + reader, err := os.Open(inputPath) + if err != nil { + return err + } + defer reader.Close() + + dictionary, entities, err := jmdict.LoadJmdictNoTransform(reader) + if err != nil { + return err + } + + meta := newJmdictMetadata(dictionary, languageName) + + terms := dbTermList{} + for _, entry := range dictionary.Entries { + headwords := extractHeadwords(entry) + for _, headword := range headwords { + if newTerms, ok := extractTerms(headword, entry, meta); ok { + terms = append(terms, newTerms...) + } + } + } + + tags := dbTagList{} + tags = append(tags, entityTags(entities)...) + tags = append(tags, senseNumberTags(meta.maxSenseCount)...) + tags = append(tags, newsFrequencyTags()...) + tags = append(tags, customDbTags()...) + + recordData := map[string]dbRecordList{ + "term": terms.crush(), + "tag": tags.crush(), + } + + if title == "" { + title = "JMdict" + } + jmdictDate := jmdictPublicationDate(dictionary) + + index := dbIndex{ + Title: title, + Revision: "JMdict." + jmdictDate, + Sequenced: true, + Attribution: edrdgAttribution, + } + index.setDefaults() + + return writeDb( + outputPath, + index, + recordData, + stride, + pretty, + ) +} diff --git a/jmdictConstants.go b/jmdictConstants.go new file mode 100644 index 0000000..1d49194 --- /dev/null +++ b/jmdictConstants.go @@ -0,0 +1,215 @@ +package yomichan + +type LangCode struct { + language string + code string +} + +const edrdgAttribution = "This publication has included material from the JMdict (EDICT, etc.) dictionary files in accordance with the licence provisions of the Electronic Dictionaries Research Group. See http://www.edrdg.org/" + +const prioritySymbol = "โ˜…" +const rareKanjiSymbol = "๐Ÿ…" +const irregularSymbol = "โš " +const outdatedSymbol = "โ›ฌ" +const defaultSymbol = "ใŠ’" + +const priorityTagName = "โญ" +const rareKanjiTagName = "R" +const irregularTagName = "โš ๏ธ" +const outdatedTagName = "โ›ฌ" +const atejiTagName = "ateji" +const gikunTagName = "gikun" + +const langMarker = "'๐ŸŒ '" +const noteMarker = "'๐Ÿ“ '" +const infoMarker = "'โ„น๏ธ '" +const refMarker = "'โžก๏ธ '" +const antonymMarker = "'๐Ÿ”„ '" + +var ISOtoFlag = map[string]string{ + "": "'๐Ÿ‡ฌ๐Ÿ‡ง '", + "eng": "'๐Ÿ‡ฌ๐Ÿ‡ง '", + "dut": "'๐Ÿ‡ณ๐Ÿ‡ฑ '", + "fre": "'๐Ÿ‡ซ๐Ÿ‡ท '", + "ger": "'๐Ÿ‡ฉ๐Ÿ‡ช '", + "hun": "'๐Ÿ‡ญ๐Ÿ‡บ '", + "ita": "'๐Ÿ‡ฎ๐Ÿ‡น '", + "jpn": "'๐Ÿ‡ฏ๐Ÿ‡ต '", + "rus": "'๐Ÿ‡ท๐Ÿ‡บ '", + "slv": "'๐Ÿ‡ธ๐Ÿ‡ฎ '", + "spa": "'๐Ÿ‡ช๐Ÿ‡ธ '", + "swe": "'๐Ÿ‡ธ๐Ÿ‡ช '", +} + +var langNameToCode = map[string]string{ + "": "eng", + "english": "eng", + "dutch": "dut", + "french": "fre", + "german": "ger", + "hungarian": "hun", + "italian": "ita", + "russian": "rus", + "slovenian": "slv", + "spanish": "spa", + "swedish": "swe", +} + +var glossTypeCodeToName = map[LangCode]string{ + LangCode{"eng", "lit"}: "literally", + LangCode{"eng", "fig"}: "figuratively", + LangCode{"eng", "expl"}: "", // don't need to tell the user that an explanation is an explanation + LangCode{"eng", "tm"}: "trademark", +} + +var refNoteHint = map[LangCode]string{ + LangCode{"eng", "xref"}: "see", + LangCode{"eng", "ant"}: "antonym", +} + +var sourceLangTypeCodeToType = map[LangCode]string{ + LangCode{"eng", "part"}: "partial", + LangCode{"eng", ""}: "", // implied "full" +} + +var langCodeToName = map[LangCode]string{ + LangCode{"eng", "afr"}: "Afrikaans", + LangCode{"eng", "ain"}: "Ainu", + LangCode{"eng", "alg"}: "Algonquian", + LangCode{"eng", "amh"}: "Amharic", + LangCode{"eng", "ara"}: "Arabic", + LangCode{"eng", "arn"}: "Mapudungun", + LangCode{"eng", "bnt"}: "Bantu", + LangCode{"eng", "bre"}: "Breton", + LangCode{"eng", "bul"}: "Bulgarian", + LangCode{"eng", "bur"}: "Burmese", + LangCode{"eng", "chi"}: "Chinese", + LangCode{"eng", "chn"}: "Chinook Jargon", + LangCode{"eng", "cze"}: "Czech", + LangCode{"eng", "dan"}: "Danish", + LangCode{"eng", "dut"}: "Dutch", + LangCode{"eng", "eng"}: "English", + LangCode{"eng", "epo"}: "Esperanto", + LangCode{"eng", "est"}: "Estonian", + LangCode{"eng", "fil"}: "Filipino", + LangCode{"eng", "fin"}: "Finnish", + LangCode{"eng", "fre"}: "French", + LangCode{"eng", "geo"}: "Georgian", + LangCode{"eng", "ger"}: "German", + LangCode{"eng", "glg"}: "Galician", + LangCode{"eng", "grc"}: "Ancient Greek", + LangCode{"eng", "gre"}: "Modern Greek", + LangCode{"eng", "haw"}: "Hawaiian", + LangCode{"eng", "heb"}: "Hebrew", + LangCode{"eng", "hin"}: "Hindi", + LangCode{"eng", "hun"}: "Hungarian", + LangCode{"eng", "ice"}: "Icelandic", + LangCode{"eng", "ind"}: "Indonesian", + LangCode{"eng", "ita"}: "Italian", + LangCode{"eng", "khm"}: "Khmer", + LangCode{"eng", "kor"}: "Korean", + LangCode{"eng", "kur"}: "Kurdish", + LangCode{"eng", "lat"}: "Latin", + LangCode{"eng", "mal"}: "Malayalam", + LangCode{"eng", "mao"}: "Maori", + LangCode{"eng", "may"}: "Malay", + LangCode{"eng", "mnc"}: "Manchu", + LangCode{"eng", "mol"}: "Moldavian", // ISO 639 deprecated (https://iso639-3.sil.org/code/mol) + LangCode{"eng", "mon"}: "Mongolian", + LangCode{"eng", "nor"}: "Norwegian", + LangCode{"eng", "per"}: "Persian", + LangCode{"eng", "pol"}: "Polish", + LangCode{"eng", "por"}: "Portuguese", + LangCode{"eng", "rum"}: "Romanian", + LangCode{"eng", "rus"}: "Russian", + LangCode{"eng", "san"}: "Sanskrit", + LangCode{"eng", "scr"}: "Croatian", // Code doesn't seem to exist in ISO 639. Should be "hrv" instead? (https://iso639-3.sil.org/code/hrv) + LangCode{"eng", "slo"}: "Slovak", + LangCode{"eng", "slv"}: "Slovenian", + LangCode{"eng", "som"}: "Somali", + LangCode{"eng", "spa"}: "Spanish", + LangCode{"eng", "swa"}: "Swahili", + LangCode{"eng", "swe"}: "Swedish", + LangCode{"eng", "tah"}: "Tahitian", + LangCode{"eng", "tam"}: "Tamil", + LangCode{"eng", "tgl"}: "Tagalog", + LangCode{"eng", "tha"}: "Thai", + LangCode{"eng", "tib"}: "Tibetan", + LangCode{"eng", "tur"}: "Turkish", + LangCode{"eng", "ukr"}: "Ukrainian", + LangCode{"eng", "urd"}: "Urdu", + LangCode{"eng", "vie"}: "Vietnamese", + LangCode{"eng", "yid"}: "Yiddish", +} + +// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry +var ISOtoHTML = map[string]string{ + "afr": "af", // Afrikaans + "ain": "ain", // Ainu + "alg": "alg", // Algonquian + "amh": "am", // Amharic + "ara": "ar", // Arabic + "arn": "arn", // Mapudungun + "bnt": "bnt", // Bantu + "bre": "br", // Breton + "bul": "bg", // Bulgarian + "bur": "my", // Burmese + "chi": "zh", // Chinese + "chn": "chn", // Chinook Jargon + "cze": "cs", // Czech + "dan": "da", // Danish + "dut": "nl", // Dutch + "eng": "en", // English + "epo": "eo", // Esperanto + "est": "et", // Estonian + "fil": "fil", // Filipino + "fin": "fi", // Finnish + "fre": "fr", // French + "geo": "ka", // Georgian + "ger": "de", // German + "glg": "gl", // Galician + "grc": "grc", // Ancient Greek + "gre": "el", // Modern Greek + "haw": "haw", // Hawaiian + "heb": "he", // Hebrew + "hin": "hi", // Hindi + "hun": "hu", // Hungarian + "ice": "is", // Icelandic + "ind": "id", // Indonesian + "ita": "it", // Italian + "jpn": "ja", // Japanese + "khm": "km", // Khmer + "kor": "ko", // Korean + "kur": "ku", // Kurdish + "lat": "la", // Latin + "mal": "ml", // Malayalam + "mao": "mi", // Maori + "may": "ms", // Malay + "mnc": "mnc", // Manchu + "mol": "ro", // Moldavian + "mon": "mn", // Mongolian + "nor": "no", // Norwegian + "per": "fa", // Persian + "pol": "pl", // Polish + "por": "pt", // Portuguese + "rum": "ro", // Romanian + "rus": "ru", // Russian + "san": "sa", // Sanskrit + "scr": "hr", // Croatian + "slo": "sk", // Slovak + "slv": "sl", // Slovenian + "som": "so", // Somali + "spa": "es", // Spanish + "swa": "sw", // Swahili + "swe": "sv", // Swedish + "tah": "ty", // Tahitian + "tam": "ta", // Tamil + "tgl": "tl", // Tagalog + "tha": "th", // Thai + "tib": "bo", // Tibetan + "tur": "tr", // Turkish + "ukr": "uk", // Ukrainian + "urd": "ur", // Urdu + "vie": "vi", // Vietnamese + "yid": "yi", // Yiddish +} diff --git a/jmdictForms.go b/jmdictForms.go new file mode 100644 index 0000000..76eba34 --- /dev/null +++ b/jmdictForms.go @@ -0,0 +1,254 @@ +package yomichan + +import ( + "os" + "strings" + + "foosoft.net/projects/jmdict" + "golang.org/x/exp/slices" +) + +func kata2hira(word string) string { + charMap := func(character rune) rune { + if (character >= 'ใ‚ก' && character <= 'ใƒถ') || (character >= 'ใƒฝ' && character <= 'ใƒพ') { + return character - 0x60 + } else { + return character + } + } + return strings.Map(charMap, word) +} + +func (h *headword) InfoSymbols() string { + infoSymbols := []string{} + if h.IsPriority { + infoSymbols = append(infoSymbols, prioritySymbol) + } + if h.IsRareKanji { + infoSymbols = append(infoSymbols, rareKanjiSymbol) + } + if h.IsIrregular { + infoSymbols = append(infoSymbols, irregularSymbol) + } + if h.IsOutdated { + infoSymbols = append(infoSymbols, outdatedSymbol) + } + return strings.Join(infoSymbols[:], " | ") +} + +func (h *headword) GlossText() string { + gloss := h.Expression + if h.IsAteji { + gloss = "ใ€ˆ" + gloss + "ใ€‰" + } + symbolText := h.InfoSymbols() + if symbolText != "" { + gloss += "๏ผˆ" + symbolText + "๏ผ‰" + } + return gloss +} + +func (h *headword) TableColHeaderText() string { + text := h.KanjiForm() + if h.IsAteji { + text = "ใ€ˆ" + text + "ใ€‰" + } + return text +} + +func (h *headword) TableRowHeaderText() string { + text := h.Reading + if h.IsGikun { + text = "ใ€ˆ" + text + "ใ€‰" + } + return text +} + +func (h *headword) TableCellText() string { + text := h.InfoSymbols() + if text == "" { + return defaultSymbol + } else { + return text + } +} + +func (h *headword) KanjiForm() string { + if h.IsKanaOnly() { + return "โˆ…" + } else { + return h.Expression + } +} + +func jmdNeedsFormTable(headwords []headword) bool { + // Does the entry contain more than 1 distinct reading? + // E.g. ใƒใ‚ซใŒใ„ and ใฐใ‹ใŒใ„ are not distinct. + uniqueReading := "" + for _, h := range headwords { + if h.IsGikun { + return true + } else if h.IsSearchOnly { + continue + } else if h.IsKanaOnly() { + continue + } else if uniqueReading == "" { + uniqueReading = kata2hira(h.Reading) + } else if uniqueReading != kata2hira(h.Reading) { + return true + } + } + return false +} + +type formTableData struct { + kanjiForms []string + readings []string + colHeaderText map[string]string + rowHeaderText map[string]string + cellText map[string]map[string]string +} + +func tableData(headwords []headword) formTableData { + d := formTableData{ + kanjiForms: []string{}, + readings: []string{}, + colHeaderText: make(map[string]string), + rowHeaderText: make(map[string]string), + cellText: make(map[string]map[string]string), + } + for _, h := range headwords { + if h.IsSearchOnly { + continue + } + kanjiForm := h.KanjiForm() + if !slices.Contains(d.kanjiForms, kanjiForm) { + d.kanjiForms = append(d.kanjiForms, kanjiForm) + d.colHeaderText[kanjiForm] = h.TableColHeaderText() + } + reading := h.Reading + if !slices.Contains(d.readings, reading) { + d.readings = append(d.readings, reading) + d.rowHeaderText[reading] = h.TableRowHeaderText() + d.cellText[reading] = make(map[string]string) + } + d.cellText[reading][kanjiForm] = h.TableCellText() + } + return d +} + +func formsTableGlossary(headwords []headword) []any { + d := tableData(headwords) + + attr := contentAttr{} + centeredAttr := contentAttr{textAlign: "center"} + leftAttr := contentAttr{textAlign: "left"} + + cornerCell := contentTableHeadCell(attr, "") // empty cell in upper left corner + headRowCells := []any{cornerCell} + for _, kanjiForm := range d.kanjiForms { + content := d.colHeaderText[kanjiForm] + cell := contentTableHeadCell(centeredAttr, content) + headRowCells = append(headRowCells, cell) + } + headRow := contentTableRow(attr, headRowCells...) + tableRows := []any{headRow} + for _, reading := range d.readings { + rowHeadCellText := d.rowHeaderText[reading] + rowHeadCell := contentTableHeadCell(leftAttr, rowHeadCellText) + rowCells := []any{rowHeadCell} + for _, kanjiForm := range d.kanjiForms { + text := d.cellText[reading][kanjiForm] + rowCell := contentTableCell(centeredAttr, text) + rowCells = append(rowCells, rowCell) + } + tableRow := contentTableRow(attr, rowCells...) + tableRows = append(tableRows, tableRow) + } + tableAttr := contentAttr{data: map[string]string{"content": "formsTable"}} + contentTable := contentTable(tableAttr, tableRows...) + content := contentStructure(contentTable) + return []any{content} +} + +func formsGlossary(headwords []headword) []any { + glossary := []any{} + for _, h := range headwords { + if h.IsSearchOnly { + continue + } + text := h.GlossText() + glossary = append(glossary, text) + } + return glossary +} + +func baseFormsTerm(entry jmdict.JmdictEntry) dbTerm { + term := dbTerm{Sequence: entry.Sequence} + headwords := extractHeadwords(entry) + if jmdNeedsFormTable(headwords) { + term.Glossary = formsTableGlossary(headwords) + } else { + term.Glossary = formsGlossary(headwords) + } + for _, sense := range entry.Sense { + rules := grammarRules(sense.PartsOfSpeech) + term.addRules(rules...) + } + return term +} + +func formsExportDb(inputPath, outputPath, languageName, title string, stride int, pretty bool) error { + reader, err := os.Open(inputPath) + if err != nil { + return err + } + defer reader.Close() + + dictionary, _, err := jmdict.LoadJmdictNoTransform(reader) + if err != nil { + return err + } + + terms := dbTermList{} + for _, entry := range dictionary.Entries { + baseTerm := baseFormsTerm(entry) + headwords := extractHeadwords(entry) + for _, h := range headwords { + term := baseTerm + if h.IsSearchOnly { + term.Sequence = -term.Sequence + } + term.Expression = h.Expression + term.Reading = h.Reading + terms = append(terms, term) + } + } + + if title == "" { + title = "JMdict Forms" + } + + recordData := map[string]dbRecordList{ + "term": terms.crush(), + "tag": dbRecordList{}, + } + + jmdictDate := jmdictPublicationDate(dictionary) + + index := dbIndex{ + Title: title, + Revision: "JMdict." + jmdictDate, + Sequenced: true, + Attribution: edrdgAttribution, + } + index.setDefaults() + + return writeDb( + outputPath, + index, + recordData, + stride, + pretty, + ) +} diff --git a/jmdictGlossary.go b/jmdictGlossary.go new file mode 100644 index 0000000..0260cbf --- /dev/null +++ b/jmdictGlossary.go @@ -0,0 +1,300 @@ +package yomichan + +import ( + "fmt" + "strconv" + + "foosoft.net/projects/jmdict" +) + +func glossaryContainsLanguage(glossary []jmdict.JmdictGlossary, language string) bool { + hasGlosses := false + for _, gloss := range glossary { + if glossContainsLanguage(gloss, language) { + hasGlosses = true + break + } + } + return hasGlosses +} + +func glossContainsLanguage(gloss jmdict.JmdictGlossary, language string) bool { + if gloss.Language == nil && language != "eng" { + return false + } else if gloss.Language != nil && language != *gloss.Language { + return false + } else { + return true + } +} + +func makeGlossListItem(gloss jmdict.JmdictGlossary, language string) any { + contents := []any{gloss.Content} + listItem := contentListItem(contentAttr{}, contents...) + return listItem +} + +func makeInfoGlossListItem(gloss jmdict.JmdictGlossary, language string) any { + // Prepend gloss with "type" (literal, figurative, trademark, etc.) + glossTypeCode := *gloss.Type + contents := []any{} + if name, ok := glossTypeCodeToName[LangCode{language, glossTypeCode}]; ok { + if name != "" { + italicStyle := contentAttr{fontStyle: "italic"} + contents = append(contents, contentSpan(italicStyle, "("+name+")"), " ") + } + } else { + fmt.Println("Unknown glossary type code " + *gloss.Type + " for build language " + language) + contents = append(contents, "["+glossTypeCode+"] ") + } + contents = append(contents, gloss.Content) + listItem := contentListItem(contentAttr{}, contents...) + return listItem +} + +func makeSourceLangListItem(sourceLanguage jmdict.JmdictSource, language string) any { + contents := []any{} + + var srcLangCode string + if sourceLanguage.Language == nil { + srcLangCode = "eng" + } else { + srcLangCode = *sourceLanguage.Language + } + + // Format: [Language] ([Partial?], [Wasei?]): [Original word?] + // [Language] + if langName, ok := langCodeToName[LangCode{language, srcLangCode}]; ok { + contents = append(contents, langName) + } else { + contents = append(contents, srcLangCode) + fmt.Println("Unable to convert ISO 639 code " + srcLangCode + " to its full name in language " + language) + } + + // ([Partial?], [Wasei?]) + var sourceLangTypeCode string + if sourceLanguage.Type == nil { + sourceLangTypeCode = "" + } else { + sourceLangTypeCode = *sourceLanguage.Type + } + var sourceLangType string + if val, ok := sourceLangTypeCodeToType[LangCode{language, sourceLangTypeCode}]; ok { + sourceLangType = val + } else { + sourceLangType = sourceLangTypeCode + fmt.Println("Unknown source language type code " + sourceLangTypeCode + " for build language " + language) + } + if sourceLangType != "" && sourceLanguage.Wasei == "y" { + contents = append(contents, " ("+sourceLangType+", wasei)") + } else if sourceLangType != "" { + contents = append(contents, " ("+sourceLangType+")") + } else if sourceLanguage.Wasei == "y" { + contents = append(contents, " (wasei)") + } + + // : [Original word?] + if sourceLanguage.Content != "" { + contents = append(contents, ": ") + attr := contentAttr{lang: ISOtoHTML[srcLangCode]} + contents = append(contents, contentSpan(attr, sourceLanguage.Content)) + } + + listItem := contentListItem(contentAttr{}, contents...) + return listItem +} + +func makeReferenceListItem(reference string, refType string, meta jmdictMetadata) any { + contents := []any{} + attr := contentAttr{} + + hint := refNoteHint[LangCode{meta.language, refType}] + contents = append(contents, hint+": ") + + refHeadword, senseNumber, ok := parseReference(reference) + if !ok { + contents = append(contents, "ใ€"+reference+"ใ€‘") + return contentListItem(attr, contents...) + } + + sequence, ok := meta.referenceToSeq[reference] + if !ok { + contents = append(contents, "ใ€"+reference+"ใ€‘") + return contentListItem(attr, contents...) + } + + targetSense := senseID{ + sequence: sequence, + number: senseNumber, + } + + expHash := refHeadword.ExpHash() + doDisplayReading := (len(meta.expHashToReadings[expHash]) > 1) + doDisplaySenseNumber := (meta.seqToSenseCount[targetSense.sequence] > 1) + refGlossAttr := contentAttr{ + fontSize: "65%", + verticalAlign: "middle", + data: map[string]string{"content": "refGlosses"}, + } + + contents = append(contents, refHeadword.ToInternalLink(doDisplayReading)) + if doDisplaySenseNumber { + contents = append(contents, contentSpan(refGlossAttr, " "+strconv.Itoa(targetSense.number)+". "+meta.condensedGlosses[targetSense])) + } else { + contents = append(contents, contentSpan(refGlossAttr, " "+meta.condensedGlosses[targetSense])) + } + + listItem := contentListItem(attr, contents...) + return listItem +} + +func makeExampleListItem(sentence jmdict.JmdictExampleSentence) any { + if sentence.Lang == "jpn" { + return contentListItem(contentAttr{}, sentence.Text) + } else { + attr := contentAttr{ + lang: ISOtoHTML[sentence.Lang], + listStyleType: ISOtoFlag[sentence.Lang], + } + return contentListItem(attr, sentence.Text) + } +} + +func listAttr(lang string, listStyleType string, dataContent string) contentAttr { + return contentAttr{ + lang: lang, + listStyleType: listStyleType, + data: map[string]string{"content": dataContent}, + } +} + +func needsStructuredContent(sense jmdict.JmdictSense, language string) bool { + for _, gloss := range sense.Glossary { + if glossContainsLanguage(gloss, language) && gloss.Type != nil { + return true + } + } + if len(sense.SourceLanguages) > 0 { + return true + } else if len(sense.Information) > 0 { + return true + } else if len(sense.Antonyms) > 0 { + return true + } else if len(sense.References) > 0 { + return true + } else if len(sense.Examples) > 0 { + return true + } else { + return false + } +} + +func createGlossaryContent(sense jmdict.JmdictSense, meta jmdictMetadata) any { + glossaryContents := []any{} + + // Add normal glosses + glossListItems := []any{} + for _, gloss := range sense.Glossary { + if glossContainsLanguage(gloss, meta.language) && gloss.Type == nil { + listItem := makeGlossListItem(gloss, meta.language) + glossListItems = append(glossListItems, listItem) + } + } + if len(glossListItems) > 0 { + attr := listAttr(ISOtoHTML[meta.language], "circle", "glossary") + list := contentUnorderedList(attr, glossListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add information glosses + infoGlossListItems := []any{} + for _, gloss := range sense.Glossary { + if glossContainsLanguage(gloss, meta.language) && gloss.Type != nil { + listItem := makeInfoGlossListItem(gloss, meta.language) + infoGlossListItems = append(infoGlossListItems, listItem) + } + } + if len(infoGlossListItems) > 0 { + attr := listAttr(ISOtoHTML[meta.language], infoMarker, "infoGlossary") + list := contentUnorderedList(attr, infoGlossListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add language-of-origin / loanword information + sourceLangListItems := []any{} + for _, sourceLanguage := range sense.SourceLanguages { + listItem := makeSourceLangListItem(sourceLanguage, meta.language) + sourceLangListItems = append(sourceLangListItems, listItem) + } + if len(sourceLangListItems) > 0 { + attr := listAttr(ISOtoHTML[meta.language], langMarker, "sourceLanguages") + list := contentUnorderedList(attr, sourceLangListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add sense notes + noteListItems := []any{} + for _, information := range sense.Information { + listItem := contentListItem(contentAttr{}, information) + noteListItems = append(noteListItems, listItem) + } + if len(noteListItems) > 0 { + attr := listAttr(ISOtoHTML["jpn"], noteMarker, "notes") // notes often contain japanese text + list := contentUnorderedList(attr, noteListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add antonyms + antonymListItems := []any{} + for _, antonym := range sense.Antonyms { + listItem := makeReferenceListItem(antonym, "ant", meta) + antonymListItems = append(antonymListItems, listItem) + } + if len(antonymListItems) > 0 { + attr := listAttr(ISOtoHTML[meta.language], antonymMarker, "antonyms") + list := contentUnorderedList(attr, antonymListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add cross-references + referenceListItems := []any{} + for _, reference := range sense.References { + listItem := makeReferenceListItem(reference, "xref", meta) + referenceListItems = append(referenceListItems, listItem) + } + if len(referenceListItems) > 0 { + attr := listAttr(ISOtoHTML[meta.language], refMarker, "references") + list := contentUnorderedList(attr, referenceListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add example sentences + exampleListItems := []any{} + for _, example := range sense.Examples { + for _, sentence := range example.Sentences { + listItem := makeExampleListItem(sentence) + exampleListItems = append(exampleListItems, listItem) + } + } + if len(exampleListItems) > 0 { + attr := listAttr(ISOtoHTML["jpn"], ISOtoFlag["jpn"], "examples") + list := contentUnorderedList(attr, exampleListItems...) + glossaryContents = append(glossaryContents, list) + } + + return contentStructure(glossaryContents...) +} + +func createGlossary(sense jmdict.JmdictSense, meta jmdictMetadata) []any { + glossary := []any{} + if needsStructuredContent(sense, meta.language) { + glossary = append(glossary, createGlossaryContent(sense, meta)) + } else { + for _, gloss := range sense.Glossary { + if glossContainsLanguage(gloss, meta.language) { + glossary = append(glossary, gloss.Content) + } + } + } + return glossary +} diff --git a/jmdictHeadword.go b/jmdictHeadword.go new file mode 100644 index 0000000..a1a75cb --- /dev/null +++ b/jmdictHeadword.go @@ -0,0 +1,267 @@ +package yomichan + +import ( + "fmt" + "hash/fnv" + "regexp" + "strconv" + + "foosoft.net/projects/jmdict" + "golang.org/x/exp/slices" +) + +type headword struct { + Expression string + Reading string + TermTags []string + Index int + IsPriority bool + IsIrregular bool + IsOutdated bool + IsRareKanji bool + IsSearchOnly bool + IsAteji bool + IsGikun bool +} + +type hash uint64 + +func (h *headword) Hash() hash { + return hashText(h.Expression + "โž" + h.Reading) +} + +func (h *headword) ExpHash() hash { + return hashText(h.Expression + "โž" + h.Expression) +} + +func (h *headword) ReadingHash() hash { + return hashText(h.Reading + "โž" + h.Reading) +} + +func hashText(s string) hash { + h := fnv.New64a() + h.Write([]byte(s)) + return hash(h.Sum64()) +} + +func (h *headword) IsKanaOnly() bool { + if h.Expression != h.Reading { + return false + } + for _, char := range h.Expression { + if char >= 'ใ' && char <= 'ใƒฟ' { + // hiragana and katakana range + continue + } else if char >= '๏ฝฅ' && char <= '๏พŸ' { + // halfwidth katakana range + continue + } else if char == 'ใ€œ' { + continue + } else { + return false + } + } + return true +} + +func (h *headword) Score() int { + score := 0 + if h.IsPriority { + score += 1 + } + if h.IsIrregular { + score -= 5 + } + if h.IsOutdated { + score -= 5 + } + if h.IsRareKanji { + score -= 5 + } + if h.IsSearchOnly { + score -= 5 + } + return score +} + +func (h *headword) ToInternalLink(includeReading bool) any { + if !includeReading || h.Expression == h.Reading { + return contentInternalLink( + contentAttr{lang: ISOtoHTML["jpn"]}, + h.Expression, + ) + } else { + return contentSpan( + contentAttr{lang: ISOtoHTML["jpn"]}, + contentInternalLink(contentAttr{}, h.Expression), + "๏ผˆ", + contentInternalLink(contentAttr{}, h.Reading), + "๏ผ‰", + ) + } +} + +func (h *headword) SetFlags(infoTags, freqTags []string) { + priorityTags := []string{"ichi1", "news1", "gai1", "spec1", "spec2"} + for _, priorityTag := range priorityTags { + if slices.Contains(freqTags, priorityTag) { + h.IsPriority = true + break + } + } + for _, infoTag := range infoTags { + switch infoTag { + case "iK", "ik", "io": + h.IsIrregular = true + case "oK", "ok": + h.IsOutdated = true + case "sK", "sk": + h.IsSearchOnly = true + case "rK": + h.IsRareKanji = true + case "ateji": + h.IsAteji = true + case "gikun": + h.IsGikun = true + } + } + if h.IsOutdated && h.IsRareKanji { + h.IsRareKanji = false + } +} + +func (h *headword) SetTermTags(freqTags []string) { + h.TermTags = []string{} + if h.IsPriority { + h.TermTags = append(h.TermTags, priorityTagName) + } + for _, tag := range freqTags { + isNewsFreqTag, _ := regexp.MatchString(`nf\d\d`, tag) + if isNewsFreqTag { + // nf tags are divided into ranks of 500 + // (nf01 to nf48), but it will be easier + // for the user to read 1k, 2k, etc. + var i int + if _, err := fmt.Sscanf(tag, "nf%2d", &i); err == nil { + i = (i + (i % 2)) / 2 + newsTag := "news" + strconv.Itoa(i) + "k" + h.TermTags = append(h.TermTags, newsTag) + } + } else if tag == "news1" || tag == "news2" { + continue + } else { + tagWithoutTheNumber := tag[:len(tag)-1] // "ichi", "gai", or "spec" + h.TermTags = append(h.TermTags, tagWithoutTheNumber) + } + } + if h.IsIrregular { + h.TermTags = append(h.TermTags, irregularTagName) + } + if h.IsOutdated { + h.TermTags = append(h.TermTags, outdatedTagName) + } + if h.IsRareKanji { + h.TermTags = append(h.TermTags, rareKanjiTagName) + } + if h.IsAteji { + h.TermTags = append(h.TermTags, atejiTagName) + } + if h.IsGikun { + h.TermTags = append(h.TermTags, gikunTagName) + } +} + +func newHeadword(kanji *jmdict.JmdictKanji, reading *jmdict.JmdictReading) headword { + h := headword{} + infoTags := []string{} + freqTags := []string{} + if kanji == nil { + h.Expression = reading.Reading + h.Reading = reading.Reading + infoTags = reading.Information + freqTags = reading.Priorities + } else if reading == nil { + // should only apply to search-only kanji terms + h.Expression = kanji.Expression + h.Reading = "" + infoTags = kanji.Information + freqTags = kanji.Priorities + } else { + h.Expression = kanji.Expression + h.Reading = reading.Reading + infoTags = union(kanji.Information, reading.Information) + freqTags = intersection(kanji.Priorities, reading.Priorities) + } + h.SetFlags(infoTags, freqTags) + h.SetTermTags(freqTags) + return h +} + +func areAllKanjiIrregular(allKanji []jmdict.JmdictKanji) bool { + // If every kanji form is rare or irregular, then we'll make + // kana-only headwords for each kana form. + if len(allKanji) == 0 { + return false + } + for _, kanji := range allKanji { + h := newHeadword(&kanji, nil) + kanjiIsIrregular := h.IsRareKanji || h.IsIrregular || h.IsOutdated || h.IsSearchOnly + if !kanjiIsIrregular { + return false + } + } + return true +} + +func extractHeadwords(entry jmdict.JmdictEntry) []headword { + headwords := []headword{} + allKanjiAreIrregular := areAllKanjiIrregular(entry.Kanji) + + if allKanjiAreIrregular { + // Adding the reading-only terms before kanji+reading + // terms here for the sake of the Index property, + // which affects the yomichan term ranking. + for _, reading := range entry.Readings { + h := newHeadword(nil, &reading) + h.Index = len(headwords) + headwords = append(headwords, h) + } + } + + for _, kanji := range entry.Kanji { + if slices.Contains(kanji.Information, "sK") { + // Search-only kanji forms do not have associated readings. + h := newHeadword(&kanji, nil) + h.Index = len(headwords) + headwords = append(headwords, h) + continue + } + for _, reading := range entry.Readings { + if reading.NoKanji != nil { + continue + } else if slices.Contains(reading.Information, "sk") { + // Search-only kana forms do not have associated kanji forms. + continue + } else if reading.Restrictions != nil && !slices.Contains(reading.Restrictions, kanji.Expression) { + continue + } else { + h := newHeadword(&kanji, &reading) + h.Index = len(headwords) + headwords = append(headwords, h) + } + } + } + + if !allKanjiAreIrregular { + noKanjiInEntry := (len(entry.Kanji) == 0) + for _, reading := range entry.Readings { + if reading.NoKanji != nil || noKanjiInEntry || slices.Contains(reading.Information, "sk") { + h := newHeadword(nil, &reading) + h.Index = len(headwords) + headwords = append(headwords, h) + } + } + } + + return headwords +} diff --git a/jmdictMetadata.go b/jmdictMetadata.go new file mode 100644 index 0000000..ec92827 --- /dev/null +++ b/jmdictMetadata.go @@ -0,0 +1,158 @@ +package yomichan + +import ( + "strings" + + "foosoft.net/projects/jmdict" + "golang.org/x/exp/slices" +) + +type sequence = int + +type jmdictMetadata struct { + language string + condensedGlosses map[senseID]string + seqToSenseCount map[sequence]int + seqToMainHeadword map[sequence]headword + expHashToReadings map[hash][]string + headwordHashToSeqs map[hash][]sequence + references []string + referenceToSeq map[string]sequence + hashToSearchValues map[hash][]searchValue + seqToSearchHashes map[sequence][]searchHash + hasMultipleForms map[sequence]bool + maxSenseCount int +} + +type senseID struct { + sequence sequence + number int +} + +func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEntry) { + + // Determine how many senses are in this entry for this language + if _, ok := meta.seqToSenseCount[entry.Sequence]; !ok { + senseCount := 0 + for _, entrySense := range entry.Sense { + for _, gloss := range entrySense.Glossary { + if glossContainsLanguage(gloss, meta.language) { + senseCount += 1 + break + } + } + } + meta.seqToSenseCount[entry.Sequence] = senseCount + } + + if meta.seqToSenseCount[entry.Sequence] == 0 { + return + } + + // main headwords (first ones that are found in entries). + if _, ok := meta.seqToMainHeadword[entry.Sequence]; !ok { + meta.seqToMainHeadword[entry.Sequence] = headword + } + + // hash the term pair so we can determine if it's used + // in more than one JMdict entry later. + headwordHash := headword.Hash() + if !slices.Contains(meta.headwordHashToSeqs[headwordHash], entry.Sequence) { + meta.headwordHashToSeqs[headwordHash] = append(meta.headwordHashToSeqs[headwordHash], entry.Sequence) + } + + // hash the expression so that we can determine if we + // need to disambiguate it by displaying its reading + // in reference notes later. + expHash := headword.ExpHash() + if !slices.Contains(meta.expHashToReadings[expHash], headword.Reading) { + meta.expHashToReadings[expHash] = append(meta.expHashToReadings[expHash], headword.Reading) + } + + // e.g. for JMdict (English) we expect to end up with + // seqToHashedHeadwords[1260670] == ใ€ๅ…ƒใƒปใ‚‚ใจใ€‘ใ€ใ€ๅ…ƒใƒปๅ…ƒใ€‘ใ€ใ€ใ‚‚ใจใƒปใ‚‚ใจใ€‘ใ€ใ€ๆœฌใƒปใ‚‚ใจใ€‘ใ€ใ€ๆœฌใƒปๆœฌใ€‘ใ€ใ€็ด ใƒปใ‚‚ใจใ€‘ใ€ใ€็ด ใƒป็ด ใ€‘ใ€ใ€ๅŸบใƒปใ‚‚ใจใ€‘ใ€ใ€ๅŸบใƒปๅŸบใ€‘ + // used for correlating references to sequence numbers later. + searchHashes := []searchHash{ + searchHash{headwordHash, headword.IsPriority}, + searchHash{expHash, headword.IsPriority}, + searchHash{headword.ReadingHash(), headword.IsPriority}, + } + for _, x := range searchHashes { + if !slices.Contains(meta.seqToSearchHashes[entry.Sequence], x) { + meta.seqToSearchHashes[entry.Sequence] = append(meta.seqToSearchHashes[entry.Sequence], x) + } + } + + currentSenseNumber := 1 + for _, entrySense := range entry.Sense { + if !glossaryContainsLanguage(entrySense.Glossary, meta.language) { + continue + } + if entrySense.RestrictedReadings != nil && !slices.Contains(entrySense.RestrictedReadings, headword.Reading) { + currentSenseNumber += 1 + continue + } + if entrySense.RestrictedKanji != nil && !slices.Contains(entrySense.RestrictedKanji, headword.Expression) { + currentSenseNumber += 1 + continue + } + + allReferences := append(entrySense.References, entrySense.Antonyms...) + for _, reference := range allReferences { + meta.references = append(meta.references, reference) + } + + currentSense := senseID{entry.Sequence, currentSenseNumber} + if meta.condensedGlosses[currentSense] == "" { + glosses := []string{} + for _, gloss := range entrySense.Glossary { + if glossContainsLanguage(gloss, meta.language) && gloss.Type == nil { + glosses = append(glosses, gloss.Content) + } + } + meta.condensedGlosses[currentSense] = strings.Join(glosses, "; ") + } + currentSenseNumber += 1 + } +} + +func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMetadata { + meta := jmdictMetadata{ + language: langNameToCode[languageName], + seqToSenseCount: make(map[sequence]int), + condensedGlosses: make(map[senseID]string), + seqToMainHeadword: make(map[sequence]headword), + expHashToReadings: make(map[hash][]string), + seqToSearchHashes: make(map[sequence][]searchHash), + headwordHashToSeqs: make(map[hash][]sequence), + references: []string{}, + hashToSearchValues: nil, + referenceToSeq: nil, + hasMultipleForms: make(map[sequence]bool), + maxSenseCount: 0, + } + + for _, entry := range dictionary.Entries { + headwords := extractHeadwords(entry) + formCount := 0 + for _, headword := range headwords { + meta.AddHeadword(headword, entry) + if !headword.IsSearchOnly { + formCount += 1 + } + } + meta.hasMultipleForms[entry.Sequence] = (formCount > 1) + } + + // this correlation process will be unnecessary once JMdict + // includes sequence numbers in its cross-reference data + meta.MakeReferenceToSeqMap() + + for _, senseCount := range meta.seqToSenseCount { + if meta.maxSenseCount < senseCount { + meta.maxSenseCount = senseCount + } + } + + return meta +} diff --git a/jmdictReferences.go b/jmdictReferences.go new file mode 100644 index 0000000..71a7501 --- /dev/null +++ b/jmdictReferences.go @@ -0,0 +1,166 @@ +package yomichan + +import ( + "fmt" + "strconv" + "strings" +) + +/* + * In the future, JMdict will be updated to include sequence numbers + * with each cross reference. At that time, most of the functions and + * types defined in this file will become unnecessary. see: + * https://www.edrdg.org/jmdict_edict_list/2022/msg00008.html + */ + +type searchValue struct { + sequence sequence + index int + isPriority bool +} + +type searchHash struct { + hash hash + isPriority bool +} + +func parseReference(reference string) (headword, int, bool) { + // Reference strings in JMDict currently consist of 3 parts at + // most, separated by ใƒป characters. The latter two parts are + // optional. When the sense number is not specified, it is + // implied to be the first sense. + var h headword + var senseNumber int + ok := true + refParts := strings.Split(reference, "ใƒป") + if len(refParts) == 1 { + // (Kanji) or (Reading) + h = headword{Expression: refParts[0], Reading: refParts[0]} + senseNumber = 1 + } else if len(refParts) == 2 { + // [Kanji + (Reading or Sense)] or (Reading + Sense) + val, err := strconv.Atoi(refParts[1]) + if err == nil { + h = headword{Expression: refParts[0], Reading: refParts[0]} + senseNumber = val + } else { + h = headword{Expression: refParts[0], Reading: refParts[1]} + senseNumber = 1 + } + } else if len(refParts) == 3 { + // Expression + Reading + Sense + h = headword{Expression: refParts[0], Reading: refParts[1]} + val, err := strconv.Atoi(strings.TrimSpace(refParts[2])) + if err == nil { + senseNumber = val + } else { + errortext := "Unexpected format (3rd part not integer) for x-ref \"" + reference + "\"" + fmt.Println(errortext) + ok = false + } + } else { + errortext := "Unexpected format for x-ref \"" + reference + "\"" + fmt.Println(errortext) + ok = false + } + return h, senseNumber, ok +} + +func (meta *jmdictMetadata) MakeReferenceToSeqMap() { + + meta.referenceToSeq = make(map[string]sequence) + meta.MakeHashToSearchValuesMap() + + for _, reference := range meta.references { + if meta.referenceToSeq[reference] != 0 { + continue + } + seq := meta.FindBestSequence(reference) + if seq != 0 { + meta.referenceToSeq[reference] = seq + } else { + fmt.Println("Unable to convert reference to sequence number: `" + reference + "`") + } + } +} + +func (meta *jmdictMetadata) MakeHashToSearchValuesMap() { + meta.hashToSearchValues = make(map[hash][]searchValue) + for seq, searchHashes := range meta.seqToSearchHashes { + for score, searchHash := range searchHashes { + searchValue := searchValue{ + sequence: seq, + index: score, + isPriority: searchHash.isPriority, + } + meta.hashToSearchValues[searchHash.hash] = + append(meta.hashToSearchValues[searchHash.hash], searchValue) + } + } +} + +/* + * Generally, correspondence is determined by the order in which term + * pairs are extracted from each JMdict entry. Take for example the + * JMdict entry for ใ”ๆœฌ, which contains a reference to ๆœฌ (without a + * reading specified). To correlate this reference with a sequence + * number, our program searches each entry for the hash ofใ€ๆœฌใƒปๆœฌใ€‘. + * There are two entries in which it is found in JMdict (English): + * + * sequence 1260670: ใ€ๅ…ƒใƒปใ‚‚ใจใ€‘ใ€ใ€ๅ…ƒใƒปๅ…ƒใ€‘ใ€ใ€ใ‚‚ใจใƒปใ‚‚ใจใ€‘ใ€ใ€ๆœฌใƒปใ‚‚ใจใ€‘ใ€ใ€ๆœฌใƒปๆœฌใ€‘ใ€ใ€็ด ใƒปใ‚‚ใจใ€‘ใ€ใ€็ด ใƒป็ด ใ€‘ใ€ใ€ๅŸบใƒปใ‚‚ใจใ€‘ใ€ใ€ๅŸบใƒปๅŸบใ€‘ + * sequence 1522150: ใ€ๆœฌใƒปใปใ‚“ใ€‘ใ€ใ€ๆœฌใƒปๆœฌใ€‘ใ€ใ€ใปใ‚“ใƒปใปใ‚“ใ€‘ + * + * Because ใ€ๆœฌใƒปๆœฌใ€‘ is closer to the beginning of the array in the + * latter (i.e., has the lowest index), sequence number 1522150 is + * returned. + * + * In situations in which multiple sequences are found with the same + * score, the entry with a priority tag ("news1", "ichi1", "spec1", + * "spec2", "gai1") is given preference. This mostly affects + * katakana-only loanwords like ใƒฉใ‚ฐ. + * + * To improve accuracy, this method also checks to see if the + * reference's specified sense number really exists in the + * corresponding entry. For example, sequence 1582850 ใ€ๅฆ‚ไฝ•ใงใƒปใ„ใ‹ใ‚“ใงใ€‘ + * has a reference to sense #2 of ใ„ใ‹ใ‚“ (no kanji specified), which + * could belong to 13 different sequences. However, sequences 1582850 + * and 2829697 are the only 2 of those 13 which contain more than one + * sense. Incidentally, sequence 1582850 is the correct match. + * + * All else being equal, the entry with the smallest sequence number + * is chosen. References in the JMdict file are currently ambiguous, + * and getting this perfect won't be possible until sequence numbers + * are explictly identified in these references. See: + * https://github.com/JMdictProject/JMdictIssues/issues/61 + */ +func (meta *jmdictMetadata) FindBestSequence(reference string) sequence { + bestSeq := 0 + lowestIndex := 100000 + bestIsPriority := false + headword, senseNumber, ok := parseReference(reference) + if !ok { + return bestSeq + } + hash := headword.Hash() + for _, seqScore := range meta.hashToSearchValues[hash] { + if meta.seqToSenseCount[seqScore.sequence] < senseNumber { + // entry must contain the specified sense + continue + } else if lowestIndex < seqScore.index { + // lower indices are better + continue + } else if (lowestIndex == seqScore.index) && (bestIsPriority && !seqScore.isPriority) { + // if scores match, check priority + continue + } else if (lowestIndex == seqScore.index) && (bestIsPriority == seqScore.isPriority) && (bestSeq < seqScore.sequence) { + // if scores and priority match, check sequence number. + // lower sequence numbers are better + continue + } else { + lowestIndex = seqScore.index + bestSeq = seqScore.sequence + bestIsPriority = seqScore.isPriority + } + } + return bestSeq +} diff --git a/jmdictTags.go b/jmdictTags.go new file mode 100644 index 0000000..b444c47 --- /dev/null +++ b/jmdictTags.go @@ -0,0 +1,348 @@ +package yomichan + +import ( + "fmt" + "strconv" + + "golang.org/x/exp/slices" +) + +func senseNumberTags(maxSenseCount int) []dbTag { + tags := []dbTag{} + for i := 1; i <= maxSenseCount; i++ { + tag := dbTag{ + Name: strconv.Itoa(i), + Order: -10, // these tags will appear on the left side + Notes: "JMdict Sense #" + strconv.Itoa(i), + } + tags = append(tags, tag) + } + return tags +} + +func newsFrequencyTags() []dbTag { + // 24,000 ranks divided into 24 tags, news1k ... news24k + tags := []dbTag{} + for i := 1; i <= 24; i++ { + tagName := "news" + strconv.Itoa(i) + "k" + var startRank string + if i == 1 { + startRank = "1" + } else { + // technically should be ",001", but that looks odd + startRank = strconv.Itoa(i-1) + ",000" + } + endRank := strconv.Itoa(i) + ",000" + tag := dbTag{ + Name: tagName, + Order: -2, + Score: 0, + Category: "frequent", + Notes: "ranked between the top " + startRank + " and " + endRank + " words in a frequency analysis of the Mainichi Shimbun (1990s)", + } + tags = append(tags, tag) + } + return tags +} + +func entityTags(entities map[string]string) []dbTag { + tags := knownEntityTags() + for name, notes := range entities { + idx := slices.IndexFunc(tags, func(t dbTag) bool { return t.Name == name }) + if idx != -1 { + tags[idx].Notes = notes + } else { + fmt.Println("Unknown tag type \"" + name + "\": " + notes) + unknownTag := dbTag{Name: name, Notes: notes} + tags = append(tags, unknownTag) + } + } + return tags +} + +func customDbTags() []dbTag { + return []dbTag{ + dbTag{Name: priorityTagName, Order: -10, Score: 10, Category: "popular", Notes: "high priority term"}, + dbTag{Name: rareKanjiTagName, Order: 0, Score: -5, Category: "archaism", Notes: "rarely-used kanji form of this expression"}, + dbTag{Name: irregularTagName, Order: 0, Score: -5, Category: "archaism", Notes: "irregular form of this expression"}, + dbTag{Name: outdatedTagName, Order: 0, Score: -5, Category: "archaism", Notes: "outdated form of this expression"}, + dbTag{Name: "ichi", Order: -2, Score: 0, Category: "frequent", Notes: "included in Ichimango Goi Bunruishuu (๏ผ‘ไธ‡่ชž่ชžๅฝ™ๅˆ†้กž้›†)"}, + dbTag{Name: "spec", Order: -2, Score: 0, Category: "frequent", Notes: "specified as common by JMdict editors"}, + dbTag{Name: "gai", Order: -2, Score: 0, Category: "frequent", Notes: "common loanword (gairaigoใƒปๅค–ๆฅ่ชž)"}, + dbTag{Name: "forms", Order: 0, Score: 0, Category: "", Notes: "other surface forms and readings"}, + } +} + +func knownEntityTags() []dbTag { + return []dbTag{ + // see: https://www.edrdg.org/jmdictdb/cgi-bin/edhelp.py?svc=jmdict&sid=#kwabbr + // additional descriptions at the beginning of the JMdict file + + // reading info + dbTag{Name: "gikun", Order: 0, Score: 0, Category: ""}, // gikun (meaning as reading) or jukujikun (special kanji reading) + dbTag{Name: "ik", Order: 0, Score: -5, Category: ""}, // word containing irregular kana usage + dbTag{Name: "ok", Order: 0, Score: -5, Category: ""}, // out-dated or obsolete kana usage + dbTag{Name: "sk", Order: 0, Score: -5, Category: ""}, // search-only kana form + + // kanji info + /* kanji info also has a "ik" entity that would go here if not already for the re_inf tag */ + dbTag{Name: "ateji", Order: 0, Score: 0, Category: ""}, // ateji (phonetic) reading + dbTag{Name: "iK", Order: 0, Score: -5, Category: ""}, // word containing irregular kanji usage + dbTag{Name: "io", Order: 0, Score: -5, Category: ""}, // irregular okurigana usage + dbTag{Name: "oK", Order: 0, Score: -5, Category: ""}, // word containing out-dated kanji or kanji usage + dbTag{Name: "rK", Order: 0, Score: -5, Category: ""}, // rarely-used kanji form + dbTag{Name: "sK", Order: 0, Score: -5, Category: ""}, // search-only kanji form + + // miscellaneous sense info + dbTag{Name: "abbr", Order: 0, Score: 0, Category: ""}, // abbreviation + dbTag{Name: "arch", Order: -4, Score: 0, Category: "archaism"}, // archaism + dbTag{Name: "char", Order: 0, Score: 0, Category: ""}, // character + dbTag{Name: "chn", Order: 0, Score: 0, Category: ""}, // children's language + dbTag{Name: "col", Order: 0, Score: 0, Category: ""}, // colloquialism + dbTag{Name: "company", Order: 0, Score: 0, Category: ""}, // company name + dbTag{Name: "creat", Order: 0, Score: 0, Category: ""}, // creature + dbTag{Name: "dated", Order: -4, Score: 0, Category: "archaism"}, // dated term + dbTag{Name: "dei", Order: 0, Score: 0, Category: ""}, // deity + dbTag{Name: "derog", Order: 0, Score: 0, Category: ""}, // derogatory + dbTag{Name: "doc", Order: 0, Score: 0, Category: ""}, // document + dbTag{Name: "euph", Order: 0, Score: 0, Category: ""}, // euphemistic + dbTag{Name: "ev", Order: 0, Score: 0, Category: ""}, // event + dbTag{Name: "fam", Order: 0, Score: 0, Category: ""}, // familiar language + dbTag{Name: "fem", Order: 0, Score: 0, Category: ""}, // female term or language + dbTag{Name: "fict", Order: 0, Score: 0, Category: ""}, // fiction + dbTag{Name: "form", Order: 0, Score: 0, Category: ""}, // formal or literary term + dbTag{Name: "given", Order: 0, Score: 0, Category: ""}, // given name or forename, gender not specified + dbTag{Name: "group", Order: 0, Score: 0, Category: ""}, // group + dbTag{Name: "hist", Order: 0, Score: 0, Category: ""}, // historical term + dbTag{Name: "hon", Order: 0, Score: 0, Category: ""}, // honorific or respectful (sonkeigo) language + dbTag{Name: "hum", Order: 0, Score: 0, Category: ""}, // humble (kenjougo) language + dbTag{Name: "id", Order: -5, Score: 0, Category: "expression"}, // idiomatic expression + dbTag{Name: "joc", Order: 0, Score: 0, Category: ""}, // jocular, humorous term + dbTag{Name: "leg", Order: 0, Score: 0, Category: ""}, // legend + dbTag{Name: "m-sl", Order: 0, Score: 0, Category: ""}, // manga slang + dbTag{Name: "male", Order: 0, Score: 0, Category: ""}, // male term or language + dbTag{Name: "myth", Order: 0, Score: 0, Category: ""}, // mythology + dbTag{Name: "net-sl", Order: 0, Score: 0, Category: ""}, // Internet slang + dbTag{Name: "obj", Order: 0, Score: 0, Category: ""}, // object + dbTag{Name: "obs", Order: -4, Score: 0, Category: "archaism"}, // obsolete term + dbTag{Name: "on-mim", Order: 0, Score: 0, Category: ""}, // onomatopoeic or mimetic word + dbTag{Name: "organization", Order: 0, Score: 0, Category: ""}, // organization name + dbTag{Name: "oth", Order: 0, Score: 0, Category: ""}, // other + dbTag{Name: "person", Order: 0, Score: 0, Category: ""}, // full name of a particular person + dbTag{Name: "place", Order: 0, Score: 0, Category: ""}, // place name + dbTag{Name: "poet", Order: 0, Score: 0, Category: ""}, // poetical term + dbTag{Name: "pol", Order: 0, Score: 0, Category: ""}, // polite (teineigo) language + dbTag{Name: "product", Order: 0, Score: 0, Category: ""}, // product name + dbTag{Name: "proverb", Order: 0, Score: 0, Category: "expression"}, // proverb + dbTag{Name: "quote", Order: 0, Score: 0, Category: "expression"}, // quotation + dbTag{Name: "rare", Order: -4, Score: 0, Category: "archaism"}, // rare + dbTag{Name: "relig", Order: 0, Score: 0, Category: ""}, // religion + dbTag{Name: "sens", Order: 0, Score: 0, Category: ""}, // sensitive + dbTag{Name: "serv", Order: 0, Score: 0, Category: ""}, // service + dbTag{Name: "ship", Order: 0, Score: 0, Category: ""}, // ship name + dbTag{Name: "sl", Order: 0, Score: 0, Category: ""}, // slang + dbTag{Name: "station", Order: 0, Score: 0, Category: ""}, // railway station + dbTag{Name: "surname", Order: 0, Score: 0, Category: ""}, // family or surname + dbTag{Name: "uk", Order: 0, Score: 0, Category: ""}, // word usually written using kana alone + dbTag{Name: "unclass", Order: 0, Score: 0, Category: ""}, // unclassified name + dbTag{Name: "vulg", Order: 0, Score: 0, Category: ""}, // vulgar expression or word + dbTag{Name: "work", Order: 0, Score: 0, Category: ""}, // work of art, literature, music, etc. name + dbTag{Name: "X", Order: 0, Score: 0, Category: ""}, // rude or X-rated term (not displayed in educational software) + dbTag{Name: "yoji", Order: 0, Score: 0, Category: ""}, // yojijukugo + + // part-of-speech info + dbTag{Name: "adj-f", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun or verb acting prenominally + dbTag{Name: "adj-i", Order: -3, Score: 0, Category: "partOfSpeech"}, // adjective (keiyoushi) + dbTag{Name: "adj-ix", Order: -3, Score: 0, Category: "partOfSpeech"}, // adjective (keiyoushi) - yoi/ii class + dbTag{Name: "adj-kari", Order: -3, Score: 0, Category: "partOfSpeech"}, // 'kari' adjective (archaic) + dbTag{Name: "adj-ku", Order: -3, Score: 0, Category: "partOfSpeech"}, // 'ku' adjective (archaic) + dbTag{Name: "adj-na", Order: -3, Score: 0, Category: "partOfSpeech"}, // adjectival nouns or quasi-adjectives (keiyodoshi) + dbTag{Name: "adj-nari", Order: -3, Score: 0, Category: "partOfSpeech"}, // archaic/formal form of na-adjective + dbTag{Name: "adj-no", Order: -3, Score: 0, Category: "partOfSpeech"}, // nouns which may take the genitive case particle 'no' + dbTag{Name: "adj-pn", Order: -3, Score: 0, Category: "partOfSpeech"}, // pre-noun adjectival (rentaishi) + dbTag{Name: "adj-shiku", Order: -3, Score: 0, Category: "partOfSpeech"}, // 'shiku' adjective (archaic) + dbTag{Name: "adj-t", Order: -3, Score: 0, Category: "partOfSpeech"}, // 'taru' adjective + dbTag{Name: "adv", Order: -3, Score: 0, Category: "partOfSpeech"}, // adverb (fukushi) + dbTag{Name: "adv-to", Order: -3, Score: 0, Category: "partOfSpeech"}, // adverb taking the 'to' particle + dbTag{Name: "aux", Order: -3, Score: 0, Category: "partOfSpeech"}, // auxiliary + dbTag{Name: "aux-adj", Order: -3, Score: 0, Category: "partOfSpeech"}, // auxiliary adjective + dbTag{Name: "aux-v", Order: -3, Score: 0, Category: "partOfSpeech"}, // auxiliary verb + dbTag{Name: "conj", Order: -3, Score: 0, Category: "partOfSpeech"}, // conjunction + dbTag{Name: "cop", Order: -3, Score: 0, Category: "partOfSpeech"}, // copula + dbTag{Name: "ctr", Order: -3, Score: 0, Category: "partOfSpeech"}, // counter + dbTag{Name: "exp", Order: -5, Score: 0, Category: "expression"}, // expressions (phrases, clauses, etc.) + dbTag{Name: "int", Order: -3, Score: 0, Category: "partOfSpeech"}, // interjection (kandoushi) + dbTag{Name: "n", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun (common) (futsuumeishi) + dbTag{Name: "n-adv", Order: -3, Score: 0, Category: "partOfSpeech"}, // adverbial noun (fukushitekimeishi) + dbTag{Name: "n-pr", Order: -3, Score: 0, Category: "partOfSpeech"}, // proper noun + dbTag{Name: "n-pref", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun, used as a prefix + dbTag{Name: "n-suf", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun, used as a suffix + dbTag{Name: "n-t", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun (temporal) (jisoumeishi) + dbTag{Name: "num", Order: -3, Score: 0, Category: "partOfSpeech"}, // numeric + dbTag{Name: "pn", Order: -3, Score: 0, Category: "partOfSpeech"}, // pronoun + dbTag{Name: "pref", Order: -3, Score: 0, Category: "partOfSpeech"}, // prefix + dbTag{Name: "prt", Order: -3, Score: 0, Category: "partOfSpeech"}, // particle + dbTag{Name: "suf", Order: -3, Score: 0, Category: "partOfSpeech"}, // suffix + dbTag{Name: "unc", Order: -3, Score: 0, Category: "partOfSpeech"}, // unclassified + dbTag{Name: "v-unspec", Order: -3, Score: 0, Category: "partOfSpeech"}, // verb unspecified + dbTag{Name: "v1", Order: -3, Score: 0, Category: "partOfSpeech"}, // Ichidan verb + dbTag{Name: "v1-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Ichidan verb - kureru special class + dbTag{Name: "v2a-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb with 'u' ending (archaic) + dbTag{Name: "v2b-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'bu' ending (archaic) + dbTag{Name: "v2b-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'bu' ending (archaic) + dbTag{Name: "v2d-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'dzu' ending (archaic) + dbTag{Name: "v2d-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'dzu' ending (archaic) + dbTag{Name: "v2g-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'gu' ending (archaic) + dbTag{Name: "v2g-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'gu' ending (archaic) + dbTag{Name: "v2h-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'hu/fu' ending (archaic) + dbTag{Name: "v2h-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'hu/fu' ending (archaic) + dbTag{Name: "v2k-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'ku' ending (archaic) + dbTag{Name: "v2k-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'ku' ending (archaic) + dbTag{Name: "v2m-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'mu' ending (archaic) + dbTag{Name: "v2m-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'mu' ending (archaic) + dbTag{Name: "v2n-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'nu' ending (archaic) + dbTag{Name: "v2r-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'ru' ending (archaic) + dbTag{Name: "v2r-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'ru' ending (archaic) + dbTag{Name: "v2s-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'su' ending (archaic) + dbTag{Name: "v2t-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'tsu' ending (archaic) + dbTag{Name: "v2t-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'tsu' ending (archaic) + dbTag{Name: "v2w-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'u' ending and 'we' conjugation (archaic) + dbTag{Name: "v2y-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'yu' ending (archaic) + dbTag{Name: "v2y-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'yu' ending (archaic) + dbTag{Name: "v2z-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'zu' ending (archaic) + dbTag{Name: "v4b", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'bu' ending (archaic) + dbTag{Name: "v4g", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'gu' ending (archaic) + dbTag{Name: "v4h", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'hu/fu' ending (archaic) + dbTag{Name: "v4k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'ku' ending (archaic) + dbTag{Name: "v4m", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'mu' ending (archaic) + dbTag{Name: "v4n", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'nu' ending (archaic) + dbTag{Name: "v4r", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'ru' ending (archaic) + dbTag{Name: "v4s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'su' ending (archaic) + dbTag{Name: "v4t", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'tsu' ending (archaic) + dbTag{Name: "v5aru", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb - -aru special class + dbTag{Name: "v5b", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'bu' ending + dbTag{Name: "v5g", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'gu' ending + dbTag{Name: "v5k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'ku' ending + dbTag{Name: "v5k-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb - Iku/Yuku special class + dbTag{Name: "v5m", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'mu' ending + dbTag{Name: "v5n", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'nu' ending + dbTag{Name: "v5r", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'ru' ending + dbTag{Name: "v5r-i", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'ru' ending (irregular verb) + dbTag{Name: "v5s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'su' ending + dbTag{Name: "v5t", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'tsu' ending + dbTag{Name: "v5u", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'u' ending + dbTag{Name: "v5u-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'u' ending (special class) + dbTag{Name: "v5uru", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb - Uru old class verb (old form of Eru) + dbTag{Name: "vi", Order: -3, Score: 0, Category: "partOfSpeech"}, // intransitive verb + dbTag{Name: "vk", Order: -3, Score: 0, Category: "partOfSpeech"}, // Kuru verb - special class + dbTag{Name: "vn", Order: -3, Score: 0, Category: "partOfSpeech"}, // irregular nu verb + dbTag{Name: "vr", Order: -3, Score: 0, Category: "partOfSpeech"}, // irregular ru verb, plain form ends with -ri + dbTag{Name: "vs", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun or participle which takes the aux. verb suru + dbTag{Name: "vs-c", Order: -3, Score: 0, Category: "partOfSpeech"}, // su verb - precursor to the modern suru + dbTag{Name: "vs-i", Order: -3, Score: 0, Category: "partOfSpeech"}, // suru verb - included + dbTag{Name: "vs-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // suru verb - special class + dbTag{Name: "vt", Order: -3, Score: 0, Category: "partOfSpeech"}, // transitive verb + dbTag{Name: "vz", Order: -3, Score: 0, Category: "partOfSpeech"}, // Ichidan verb - zuru verb (alternative form of -jiru verbs) + + // usage domain + dbTag{Name: "agric", Order: 0, Score: 0, Category: ""}, // agriculture + dbTag{Name: "anat", Order: 0, Score: 0, Category: ""}, // anatomy + dbTag{Name: "archeol", Order: 0, Score: 0, Category: ""}, // archeology + dbTag{Name: "archit", Order: 0, Score: 0, Category: ""}, // architecture + dbTag{Name: "art", Order: 0, Score: 0, Category: ""}, // art, aesthetics + dbTag{Name: "astron", Order: 0, Score: 0, Category: ""}, // astronomy + dbTag{Name: "audvid", Order: 0, Score: 0, Category: ""}, // audiovisual + dbTag{Name: "aviat", Order: 0, Score: 0, Category: ""}, // aviation + dbTag{Name: "baseb", Order: 0, Score: 0, Category: ""}, // baseball + dbTag{Name: "biochem", Order: 0, Score: 0, Category: ""}, // biochemistry + dbTag{Name: "biol", Order: 0, Score: 0, Category: ""}, // biology + dbTag{Name: "bot", Order: 0, Score: 0, Category: ""}, // botany + dbTag{Name: "Buddh", Order: 0, Score: 0, Category: ""}, // Buddhism + dbTag{Name: "bus", Order: 0, Score: 0, Category: ""}, // business + dbTag{Name: "cards", Order: 0, Score: 0, Category: ""}, // card games + dbTag{Name: "chem", Order: 0, Score: 0, Category: ""}, // chemistry + dbTag{Name: "Christn", Order: 0, Score: 0, Category: ""}, // Christianity + dbTag{Name: "cloth", Order: 0, Score: 0, Category: ""}, // clothing + dbTag{Name: "comp", Order: 0, Score: 0, Category: ""}, // computing + dbTag{Name: "cryst", Order: 0, Score: 0, Category: ""}, // crystallography + dbTag{Name: "dent", Order: 0, Score: 0, Category: ""}, // dentistry + dbTag{Name: "ecol", Order: 0, Score: 0, Category: ""}, // ecology + dbTag{Name: "econ", Order: 0, Score: 0, Category: ""}, // economics + dbTag{Name: "elec", Order: 0, Score: 0, Category: ""}, // electricity, elec. eng. + dbTag{Name: "electr", Order: 0, Score: 0, Category: ""}, // electronics + dbTag{Name: "embryo", Order: 0, Score: 0, Category: ""}, // embryology + dbTag{Name: "engr", Order: 0, Score: 0, Category: ""}, // engineering + dbTag{Name: "ent", Order: 0, Score: 0, Category: ""}, // entomology + dbTag{Name: "film", Order: 0, Score: 0, Category: ""}, // film + dbTag{Name: "finc", Order: 0, Score: 0, Category: ""}, // finance + dbTag{Name: "fish", Order: 0, Score: 0, Category: ""}, // fishing + dbTag{Name: "food", Order: 0, Score: 0, Category: ""}, // food, cooking + dbTag{Name: "gardn", Order: 0, Score: 0, Category: ""}, // gardening, horticulture + dbTag{Name: "genet", Order: 0, Score: 0, Category: ""}, // genetics + dbTag{Name: "geogr", Order: 0, Score: 0, Category: ""}, // geography + dbTag{Name: "geol", Order: 0, Score: 0, Category: ""}, // geology + dbTag{Name: "geom", Order: 0, Score: 0, Category: ""}, // geometry + dbTag{Name: "go", Order: 0, Score: 0, Category: ""}, // go (game) + dbTag{Name: "golf", Order: 0, Score: 0, Category: ""}, // golf + dbTag{Name: "gramm", Order: 0, Score: 0, Category: ""}, // grammar + dbTag{Name: "grmyth", Order: 0, Score: 0, Category: ""}, // Greek mythology + dbTag{Name: "hanaf", Order: 0, Score: 0, Category: ""}, // hanafuda + dbTag{Name: "horse", Order: 0, Score: 0, Category: ""}, // horse racing + dbTag{Name: "kabuki", Order: 0, Score: 0, Category: ""}, // kabuki + dbTag{Name: "law", Order: 0, Score: 0, Category: ""}, // law + dbTag{Name: "ling", Order: 0, Score: 0, Category: ""}, // linguistics + dbTag{Name: "logic", Order: 0, Score: 0, Category: ""}, // logic + dbTag{Name: "MA", Order: 0, Score: 0, Category: ""}, // martial arts + dbTag{Name: "mahj", Order: 0, Score: 0, Category: ""}, // mahjong + dbTag{Name: "manga", Order: 0, Score: 0, Category: ""}, // manga + dbTag{Name: "math", Order: 0, Score: 0, Category: ""}, // mathematics + dbTag{Name: "mech", Order: 0, Score: 0, Category: ""}, // mechanical engineering + dbTag{Name: "med", Order: 0, Score: 0, Category: ""}, // medicine + dbTag{Name: "met", Order: 0, Score: 0, Category: ""}, // meteorology + dbTag{Name: "mil", Order: 0, Score: 0, Category: ""}, // military + dbTag{Name: "mining", Order: 0, Score: 0, Category: ""}, // mining + dbTag{Name: "music", Order: 0, Score: 0, Category: ""}, // music + dbTag{Name: "noh", Order: 0, Score: 0, Category: ""}, // noh + dbTag{Name: "ornith", Order: 0, Score: 0, Category: ""}, // ornithology + dbTag{Name: "paleo", Order: 0, Score: 0, Category: ""}, // paleontology + dbTag{Name: "pathol", Order: 0, Score: 0, Category: ""}, // pathology + dbTag{Name: "pharm", Order: 0, Score: 0, Category: ""}, // pharmacy + dbTag{Name: "phil", Order: 0, Score: 0, Category: ""}, // philosophy + dbTag{Name: "photo", Order: 0, Score: 0, Category: ""}, // photography + dbTag{Name: "physics", Order: 0, Score: 0, Category: ""}, // physics + dbTag{Name: "physiol", Order: 0, Score: 0, Category: ""}, // physiology + dbTag{Name: "politics", Order: 0, Score: 0, Category: ""}, // politics + dbTag{Name: "print", Order: 0, Score: 0, Category: ""}, // printing + dbTag{Name: "psy", Order: 0, Score: 0, Category: ""}, // psychiatry + dbTag{Name: "psyanal", Order: 0, Score: 0, Category: ""}, // psychoanalysis + dbTag{Name: "psych", Order: 0, Score: 0, Category: ""}, // psychology + dbTag{Name: "rail", Order: 0, Score: 0, Category: ""}, // railway + dbTag{Name: "rommyth", Order: 0, Score: 0, Category: ""}, // Roman mythology + dbTag{Name: "Shinto", Order: 0, Score: 0, Category: ""}, // Shinto + dbTag{Name: "shogi", Order: 0, Score: 0, Category: ""}, // shogi + dbTag{Name: "ski", Order: 0, Score: 0, Category: ""}, // skiing + dbTag{Name: "sports", Order: 0, Score: 0, Category: ""}, // sports + dbTag{Name: "stat", Order: 0, Score: 0, Category: ""}, // statistics + dbTag{Name: "stockm", Order: 0, Score: 0, Category: ""}, // stock market + dbTag{Name: "sumo", Order: 0, Score: 0, Category: ""}, // sumo + dbTag{Name: "telec", Order: 0, Score: 0, Category: ""}, // telecommunications + dbTag{Name: "tradem", Order: 0, Score: 0, Category: ""}, // trademark + dbTag{Name: "tv", Order: 0, Score: 0, Category: ""}, // television + dbTag{Name: "vidg", Order: 0, Score: 0, Category: ""}, // video games + dbTag{Name: "zool", Order: 0, Score: 0, Category: ""}, // zoology + + // dialect + dbTag{Name: "bra", Order: 0, Score: 0, Category: ""}, // Brazilian + dbTag{Name: "hob", Order: 0, Score: 0, Category: ""}, // Hokkaido-ben + dbTag{Name: "ksb", Order: 0, Score: 0, Category: ""}, // Kansai-ben + dbTag{Name: "ktb", Order: 0, Score: 0, Category: ""}, // Kantou-ben + dbTag{Name: "kyb", Order: 0, Score: 0, Category: ""}, // Kyoto-ben + dbTag{Name: "kyu", Order: 0, Score: 0, Category: ""}, // Kyuushuu-ben + dbTag{Name: "nab", Order: 0, Score: 0, Category: ""}, // Nagano-ben + dbTag{Name: "osb", Order: 0, Score: 0, Category: ""}, // Osaka-ben + dbTag{Name: "rkb", Order: 0, Score: 0, Category: ""}, // Ryuukyuu-ben + dbTag{Name: "thb", Order: 0, Score: 0, Category: ""}, // Touhoku-ben + dbTag{Name: "tsb", Order: 0, Score: 0, Category: ""}, // Tosa-ben + dbTag{Name: "tsug", Order: 0, Score: 0, Category: ""}, // Tsugaru-ben + } +} diff --git a/structuredContent.go b/structuredContent.go new file mode 100644 index 0000000..ded8229 --- /dev/null +++ b/structuredContent.go @@ -0,0 +1,192 @@ +package yomichan + +type contentAttr struct { + lang string + fontStyle string // normal, italic + fontWeight string // normal, bold + fontSize string // small, medium, large, smaller, 80%, 125%, etc. + textDecorationLine []string // underline, overline, line-through + verticalAlign string // baseline, sub, super, text-top, text-bottom, middle, top, bottom + textAlign string // start, end, left, right, center, justify, justify-all, match-parent + marginTop int + marginLeft int + marginRight int + marginBottom int + listStyleType string + data map[string]string +} + +// if the array contains adjacent strings, concatenate them. +// ex: ["one", "two", content_structure, "four"] -> ["onetwo", content_structure, "four"] +// if the array only contains strings, return a concatenated string. +// ex: ["one", "two"] -> "onetwo" +func contentReduce(contents []any) any { + if len(contents) == 1 { + return contents[0] + } + newContents := []any{} + var accumulator string + for _, content := range contents { + switch v := content.(type) { + case string: + accumulator = accumulator + v + default: + if accumulator != "" { + newContents = append(newContents, accumulator) + accumulator = "" + } + newContents = append(newContents, content) + } + } + if accumulator != "" { + newContents = append(newContents, accumulator) + } + if len(newContents) == 1 { + return newContents[0] + } else { + return newContents + } +} + +func contentStructure(contents ...any) map[string]any { + return map[string]any{ + "type": "structured-content", + "content": contentReduce(contents), + } +} + +func contentRuby(attr contentAttr, ruby string, contents ...any) map[string]any { + rubyContent := map[string]any{ + "tag": "ruby", + "content": []any{ + contentReduce(contents), + map[string]string{"tag": "rp", "content": "("}, + map[string]string{"tag": "rt", "content": ruby}, + map[string]string{"tag": "rp", "content": ")"}, + }, + } + if attr.lang != "" { + rubyContent["lang"] = attr.lang + } + if len(attr.data) != 0 { + rubyContent["data"] = attr.data + } + return rubyContent +} + +func contentInternalLink(attr contentAttr, query string, contents ...any) map[string]any { + linkContent := map[string]any{ + "tag": "a", + "href": "?query=" + query + "&wildcards=off", + } + if len(contents) == 0 { + linkContent["content"] = query + } else { + linkContent["content"] = contentReduce(contents) + } + if attr.lang != "" { + linkContent["lang"] = attr.lang + } + if len(attr.data) != 0 { + linkContent["data"] = attr.data + } + return linkContent +} + +func contentSpan(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "span", contents...) +} + +func contentDiv(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "div", contents...) +} + +func contentListItem(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "li", contents...) +} + +func contentOrderedList(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "ol", contents...) +} + +func contentUnorderedList(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "ul", contents...) +} + +func contentTable(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "table", contents...) +} + +func contentTableHead(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "thead", contents...) +} + +func contentTableBody(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "tbody", contents...) +} + +func contentTableRow(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "tr", contents...) +} + +func contentTableHeadCell(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "th", contents...) +} + +func contentTableCell(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "td", contents...) +} + +func contentStyledContainer(attr contentAttr, tag string, contents ...any) map[string]any { + container := map[string]any{"tag": tag} + container["content"] = contentReduce(contents) + if attr.lang != "" { + container["lang"] = attr.lang + } + if len(attr.data) != 0 { + container["data"] = attr.data + } + style := contentStyle(attr) + if len(style) != 0 { + container["style"] = style + } + return container +} + +func contentStyle(attr contentAttr) map[string]any { + style := make(map[string]any) + if attr.fontStyle != "" { + style["fontStyle"] = attr.fontStyle + } + if attr.fontWeight != "" { + style["fontWeight"] = attr.fontWeight + } + if attr.fontSize != "" { + style["fontSize"] = attr.fontSize + } + if len(attr.textDecorationLine) != 0 { + style["textDecorationLine"] = attr.textDecorationLine + } + if attr.verticalAlign != "" { + style["verticalAlign"] = attr.verticalAlign + } + if attr.textAlign != "" { + style["textAlign"] = attr.textAlign + } + if attr.marginTop != 0 { + style["marginTop"] = attr.marginTop + } + if attr.marginLeft != 0 { + style["marginLeft"] = attr.marginLeft + } + if attr.marginRight != 0 { + style["marginRight"] = attr.marginRight + } + if attr.marginBottom != 0 { + style["marginBottom"] = attr.marginBottom + } + if attr.listStyleType != "" { + style["listStyleType"] = attr.listStyleType + } + return style +}