From abc28bb19dc7bc6f7efba09c0720b21fbbc19b11 Mon Sep 17 00:00:00 2001 From: stephenmk Date: Sun, 22 Jan 2023 14:37:18 -0600 Subject: [PATCH] Add new JMdict version --- common.go | 5 +- edict.go | 252 ------------------------------- go.mod | 1 + go.sum | 2 + jmdict.go | 221 +++++++++++++++++++++++++++ jmdictConstants.go | 215 ++++++++++++++++++++++++++ jmdictForms.go | 254 +++++++++++++++++++++++++++++++ jmdictGlossary.go | 300 +++++++++++++++++++++++++++++++++++++ jmdictHeadword.go | 267 +++++++++++++++++++++++++++++++++ jmdictMetadata.go | 158 ++++++++++++++++++++ jmdictReferences.go | 166 +++++++++++++++++++++ jmdictTags.go | 348 +++++++++++++++++++++++++++++++++++++++++++ structuredContent.go | 192 ++++++++++++++++++++++++ 13 files changed, 2127 insertions(+), 254 deletions(-) delete mode 100644 edict.go create mode 100644 jmdict.go create mode 100644 jmdictConstants.go create mode 100644 jmdictForms.go create mode 100644 jmdictGlossary.go create mode 100644 jmdictHeadword.go create mode 100644 jmdictMetadata.go create mode 100644 jmdictReferences.go create mode 100644 jmdictTags.go create mode 100644 structuredContent.go diff --git a/common.go b/common.go index 5f2dab3..9d6b2aa 100644 --- a/common.go +++ b/common.go @@ -306,7 +306,7 @@ func detectFormat(path string) (string, error) { } switch filepath.Base(path) { - case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml": + case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml", "JMdict_e_examp": return "edict", nil case "JMnedict", "JMnedict.xml": return "enamdict", nil @@ -336,7 +336,8 @@ func detectFormat(path string) (string, error) { func ExportDb(inputPath, outputPath, format, language, title string, stride int, pretty bool) error { handlers := map[string]func(string, string, string, string, int, bool) error{ - "edict": jmdictExportDb, + "edict": jmdExportDb, + "forms": formsExportDb, "enamdict": jmnedictExportDb, "epwing": epwingExportDb, "kanjidic": kanjidicExportDb, diff --git a/edict.go b/edict.go deleted file mode 100644 index b6326f0..0000000 --- a/edict.go +++ /dev/null @@ -1,252 +0,0 @@ -package yomichan - -import ( - "os" - "strings" - - "foosoft.net/projects/jmdict" -) - -const edrdgAttribution = "This publication has included material from the JMdict (EDICT, etc.) dictionary files in accordance with the licence provisions of the Electronic Dictionaries Research Group. See http://www.edrdg.org/" - -func jmdictBuildRules(term *dbTerm) { - for _, tag := range term.DefinitionTags { - switch tag { - case "adj-i", "v1", "vk", "vz": - term.addRules(tag) - default: - if strings.HasPrefix(tag, "v5") { - term.addRules("v5") - } else if strings.HasPrefix(tag, "vs-") { - term.addRules("vs") - } - } - } -} - -func jmdictBuildScore(term *dbTerm) { - for _, tag := range term.DefinitionTags { - switch tag { - case "arch": - term.Score -= 100 - } - } - for _, tag := range term.TermTags { - switch tag { - case "news", "ichi", "spec", "gai1": - term.Score += 100 - case "P": - term.Score += 500 - case "iK", "ik", "ok", "oK", "io", "oik": - term.Score -= 100 - } - } -} - -func jmdictAddPriorities(term *dbTerm, priorities ...string) { - for _, priority := range priorities { - switch priority { - case "news1", "ichi1", "spec1", "gai1": - term.addTermTags("P") - fallthrough - case "news2", "ichi2", "spec2", "gai2": - term.addTermTags(priority[:len(priority)-1]) - } - } -} - -func jmdictBuildTagMeta(entities map[string]string) dbTagList { - tags := dbTagList{ - dbTag{Name: "news", Notes: "appears frequently in Mainichi Shimbun", Category: "frequent", Order: -2}, - dbTag{Name: "ichi", Notes: "listed as common in Ichimango Goi Bunruishuu", Category: "frequent", Order: -2}, - dbTag{Name: "spec", Notes: "common words not included in frequency lists", Category: "frequent", Order: -2}, - dbTag{Name: "gai", Notes: "common loanword", Category: "frequent", Order: -2}, - dbTag{Name: "P", Notes: "popular term", Category: "popular", Order: -10, Score: 10}, - } - - for name, value := range entities { - tag := dbTag{Name: name, Notes: value} - - switch name { - case "exp", "id": - tag.Category = "expression" - tag.Order = -5 - case "arch": - tag.Category = "archaism" - tag.Order = -4 - case "iK", "ik", "ok", "oK", "io", "oik": - tag.Score = -5 - case "adj-f", "adj-i", "adj-ix", "adj-ku", "adj-na", "adj-nari", "adj-no", "adj-pn", "adj-shiku", "adj-t", "adv", "adv-to", "aux-adj", - "aux", "aux-v", "conj", "cop-da", "ctr", "int", "n-adv", "n", "n-pref", "n-pr", "n-suf", "n-t", "num", "pn", "pref", "prt", "suf", - "unc", "v1", "v1-s", "v2a-s", "v2b-k", "v2d-s", "v2g-k", "v2g-s", "v2h-k", "v2h-s", "v2k-k", "v2k-s", "v2m-s", "v2n-s", "v2r-k", - "v2r-s", "v2s-s", "v2t-k", "v2t-s", "v2w-s", "v2y-k", "v2y-s", "v2z-s", "v4b", "v4h", "v4k", "v4m", "v4r", "v4s", "v4t", "v5aru", - "v5b", "v5g", "v5k", "v5k-s", "v5m", "v5n", "v5r-i", "v5r", "v5s", "v5t", "v5u", "v5u-s", "vi", "vk", "vn", "vr", "vs-c", "vs-i", - "vs", "vs-s", "vt", "vz": - tag.Category = "partOfSpeech" - tag.Order = -3 - } - - tags = append(tags, tag) - } - - return tags -} - -func jmdictExtractTerms(edictEntry jmdict.JmdictEntry, language string) []dbTerm { - var terms []dbTerm - - convert := func(reading jmdict.JmdictReading, kanji *jmdict.JmdictKanji) { - if kanji != nil && reading.Restrictions != nil && !hasString(kanji.Expression, reading.Restrictions) { - return - } - - var termBase dbTerm - termBase.addTermTags(reading.Information...) - - if kanji == nil { - termBase.Expression = reading.Reading - jmdictAddPriorities(&termBase, reading.Priorities...) - } else { - termBase.Expression = kanji.Expression - termBase.Reading = reading.Reading - termBase.addTermTags(kanji.Information...) - - for _, priority := range kanji.Priorities { - if hasString(priority, reading.Priorities) { - jmdictAddPriorities(&termBase, priority) - } - } - } - - var partsOfSpeech []string - for index, sense := range edictEntry.Sense { - - if len(sense.PartsOfSpeech) != 0 { - partsOfSpeech = sense.PartsOfSpeech - } - - if sense.RestrictedReadings != nil && !hasString(reading.Reading, sense.RestrictedReadings) { - continue - } - - if kanji != nil && sense.RestrictedKanji != nil && !hasString(kanji.Expression, sense.RestrictedKanji) { - continue - } - - term := dbTerm{ - Reading: termBase.Reading, - Expression: termBase.Expression, - Score: len(edictEntry.Sense) - index, - Sequence: edictEntry.Sequence, - } - - for _, glossary := range sense.Glossary { - if glossary.Language == nil && language == "" || glossary.Language != nil && language == *glossary.Language { - term.Glossary = append(term.Glossary, glossary.Content) - } - } - - if len(term.Glossary) == 0 { - continue - } - - term.addDefinitionTags(termBase.DefinitionTags...) - term.addTermTags(termBase.TermTags...) - term.addDefinitionTags(partsOfSpeech...) - term.addDefinitionTags(sense.Fields...) - term.addDefinitionTags(sense.Misc...) - term.addDefinitionTags(sense.Dialects...) - - jmdictBuildRules(&term) - jmdictBuildScore(&term) - - terms = append(terms, term) - } - } - - if len(edictEntry.Kanji) > 0 { - for _, kanji := range edictEntry.Kanji { - for _, reading := range edictEntry.Readings { - if reading.NoKanji == nil { - convert(reading, &kanji) - } - } - } - for _, reading := range edictEntry.Readings { - if reading.NoKanji != nil { - convert(reading, nil) - } - } - } else { - for _, reading := range edictEntry.Readings { - convert(reading, nil) - } - } - - return terms -} - -func jmdictExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error { - reader, err := os.Open(inputPath) - if err != nil { - return err - } - defer reader.Close() - - dict, entities, err := jmdict.LoadJmdictNoTransform(reader) - if err != nil { - return err - } - - var langTag string - switch language { - case "dutch": - langTag = "dut" - case "french": - langTag = "fre" - case "german": - langTag = "ger" - case "hungarian": - langTag = "hun" - case "italian": - langTag = "ita" - case "russian": - langTag = "rus" - case "slovenian": - langTag = "slv" - case "spanish": - langTag = "spa" - case "swedish": - langTag = "swe" - } - - var terms dbTermList - for _, entry := range dict.Entries { - terms = append(terms, jmdictExtractTerms(entry, langTag)...) - } - - if title == "" { - title = "JMdict" - } - - recordData := map[string]dbRecordList{ - "term": terms.crush(), - "tag": jmdictBuildTagMeta(entities).crush(), - } - - index := dbIndex{ - Title: title, - Revision: "jmdict4", - Sequenced: true, - Attribution: edrdgAttribution, - } - index.setDefaults() - - return writeDb( - outputPath, - index, - recordData, - stride, - pretty, - ) -} diff --git a/go.mod b/go.mod index 0bca3dd..4f31a22 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( foosoft.net/projects/zero-epwing-go v0.0.0-20220704035039-bc008453615d github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e github.com/mattn/go-sqlite3 v1.14.14 + golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f ) require golang.org/x/text v0.3.7 // indirect diff --git a/go.sum b/go.sum index ca51ada..4dd5f91 100644 --- a/go.sum +++ b/go.sum @@ -6,5 +6,7 @@ github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e h1:wSQCJiig/QkoUnpvelSP github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e/go.mod h1:5G2EjwzgZUPnnReoKvPWVneT8APYbyKkihDVAHUi0II= github.com/mattn/go-sqlite3 v1.14.14 h1:qZgc/Rwetq+MtyE18WhzjokPD93dNqLGNT3QJuLvBGw= github.com/mattn/go-sqlite3 v1.14.14/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= +golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f h1:90Jq/vvGVDsqj8QqCynjFw9MCerDguSMODLYII416Y8= +golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= diff --git a/jmdict.go b/jmdict.go new file mode 100644 index 0000000..74809e7 --- /dev/null +++ b/jmdict.go @@ -0,0 +1,221 @@ +package yomichan + +import ( + "os" + "regexp" + "strconv" + "strings" + + "foosoft.net/projects/jmdict" + "golang.org/x/exp/slices" +) + +func grammarRules(partsOfSpeech []string) []string { + rules := []string{} + for _, partOfSpeech := range partsOfSpeech { + switch partOfSpeech { + case "adj-i", "vk", "vz": + rules = append(rules, partOfSpeech) + default: + if strings.HasPrefix(partOfSpeech, "v5") { + rules = append(rules, "v5") + } else if strings.HasPrefix(partOfSpeech, "v1") { + rules = append(rules, "v1") + } else if strings.HasPrefix(partOfSpeech, "vs-") { + rules = append(rules, "vs") + } + } + } + return rules +} + +func calculateTermScore(senseNumber int, headword headword) int { + const senseWeight int = 1 + const entryPositionWeight int = 100 + const priorityWeight int = 10000 + + score := 0 + score -= (senseNumber - 1) * senseWeight + score -= headword.Index * entryPositionWeight + score += headword.Score() * priorityWeight + + return score +} + +func doDisplaySenseNumberTag(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) bool { + // Display sense numbers if the entry has more than one sense + // or if the headword is found in multiple entries. + hash := headword.Hash() + if meta.seqToSenseCount[entry.Sequence] > 1 { + return true + } else if len(meta.headwordHashToSeqs[hash]) > 1 { + return true + } else { + return false + } +} + +func jmdictPublicationDate(dictionary jmdict.Jmdict) string { + dateEntry := dictionary.Entries[len(dictionary.Entries)-1] + r := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`) + jmdictDate := r.FindString(dateEntry.Sense[0].Glossary[0].Content) + return jmdictDate +} + +func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm { + term := baseFormsTerm(entry) + term.Expression = headword.Expression + term.Reading = headword.Reading + + term.addTermTags(headword.TermTags...) + + term.addDefinitionTags("forms") + senseNumber := meta.seqToSenseCount[entry.Sequence] + 1 + term.Score = calculateTermScore(senseNumber, headword) + return term +} + +func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm { + term := dbTerm{ + Expression: headword.Expression, + Sequence: -entry.Sequence, + } + for _, sense := range entry.Sense { + rules := grammarRules(sense.PartsOfSpeech) + term.addRules(rules...) + } + term.addTermTags(headword.TermTags...) + term.Score = calculateTermScore(0, headword) + + redirectHeadword := meta.seqToMainHeadword[entry.Sequence] + expHash := redirectHeadword.ExpHash() + doDisplayReading := (len(meta.expHashToReadings[expHash]) > 1) + + content := contentSpan( + contentAttr{fontSize: "130%"}, + "โŸถ", + redirectHeadword.ToInternalLink(doDisplayReading), + ) + + term.Glossary = []any{contentStructure(content)} + return term +} + +func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm { + term := dbTerm{ + Expression: headword.Expression, + Reading: headword.Reading, + Sequence: entry.Sequence, + } + + term.Glossary = createGlossary(sense, meta) + + term.addTermTags(headword.TermTags...) + + if doDisplaySenseNumberTag(headword, entry, meta) { + senseNumberTag := strconv.Itoa(senseNumber) + term.addDefinitionTags(senseNumberTag) + } + term.addDefinitionTags(sense.PartsOfSpeech...) + term.addDefinitionTags(sense.Fields...) + term.addDefinitionTags(sense.Misc...) + term.addDefinitionTags(sense.Dialects...) + + rules := grammarRules(sense.PartsOfSpeech) + term.addRules(rules...) + + term.Score = calculateTermScore(senseNumber, headword) + + return term +} + +func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) { + if meta.seqToSenseCount[entry.Sequence] == 0 { + return nil, false + } + if headword.IsSearchOnly { + searchTerm := createSearchTerm(headword, entry, meta) + return []dbTerm{searchTerm}, true + } + terms := []dbTerm{} + senseNumber := 1 + for _, sense := range entry.Sense { + if !glossaryContainsLanguage(sense.Glossary, meta.language) { + continue + } + if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) { + senseNumber += 1 + continue + } + if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) { + senseNumber += 1 + continue + } + senseTerm := createSenseTerm(sense, senseNumber, headword, entry, meta) + senseNumber += 1 + terms = append(terms, senseTerm) + } + + if meta.hasMultipleForms[entry.Sequence] { + formsTerm := createFormsTerm(headword, entry, meta) + terms = append(terms, formsTerm) + } + return terms, true +} + +func jmdExportDb(inputPath string, outputPath string, languageName string, title string, stride int, pretty bool) error { + reader, err := os.Open(inputPath) + if err != nil { + return err + } + defer reader.Close() + + dictionary, entities, err := jmdict.LoadJmdictNoTransform(reader) + if err != nil { + return err + } + + meta := newJmdictMetadata(dictionary, languageName) + + terms := dbTermList{} + for _, entry := range dictionary.Entries { + headwords := extractHeadwords(entry) + for _, headword := range headwords { + if newTerms, ok := extractTerms(headword, entry, meta); ok { + terms = append(terms, newTerms...) + } + } + } + + tags := dbTagList{} + tags = append(tags, entityTags(entities)...) + tags = append(tags, senseNumberTags(meta.maxSenseCount)...) + tags = append(tags, newsFrequencyTags()...) + tags = append(tags, customDbTags()...) + + recordData := map[string]dbRecordList{ + "term": terms.crush(), + "tag": tags.crush(), + } + + if title == "" { + title = "JMdict" + } + jmdictDate := jmdictPublicationDate(dictionary) + + index := dbIndex{ + Title: title, + Revision: "JMdict." + jmdictDate, + Sequenced: true, + Attribution: edrdgAttribution, + } + index.setDefaults() + + return writeDb( + outputPath, + index, + recordData, + stride, + pretty, + ) +} diff --git a/jmdictConstants.go b/jmdictConstants.go new file mode 100644 index 0000000..1d49194 --- /dev/null +++ b/jmdictConstants.go @@ -0,0 +1,215 @@ +package yomichan + +type LangCode struct { + language string + code string +} + +const edrdgAttribution = "This publication has included material from the JMdict (EDICT, etc.) dictionary files in accordance with the licence provisions of the Electronic Dictionaries Research Group. See http://www.edrdg.org/" + +const prioritySymbol = "โ˜…" +const rareKanjiSymbol = "๐Ÿ…" +const irregularSymbol = "โš " +const outdatedSymbol = "โ›ฌ" +const defaultSymbol = "ใŠ’" + +const priorityTagName = "โญ" +const rareKanjiTagName = "R" +const irregularTagName = "โš ๏ธ" +const outdatedTagName = "โ›ฌ" +const atejiTagName = "ateji" +const gikunTagName = "gikun" + +const langMarker = "'๐ŸŒ '" +const noteMarker = "'๐Ÿ“ '" +const infoMarker = "'โ„น๏ธ '" +const refMarker = "'โžก๏ธ '" +const antonymMarker = "'๐Ÿ”„ '" + +var ISOtoFlag = map[string]string{ + "": "'๐Ÿ‡ฌ๐Ÿ‡ง '", + "eng": "'๐Ÿ‡ฌ๐Ÿ‡ง '", + "dut": "'๐Ÿ‡ณ๐Ÿ‡ฑ '", + "fre": "'๐Ÿ‡ซ๐Ÿ‡ท '", + "ger": "'๐Ÿ‡ฉ๐Ÿ‡ช '", + "hun": "'๐Ÿ‡ญ๐Ÿ‡บ '", + "ita": "'๐Ÿ‡ฎ๐Ÿ‡น '", + "jpn": "'๐Ÿ‡ฏ๐Ÿ‡ต '", + "rus": "'๐Ÿ‡ท๐Ÿ‡บ '", + "slv": "'๐Ÿ‡ธ๐Ÿ‡ฎ '", + "spa": "'๐Ÿ‡ช๐Ÿ‡ธ '", + "swe": "'๐Ÿ‡ธ๐Ÿ‡ช '", +} + +var langNameToCode = map[string]string{ + "": "eng", + "english": "eng", + "dutch": "dut", + "french": "fre", + "german": "ger", + "hungarian": "hun", + "italian": "ita", + "russian": "rus", + "slovenian": "slv", + "spanish": "spa", + "swedish": "swe", +} + +var glossTypeCodeToName = map[LangCode]string{ + LangCode{"eng", "lit"}: "literally", + LangCode{"eng", "fig"}: "figuratively", + LangCode{"eng", "expl"}: "", // don't need to tell the user that an explanation is an explanation + LangCode{"eng", "tm"}: "trademark", +} + +var refNoteHint = map[LangCode]string{ + LangCode{"eng", "xref"}: "see", + LangCode{"eng", "ant"}: "antonym", +} + +var sourceLangTypeCodeToType = map[LangCode]string{ + LangCode{"eng", "part"}: "partial", + LangCode{"eng", ""}: "", // implied "full" +} + +var langCodeToName = map[LangCode]string{ + LangCode{"eng", "afr"}: "Afrikaans", + LangCode{"eng", "ain"}: "Ainu", + LangCode{"eng", "alg"}: "Algonquian", + LangCode{"eng", "amh"}: "Amharic", + LangCode{"eng", "ara"}: "Arabic", + LangCode{"eng", "arn"}: "Mapudungun", + LangCode{"eng", "bnt"}: "Bantu", + LangCode{"eng", "bre"}: "Breton", + LangCode{"eng", "bul"}: "Bulgarian", + LangCode{"eng", "bur"}: "Burmese", + LangCode{"eng", "chi"}: "Chinese", + LangCode{"eng", "chn"}: "Chinook Jargon", + LangCode{"eng", "cze"}: "Czech", + LangCode{"eng", "dan"}: "Danish", + LangCode{"eng", "dut"}: "Dutch", + LangCode{"eng", "eng"}: "English", + LangCode{"eng", "epo"}: "Esperanto", + LangCode{"eng", "est"}: "Estonian", + LangCode{"eng", "fil"}: "Filipino", + LangCode{"eng", "fin"}: "Finnish", + LangCode{"eng", "fre"}: "French", + LangCode{"eng", "geo"}: "Georgian", + LangCode{"eng", "ger"}: "German", + LangCode{"eng", "glg"}: "Galician", + LangCode{"eng", "grc"}: "Ancient Greek", + LangCode{"eng", "gre"}: "Modern Greek", + LangCode{"eng", "haw"}: "Hawaiian", + LangCode{"eng", "heb"}: "Hebrew", + LangCode{"eng", "hin"}: "Hindi", + LangCode{"eng", "hun"}: "Hungarian", + LangCode{"eng", "ice"}: "Icelandic", + LangCode{"eng", "ind"}: "Indonesian", + LangCode{"eng", "ita"}: "Italian", + LangCode{"eng", "khm"}: "Khmer", + LangCode{"eng", "kor"}: "Korean", + LangCode{"eng", "kur"}: "Kurdish", + LangCode{"eng", "lat"}: "Latin", + LangCode{"eng", "mal"}: "Malayalam", + LangCode{"eng", "mao"}: "Maori", + LangCode{"eng", "may"}: "Malay", + LangCode{"eng", "mnc"}: "Manchu", + LangCode{"eng", "mol"}: "Moldavian", // ISO 639 deprecated (https://iso639-3.sil.org/code/mol) + LangCode{"eng", "mon"}: "Mongolian", + LangCode{"eng", "nor"}: "Norwegian", + LangCode{"eng", "per"}: "Persian", + LangCode{"eng", "pol"}: "Polish", + LangCode{"eng", "por"}: "Portuguese", + LangCode{"eng", "rum"}: "Romanian", + LangCode{"eng", "rus"}: "Russian", + LangCode{"eng", "san"}: "Sanskrit", + LangCode{"eng", "scr"}: "Croatian", // Code doesn't seem to exist in ISO 639. Should be "hrv" instead? (https://iso639-3.sil.org/code/hrv) + LangCode{"eng", "slo"}: "Slovak", + LangCode{"eng", "slv"}: "Slovenian", + LangCode{"eng", "som"}: "Somali", + LangCode{"eng", "spa"}: "Spanish", + LangCode{"eng", "swa"}: "Swahili", + LangCode{"eng", "swe"}: "Swedish", + LangCode{"eng", "tah"}: "Tahitian", + LangCode{"eng", "tam"}: "Tamil", + LangCode{"eng", "tgl"}: "Tagalog", + LangCode{"eng", "tha"}: "Thai", + LangCode{"eng", "tib"}: "Tibetan", + LangCode{"eng", "tur"}: "Turkish", + LangCode{"eng", "ukr"}: "Ukrainian", + LangCode{"eng", "urd"}: "Urdu", + LangCode{"eng", "vie"}: "Vietnamese", + LangCode{"eng", "yid"}: "Yiddish", +} + +// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry +var ISOtoHTML = map[string]string{ + "afr": "af", // Afrikaans + "ain": "ain", // Ainu + "alg": "alg", // Algonquian + "amh": "am", // Amharic + "ara": "ar", // Arabic + "arn": "arn", // Mapudungun + "bnt": "bnt", // Bantu + "bre": "br", // Breton + "bul": "bg", // Bulgarian + "bur": "my", // Burmese + "chi": "zh", // Chinese + "chn": "chn", // Chinook Jargon + "cze": "cs", // Czech + "dan": "da", // Danish + "dut": "nl", // Dutch + "eng": "en", // English + "epo": "eo", // Esperanto + "est": "et", // Estonian + "fil": "fil", // Filipino + "fin": "fi", // Finnish + "fre": "fr", // French + "geo": "ka", // Georgian + "ger": "de", // German + "glg": "gl", // Galician + "grc": "grc", // Ancient Greek + "gre": "el", // Modern Greek + "haw": "haw", // Hawaiian + "heb": "he", // Hebrew + "hin": "hi", // Hindi + "hun": "hu", // Hungarian + "ice": "is", // Icelandic + "ind": "id", // Indonesian + "ita": "it", // Italian + "jpn": "ja", // Japanese + "khm": "km", // Khmer + "kor": "ko", // Korean + "kur": "ku", // Kurdish + "lat": "la", // Latin + "mal": "ml", // Malayalam + "mao": "mi", // Maori + "may": "ms", // Malay + "mnc": "mnc", // Manchu + "mol": "ro", // Moldavian + "mon": "mn", // Mongolian + "nor": "no", // Norwegian + "per": "fa", // Persian + "pol": "pl", // Polish + "por": "pt", // Portuguese + "rum": "ro", // Romanian + "rus": "ru", // Russian + "san": "sa", // Sanskrit + "scr": "hr", // Croatian + "slo": "sk", // Slovak + "slv": "sl", // Slovenian + "som": "so", // Somali + "spa": "es", // Spanish + "swa": "sw", // Swahili + "swe": "sv", // Swedish + "tah": "ty", // Tahitian + "tam": "ta", // Tamil + "tgl": "tl", // Tagalog + "tha": "th", // Thai + "tib": "bo", // Tibetan + "tur": "tr", // Turkish + "ukr": "uk", // Ukrainian + "urd": "ur", // Urdu + "vie": "vi", // Vietnamese + "yid": "yi", // Yiddish +} diff --git a/jmdictForms.go b/jmdictForms.go new file mode 100644 index 0000000..76eba34 --- /dev/null +++ b/jmdictForms.go @@ -0,0 +1,254 @@ +package yomichan + +import ( + "os" + "strings" + + "foosoft.net/projects/jmdict" + "golang.org/x/exp/slices" +) + +func kata2hira(word string) string { + charMap := func(character rune) rune { + if (character >= 'ใ‚ก' && character <= 'ใƒถ') || (character >= 'ใƒฝ' && character <= 'ใƒพ') { + return character - 0x60 + } else { + return character + } + } + return strings.Map(charMap, word) +} + +func (h *headword) InfoSymbols() string { + infoSymbols := []string{} + if h.IsPriority { + infoSymbols = append(infoSymbols, prioritySymbol) + } + if h.IsRareKanji { + infoSymbols = append(infoSymbols, rareKanjiSymbol) + } + if h.IsIrregular { + infoSymbols = append(infoSymbols, irregularSymbol) + } + if h.IsOutdated { + infoSymbols = append(infoSymbols, outdatedSymbol) + } + return strings.Join(infoSymbols[:], " | ") +} + +func (h *headword) GlossText() string { + gloss := h.Expression + if h.IsAteji { + gloss = "ใ€ˆ" + gloss + "ใ€‰" + } + symbolText := h.InfoSymbols() + if symbolText != "" { + gloss += "๏ผˆ" + symbolText + "๏ผ‰" + } + return gloss +} + +func (h *headword) TableColHeaderText() string { + text := h.KanjiForm() + if h.IsAteji { + text = "ใ€ˆ" + text + "ใ€‰" + } + return text +} + +func (h *headword) TableRowHeaderText() string { + text := h.Reading + if h.IsGikun { + text = "ใ€ˆ" + text + "ใ€‰" + } + return text +} + +func (h *headword) TableCellText() string { + text := h.InfoSymbols() + if text == "" { + return defaultSymbol + } else { + return text + } +} + +func (h *headword) KanjiForm() string { + if h.IsKanaOnly() { + return "โˆ…" + } else { + return h.Expression + } +} + +func jmdNeedsFormTable(headwords []headword) bool { + // Does the entry contain more than 1 distinct reading? + // E.g. ใƒใ‚ซใŒใ„ and ใฐใ‹ใŒใ„ are not distinct. + uniqueReading := "" + for _, h := range headwords { + if h.IsGikun { + return true + } else if h.IsSearchOnly { + continue + } else if h.IsKanaOnly() { + continue + } else if uniqueReading == "" { + uniqueReading = kata2hira(h.Reading) + } else if uniqueReading != kata2hira(h.Reading) { + return true + } + } + return false +} + +type formTableData struct { + kanjiForms []string + readings []string + colHeaderText map[string]string + rowHeaderText map[string]string + cellText map[string]map[string]string +} + +func tableData(headwords []headword) formTableData { + d := formTableData{ + kanjiForms: []string{}, + readings: []string{}, + colHeaderText: make(map[string]string), + rowHeaderText: make(map[string]string), + cellText: make(map[string]map[string]string), + } + for _, h := range headwords { + if h.IsSearchOnly { + continue + } + kanjiForm := h.KanjiForm() + if !slices.Contains(d.kanjiForms, kanjiForm) { + d.kanjiForms = append(d.kanjiForms, kanjiForm) + d.colHeaderText[kanjiForm] = h.TableColHeaderText() + } + reading := h.Reading + if !slices.Contains(d.readings, reading) { + d.readings = append(d.readings, reading) + d.rowHeaderText[reading] = h.TableRowHeaderText() + d.cellText[reading] = make(map[string]string) + } + d.cellText[reading][kanjiForm] = h.TableCellText() + } + return d +} + +func formsTableGlossary(headwords []headword) []any { + d := tableData(headwords) + + attr := contentAttr{} + centeredAttr := contentAttr{textAlign: "center"} + leftAttr := contentAttr{textAlign: "left"} + + cornerCell := contentTableHeadCell(attr, "") // empty cell in upper left corner + headRowCells := []any{cornerCell} + for _, kanjiForm := range d.kanjiForms { + content := d.colHeaderText[kanjiForm] + cell := contentTableHeadCell(centeredAttr, content) + headRowCells = append(headRowCells, cell) + } + headRow := contentTableRow(attr, headRowCells...) + tableRows := []any{headRow} + for _, reading := range d.readings { + rowHeadCellText := d.rowHeaderText[reading] + rowHeadCell := contentTableHeadCell(leftAttr, rowHeadCellText) + rowCells := []any{rowHeadCell} + for _, kanjiForm := range d.kanjiForms { + text := d.cellText[reading][kanjiForm] + rowCell := contentTableCell(centeredAttr, text) + rowCells = append(rowCells, rowCell) + } + tableRow := contentTableRow(attr, rowCells...) + tableRows = append(tableRows, tableRow) + } + tableAttr := contentAttr{data: map[string]string{"content": "formsTable"}} + contentTable := contentTable(tableAttr, tableRows...) + content := contentStructure(contentTable) + return []any{content} +} + +func formsGlossary(headwords []headword) []any { + glossary := []any{} + for _, h := range headwords { + if h.IsSearchOnly { + continue + } + text := h.GlossText() + glossary = append(glossary, text) + } + return glossary +} + +func baseFormsTerm(entry jmdict.JmdictEntry) dbTerm { + term := dbTerm{Sequence: entry.Sequence} + headwords := extractHeadwords(entry) + if jmdNeedsFormTable(headwords) { + term.Glossary = formsTableGlossary(headwords) + } else { + term.Glossary = formsGlossary(headwords) + } + for _, sense := range entry.Sense { + rules := grammarRules(sense.PartsOfSpeech) + term.addRules(rules...) + } + return term +} + +func formsExportDb(inputPath, outputPath, languageName, title string, stride int, pretty bool) error { + reader, err := os.Open(inputPath) + if err != nil { + return err + } + defer reader.Close() + + dictionary, _, err := jmdict.LoadJmdictNoTransform(reader) + if err != nil { + return err + } + + terms := dbTermList{} + for _, entry := range dictionary.Entries { + baseTerm := baseFormsTerm(entry) + headwords := extractHeadwords(entry) + for _, h := range headwords { + term := baseTerm + if h.IsSearchOnly { + term.Sequence = -term.Sequence + } + term.Expression = h.Expression + term.Reading = h.Reading + terms = append(terms, term) + } + } + + if title == "" { + title = "JMdict Forms" + } + + recordData := map[string]dbRecordList{ + "term": terms.crush(), + "tag": dbRecordList{}, + } + + jmdictDate := jmdictPublicationDate(dictionary) + + index := dbIndex{ + Title: title, + Revision: "JMdict." + jmdictDate, + Sequenced: true, + Attribution: edrdgAttribution, + } + index.setDefaults() + + return writeDb( + outputPath, + index, + recordData, + stride, + pretty, + ) +} diff --git a/jmdictGlossary.go b/jmdictGlossary.go new file mode 100644 index 0000000..0260cbf --- /dev/null +++ b/jmdictGlossary.go @@ -0,0 +1,300 @@ +package yomichan + +import ( + "fmt" + "strconv" + + "foosoft.net/projects/jmdict" +) + +func glossaryContainsLanguage(glossary []jmdict.JmdictGlossary, language string) bool { + hasGlosses := false + for _, gloss := range glossary { + if glossContainsLanguage(gloss, language) { + hasGlosses = true + break + } + } + return hasGlosses +} + +func glossContainsLanguage(gloss jmdict.JmdictGlossary, language string) bool { + if gloss.Language == nil && language != "eng" { + return false + } else if gloss.Language != nil && language != *gloss.Language { + return false + } else { + return true + } +} + +func makeGlossListItem(gloss jmdict.JmdictGlossary, language string) any { + contents := []any{gloss.Content} + listItem := contentListItem(contentAttr{}, contents...) + return listItem +} + +func makeInfoGlossListItem(gloss jmdict.JmdictGlossary, language string) any { + // Prepend gloss with "type" (literal, figurative, trademark, etc.) + glossTypeCode := *gloss.Type + contents := []any{} + if name, ok := glossTypeCodeToName[LangCode{language, glossTypeCode}]; ok { + if name != "" { + italicStyle := contentAttr{fontStyle: "italic"} + contents = append(contents, contentSpan(italicStyle, "("+name+")"), " ") + } + } else { + fmt.Println("Unknown glossary type code " + *gloss.Type + " for build language " + language) + contents = append(contents, "["+glossTypeCode+"] ") + } + contents = append(contents, gloss.Content) + listItem := contentListItem(contentAttr{}, contents...) + return listItem +} + +func makeSourceLangListItem(sourceLanguage jmdict.JmdictSource, language string) any { + contents := []any{} + + var srcLangCode string + if sourceLanguage.Language == nil { + srcLangCode = "eng" + } else { + srcLangCode = *sourceLanguage.Language + } + + // Format: [Language] ([Partial?], [Wasei?]): [Original word?] + // [Language] + if langName, ok := langCodeToName[LangCode{language, srcLangCode}]; ok { + contents = append(contents, langName) + } else { + contents = append(contents, srcLangCode) + fmt.Println("Unable to convert ISO 639 code " + srcLangCode + " to its full name in language " + language) + } + + // ([Partial?], [Wasei?]) + var sourceLangTypeCode string + if sourceLanguage.Type == nil { + sourceLangTypeCode = "" + } else { + sourceLangTypeCode = *sourceLanguage.Type + } + var sourceLangType string + if val, ok := sourceLangTypeCodeToType[LangCode{language, sourceLangTypeCode}]; ok { + sourceLangType = val + } else { + sourceLangType = sourceLangTypeCode + fmt.Println("Unknown source language type code " + sourceLangTypeCode + " for build language " + language) + } + if sourceLangType != "" && sourceLanguage.Wasei == "y" { + contents = append(contents, " ("+sourceLangType+", wasei)") + } else if sourceLangType != "" { + contents = append(contents, " ("+sourceLangType+")") + } else if sourceLanguage.Wasei == "y" { + contents = append(contents, " (wasei)") + } + + // : [Original word?] + if sourceLanguage.Content != "" { + contents = append(contents, ": ") + attr := contentAttr{lang: ISOtoHTML[srcLangCode]} + contents = append(contents, contentSpan(attr, sourceLanguage.Content)) + } + + listItem := contentListItem(contentAttr{}, contents...) + return listItem +} + +func makeReferenceListItem(reference string, refType string, meta jmdictMetadata) any { + contents := []any{} + attr := contentAttr{} + + hint := refNoteHint[LangCode{meta.language, refType}] + contents = append(contents, hint+": ") + + refHeadword, senseNumber, ok := parseReference(reference) + if !ok { + contents = append(contents, "ใ€"+reference+"ใ€‘") + return contentListItem(attr, contents...) + } + + sequence, ok := meta.referenceToSeq[reference] + if !ok { + contents = append(contents, "ใ€"+reference+"ใ€‘") + return contentListItem(attr, contents...) + } + + targetSense := senseID{ + sequence: sequence, + number: senseNumber, + } + + expHash := refHeadword.ExpHash() + doDisplayReading := (len(meta.expHashToReadings[expHash]) > 1) + doDisplaySenseNumber := (meta.seqToSenseCount[targetSense.sequence] > 1) + refGlossAttr := contentAttr{ + fontSize: "65%", + verticalAlign: "middle", + data: map[string]string{"content": "refGlosses"}, + } + + contents = append(contents, refHeadword.ToInternalLink(doDisplayReading)) + if doDisplaySenseNumber { + contents = append(contents, contentSpan(refGlossAttr, " "+strconv.Itoa(targetSense.number)+". "+meta.condensedGlosses[targetSense])) + } else { + contents = append(contents, contentSpan(refGlossAttr, " "+meta.condensedGlosses[targetSense])) + } + + listItem := contentListItem(attr, contents...) + return listItem +} + +func makeExampleListItem(sentence jmdict.JmdictExampleSentence) any { + if sentence.Lang == "jpn" { + return contentListItem(contentAttr{}, sentence.Text) + } else { + attr := contentAttr{ + lang: ISOtoHTML[sentence.Lang], + listStyleType: ISOtoFlag[sentence.Lang], + } + return contentListItem(attr, sentence.Text) + } +} + +func listAttr(lang string, listStyleType string, dataContent string) contentAttr { + return contentAttr{ + lang: lang, + listStyleType: listStyleType, + data: map[string]string{"content": dataContent}, + } +} + +func needsStructuredContent(sense jmdict.JmdictSense, language string) bool { + for _, gloss := range sense.Glossary { + if glossContainsLanguage(gloss, language) && gloss.Type != nil { + return true + } + } + if len(sense.SourceLanguages) > 0 { + return true + } else if len(sense.Information) > 0 { + return true + } else if len(sense.Antonyms) > 0 { + return true + } else if len(sense.References) > 0 { + return true + } else if len(sense.Examples) > 0 { + return true + } else { + return false + } +} + +func createGlossaryContent(sense jmdict.JmdictSense, meta jmdictMetadata) any { + glossaryContents := []any{} + + // Add normal glosses + glossListItems := []any{} + for _, gloss := range sense.Glossary { + if glossContainsLanguage(gloss, meta.language) && gloss.Type == nil { + listItem := makeGlossListItem(gloss, meta.language) + glossListItems = append(glossListItems, listItem) + } + } + if len(glossListItems) > 0 { + attr := listAttr(ISOtoHTML[meta.language], "circle", "glossary") + list := contentUnorderedList(attr, glossListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add information glosses + infoGlossListItems := []any{} + for _, gloss := range sense.Glossary { + if glossContainsLanguage(gloss, meta.language) && gloss.Type != nil { + listItem := makeInfoGlossListItem(gloss, meta.language) + infoGlossListItems = append(infoGlossListItems, listItem) + } + } + if len(infoGlossListItems) > 0 { + attr := listAttr(ISOtoHTML[meta.language], infoMarker, "infoGlossary") + list := contentUnorderedList(attr, infoGlossListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add language-of-origin / loanword information + sourceLangListItems := []any{} + for _, sourceLanguage := range sense.SourceLanguages { + listItem := makeSourceLangListItem(sourceLanguage, meta.language) + sourceLangListItems = append(sourceLangListItems, listItem) + } + if len(sourceLangListItems) > 0 { + attr := listAttr(ISOtoHTML[meta.language], langMarker, "sourceLanguages") + list := contentUnorderedList(attr, sourceLangListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add sense notes + noteListItems := []any{} + for _, information := range sense.Information { + listItem := contentListItem(contentAttr{}, information) + noteListItems = append(noteListItems, listItem) + } + if len(noteListItems) > 0 { + attr := listAttr(ISOtoHTML["jpn"], noteMarker, "notes") // notes often contain japanese text + list := contentUnorderedList(attr, noteListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add antonyms + antonymListItems := []any{} + for _, antonym := range sense.Antonyms { + listItem := makeReferenceListItem(antonym, "ant", meta) + antonymListItems = append(antonymListItems, listItem) + } + if len(antonymListItems) > 0 { + attr := listAttr(ISOtoHTML[meta.language], antonymMarker, "antonyms") + list := contentUnorderedList(attr, antonymListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add cross-references + referenceListItems := []any{} + for _, reference := range sense.References { + listItem := makeReferenceListItem(reference, "xref", meta) + referenceListItems = append(referenceListItems, listItem) + } + if len(referenceListItems) > 0 { + attr := listAttr(ISOtoHTML[meta.language], refMarker, "references") + list := contentUnorderedList(attr, referenceListItems...) + glossaryContents = append(glossaryContents, list) + } + + // Add example sentences + exampleListItems := []any{} + for _, example := range sense.Examples { + for _, sentence := range example.Sentences { + listItem := makeExampleListItem(sentence) + exampleListItems = append(exampleListItems, listItem) + } + } + if len(exampleListItems) > 0 { + attr := listAttr(ISOtoHTML["jpn"], ISOtoFlag["jpn"], "examples") + list := contentUnorderedList(attr, exampleListItems...) + glossaryContents = append(glossaryContents, list) + } + + return contentStructure(glossaryContents...) +} + +func createGlossary(sense jmdict.JmdictSense, meta jmdictMetadata) []any { + glossary := []any{} + if needsStructuredContent(sense, meta.language) { + glossary = append(glossary, createGlossaryContent(sense, meta)) + } else { + for _, gloss := range sense.Glossary { + if glossContainsLanguage(gloss, meta.language) { + glossary = append(glossary, gloss.Content) + } + } + } + return glossary +} diff --git a/jmdictHeadword.go b/jmdictHeadword.go new file mode 100644 index 0000000..a1a75cb --- /dev/null +++ b/jmdictHeadword.go @@ -0,0 +1,267 @@ +package yomichan + +import ( + "fmt" + "hash/fnv" + "regexp" + "strconv" + + "foosoft.net/projects/jmdict" + "golang.org/x/exp/slices" +) + +type headword struct { + Expression string + Reading string + TermTags []string + Index int + IsPriority bool + IsIrregular bool + IsOutdated bool + IsRareKanji bool + IsSearchOnly bool + IsAteji bool + IsGikun bool +} + +type hash uint64 + +func (h *headword) Hash() hash { + return hashText(h.Expression + "โž" + h.Reading) +} + +func (h *headword) ExpHash() hash { + return hashText(h.Expression + "โž" + h.Expression) +} + +func (h *headword) ReadingHash() hash { + return hashText(h.Reading + "โž" + h.Reading) +} + +func hashText(s string) hash { + h := fnv.New64a() + h.Write([]byte(s)) + return hash(h.Sum64()) +} + +func (h *headword) IsKanaOnly() bool { + if h.Expression != h.Reading { + return false + } + for _, char := range h.Expression { + if char >= 'ใ' && char <= 'ใƒฟ' { + // hiragana and katakana range + continue + } else if char >= '๏ฝฅ' && char <= '๏พŸ' { + // halfwidth katakana range + continue + } else if char == 'ใ€œ' { + continue + } else { + return false + } + } + return true +} + +func (h *headword) Score() int { + score := 0 + if h.IsPriority { + score += 1 + } + if h.IsIrregular { + score -= 5 + } + if h.IsOutdated { + score -= 5 + } + if h.IsRareKanji { + score -= 5 + } + if h.IsSearchOnly { + score -= 5 + } + return score +} + +func (h *headword) ToInternalLink(includeReading bool) any { + if !includeReading || h.Expression == h.Reading { + return contentInternalLink( + contentAttr{lang: ISOtoHTML["jpn"]}, + h.Expression, + ) + } else { + return contentSpan( + contentAttr{lang: ISOtoHTML["jpn"]}, + contentInternalLink(contentAttr{}, h.Expression), + "๏ผˆ", + contentInternalLink(contentAttr{}, h.Reading), + "๏ผ‰", + ) + } +} + +func (h *headword) SetFlags(infoTags, freqTags []string) { + priorityTags := []string{"ichi1", "news1", "gai1", "spec1", "spec2"} + for _, priorityTag := range priorityTags { + if slices.Contains(freqTags, priorityTag) { + h.IsPriority = true + break + } + } + for _, infoTag := range infoTags { + switch infoTag { + case "iK", "ik", "io": + h.IsIrregular = true + case "oK", "ok": + h.IsOutdated = true + case "sK", "sk": + h.IsSearchOnly = true + case "rK": + h.IsRareKanji = true + case "ateji": + h.IsAteji = true + case "gikun": + h.IsGikun = true + } + } + if h.IsOutdated && h.IsRareKanji { + h.IsRareKanji = false + } +} + +func (h *headword) SetTermTags(freqTags []string) { + h.TermTags = []string{} + if h.IsPriority { + h.TermTags = append(h.TermTags, priorityTagName) + } + for _, tag := range freqTags { + isNewsFreqTag, _ := regexp.MatchString(`nf\d\d`, tag) + if isNewsFreqTag { + // nf tags are divided into ranks of 500 + // (nf01 to nf48), but it will be easier + // for the user to read 1k, 2k, etc. + var i int + if _, err := fmt.Sscanf(tag, "nf%2d", &i); err == nil { + i = (i + (i % 2)) / 2 + newsTag := "news" + strconv.Itoa(i) + "k" + h.TermTags = append(h.TermTags, newsTag) + } + } else if tag == "news1" || tag == "news2" { + continue + } else { + tagWithoutTheNumber := tag[:len(tag)-1] // "ichi", "gai", or "spec" + h.TermTags = append(h.TermTags, tagWithoutTheNumber) + } + } + if h.IsIrregular { + h.TermTags = append(h.TermTags, irregularTagName) + } + if h.IsOutdated { + h.TermTags = append(h.TermTags, outdatedTagName) + } + if h.IsRareKanji { + h.TermTags = append(h.TermTags, rareKanjiTagName) + } + if h.IsAteji { + h.TermTags = append(h.TermTags, atejiTagName) + } + if h.IsGikun { + h.TermTags = append(h.TermTags, gikunTagName) + } +} + +func newHeadword(kanji *jmdict.JmdictKanji, reading *jmdict.JmdictReading) headword { + h := headword{} + infoTags := []string{} + freqTags := []string{} + if kanji == nil { + h.Expression = reading.Reading + h.Reading = reading.Reading + infoTags = reading.Information + freqTags = reading.Priorities + } else if reading == nil { + // should only apply to search-only kanji terms + h.Expression = kanji.Expression + h.Reading = "" + infoTags = kanji.Information + freqTags = kanji.Priorities + } else { + h.Expression = kanji.Expression + h.Reading = reading.Reading + infoTags = union(kanji.Information, reading.Information) + freqTags = intersection(kanji.Priorities, reading.Priorities) + } + h.SetFlags(infoTags, freqTags) + h.SetTermTags(freqTags) + return h +} + +func areAllKanjiIrregular(allKanji []jmdict.JmdictKanji) bool { + // If every kanji form is rare or irregular, then we'll make + // kana-only headwords for each kana form. + if len(allKanji) == 0 { + return false + } + for _, kanji := range allKanji { + h := newHeadword(&kanji, nil) + kanjiIsIrregular := h.IsRareKanji || h.IsIrregular || h.IsOutdated || h.IsSearchOnly + if !kanjiIsIrregular { + return false + } + } + return true +} + +func extractHeadwords(entry jmdict.JmdictEntry) []headword { + headwords := []headword{} + allKanjiAreIrregular := areAllKanjiIrregular(entry.Kanji) + + if allKanjiAreIrregular { + // Adding the reading-only terms before kanji+reading + // terms here for the sake of the Index property, + // which affects the yomichan term ranking. + for _, reading := range entry.Readings { + h := newHeadword(nil, &reading) + h.Index = len(headwords) + headwords = append(headwords, h) + } + } + + for _, kanji := range entry.Kanji { + if slices.Contains(kanji.Information, "sK") { + // Search-only kanji forms do not have associated readings. + h := newHeadword(&kanji, nil) + h.Index = len(headwords) + headwords = append(headwords, h) + continue + } + for _, reading := range entry.Readings { + if reading.NoKanji != nil { + continue + } else if slices.Contains(reading.Information, "sk") { + // Search-only kana forms do not have associated kanji forms. + continue + } else if reading.Restrictions != nil && !slices.Contains(reading.Restrictions, kanji.Expression) { + continue + } else { + h := newHeadword(&kanji, &reading) + h.Index = len(headwords) + headwords = append(headwords, h) + } + } + } + + if !allKanjiAreIrregular { + noKanjiInEntry := (len(entry.Kanji) == 0) + for _, reading := range entry.Readings { + if reading.NoKanji != nil || noKanjiInEntry || slices.Contains(reading.Information, "sk") { + h := newHeadword(nil, &reading) + h.Index = len(headwords) + headwords = append(headwords, h) + } + } + } + + return headwords +} diff --git a/jmdictMetadata.go b/jmdictMetadata.go new file mode 100644 index 0000000..ec92827 --- /dev/null +++ b/jmdictMetadata.go @@ -0,0 +1,158 @@ +package yomichan + +import ( + "strings" + + "foosoft.net/projects/jmdict" + "golang.org/x/exp/slices" +) + +type sequence = int + +type jmdictMetadata struct { + language string + condensedGlosses map[senseID]string + seqToSenseCount map[sequence]int + seqToMainHeadword map[sequence]headword + expHashToReadings map[hash][]string + headwordHashToSeqs map[hash][]sequence + references []string + referenceToSeq map[string]sequence + hashToSearchValues map[hash][]searchValue + seqToSearchHashes map[sequence][]searchHash + hasMultipleForms map[sequence]bool + maxSenseCount int +} + +type senseID struct { + sequence sequence + number int +} + +func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEntry) { + + // Determine how many senses are in this entry for this language + if _, ok := meta.seqToSenseCount[entry.Sequence]; !ok { + senseCount := 0 + for _, entrySense := range entry.Sense { + for _, gloss := range entrySense.Glossary { + if glossContainsLanguage(gloss, meta.language) { + senseCount += 1 + break + } + } + } + meta.seqToSenseCount[entry.Sequence] = senseCount + } + + if meta.seqToSenseCount[entry.Sequence] == 0 { + return + } + + // main headwords (first ones that are found in entries). + if _, ok := meta.seqToMainHeadword[entry.Sequence]; !ok { + meta.seqToMainHeadword[entry.Sequence] = headword + } + + // hash the term pair so we can determine if it's used + // in more than one JMdict entry later. + headwordHash := headword.Hash() + if !slices.Contains(meta.headwordHashToSeqs[headwordHash], entry.Sequence) { + meta.headwordHashToSeqs[headwordHash] = append(meta.headwordHashToSeqs[headwordHash], entry.Sequence) + } + + // hash the expression so that we can determine if we + // need to disambiguate it by displaying its reading + // in reference notes later. + expHash := headword.ExpHash() + if !slices.Contains(meta.expHashToReadings[expHash], headword.Reading) { + meta.expHashToReadings[expHash] = append(meta.expHashToReadings[expHash], headword.Reading) + } + + // e.g. for JMdict (English) we expect to end up with + // seqToHashedHeadwords[1260670] == ใ€ๅ…ƒใƒปใ‚‚ใจใ€‘ใ€ใ€ๅ…ƒใƒปๅ…ƒใ€‘ใ€ใ€ใ‚‚ใจใƒปใ‚‚ใจใ€‘ใ€ใ€ๆœฌใƒปใ‚‚ใจใ€‘ใ€ใ€ๆœฌใƒปๆœฌใ€‘ใ€ใ€็ด ใƒปใ‚‚ใจใ€‘ใ€ใ€็ด ใƒป็ด ใ€‘ใ€ใ€ๅŸบใƒปใ‚‚ใจใ€‘ใ€ใ€ๅŸบใƒปๅŸบใ€‘ + // used for correlating references to sequence numbers later. + searchHashes := []searchHash{ + searchHash{headwordHash, headword.IsPriority}, + searchHash{expHash, headword.IsPriority}, + searchHash{headword.ReadingHash(), headword.IsPriority}, + } + for _, x := range searchHashes { + if !slices.Contains(meta.seqToSearchHashes[entry.Sequence], x) { + meta.seqToSearchHashes[entry.Sequence] = append(meta.seqToSearchHashes[entry.Sequence], x) + } + } + + currentSenseNumber := 1 + for _, entrySense := range entry.Sense { + if !glossaryContainsLanguage(entrySense.Glossary, meta.language) { + continue + } + if entrySense.RestrictedReadings != nil && !slices.Contains(entrySense.RestrictedReadings, headword.Reading) { + currentSenseNumber += 1 + continue + } + if entrySense.RestrictedKanji != nil && !slices.Contains(entrySense.RestrictedKanji, headword.Expression) { + currentSenseNumber += 1 + continue + } + + allReferences := append(entrySense.References, entrySense.Antonyms...) + for _, reference := range allReferences { + meta.references = append(meta.references, reference) + } + + currentSense := senseID{entry.Sequence, currentSenseNumber} + if meta.condensedGlosses[currentSense] == "" { + glosses := []string{} + for _, gloss := range entrySense.Glossary { + if glossContainsLanguage(gloss, meta.language) && gloss.Type == nil { + glosses = append(glosses, gloss.Content) + } + } + meta.condensedGlosses[currentSense] = strings.Join(glosses, "; ") + } + currentSenseNumber += 1 + } +} + +func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMetadata { + meta := jmdictMetadata{ + language: langNameToCode[languageName], + seqToSenseCount: make(map[sequence]int), + condensedGlosses: make(map[senseID]string), + seqToMainHeadword: make(map[sequence]headword), + expHashToReadings: make(map[hash][]string), + seqToSearchHashes: make(map[sequence][]searchHash), + headwordHashToSeqs: make(map[hash][]sequence), + references: []string{}, + hashToSearchValues: nil, + referenceToSeq: nil, + hasMultipleForms: make(map[sequence]bool), + maxSenseCount: 0, + } + + for _, entry := range dictionary.Entries { + headwords := extractHeadwords(entry) + formCount := 0 + for _, headword := range headwords { + meta.AddHeadword(headword, entry) + if !headword.IsSearchOnly { + formCount += 1 + } + } + meta.hasMultipleForms[entry.Sequence] = (formCount > 1) + } + + // this correlation process will be unnecessary once JMdict + // includes sequence numbers in its cross-reference data + meta.MakeReferenceToSeqMap() + + for _, senseCount := range meta.seqToSenseCount { + if meta.maxSenseCount < senseCount { + meta.maxSenseCount = senseCount + } + } + + return meta +} diff --git a/jmdictReferences.go b/jmdictReferences.go new file mode 100644 index 0000000..71a7501 --- /dev/null +++ b/jmdictReferences.go @@ -0,0 +1,166 @@ +package yomichan + +import ( + "fmt" + "strconv" + "strings" +) + +/* + * In the future, JMdict will be updated to include sequence numbers + * with each cross reference. At that time, most of the functions and + * types defined in this file will become unnecessary. see: + * https://www.edrdg.org/jmdict_edict_list/2022/msg00008.html + */ + +type searchValue struct { + sequence sequence + index int + isPriority bool +} + +type searchHash struct { + hash hash + isPriority bool +} + +func parseReference(reference string) (headword, int, bool) { + // Reference strings in JMDict currently consist of 3 parts at + // most, separated by ใƒป characters. The latter two parts are + // optional. When the sense number is not specified, it is + // implied to be the first sense. + var h headword + var senseNumber int + ok := true + refParts := strings.Split(reference, "ใƒป") + if len(refParts) == 1 { + // (Kanji) or (Reading) + h = headword{Expression: refParts[0], Reading: refParts[0]} + senseNumber = 1 + } else if len(refParts) == 2 { + // [Kanji + (Reading or Sense)] or (Reading + Sense) + val, err := strconv.Atoi(refParts[1]) + if err == nil { + h = headword{Expression: refParts[0], Reading: refParts[0]} + senseNumber = val + } else { + h = headword{Expression: refParts[0], Reading: refParts[1]} + senseNumber = 1 + } + } else if len(refParts) == 3 { + // Expression + Reading + Sense + h = headword{Expression: refParts[0], Reading: refParts[1]} + val, err := strconv.Atoi(strings.TrimSpace(refParts[2])) + if err == nil { + senseNumber = val + } else { + errortext := "Unexpected format (3rd part not integer) for x-ref \"" + reference + "\"" + fmt.Println(errortext) + ok = false + } + } else { + errortext := "Unexpected format for x-ref \"" + reference + "\"" + fmt.Println(errortext) + ok = false + } + return h, senseNumber, ok +} + +func (meta *jmdictMetadata) MakeReferenceToSeqMap() { + + meta.referenceToSeq = make(map[string]sequence) + meta.MakeHashToSearchValuesMap() + + for _, reference := range meta.references { + if meta.referenceToSeq[reference] != 0 { + continue + } + seq := meta.FindBestSequence(reference) + if seq != 0 { + meta.referenceToSeq[reference] = seq + } else { + fmt.Println("Unable to convert reference to sequence number: `" + reference + "`") + } + } +} + +func (meta *jmdictMetadata) MakeHashToSearchValuesMap() { + meta.hashToSearchValues = make(map[hash][]searchValue) + for seq, searchHashes := range meta.seqToSearchHashes { + for score, searchHash := range searchHashes { + searchValue := searchValue{ + sequence: seq, + index: score, + isPriority: searchHash.isPriority, + } + meta.hashToSearchValues[searchHash.hash] = + append(meta.hashToSearchValues[searchHash.hash], searchValue) + } + } +} + +/* + * Generally, correspondence is determined by the order in which term + * pairs are extracted from each JMdict entry. Take for example the + * JMdict entry for ใ”ๆœฌ, which contains a reference to ๆœฌ (without a + * reading specified). To correlate this reference with a sequence + * number, our program searches each entry for the hash ofใ€ๆœฌใƒปๆœฌใ€‘. + * There are two entries in which it is found in JMdict (English): + * + * sequence 1260670: ใ€ๅ…ƒใƒปใ‚‚ใจใ€‘ใ€ใ€ๅ…ƒใƒปๅ…ƒใ€‘ใ€ใ€ใ‚‚ใจใƒปใ‚‚ใจใ€‘ใ€ใ€ๆœฌใƒปใ‚‚ใจใ€‘ใ€ใ€ๆœฌใƒปๆœฌใ€‘ใ€ใ€็ด ใƒปใ‚‚ใจใ€‘ใ€ใ€็ด ใƒป็ด ใ€‘ใ€ใ€ๅŸบใƒปใ‚‚ใจใ€‘ใ€ใ€ๅŸบใƒปๅŸบใ€‘ + * sequence 1522150: ใ€ๆœฌใƒปใปใ‚“ใ€‘ใ€ใ€ๆœฌใƒปๆœฌใ€‘ใ€ใ€ใปใ‚“ใƒปใปใ‚“ใ€‘ + * + * Because ใ€ๆœฌใƒปๆœฌใ€‘ is closer to the beginning of the array in the + * latter (i.e., has the lowest index), sequence number 1522150 is + * returned. + * + * In situations in which multiple sequences are found with the same + * score, the entry with a priority tag ("news1", "ichi1", "spec1", + * "spec2", "gai1") is given preference. This mostly affects + * katakana-only loanwords like ใƒฉใ‚ฐ. + * + * To improve accuracy, this method also checks to see if the + * reference's specified sense number really exists in the + * corresponding entry. For example, sequence 1582850 ใ€ๅฆ‚ไฝ•ใงใƒปใ„ใ‹ใ‚“ใงใ€‘ + * has a reference to sense #2 of ใ„ใ‹ใ‚“ (no kanji specified), which + * could belong to 13 different sequences. However, sequences 1582850 + * and 2829697 are the only 2 of those 13 which contain more than one + * sense. Incidentally, sequence 1582850 is the correct match. + * + * All else being equal, the entry with the smallest sequence number + * is chosen. References in the JMdict file are currently ambiguous, + * and getting this perfect won't be possible until sequence numbers + * are explictly identified in these references. See: + * https://github.com/JMdictProject/JMdictIssues/issues/61 + */ +func (meta *jmdictMetadata) FindBestSequence(reference string) sequence { + bestSeq := 0 + lowestIndex := 100000 + bestIsPriority := false + headword, senseNumber, ok := parseReference(reference) + if !ok { + return bestSeq + } + hash := headword.Hash() + for _, seqScore := range meta.hashToSearchValues[hash] { + if meta.seqToSenseCount[seqScore.sequence] < senseNumber { + // entry must contain the specified sense + continue + } else if lowestIndex < seqScore.index { + // lower indices are better + continue + } else if (lowestIndex == seqScore.index) && (bestIsPriority && !seqScore.isPriority) { + // if scores match, check priority + continue + } else if (lowestIndex == seqScore.index) && (bestIsPriority == seqScore.isPriority) && (bestSeq < seqScore.sequence) { + // if scores and priority match, check sequence number. + // lower sequence numbers are better + continue + } else { + lowestIndex = seqScore.index + bestSeq = seqScore.sequence + bestIsPriority = seqScore.isPriority + } + } + return bestSeq +} diff --git a/jmdictTags.go b/jmdictTags.go new file mode 100644 index 0000000..b444c47 --- /dev/null +++ b/jmdictTags.go @@ -0,0 +1,348 @@ +package yomichan + +import ( + "fmt" + "strconv" + + "golang.org/x/exp/slices" +) + +func senseNumberTags(maxSenseCount int) []dbTag { + tags := []dbTag{} + for i := 1; i <= maxSenseCount; i++ { + tag := dbTag{ + Name: strconv.Itoa(i), + Order: -10, // these tags will appear on the left side + Notes: "JMdict Sense #" + strconv.Itoa(i), + } + tags = append(tags, tag) + } + return tags +} + +func newsFrequencyTags() []dbTag { + // 24,000 ranks divided into 24 tags, news1k ... news24k + tags := []dbTag{} + for i := 1; i <= 24; i++ { + tagName := "news" + strconv.Itoa(i) + "k" + var startRank string + if i == 1 { + startRank = "1" + } else { + // technically should be ",001", but that looks odd + startRank = strconv.Itoa(i-1) + ",000" + } + endRank := strconv.Itoa(i) + ",000" + tag := dbTag{ + Name: tagName, + Order: -2, + Score: 0, + Category: "frequent", + Notes: "ranked between the top " + startRank + " and " + endRank + " words in a frequency analysis of the Mainichi Shimbun (1990s)", + } + tags = append(tags, tag) + } + return tags +} + +func entityTags(entities map[string]string) []dbTag { + tags := knownEntityTags() + for name, notes := range entities { + idx := slices.IndexFunc(tags, func(t dbTag) bool { return t.Name == name }) + if idx != -1 { + tags[idx].Notes = notes + } else { + fmt.Println("Unknown tag type \"" + name + "\": " + notes) + unknownTag := dbTag{Name: name, Notes: notes} + tags = append(tags, unknownTag) + } + } + return tags +} + +func customDbTags() []dbTag { + return []dbTag{ + dbTag{Name: priorityTagName, Order: -10, Score: 10, Category: "popular", Notes: "high priority term"}, + dbTag{Name: rareKanjiTagName, Order: 0, Score: -5, Category: "archaism", Notes: "rarely-used kanji form of this expression"}, + dbTag{Name: irregularTagName, Order: 0, Score: -5, Category: "archaism", Notes: "irregular form of this expression"}, + dbTag{Name: outdatedTagName, Order: 0, Score: -5, Category: "archaism", Notes: "outdated form of this expression"}, + dbTag{Name: "ichi", Order: -2, Score: 0, Category: "frequent", Notes: "included in Ichimango Goi Bunruishuu (๏ผ‘ไธ‡่ชž่ชžๅฝ™ๅˆ†้กž้›†)"}, + dbTag{Name: "spec", Order: -2, Score: 0, Category: "frequent", Notes: "specified as common by JMdict editors"}, + dbTag{Name: "gai", Order: -2, Score: 0, Category: "frequent", Notes: "common loanword (gairaigoใƒปๅค–ๆฅ่ชž)"}, + dbTag{Name: "forms", Order: 0, Score: 0, Category: "", Notes: "other surface forms and readings"}, + } +} + +func knownEntityTags() []dbTag { + return []dbTag{ + // see: https://www.edrdg.org/jmdictdb/cgi-bin/edhelp.py?svc=jmdict&sid=#kwabbr + // additional descriptions at the beginning of the JMdict file + + // reading info + dbTag{Name: "gikun", Order: 0, Score: 0, Category: ""}, // gikun (meaning as reading) or jukujikun (special kanji reading) + dbTag{Name: "ik", Order: 0, Score: -5, Category: ""}, // word containing irregular kana usage + dbTag{Name: "ok", Order: 0, Score: -5, Category: ""}, // out-dated or obsolete kana usage + dbTag{Name: "sk", Order: 0, Score: -5, Category: ""}, // search-only kana form + + // kanji info + /* kanji info also has a "ik" entity that would go here if not already for the re_inf tag */ + dbTag{Name: "ateji", Order: 0, Score: 0, Category: ""}, // ateji (phonetic) reading + dbTag{Name: "iK", Order: 0, Score: -5, Category: ""}, // word containing irregular kanji usage + dbTag{Name: "io", Order: 0, Score: -5, Category: ""}, // irregular okurigana usage + dbTag{Name: "oK", Order: 0, Score: -5, Category: ""}, // word containing out-dated kanji or kanji usage + dbTag{Name: "rK", Order: 0, Score: -5, Category: ""}, // rarely-used kanji form + dbTag{Name: "sK", Order: 0, Score: -5, Category: ""}, // search-only kanji form + + // miscellaneous sense info + dbTag{Name: "abbr", Order: 0, Score: 0, Category: ""}, // abbreviation + dbTag{Name: "arch", Order: -4, Score: 0, Category: "archaism"}, // archaism + dbTag{Name: "char", Order: 0, Score: 0, Category: ""}, // character + dbTag{Name: "chn", Order: 0, Score: 0, Category: ""}, // children's language + dbTag{Name: "col", Order: 0, Score: 0, Category: ""}, // colloquialism + dbTag{Name: "company", Order: 0, Score: 0, Category: ""}, // company name + dbTag{Name: "creat", Order: 0, Score: 0, Category: ""}, // creature + dbTag{Name: "dated", Order: -4, Score: 0, Category: "archaism"}, // dated term + dbTag{Name: "dei", Order: 0, Score: 0, Category: ""}, // deity + dbTag{Name: "derog", Order: 0, Score: 0, Category: ""}, // derogatory + dbTag{Name: "doc", Order: 0, Score: 0, Category: ""}, // document + dbTag{Name: "euph", Order: 0, Score: 0, Category: ""}, // euphemistic + dbTag{Name: "ev", Order: 0, Score: 0, Category: ""}, // event + dbTag{Name: "fam", Order: 0, Score: 0, Category: ""}, // familiar language + dbTag{Name: "fem", Order: 0, Score: 0, Category: ""}, // female term or language + dbTag{Name: "fict", Order: 0, Score: 0, Category: ""}, // fiction + dbTag{Name: "form", Order: 0, Score: 0, Category: ""}, // formal or literary term + dbTag{Name: "given", Order: 0, Score: 0, Category: ""}, // given name or forename, gender not specified + dbTag{Name: "group", Order: 0, Score: 0, Category: ""}, // group + dbTag{Name: "hist", Order: 0, Score: 0, Category: ""}, // historical term + dbTag{Name: "hon", Order: 0, Score: 0, Category: ""}, // honorific or respectful (sonkeigo) language + dbTag{Name: "hum", Order: 0, Score: 0, Category: ""}, // humble (kenjougo) language + dbTag{Name: "id", Order: -5, Score: 0, Category: "expression"}, // idiomatic expression + dbTag{Name: "joc", Order: 0, Score: 0, Category: ""}, // jocular, humorous term + dbTag{Name: "leg", Order: 0, Score: 0, Category: ""}, // legend + dbTag{Name: "m-sl", Order: 0, Score: 0, Category: ""}, // manga slang + dbTag{Name: "male", Order: 0, Score: 0, Category: ""}, // male term or language + dbTag{Name: "myth", Order: 0, Score: 0, Category: ""}, // mythology + dbTag{Name: "net-sl", Order: 0, Score: 0, Category: ""}, // Internet slang + dbTag{Name: "obj", Order: 0, Score: 0, Category: ""}, // object + dbTag{Name: "obs", Order: -4, Score: 0, Category: "archaism"}, // obsolete term + dbTag{Name: "on-mim", Order: 0, Score: 0, Category: ""}, // onomatopoeic or mimetic word + dbTag{Name: "organization", Order: 0, Score: 0, Category: ""}, // organization name + dbTag{Name: "oth", Order: 0, Score: 0, Category: ""}, // other + dbTag{Name: "person", Order: 0, Score: 0, Category: ""}, // full name of a particular person + dbTag{Name: "place", Order: 0, Score: 0, Category: ""}, // place name + dbTag{Name: "poet", Order: 0, Score: 0, Category: ""}, // poetical term + dbTag{Name: "pol", Order: 0, Score: 0, Category: ""}, // polite (teineigo) language + dbTag{Name: "product", Order: 0, Score: 0, Category: ""}, // product name + dbTag{Name: "proverb", Order: 0, Score: 0, Category: "expression"}, // proverb + dbTag{Name: "quote", Order: 0, Score: 0, Category: "expression"}, // quotation + dbTag{Name: "rare", Order: -4, Score: 0, Category: "archaism"}, // rare + dbTag{Name: "relig", Order: 0, Score: 0, Category: ""}, // religion + dbTag{Name: "sens", Order: 0, Score: 0, Category: ""}, // sensitive + dbTag{Name: "serv", Order: 0, Score: 0, Category: ""}, // service + dbTag{Name: "ship", Order: 0, Score: 0, Category: ""}, // ship name + dbTag{Name: "sl", Order: 0, Score: 0, Category: ""}, // slang + dbTag{Name: "station", Order: 0, Score: 0, Category: ""}, // railway station + dbTag{Name: "surname", Order: 0, Score: 0, Category: ""}, // family or surname + dbTag{Name: "uk", Order: 0, Score: 0, Category: ""}, // word usually written using kana alone + dbTag{Name: "unclass", Order: 0, Score: 0, Category: ""}, // unclassified name + dbTag{Name: "vulg", Order: 0, Score: 0, Category: ""}, // vulgar expression or word + dbTag{Name: "work", Order: 0, Score: 0, Category: ""}, // work of art, literature, music, etc. name + dbTag{Name: "X", Order: 0, Score: 0, Category: ""}, // rude or X-rated term (not displayed in educational software) + dbTag{Name: "yoji", Order: 0, Score: 0, Category: ""}, // yojijukugo + + // part-of-speech info + dbTag{Name: "adj-f", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun or verb acting prenominally + dbTag{Name: "adj-i", Order: -3, Score: 0, Category: "partOfSpeech"}, // adjective (keiyoushi) + dbTag{Name: "adj-ix", Order: -3, Score: 0, Category: "partOfSpeech"}, // adjective (keiyoushi) - yoi/ii class + dbTag{Name: "adj-kari", Order: -3, Score: 0, Category: "partOfSpeech"}, // 'kari' adjective (archaic) + dbTag{Name: "adj-ku", Order: -3, Score: 0, Category: "partOfSpeech"}, // 'ku' adjective (archaic) + dbTag{Name: "adj-na", Order: -3, Score: 0, Category: "partOfSpeech"}, // adjectival nouns or quasi-adjectives (keiyodoshi) + dbTag{Name: "adj-nari", Order: -3, Score: 0, Category: "partOfSpeech"}, // archaic/formal form of na-adjective + dbTag{Name: "adj-no", Order: -3, Score: 0, Category: "partOfSpeech"}, // nouns which may take the genitive case particle 'no' + dbTag{Name: "adj-pn", Order: -3, Score: 0, Category: "partOfSpeech"}, // pre-noun adjectival (rentaishi) + dbTag{Name: "adj-shiku", Order: -3, Score: 0, Category: "partOfSpeech"}, // 'shiku' adjective (archaic) + dbTag{Name: "adj-t", Order: -3, Score: 0, Category: "partOfSpeech"}, // 'taru' adjective + dbTag{Name: "adv", Order: -3, Score: 0, Category: "partOfSpeech"}, // adverb (fukushi) + dbTag{Name: "adv-to", Order: -3, Score: 0, Category: "partOfSpeech"}, // adverb taking the 'to' particle + dbTag{Name: "aux", Order: -3, Score: 0, Category: "partOfSpeech"}, // auxiliary + dbTag{Name: "aux-adj", Order: -3, Score: 0, Category: "partOfSpeech"}, // auxiliary adjective + dbTag{Name: "aux-v", Order: -3, Score: 0, Category: "partOfSpeech"}, // auxiliary verb + dbTag{Name: "conj", Order: -3, Score: 0, Category: "partOfSpeech"}, // conjunction + dbTag{Name: "cop", Order: -3, Score: 0, Category: "partOfSpeech"}, // copula + dbTag{Name: "ctr", Order: -3, Score: 0, Category: "partOfSpeech"}, // counter + dbTag{Name: "exp", Order: -5, Score: 0, Category: "expression"}, // expressions (phrases, clauses, etc.) + dbTag{Name: "int", Order: -3, Score: 0, Category: "partOfSpeech"}, // interjection (kandoushi) + dbTag{Name: "n", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun (common) (futsuumeishi) + dbTag{Name: "n-adv", Order: -3, Score: 0, Category: "partOfSpeech"}, // adverbial noun (fukushitekimeishi) + dbTag{Name: "n-pr", Order: -3, Score: 0, Category: "partOfSpeech"}, // proper noun + dbTag{Name: "n-pref", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun, used as a prefix + dbTag{Name: "n-suf", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun, used as a suffix + dbTag{Name: "n-t", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun (temporal) (jisoumeishi) + dbTag{Name: "num", Order: -3, Score: 0, Category: "partOfSpeech"}, // numeric + dbTag{Name: "pn", Order: -3, Score: 0, Category: "partOfSpeech"}, // pronoun + dbTag{Name: "pref", Order: -3, Score: 0, Category: "partOfSpeech"}, // prefix + dbTag{Name: "prt", Order: -3, Score: 0, Category: "partOfSpeech"}, // particle + dbTag{Name: "suf", Order: -3, Score: 0, Category: "partOfSpeech"}, // suffix + dbTag{Name: "unc", Order: -3, Score: 0, Category: "partOfSpeech"}, // unclassified + dbTag{Name: "v-unspec", Order: -3, Score: 0, Category: "partOfSpeech"}, // verb unspecified + dbTag{Name: "v1", Order: -3, Score: 0, Category: "partOfSpeech"}, // Ichidan verb + dbTag{Name: "v1-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Ichidan verb - kureru special class + dbTag{Name: "v2a-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb with 'u' ending (archaic) + dbTag{Name: "v2b-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'bu' ending (archaic) + dbTag{Name: "v2b-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'bu' ending (archaic) + dbTag{Name: "v2d-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'dzu' ending (archaic) + dbTag{Name: "v2d-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'dzu' ending (archaic) + dbTag{Name: "v2g-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'gu' ending (archaic) + dbTag{Name: "v2g-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'gu' ending (archaic) + dbTag{Name: "v2h-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'hu/fu' ending (archaic) + dbTag{Name: "v2h-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'hu/fu' ending (archaic) + dbTag{Name: "v2k-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'ku' ending (archaic) + dbTag{Name: "v2k-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'ku' ending (archaic) + dbTag{Name: "v2m-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'mu' ending (archaic) + dbTag{Name: "v2m-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'mu' ending (archaic) + dbTag{Name: "v2n-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'nu' ending (archaic) + dbTag{Name: "v2r-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'ru' ending (archaic) + dbTag{Name: "v2r-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'ru' ending (archaic) + dbTag{Name: "v2s-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'su' ending (archaic) + dbTag{Name: "v2t-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'tsu' ending (archaic) + dbTag{Name: "v2t-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'tsu' ending (archaic) + dbTag{Name: "v2w-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'u' ending and 'we' conjugation (archaic) + dbTag{Name: "v2y-k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (upper class) with 'yu' ending (archaic) + dbTag{Name: "v2y-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'yu' ending (archaic) + dbTag{Name: "v2z-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Nidan verb (lower class) with 'zu' ending (archaic) + dbTag{Name: "v4b", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'bu' ending (archaic) + dbTag{Name: "v4g", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'gu' ending (archaic) + dbTag{Name: "v4h", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'hu/fu' ending (archaic) + dbTag{Name: "v4k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'ku' ending (archaic) + dbTag{Name: "v4m", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'mu' ending (archaic) + dbTag{Name: "v4n", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'nu' ending (archaic) + dbTag{Name: "v4r", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'ru' ending (archaic) + dbTag{Name: "v4s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'su' ending (archaic) + dbTag{Name: "v4t", Order: -3, Score: 0, Category: "partOfSpeech"}, // Yodan verb with 'tsu' ending (archaic) + dbTag{Name: "v5aru", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb - -aru special class + dbTag{Name: "v5b", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'bu' ending + dbTag{Name: "v5g", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'gu' ending + dbTag{Name: "v5k", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'ku' ending + dbTag{Name: "v5k-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb - Iku/Yuku special class + dbTag{Name: "v5m", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'mu' ending + dbTag{Name: "v5n", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'nu' ending + dbTag{Name: "v5r", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'ru' ending + dbTag{Name: "v5r-i", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'ru' ending (irregular verb) + dbTag{Name: "v5s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'su' ending + dbTag{Name: "v5t", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'tsu' ending + dbTag{Name: "v5u", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'u' ending + dbTag{Name: "v5u-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb with 'u' ending (special class) + dbTag{Name: "v5uru", Order: -3, Score: 0, Category: "partOfSpeech"}, // Godan verb - Uru old class verb (old form of Eru) + dbTag{Name: "vi", Order: -3, Score: 0, Category: "partOfSpeech"}, // intransitive verb + dbTag{Name: "vk", Order: -3, Score: 0, Category: "partOfSpeech"}, // Kuru verb - special class + dbTag{Name: "vn", Order: -3, Score: 0, Category: "partOfSpeech"}, // irregular nu verb + dbTag{Name: "vr", Order: -3, Score: 0, Category: "partOfSpeech"}, // irregular ru verb, plain form ends with -ri + dbTag{Name: "vs", Order: -3, Score: 0, Category: "partOfSpeech"}, // noun or participle which takes the aux. verb suru + dbTag{Name: "vs-c", Order: -3, Score: 0, Category: "partOfSpeech"}, // su verb - precursor to the modern suru + dbTag{Name: "vs-i", Order: -3, Score: 0, Category: "partOfSpeech"}, // suru verb - included + dbTag{Name: "vs-s", Order: -3, Score: 0, Category: "partOfSpeech"}, // suru verb - special class + dbTag{Name: "vt", Order: -3, Score: 0, Category: "partOfSpeech"}, // transitive verb + dbTag{Name: "vz", Order: -3, Score: 0, Category: "partOfSpeech"}, // Ichidan verb - zuru verb (alternative form of -jiru verbs) + + // usage domain + dbTag{Name: "agric", Order: 0, Score: 0, Category: ""}, // agriculture + dbTag{Name: "anat", Order: 0, Score: 0, Category: ""}, // anatomy + dbTag{Name: "archeol", Order: 0, Score: 0, Category: ""}, // archeology + dbTag{Name: "archit", Order: 0, Score: 0, Category: ""}, // architecture + dbTag{Name: "art", Order: 0, Score: 0, Category: ""}, // art, aesthetics + dbTag{Name: "astron", Order: 0, Score: 0, Category: ""}, // astronomy + dbTag{Name: "audvid", Order: 0, Score: 0, Category: ""}, // audiovisual + dbTag{Name: "aviat", Order: 0, Score: 0, Category: ""}, // aviation + dbTag{Name: "baseb", Order: 0, Score: 0, Category: ""}, // baseball + dbTag{Name: "biochem", Order: 0, Score: 0, Category: ""}, // biochemistry + dbTag{Name: "biol", Order: 0, Score: 0, Category: ""}, // biology + dbTag{Name: "bot", Order: 0, Score: 0, Category: ""}, // botany + dbTag{Name: "Buddh", Order: 0, Score: 0, Category: ""}, // Buddhism + dbTag{Name: "bus", Order: 0, Score: 0, Category: ""}, // business + dbTag{Name: "cards", Order: 0, Score: 0, Category: ""}, // card games + dbTag{Name: "chem", Order: 0, Score: 0, Category: ""}, // chemistry + dbTag{Name: "Christn", Order: 0, Score: 0, Category: ""}, // Christianity + dbTag{Name: "cloth", Order: 0, Score: 0, Category: ""}, // clothing + dbTag{Name: "comp", Order: 0, Score: 0, Category: ""}, // computing + dbTag{Name: "cryst", Order: 0, Score: 0, Category: ""}, // crystallography + dbTag{Name: "dent", Order: 0, Score: 0, Category: ""}, // dentistry + dbTag{Name: "ecol", Order: 0, Score: 0, Category: ""}, // ecology + dbTag{Name: "econ", Order: 0, Score: 0, Category: ""}, // economics + dbTag{Name: "elec", Order: 0, Score: 0, Category: ""}, // electricity, elec. eng. + dbTag{Name: "electr", Order: 0, Score: 0, Category: ""}, // electronics + dbTag{Name: "embryo", Order: 0, Score: 0, Category: ""}, // embryology + dbTag{Name: "engr", Order: 0, Score: 0, Category: ""}, // engineering + dbTag{Name: "ent", Order: 0, Score: 0, Category: ""}, // entomology + dbTag{Name: "film", Order: 0, Score: 0, Category: ""}, // film + dbTag{Name: "finc", Order: 0, Score: 0, Category: ""}, // finance + dbTag{Name: "fish", Order: 0, Score: 0, Category: ""}, // fishing + dbTag{Name: "food", Order: 0, Score: 0, Category: ""}, // food, cooking + dbTag{Name: "gardn", Order: 0, Score: 0, Category: ""}, // gardening, horticulture + dbTag{Name: "genet", Order: 0, Score: 0, Category: ""}, // genetics + dbTag{Name: "geogr", Order: 0, Score: 0, Category: ""}, // geography + dbTag{Name: "geol", Order: 0, Score: 0, Category: ""}, // geology + dbTag{Name: "geom", Order: 0, Score: 0, Category: ""}, // geometry + dbTag{Name: "go", Order: 0, Score: 0, Category: ""}, // go (game) + dbTag{Name: "golf", Order: 0, Score: 0, Category: ""}, // golf + dbTag{Name: "gramm", Order: 0, Score: 0, Category: ""}, // grammar + dbTag{Name: "grmyth", Order: 0, Score: 0, Category: ""}, // Greek mythology + dbTag{Name: "hanaf", Order: 0, Score: 0, Category: ""}, // hanafuda + dbTag{Name: "horse", Order: 0, Score: 0, Category: ""}, // horse racing + dbTag{Name: "kabuki", Order: 0, Score: 0, Category: ""}, // kabuki + dbTag{Name: "law", Order: 0, Score: 0, Category: ""}, // law + dbTag{Name: "ling", Order: 0, Score: 0, Category: ""}, // linguistics + dbTag{Name: "logic", Order: 0, Score: 0, Category: ""}, // logic + dbTag{Name: "MA", Order: 0, Score: 0, Category: ""}, // martial arts + dbTag{Name: "mahj", Order: 0, Score: 0, Category: ""}, // mahjong + dbTag{Name: "manga", Order: 0, Score: 0, Category: ""}, // manga + dbTag{Name: "math", Order: 0, Score: 0, Category: ""}, // mathematics + dbTag{Name: "mech", Order: 0, Score: 0, Category: ""}, // mechanical engineering + dbTag{Name: "med", Order: 0, Score: 0, Category: ""}, // medicine + dbTag{Name: "met", Order: 0, Score: 0, Category: ""}, // meteorology + dbTag{Name: "mil", Order: 0, Score: 0, Category: ""}, // military + dbTag{Name: "mining", Order: 0, Score: 0, Category: ""}, // mining + dbTag{Name: "music", Order: 0, Score: 0, Category: ""}, // music + dbTag{Name: "noh", Order: 0, Score: 0, Category: ""}, // noh + dbTag{Name: "ornith", Order: 0, Score: 0, Category: ""}, // ornithology + dbTag{Name: "paleo", Order: 0, Score: 0, Category: ""}, // paleontology + dbTag{Name: "pathol", Order: 0, Score: 0, Category: ""}, // pathology + dbTag{Name: "pharm", Order: 0, Score: 0, Category: ""}, // pharmacy + dbTag{Name: "phil", Order: 0, Score: 0, Category: ""}, // philosophy + dbTag{Name: "photo", Order: 0, Score: 0, Category: ""}, // photography + dbTag{Name: "physics", Order: 0, Score: 0, Category: ""}, // physics + dbTag{Name: "physiol", Order: 0, Score: 0, Category: ""}, // physiology + dbTag{Name: "politics", Order: 0, Score: 0, Category: ""}, // politics + dbTag{Name: "print", Order: 0, Score: 0, Category: ""}, // printing + dbTag{Name: "psy", Order: 0, Score: 0, Category: ""}, // psychiatry + dbTag{Name: "psyanal", Order: 0, Score: 0, Category: ""}, // psychoanalysis + dbTag{Name: "psych", Order: 0, Score: 0, Category: ""}, // psychology + dbTag{Name: "rail", Order: 0, Score: 0, Category: ""}, // railway + dbTag{Name: "rommyth", Order: 0, Score: 0, Category: ""}, // Roman mythology + dbTag{Name: "Shinto", Order: 0, Score: 0, Category: ""}, // Shinto + dbTag{Name: "shogi", Order: 0, Score: 0, Category: ""}, // shogi + dbTag{Name: "ski", Order: 0, Score: 0, Category: ""}, // skiing + dbTag{Name: "sports", Order: 0, Score: 0, Category: ""}, // sports + dbTag{Name: "stat", Order: 0, Score: 0, Category: ""}, // statistics + dbTag{Name: "stockm", Order: 0, Score: 0, Category: ""}, // stock market + dbTag{Name: "sumo", Order: 0, Score: 0, Category: ""}, // sumo + dbTag{Name: "telec", Order: 0, Score: 0, Category: ""}, // telecommunications + dbTag{Name: "tradem", Order: 0, Score: 0, Category: ""}, // trademark + dbTag{Name: "tv", Order: 0, Score: 0, Category: ""}, // television + dbTag{Name: "vidg", Order: 0, Score: 0, Category: ""}, // video games + dbTag{Name: "zool", Order: 0, Score: 0, Category: ""}, // zoology + + // dialect + dbTag{Name: "bra", Order: 0, Score: 0, Category: ""}, // Brazilian + dbTag{Name: "hob", Order: 0, Score: 0, Category: ""}, // Hokkaido-ben + dbTag{Name: "ksb", Order: 0, Score: 0, Category: ""}, // Kansai-ben + dbTag{Name: "ktb", Order: 0, Score: 0, Category: ""}, // Kantou-ben + dbTag{Name: "kyb", Order: 0, Score: 0, Category: ""}, // Kyoto-ben + dbTag{Name: "kyu", Order: 0, Score: 0, Category: ""}, // Kyuushuu-ben + dbTag{Name: "nab", Order: 0, Score: 0, Category: ""}, // Nagano-ben + dbTag{Name: "osb", Order: 0, Score: 0, Category: ""}, // Osaka-ben + dbTag{Name: "rkb", Order: 0, Score: 0, Category: ""}, // Ryuukyuu-ben + dbTag{Name: "thb", Order: 0, Score: 0, Category: ""}, // Touhoku-ben + dbTag{Name: "tsb", Order: 0, Score: 0, Category: ""}, // Tosa-ben + dbTag{Name: "tsug", Order: 0, Score: 0, Category: ""}, // Tsugaru-ben + } +} diff --git a/structuredContent.go b/structuredContent.go new file mode 100644 index 0000000..ded8229 --- /dev/null +++ b/structuredContent.go @@ -0,0 +1,192 @@ +package yomichan + +type contentAttr struct { + lang string + fontStyle string // normal, italic + fontWeight string // normal, bold + fontSize string // small, medium, large, smaller, 80%, 125%, etc. + textDecorationLine []string // underline, overline, line-through + verticalAlign string // baseline, sub, super, text-top, text-bottom, middle, top, bottom + textAlign string // start, end, left, right, center, justify, justify-all, match-parent + marginTop int + marginLeft int + marginRight int + marginBottom int + listStyleType string + data map[string]string +} + +// if the array contains adjacent strings, concatenate them. +// ex: ["one", "two", content_structure, "four"] -> ["onetwo", content_structure, "four"] +// if the array only contains strings, return a concatenated string. +// ex: ["one", "two"] -> "onetwo" +func contentReduce(contents []any) any { + if len(contents) == 1 { + return contents[0] + } + newContents := []any{} + var accumulator string + for _, content := range contents { + switch v := content.(type) { + case string: + accumulator = accumulator + v + default: + if accumulator != "" { + newContents = append(newContents, accumulator) + accumulator = "" + } + newContents = append(newContents, content) + } + } + if accumulator != "" { + newContents = append(newContents, accumulator) + } + if len(newContents) == 1 { + return newContents[0] + } else { + return newContents + } +} + +func contentStructure(contents ...any) map[string]any { + return map[string]any{ + "type": "structured-content", + "content": contentReduce(contents), + } +} + +func contentRuby(attr contentAttr, ruby string, contents ...any) map[string]any { + rubyContent := map[string]any{ + "tag": "ruby", + "content": []any{ + contentReduce(contents), + map[string]string{"tag": "rp", "content": "("}, + map[string]string{"tag": "rt", "content": ruby}, + map[string]string{"tag": "rp", "content": ")"}, + }, + } + if attr.lang != "" { + rubyContent["lang"] = attr.lang + } + if len(attr.data) != 0 { + rubyContent["data"] = attr.data + } + return rubyContent +} + +func contentInternalLink(attr contentAttr, query string, contents ...any) map[string]any { + linkContent := map[string]any{ + "tag": "a", + "href": "?query=" + query + "&wildcards=off", + } + if len(contents) == 0 { + linkContent["content"] = query + } else { + linkContent["content"] = contentReduce(contents) + } + if attr.lang != "" { + linkContent["lang"] = attr.lang + } + if len(attr.data) != 0 { + linkContent["data"] = attr.data + } + return linkContent +} + +func contentSpan(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "span", contents...) +} + +func contentDiv(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "div", contents...) +} + +func contentListItem(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "li", contents...) +} + +func contentOrderedList(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "ol", contents...) +} + +func contentUnorderedList(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "ul", contents...) +} + +func contentTable(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "table", contents...) +} + +func contentTableHead(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "thead", contents...) +} + +func contentTableBody(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "tbody", contents...) +} + +func contentTableRow(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "tr", contents...) +} + +func contentTableHeadCell(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "th", contents...) +} + +func contentTableCell(attr contentAttr, contents ...any) map[string]any { + return contentStyledContainer(attr, "td", contents...) +} + +func contentStyledContainer(attr contentAttr, tag string, contents ...any) map[string]any { + container := map[string]any{"tag": tag} + container["content"] = contentReduce(contents) + if attr.lang != "" { + container["lang"] = attr.lang + } + if len(attr.data) != 0 { + container["data"] = attr.data + } + style := contentStyle(attr) + if len(style) != 0 { + container["style"] = style + } + return container +} + +func contentStyle(attr contentAttr) map[string]any { + style := make(map[string]any) + if attr.fontStyle != "" { + style["fontStyle"] = attr.fontStyle + } + if attr.fontWeight != "" { + style["fontWeight"] = attr.fontWeight + } + if attr.fontSize != "" { + style["fontSize"] = attr.fontSize + } + if len(attr.textDecorationLine) != 0 { + style["textDecorationLine"] = attr.textDecorationLine + } + if attr.verticalAlign != "" { + style["verticalAlign"] = attr.verticalAlign + } + if attr.textAlign != "" { + style["textAlign"] = attr.textAlign + } + if attr.marginTop != 0 { + style["marginTop"] = attr.marginTop + } + if attr.marginLeft != 0 { + style["marginLeft"] = attr.marginLeft + } + if attr.marginRight != 0 { + style["marginRight"] = attr.marginRight + } + if attr.marginBottom != 0 { + style["marginBottom"] = attr.marginBottom + } + if attr.listStyleType != "" { + style["listStyleType"] = attr.listStyleType + } + return style +}