From 8281301869a946d6bebe340478068f9e9ff2770a Mon Sep 17 00:00:00 2001 From: stephenmk Date: Wed, 1 Feb 2023 18:55:03 -0600 Subject: [PATCH] New JMnedict version --- enamdict.go | 118 ------------------ jmdict_forms.go | 15 +-- jmdict_tags.go | 25 ++-- jmnedict.go | 132 ++++++++++++++++++++ jmnedict_generic_terms.go | 80 ++++++++++++ jmnedict_text_util.go | 254 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 481 insertions(+), 143 deletions(-) delete mode 100644 enamdict.go create mode 100644 jmnedict.go create mode 100644 jmnedict_generic_terms.go create mode 100644 jmnedict_text_util.go diff --git a/enamdict.go b/enamdict.go deleted file mode 100644 index 78b886d..0000000 --- a/enamdict.go +++ /dev/null @@ -1,118 +0,0 @@ -package yomichan - -import ( - "os" - - "foosoft.net/projects/jmdict" -) - -func jmnedictBuildTagMeta(entities map[string]string) dbTagList { - var tags dbTagList - - for name, value := range entities { - tag := dbTag{Name: name, Notes: value} - - switch name { - case "company", "fem", "given", "masc", "organization", "person", "place", "product", "station", "surname", "unclass", "work": - tag.Category = "name" - tag.Order = 4 - } - - tags = append(tags, tag) - } - - return tags -} - -func jmnedictExtractTerms(enamdictEntry jmdict.JmnedictEntry) []dbTerm { - var terms []dbTerm - - convert := func(reading jmdict.JmnedictReading, kanji *jmdict.JmnedictKanji) { - if kanji != nil && hasString(kanji.Expression, reading.Restrictions) { - return - } - - var term dbTerm - term.Sequence = enamdictEntry.Sequence - term.addTermTags(reading.Information...) - - if kanji == nil { - term.Expression = reading.Reading - } else { - term.Expression = kanji.Expression - term.Reading = reading.Reading - term.addTermTags(kanji.Information...) - - for _, priority := range kanji.Priorities { - if hasString(priority, reading.Priorities) { - term.addTermTags(priority) - } - } - } - - for _, trans := range enamdictEntry.Translations { - for _, translation := range trans.Translations { - term.Glossary = append(term.Glossary, translation) - } - term.addDefinitionTags(trans.NameTypes...) - } - - terms = append(terms, term) - } - - if len(enamdictEntry.Kanji) > 0 { - for _, kanji := range enamdictEntry.Kanji { - for _, reading := range enamdictEntry.Readings { - convert(reading, &kanji) - } - } - } else { - for _, reading := range enamdictEntry.Readings { - convert(reading, nil) - } - } - - return terms -} - -func jmnedictExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error { - reader, err := os.Open(inputPath) - if err != nil { - return err - } - defer reader.Close() - - dict, entities, err := jmdict.LoadJmnedictNoTransform(reader) - if err != nil { - return err - } - - var terms dbTermList - for _, entry := range dict.Entries { - terms = append(terms, jmnedictExtractTerms(entry)...) - } - - if title == "" { - title = "JMnedict" - } - - recordData := map[string]dbRecordList{ - "term": terms.crush(), - "tag": jmnedictBuildTagMeta(entities).crush(), - } - - index := dbIndex{ - Title: title, - Revision: "jmnedict1", - Sequenced: true, - Attribution: edrdgAttribution, - } - - return writeDb( - outputPath, - index, - recordData, - stride, - pretty, - ) -} diff --git a/jmdict_forms.go b/jmdict_forms.go index 5d01de5..7ca721f 100644 --- a/jmdict_forms.go +++ b/jmdict_forms.go @@ -8,17 +8,6 @@ import ( "golang.org/x/exp/slices" ) -func kata2hira(word string) string { - charMap := func(character rune) rune { - if (character >= 'ァ' && character <= 'ヶ') || (character >= 'ヽ' && character <= 'ヾ') { - return character - 0x60 - } else { - return character - } - } - return strings.Map(charMap, word) -} - func (h *headword) InfoSymbols() string { infoSymbols := []string{} if h.IsPriority { @@ -93,8 +82,8 @@ func needsFormTable(headwords []headword) bool { } else if h.IsKanaOnly() { continue } else if uniqueReading == "" { - uniqueReading = kata2hira(h.Reading) - } else if uniqueReading != kata2hira(h.Reading) { + uniqueReading = katakanaToHiragana(h.Reading) + } else if uniqueReading != katakanaToHiragana(h.Reading) { return true } } diff --git a/jmdict_tags.go b/jmdict_tags.go index b444c47..eb2a012 100644 --- a/jmdict_tags.go +++ b/jmdict_tags.go @@ -99,7 +99,7 @@ func knownEntityTags() []dbTag { dbTag{Name: "char", Order: 0, Score: 0, Category: ""}, // character dbTag{Name: "chn", Order: 0, Score: 0, Category: ""}, // children's language dbTag{Name: "col", Order: 0, Score: 0, Category: ""}, // colloquialism - dbTag{Name: "company", Order: 0, Score: 0, Category: ""}, // company name + dbTag{Name: "company", Order: 4, Score: 0, Category: "name"}, // company name dbTag{Name: "creat", Order: 0, Score: 0, Category: ""}, // creature dbTag{Name: "dated", Order: -4, Score: 0, Category: "archaism"}, // dated term dbTag{Name: "dei", Order: 0, Score: 0, Category: ""}, // deity @@ -108,10 +108,10 @@ func knownEntityTags() []dbTag { dbTag{Name: "euph", Order: 0, Score: 0, Category: ""}, // euphemistic dbTag{Name: "ev", Order: 0, Score: 0, Category: ""}, // event dbTag{Name: "fam", Order: 0, Score: 0, Category: ""}, // familiar language - dbTag{Name: "fem", Order: 0, Score: 0, Category: ""}, // female term or language + dbTag{Name: "fem", Order: 4, Score: 0, Category: "name"}, // female term, language, or name dbTag{Name: "fict", Order: 0, Score: 0, Category: ""}, // fiction dbTag{Name: "form", Order: 0, Score: 0, Category: ""}, // formal or literary term - dbTag{Name: "given", Order: 0, Score: 0, Category: ""}, // given name or forename, gender not specified + dbTag{Name: "given", Order: 4, Score: 0, Category: "name"}, // given name or forename, gender not specified dbTag{Name: "group", Order: 0, Score: 0, Category: ""}, // group dbTag{Name: "hist", Order: 0, Score: 0, Category: ""}, // historical term dbTag{Name: "hon", Order: 0, Score: 0, Category: ""}, // honorific or respectful (sonkeigo) language @@ -120,19 +120,20 @@ func knownEntityTags() []dbTag { dbTag{Name: "joc", Order: 0, Score: 0, Category: ""}, // jocular, humorous term dbTag{Name: "leg", Order: 0, Score: 0, Category: ""}, // legend dbTag{Name: "m-sl", Order: 0, Score: 0, Category: ""}, // manga slang - dbTag{Name: "male", Order: 0, Score: 0, Category: ""}, // male term or language + dbTag{Name: "male", Order: 4, Score: 0, Category: "name"}, // male term, language, or name + dbTag{Name: "masc", Order: 4, Score: 0, Category: "name"}, // male term, language, or name dbTag{Name: "myth", Order: 0, Score: 0, Category: ""}, // mythology dbTag{Name: "net-sl", Order: 0, Score: 0, Category: ""}, // Internet slang dbTag{Name: "obj", Order: 0, Score: 0, Category: ""}, // object dbTag{Name: "obs", Order: -4, Score: 0, Category: "archaism"}, // obsolete term dbTag{Name: "on-mim", Order: 0, Score: 0, Category: ""}, // onomatopoeic or mimetic word - dbTag{Name: "organization", Order: 0, Score: 0, Category: ""}, // organization name + dbTag{Name: "organization", Order: 4, Score: 0, Category: "name"}, // organization name dbTag{Name: "oth", Order: 0, Score: 0, Category: ""}, // other - dbTag{Name: "person", Order: 0, Score: 0, Category: ""}, // full name of a particular person - dbTag{Name: "place", Order: 0, Score: 0, Category: ""}, // place name + dbTag{Name: "person", Order: 4, Score: 0, Category: "name"}, // full name of a particular person + dbTag{Name: "place", Order: 4, Score: 0, Category: "name"}, // place name dbTag{Name: "poet", Order: 0, Score: 0, Category: ""}, // poetical term dbTag{Name: "pol", Order: 0, Score: 0, Category: ""}, // polite (teineigo) language - dbTag{Name: "product", Order: 0, Score: 0, Category: ""}, // product name + dbTag{Name: "product", Order: 4, Score: 0, Category: "name"}, // product name dbTag{Name: "proverb", Order: 0, Score: 0, Category: "expression"}, // proverb dbTag{Name: "quote", Order: 0, Score: 0, Category: "expression"}, // quotation dbTag{Name: "rare", Order: -4, Score: 0, Category: "archaism"}, // rare @@ -141,12 +142,12 @@ func knownEntityTags() []dbTag { dbTag{Name: "serv", Order: 0, Score: 0, Category: ""}, // service dbTag{Name: "ship", Order: 0, Score: 0, Category: ""}, // ship name dbTag{Name: "sl", Order: 0, Score: 0, Category: ""}, // slang - dbTag{Name: "station", Order: 0, Score: 0, Category: ""}, // railway station - dbTag{Name: "surname", Order: 0, Score: 0, Category: ""}, // family or surname + dbTag{Name: "station", Order: 4, Score: 0, Category: "name"}, // railway station + dbTag{Name: "surname", Order: 4, Score: 0, Category: "name"}, // family or surname dbTag{Name: "uk", Order: 0, Score: 0, Category: ""}, // word usually written using kana alone - dbTag{Name: "unclass", Order: 0, Score: 0, Category: ""}, // unclassified name + dbTag{Name: "unclass", Order: 4, Score: 0, Category: "name"}, // unclassified name dbTag{Name: "vulg", Order: 0, Score: 0, Category: ""}, // vulgar expression or word - dbTag{Name: "work", Order: 0, Score: 0, Category: ""}, // work of art, literature, music, etc. name + dbTag{Name: "work", Order: 4, Score: 0, Category: "name"}, // work of art, literature, music, etc. name dbTag{Name: "X", Order: 0, Score: 0, Category: ""}, // rude or X-rated term (not displayed in educational software) dbTag{Name: "yoji", Order: 0, Score: 0, Category: ""}, // yojijukugo diff --git a/jmnedict.go b/jmnedict.go new file mode 100644 index 0000000..636bde3 --- /dev/null +++ b/jmnedict.go @@ -0,0 +1,132 @@ +package yomichan + +import ( + "os" + "regexp" + + "foosoft.net/projects/jmdict" +) + +func jmnedictPublicationDate(dictionary jmdict.Jmnedict) string { + if len(dictionary.Entries) == 0 { + return "unknown" + } + dateEntry := dictionary.Entries[len(dictionary.Entries)-1] + if len(dateEntry.Translations) == 0 || len(dateEntry.Translations[0].Translations) == 0 { + return "unknown" + } + r := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`) + jmnedictDate := r.FindString(dateEntry.Translations[0].Translations[0]) + if jmnedictDate != "" { + return jmnedictDate + } else { + return "unknown" + } +} + +func jmnedictSenseTerm(headword headword, seq sequence, sense jmdict.JmnedictTranslation, senseNumber int) dbTerm { + term := dbTerm{ + Expression: headword.Expression, + Reading: headword.Reading, + Sequence: seq, + } + for _, gloss := range sense.Translations { + term.Glossary = append(term.Glossary, gloss) + } + term.addDefinitionTags(sense.NameTypes...) + term.Score = calculateTermScore(senseNumber, 0, headword) + return term +} + +func jmnedictTerms(headword headword, entry jmdict.JmnedictEntry, g genericTermInfo) []dbTerm { + terms := []dbTerm{} + for idx, sense := range entry.Translations { + if g.IsGenericName(headword, sense.Translations) { + g.AddGlosses(headword.Expression, sense.NameTypes, headword.Reading) + } else { + g.AddUsedSequence(entry.Sequence) + senseTerm := jmnedictSenseTerm(headword, entry.Sequence, sense, idx+1) + terms = append(terms, senseTerm) + } + } + return terms +} + +func jmnedictHeadwords(entry jmdict.JmnedictEntry) (headwords []headword) { + // Note that JMnedict doesn't (currently) use priority tags, + // frequency tags, or any sort of reading/kanji restrictions. + for _, reading := range entry.Readings { + for _, kanji := range entry.Kanji { + h := headword{ + Expression: kanji.Expression, + Reading: reading.Reading, + } + h.Index = len(headwords) + headwords = append(headwords, h) + } + } + if len(entry.Kanji) == 0 { + for _, reading := range entry.Readings { + h := headword{ + Expression: reading.Reading, + Reading: reading.Reading, + } + h.Index = len(headwords) + headwords = append(headwords, h) + } + } + return headwords +} + +func jmnedictExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error { + reader, err := os.Open(inputPath) + if err != nil { + return err + } + defer reader.Close() + + dictionary, entities, err := jmdict.LoadJmnedictNoTransform(reader) + if err != nil { + return err + } + + genericTermInfo := newGenericTermInfo() + + terms := dbTermList{} + for _, entry := range dictionary.Entries { + headwords := jmnedictHeadwords(entry) + for _, headword := range headwords { + newTerms := jmnedictTerms(headword, entry, genericTermInfo) + terms = append(terms, newTerms...) + } + } + terms = append(terms, genericTermInfo.Terms()...) + + tags := dbTagList{} + tags = append(tags, entityTags(entities)...) + + recordData := map[string]dbRecordList{ + "term": terms.crush(), + "tag": tags.crush(), + } + + if title == "" { + title = "JMnedict" + } + jmnedictDate := jmnedictPublicationDate(dictionary) + + index := dbIndex{ + Title: title, + Revision: "JMnedict." + jmnedictDate, + Sequenced: true, + Attribution: edrdgAttribution, + } + + return writeDb( + outputPath, + index, + recordData, + stride, + pretty, + ) +} diff --git a/jmnedict_generic_terms.go b/jmnedict_generic_terms.go new file mode 100644 index 0000000..e034621 --- /dev/null +++ b/jmnedict_generic_terms.go @@ -0,0 +1,80 @@ +package yomichan + +import ( + "golang.org/x/exp/slices" +) + +type genericTermMap map[string]map[string][]string + +type genericTermInfo struct { + expressionToTagToGlosses genericTermMap + usedSequences map[sequence]bool + currentSequence sequence +} + +func newGenericTermInfo() genericTermInfo { + return genericTermInfo{ + expressionToTagToGlosses: genericTermMap{}, + usedSequences: map[sequence]bool{}, + } +} + +func (i *genericTermInfo) NewSequence() sequence { + seq := i.currentSequence + 1 + for i.usedSequences[seq] { + seq += 1 + } + i.AddUsedSequence(seq) + i.currentSequence = seq + return seq +} + +func (i *genericTermInfo) AddUsedSequence(s sequence) { + i.usedSequences[s] = true +} + +func (i *genericTermInfo) AddGlosses(exp string, tags []string, gloss string) { + if i.expressionToTagToGlosses[exp] == nil { + i.expressionToTagToGlosses[exp] = map[string][]string{} + } + for _, tag := range tags { + glosses := i.expressionToTagToGlosses[exp][tag] + if !slices.Contains(glosses, gloss) { + glosses = append(glosses, gloss) + i.expressionToTagToGlosses[exp][tag] = glosses + } + } +} + +func (i *genericTermInfo) IsGenericName(headword headword, definitions []string) bool { + if headword.IsKanaOnly() { + // No reason to process these terms. + return false + } + isGenericName := true + for _, definition := range definitions { + if !isTransliteration(definition, headword.Reading) { + isGenericName = false + break + } + } + return isGenericName +} + +func (i *genericTermInfo) Terms() (terms []dbTerm) { + for expression, tagToGlosses := range i.expressionToTagToGlosses { + seq := i.NewSequence() + for tag, glosses := range tagToGlosses { + term := dbTerm{ + Expression: expression, + Sequence: seq, + } + for _, gloss := range glosses { + term.Glossary = append(term.Glossary, gloss) + } + term.addDefinitionTags(tag) + terms = append(terms, term) + } + } + return terms +} diff --git a/jmnedict_text_util.go b/jmnedict_text_util.go new file mode 100644 index 0000000..b7b2122 --- /dev/null +++ b/jmnedict_text_util.go @@ -0,0 +1,254 @@ +package yomichan + +import ( + "strings" + + "golang.org/x/exp/slices" +) + +// Returns text with all katakana characters converted into hiragana. +func katakanaToHiragana(text string) string { + f := func(x rune) rune { + if x >= 'ァ' && x <= 'ヶ' || x >= 'ヽ' && x <= 'ヾ' { + return x - 0x60 + } else { + return x + } + } + return strings.Map(f, text) +} + +// Replace hiragana iteration marks with the appropriate characters. +// E.g. "さゝき" -> "ささき"; "たゞの" -> "ただの" +func replaceIterationMarks(text string) string { + iterationMarks := []struct { + char rune + offset rune + }{ + {'ゝ', 0x00}, + {'ゞ', 0x01}, + } + for _, x := range iterationMarks { + for strings.IndexRune(text, x.char) > 0 { + runes := []rune(text) + idx := slices.Index(runes, x.char) + runes[idx] = runes[idx-1] + x.offset + text = string(runes) + } + } + return text +} + +// Returns an array of the input text split into segments. +// E.g. "しょくぎょう" -> ["しょ", "く", "ぎょ", "う"] +// Returns nil if no segmentation is possible. +func makeKanaSegments(kana string) (segments []string) { + hiragana := replaceIterationMarks(katakanaToHiragana(kana)) + kanaRunes := []rune{} + for _, kanaRune := range hiragana { + kanaRunes = append(kanaRunes, kanaRune) + } + kanaRuneCount := len(kanaRunes) + for i := 0; i < kanaRuneCount; i++ { + for j := 0; j < kanaRuneCount-i; j++ { + segment := string(kanaRunes[i : kanaRuneCount-j]) + if _, ok := kanaSegmentToRomajiList[segment]; ok { + segments = append(segments, segment) + i = kanaRuneCount - j - 1 + break + } + if j == kanaRuneCount-i-1 { + return nil + } + } + } + return segments +} + +// Returns a map of ltr substrings of the input text. +// E.g. "nihon" -> ["n", "ni", "nih", "niho", "nihon"] +func makeSubstringMap(text string) map[string]bool { + substrings := make(map[string]bool) + for i := 1; i <= len(text); i++ { + substring := text[:i] + substrings[substring] = true + } + return substrings +} + +// Determines if the input text is a valid romaji representation of +// the input kana. +// +// The strategy is to calculate every possible romaji representation +// of a given string of kana and check if the input text is one of +// them. Since the number of combinations grows very large for long +// strings of kana, we need to prune invalid branches from the +// combination tree along the way. +func isTransliteration(text string, kana string) bool { + romaji := strings.TrimSpace(strings.ToLower(text)) + validSubstrings := makeSubstringMap(romaji) + kanaSegments := makeKanaSegments(kana) + possibilities := []string{""} + for _, segment := range kanaSegments { + newPossibilities := map[string]bool{} + for _, x := range possibilities { + for _, y := range kanaSegmentToRomajiList[segment] { + z := x + y + newPossibilities[z] = true + } + } + possibilities = nil + for z := range newPossibilities { + if validSubstrings[z] { + possibilities = append(possibilities, z) + } + } + if possibilities == nil { + return false + } + } + return slices.Contains(possibilities, romaji) +} + +var kanaSegmentToRomajiList = map[string][]string{ + "ぁ": []string{"", "a"}, + "ぃ": []string{"", "i"}, + "ぅ": []string{"", "u"}, + "ぇ": []string{"", "e"}, + "ぉ": []string{"", "o"}, + "ゃ": []string{"ya"}, + "ゅ": []string{"yu"}, + "ょ": []string{"yo"}, + "ゎ": []string{"wa"}, + "っ": []string{"", "k", "g", "s", "z", "t", "d", "f", "h", "b", "p", "n", "m", "y", "w", "c"}, + "ー": []string{"", "a", "i", "u", "e", "o", "-"}, + "あ": []string{"", "a", "ā", "wa", "wā"}, + "い": []string{"", "i", "ī", "wi", "wī"}, + "う": []string{"", "u", "ū", "wu", "wū"}, + "え": []string{"", "e", "ē", "we", "wē"}, + "お": []string{"", "o", "ō", "wo", "wō"}, + "ゔ": []string{"vu", "vū", "bu", "bū"}, + "か": []string{"ka", "kā"}, + "が": []string{"ga", "gā"}, + "き": []string{"ki", "kī"}, + "ぎ": []string{"gi", "gī"}, + "く": []string{"ku", "kū"}, + "ぐ": []string{"gu", "gū"}, + "け": []string{"ke", "kē"}, + "げ": []string{"ge", "gē"}, + "こ": []string{"ko", "kō"}, + "ご": []string{"go", "gō"}, + "さ": []string{"sa", "sā"}, + "ざ": []string{"za", "zā"}, + "し": []string{"si", "sī", "shi", "shī"}, + "じ": []string{"zi", "zī", "ji", "jī"}, + "す": []string{"su", "sū"}, + "ず": []string{"zu", "zū"}, + "せ": []string{"se", "sē"}, + "ぜ": []string{"ze", "zē"}, + "そ": []string{"so", "sō"}, + "ぞ": []string{"zo", "zō"}, + "た": []string{"ta", "tā"}, + "だ": []string{"da", "dā"}, + "ち": []string{"ti", "tī", "chi", "chī"}, + "ぢ": []string{"di", "dī", "dhi", "dhī", "ji", "jī", "dji", "djī", "dzi", "dzī"}, + "つ": []string{"tu", "tū", "tsu", "tsū"}, + "づ": []string{"du", "dū", "dzu", "dzū", "zu", "zū"}, + "て": []string{"te", "tē"}, + "で": []string{"de", "dē"}, + "と": []string{"to", "tō"}, + "ど": []string{"do", "dō"}, + "な": []string{"na", "nā"}, + "に": []string{"ni", "nī"}, + "ぬ": []string{"nu", "nū"}, + "ね": []string{"ne", "nē"}, + "の": []string{"no", "nō"}, + "は": []string{"ha", "hā", "wa", "wā", "a", "ā"}, + "ば": []string{"ba", "bā"}, + "ぱ": []string{"pa", "pā"}, + "ひ": []string{"hi", "hī", "i", "ī"}, + "び": []string{"bi", "bī"}, + "ぴ": []string{"pi", "pī"}, + "ふ": []string{"hu", "hū", "fu", "fū", "u", "ū"}, + "ぶ": []string{"bu", "bū"}, + "ぷ": []string{"pu", "pū"}, + "へ": []string{"he", "hē", "e", "ē"}, + "べ": []string{"be", "bē"}, + "ぺ": []string{"pe", "pē"}, + "ほ": []string{"ho", "hō", "o", "ō"}, + "ぼ": []string{"bo", "bō"}, + "ぽ": []string{"po", "pō"}, + "ま": []string{"ma", "mā"}, + "み": []string{"mi", "mī"}, + "む": []string{"mu", "mū"}, + "め": []string{"me", "mē"}, + "も": []string{"mo", "mō"}, + "や": []string{"ya", "yā"}, + "ゆ": []string{"yu", "yū"}, + "よ": []string{"yo", "yō"}, + "ら": []string{"ra", "rā"}, + "り": []string{"ri", "rī"}, + "る": []string{"ru", "rū"}, + "れ": []string{"re", "rē"}, + "ろ": []string{"ro", "rō"}, + "わ": []string{"wa", "wā"}, + "ゐ": []string{"wi", "wī", "i", "ī"}, + "ゑ": []string{"we", "wē", "e", "ē"}, + "を": []string{"wo", "wō", "o", "ō"}, + "ん": []string{"n", "n'", "m"}, + "うぁ": []string{"wa", "wā", "ua", "uā"}, + "うぃ": []string{"wi", "wī", "ui", "uī"}, + "うぇ": []string{"we", "wē", "ue", "uē"}, + "うぉ": []string{"wo", "wō", "uo", "uō"}, + "きゃ": []string{"kya", "kyā"}, + "きゅ": []string{"kyu", "kyū"}, + "きょ": []string{"kyo", "kyō"}, + "ぎゃ": []string{"gya", "gyā"}, + "ぎゅ": []string{"gyu", "gyū"}, + "ぎょ": []string{"gyo", "gyō"}, + "くゎ": []string{"kwa", "kwā"}, + "くゅ": []string{"kyu", "kyū"}, + "しぇ": []string{"she", "shē", "shie", "shiē"}, + "しゃ": []string{"sha", "shā", "sya", "syā"}, + "しゅ": []string{"shu", "shū", "syu", "syū"}, + "しょ": []string{"sho", "shō", "syo", "syō"}, + "じぇ": []string{"je", "jē"}, + "じゃ": []string{"ja", "jā", "jya", "jyā"}, + "じゅ": []string{"ju", "jū", "jyu", "jyū"}, + "じょ": []string{"jo", "jō", "jyo", "jyō"}, + "ちぁ": []string{"cha", "chā", "chia", "chiā"}, + "ちぇ": []string{"che", "chē", "chie", "chiē"}, + "ちゃ": []string{"cha", "chā", "tya", "tyā"}, + "ちゅ": []string{"chu", "chū", "tyu", "tyū"}, + "ちょ": []string{"cho", "chō", "tyo", "tyō"}, + "ぢゃ": []string{"ja", "jā", "jya", "jyā", "dya", "dyā"}, + "ぢゅ": []string{"ju", "jū", "jyu", "jyū", "dyu", "dyū"}, + "ぢょ": []string{"jo", "jō", "jyo", "jyō", "dyo", "dyō"}, + "つぁ": []string{"tsa", "tsā", "tsua", "tsuā"}, + "つぇ": []string{"tse", "tsē", "tsue", "tsuē"}, + "てぃ": []string{"ti", "tī", "tei", "teī"}, + "でぃ": []string{"di", "dī", "dei", "deī"}, + "でゅ": []string{"dyu", "dyū", "deyu", "deyū"}, + "にゃ": []string{"nya", "nyā"}, + "にゅ": []string{"nyu", "nyū"}, + "にょ": []string{"nyo", "nyō"}, + "ひゃ": []string{"hya", "hyā"}, + "ひゅ": []string{"hyu", "hyū"}, + "ひょ": []string{"hyo", "hyō"}, + "びゃ": []string{"bya", "byā"}, + "びゅ": []string{"byu", "byū"}, + "びょ": []string{"byo", "byō"}, + "ぴゃ": []string{"pya", "pyā"}, + "ぴゅ": []string{"pyu", "pyū"}, + "ぴょ": []string{"pyo", "pyō"}, + "ふぁ": []string{"fa", "fā"}, + "ふぃ": []string{"fi", "fī"}, + "ふぇ": []string{"fe", "fē"}, + "ふぉ": []string{"fo", "fō"}, + "みゃ": []string{"mya", "myā"}, + "みゅ": []string{"myu", "myū"}, + "みょ": []string{"myo", "myō"}, + "りゃ": []string{"rya", "ryā"}, + "りゅ": []string{"ryu", "ryū"}, + "りょ": []string{"ryo", "ryō"}, +}