1

Compare commits

..

10 Commits

Author SHA1 Message Date
00dc44386e Update maintanence info 2023-02-25 12:43:03 -08:00
Alexei Yatskov
f4da17e228
Merge pull request #41 from stephenmk/master
New version of JMnedict (the proper name dictionary)
2023-02-05 09:57:17 -08:00
stephenmk
ecf22da5a3
Improve readability of publication date functions 2023-02-04 01:42:08 -06:00
stephenmk
a9d85dc720
Simplify string -> runes conversion 2023-02-03 22:07:41 -06:00
stephenmk
70611a51c4
Fix typo 2023-02-03 15:51:52 -06:00
stephenmk
dffbec6337
Designate more JMnedict category tags 2023-02-02 20:15:28 -06:00
stephenmk
5755b79341
Use cached part-of-speech values 2023-02-02 15:50:57 -06:00
stephenmk
7bff70b71c
JMdict: Ensure part-of-speech info is added in non-English versions
Only English-language senses in JMdict contain part-of-speech tags.
This info is displayed to users in definition tags and also used
for deinflecting verbs and adjectives during term lookups.

The old version of Yomichan-Import took the PoS tags from the final
sense in the English version of an entry and applied them to every
sense of every other language. For example, 川・かわ has two senses in
English JMdict: a noun sense and a suffix sense. Therefore every sense
of 川・かわ in every other language was tagged as a suffix.

Instead, I suggest gathering all distinct PoS tags from each English
entry and applying them all to each non-English sense. Every
non-English sense of 川・かわ will therefore be tagged as both a noun
and suffix.
2023-02-02 10:44:16 -06:00
stephenmk
19d6d0bb43
Rename some jmdict functions 2023-02-01 19:14:37 -06:00
stephenmk
3b420f8b6c
Use library implementation of Contains function 2023-02-01 18:57:35 -06:00
9 changed files with 135 additions and 130 deletions

View File

@ -1,5 +1,8 @@
# Yomichan Import
*Note: this project is no longer maintained. Please see [this
post](https://foosoft.net/posts/sunsetting-the-yomichan-project/) for more information.*
Yomichan Import allows users of the [Yomichan](https://foosoft.net/projects/yomichan) extension to import custom
dictionary files. It currently supports the following formats:

View File

@ -9,6 +9,8 @@ import (
"os"
"path/filepath"
"strings"
"golang.org/x/exp/slices"
)
const (
@ -116,7 +118,7 @@ type dbKanjiList []dbKanji
func (kanji *dbKanji) addTags(tags ...string) {
for _, tag := range tags {
if !hasString(tag, kanji.Tags) {
if !slices.Contains(kanji.Tags, tag) {
kanji.Tags = append(kanji.Tags, tag)
}
}
@ -245,7 +247,7 @@ func writeDb(outputPath string, index dbIndex, recordData map[string]dbRecordLis
func appendStringUnique(target []string, source ...string) []string {
for _, str := range source {
if !hasString(str, target) {
if !slices.Contains(target, str) {
target = append(target, str)
}
}
@ -253,16 +255,6 @@ func appendStringUnique(target []string, source ...string) []string {
return target
}
func hasString(needle string, haystack []string) bool {
for _, value := range haystack {
if needle == value {
return true
}
}
return false
}
func intersection(s1, s2 []string) []string {
s := []string{}
m := make(map[string]bool)
@ -337,7 +329,7 @@ func detectFormat(path string) (string, error) {
func ExportDb(inputPath, outputPath, format, language, title string, stride int, pretty bool) error {
handlers := map[string]func(string, string, string, string, int, bool) error{
"edict": jmdExportDb,
"edict": jmdictExportDb,
"forms": formsExportDb,
"enamdict": jmnedictExportDb,
"epwing": epwingExportDb,

View File

@ -8,14 +8,14 @@ import (
)
func frequencyTermsExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
return frequncyExportDb(inputPath, outputPath, language, title, stride, pretty, "term_meta")
return frequencyExportDb(inputPath, outputPath, language, title, stride, pretty, "term_meta")
}
func frequencyKanjiExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
return frequncyExportDb(inputPath, outputPath, language, title, stride, pretty, "kanji_meta")
return frequencyExportDb(inputPath, outputPath, language, title, stride, pretty, "kanji_meta")
}
func frequncyExportDb(inputPath, outputPath, language, title string, stride int, pretty bool, key string) error {
func frequencyExportDb(inputPath, outputPath, language, title string, stride int, pretty bool, key string) error {
reader, err := os.Open(inputPath)
if err != nil {
return err

View File

@ -63,23 +63,26 @@ func doDisplaySenseNumberTag(headword headword, entry jmdict.JmdictEntry, meta j
}
func jmdictPublicationDate(dictionary jmdict.Jmdict) string {
unknownDate := "unknown"
idx := len(dictionary.Entries) - 1
if len(dictionary.Entries) == 0 {
return "unknown"
}
dateEntry := dictionary.Entries[len(dictionary.Entries)-1]
if len(dateEntry.Sense) == 0 || len(dateEntry.Sense[0].Glossary) == 0 {
return "unknown"
return unknownDate
} else if len(dictionary.Entries[idx].Sense) == 0 {
return unknownDate
} else if len(dictionary.Entries[idx].Sense[0].Glossary) == 0 {
return unknownDate
}
dateGloss := dictionary.Entries[idx].Sense[0].Glossary[0].Content
r := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`)
jmdictDate := r.FindString(dateEntry.Sense[0].Glossary[0].Content)
if jmdictDate != "" {
return jmdictDate
date := r.FindString(dateGloss)
if date != "" {
return date
} else {
return "unknown"
return unknownDate
}
}
func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
func jmdictFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
// Don't add "forms" terms to non-English dictionaries.
// Information would be duplicated if users installed more
// than one version.
@ -94,20 +97,21 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet
}
}
term := baseFormsTerm(entry)
term := baseFormsTerm(entry, meta)
term.Expression = headword.Expression
term.Reading = headword.Reading
term.addTermTags(headword.TermTags...)
term.addDefinitionTags("forms")
senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
entryDepth := meta.entryDepth[entry.Sequence]
term.Score = calculateTermScore(senseNumber, entryDepth, headword)
return term, true
}
func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
func jmdictSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
// Don't add "search" terms to non-English dictionaries.
// Information would be duplicated if users installed more
// than one version.
@ -119,10 +123,11 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe
Expression: headword.Expression,
Sequence: -entry.Sequence,
}
for _, sense := range entry.Sense {
rules := grammarRules(sense.PartsOfSpeech)
term.addRules(rules...)
}
partsOfSpeech := meta.seqToPartsOfSpeech[entry.Sequence]
rules := grammarRules(partsOfSpeech)
term.addRules(rules...)
term.addTermTags(headword.TermTags...)
term.Score = calculateTermScore(1, 0, headword)
@ -140,7 +145,7 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe
return term, true
}
func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
func jmdictSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
return dbTerm{}, false
}
@ -162,6 +167,13 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor
senseNumberTag := strconv.Itoa(senseNumber)
term.addDefinitionTags(senseNumberTag)
}
if len(sense.PartsOfSpeech) == 0 && meta.language != "eng" {
// This is a hack to provide part-of-speech info to
// non-English versions of JMdict.
sense.PartsOfSpeech = meta.seqToPartsOfSpeech[entry.Sequence]
}
term.addDefinitionTags(sense.PartsOfSpeech...)
term.addDefinitionTags(sense.Fields...)
term.addDefinitionTags(sense.Misc...)
@ -176,12 +188,12 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor
return term, true
}
func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {
func jmdictTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {
if meta.seqToSenseCount[entry.Sequence] == 0 {
return nil, false
}
if headword.IsSearchOnly {
if searchTerm, ok := createSearchTerm(headword, entry, meta); ok {
if searchTerm, ok := jmdictSearchTerm(headword, entry, meta); ok {
return []dbTerm{searchTerm}, true
} else {
return nil, false
@ -194,20 +206,20 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
// Do not increment sense number
continue
}
if senseTerm, ok := createSenseTerm(sense, senseNumber, headword, entry, meta); ok {
if senseTerm, ok := jmdictSenseTerm(sense, senseNumber, headword, entry, meta); ok {
terms = append(terms, senseTerm)
}
senseNumber += 1
}
if formsTerm, ok := createFormsTerm(headword, entry, meta); ok {
if formsTerm, ok := jmdictFormsTerm(headword, entry, meta); ok {
terms = append(terms, formsTerm)
}
return terms, true
}
func jmdExportDb(inputPath string, outputPath string, languageName string, title string, stride int, pretty bool) error {
func jmdictExportDb(inputPath string, outputPath string, languageName string, title string, stride int, pretty bool) error {
if _, ok := langNameToCode[languageName]; !ok {
return errors.New("Unrecognized language parameter: " + languageName)
}
@ -229,7 +241,7 @@ func jmdExportDb(inputPath string, outputPath string, languageName string, title
for _, entry := range dictionary.Entries {
headwords := extractHeadwords(entry)
for _, headword := range headwords {
if newTerms, ok := extractTerms(headword, entry, meta); ok {
if newTerms, ok := jmdictTerms(headword, entry, meta); ok {
terms = append(terms, newTerms...)
}
}

View File

@ -172,18 +172,20 @@ func formsGlossary(headwords []headword) []any {
return glossary
}
func baseFormsTerm(entry jmdict.JmdictEntry) dbTerm {
func baseFormsTerm(entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
term := dbTerm{Sequence: entry.Sequence}
headwords := extractHeadwords(entry)
if needsFormTable(headwords) {
term.Glossary = formsTableGlossary(headwords)
} else {
term.Glossary = formsGlossary(headwords)
}
for _, sense := range entry.Sense {
rules := grammarRules(sense.PartsOfSpeech)
term.addRules(rules...)
}
partsOfSpeech := meta.seqToPartsOfSpeech[entry.Sequence]
rules := grammarRules(partsOfSpeech)
term.addRules(rules...)
return term
}
@ -203,11 +205,11 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
terms := dbTermList{}
for _, entry := range dictionary.Entries {
baseTerm := baseFormsTerm(entry)
baseTerm := baseFormsTerm(entry, meta)
headwords := extractHeadwords(entry)
for _, h := range headwords {
if h.IsSearchOnly {
if term, ok := createSearchTerm(h, entry, meta); ok {
if term, ok := jmdictSearchTerm(h, entry, meta); ok {
terms = append(terms, term)
}
continue

View File

@ -13,6 +13,7 @@ type jmdictMetadata struct {
language string
condensedGlosses map[senseID]string
seqToSenseCount map[sequence]int
seqToPartsOfSpeech map[sequence][]string
seqToMainHeadword map[sequence]headword
expHashToReadings map[hash][]string
headwordHashToSeqs map[hash][]sequence
@ -31,7 +32,7 @@ type senseID struct {
number int
}
func (meta *jmdictMetadata) CalculateEntryDepth(headwords []headword, entrySequence sequence) {
func (meta *jmdictMetadata) CalculateEntryDepth(headwords []headword, seq sequence) {
// This is to ensure that terms are grouped among their
// entries of origin and displayed in correct sequential order
maxDepth := 0
@ -48,39 +49,63 @@ func (meta *jmdictMetadata) CalculateEntryDepth(headwords []headword, entrySeque
}
}
}
meta.entryDepth[entrySequence] = maxDepth
meta.entryDepth[seq] = maxDepth
}
func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEntry) {
// Determine how many senses are in this entry for this language
if _, ok := meta.seqToSenseCount[entry.Sequence]; !ok {
senseCount := 0
for _, entrySense := range entry.Sense {
for _, gloss := range entrySense.Glossary {
if glossContainsLanguage(gloss, meta.language) {
senseCount += 1
break
}
func (meta *jmdictMetadata) AddEntry(entry jmdict.JmdictEntry) {
partsOfSpeech := []string{}
senseCount := 0
for _, sense := range entry.Sense {
// Only English-language senses contain part-of-speech info,
// but other languages need them for deinflection rules.
for _, pos := range sense.PartsOfSpeech {
if !slices.Contains(partsOfSpeech, pos) {
partsOfSpeech = append(partsOfSpeech, pos)
}
}
meta.seqToSenseCount[entry.Sequence] = senseCount
}
if meta.seqToSenseCount[entry.Sequence] == 0 {
if glossaryContainsLanguage(sense.Glossary, meta.language) {
senseCount += 1
} else {
continue
}
for _, reference := range sense.References {
meta.references = append(meta.references, reference)
}
for _, antonym := range sense.Antonyms {
meta.references = append(meta.references, antonym)
}
currentSenseID := senseID{entry.Sequence, senseCount}
glosses := []string{}
for _, gloss := range sense.Glossary {
if glossContainsLanguage(gloss, meta.language) && gloss.Type == nil {
glosses = append(glosses, gloss.Content)
}
}
meta.condensedGlosses[currentSenseID] = strings.Join(glosses, "; ")
}
meta.seqToPartsOfSpeech[entry.Sequence] = partsOfSpeech
meta.seqToSenseCount[entry.Sequence] = senseCount
}
func (meta *jmdictMetadata) AddHeadword(headword headword, seq sequence) {
if meta.seqToSenseCount[seq] == 0 {
return
}
// main headwords (first ones that are found in entries).
if _, ok := meta.seqToMainHeadword[entry.Sequence]; !ok {
meta.seqToMainHeadword[entry.Sequence] = headword
if _, ok := meta.seqToMainHeadword[seq]; !ok {
meta.seqToMainHeadword[seq] = headword
}
// hash the term pair so we can determine if it's used
// in more than one JMdict entry later.
headwordHash := headword.Hash()
if !slices.Contains(meta.headwordHashToSeqs[headwordHash], entry.Sequence) {
meta.headwordHashToSeqs[headwordHash] = append(meta.headwordHashToSeqs[headwordHash], entry.Sequence)
if !slices.Contains(meta.headwordHashToSeqs[headwordHash], seq) {
meta.headwordHashToSeqs[headwordHash] =
append(meta.headwordHashToSeqs[headwordHash], seq)
}
// hash the expression so that we can determine if we
@ -88,7 +113,8 @@ func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEn
// in reference notes later.
expHash := headword.ExpHash()
if !slices.Contains(meta.expHashToReadings[expHash], headword.Reading) {
meta.expHashToReadings[expHash] = append(meta.expHashToReadings[expHash], headword.Reading)
meta.expHashToReadings[expHash] =
append(meta.expHashToReadings[expHash], headword.Reading)
}
// e.g. for JMdict (English) we expect to end up with
@ -100,48 +126,17 @@ func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEn
searchHash{headword.ReadingHash(), headword.IsPriority},
}
for _, x := range searchHashes {
if !slices.Contains(meta.seqToSearchHashes[entry.Sequence], x) {
meta.seqToSearchHashes[entry.Sequence] = append(meta.seqToSearchHashes[entry.Sequence], x)
if !slices.Contains(meta.seqToSearchHashes[seq], x) {
meta.seqToSearchHashes[seq] = append(meta.seqToSearchHashes[seq], x)
}
}
currentSenseNumber := 1
for _, entrySense := range entry.Sense {
if !glossaryContainsLanguage(entrySense.Glossary, meta.language) {
continue
}
if entrySense.RestrictedReadings != nil && !slices.Contains(entrySense.RestrictedReadings, headword.Reading) {
currentSenseNumber += 1
continue
}
if entrySense.RestrictedKanji != nil && !slices.Contains(entrySense.RestrictedKanji, headword.Expression) {
currentSenseNumber += 1
continue
}
allReferences := append(entrySense.References, entrySense.Antonyms...)
for _, reference := range allReferences {
meta.references = append(meta.references, reference)
}
currentSense := senseID{entry.Sequence, currentSenseNumber}
if meta.condensedGlosses[currentSense] == "" {
glosses := []string{}
for _, gloss := range entrySense.Glossary {
if glossContainsLanguage(gloss, meta.language) && gloss.Type == nil {
glosses = append(glosses, gloss.Content)
}
}
meta.condensedGlosses[currentSense] = strings.Join(glosses, "; ")
}
currentSenseNumber += 1
}
}
func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMetadata {
meta := jmdictMetadata{
language: langNameToCode[languageName],
seqToSenseCount: make(map[sequence]int),
seqToPartsOfSpeech: make(map[sequence][]string),
condensedGlosses: make(map[senseID]string),
seqToMainHeadword: make(map[sequence]headword),
expHashToReadings: make(map[hash][]string),
@ -157,10 +152,11 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta
}
for _, entry := range dictionary.Entries {
meta.AddEntry(entry)
headwords := extractHeadwords(entry)
formCount := 0
for _, headword := range headwords {
meta.AddHeadword(headword, entry)
meta.AddHeadword(headword, entry.Sequence)
if !headword.IsSearchOnly {
formCount += 1
}

View File

@ -96,39 +96,39 @@ func knownEntityTags() []dbTag {
// <misc> miscellaneous sense info
dbTag{Name: "abbr", Order: 0, Score: 0, Category: ""}, // abbreviation
dbTag{Name: "arch", Order: -4, Score: 0, Category: "archaism"}, // archaism
dbTag{Name: "char", Order: 0, Score: 0, Category: ""}, // character
dbTag{Name: "char", Order: 4, Score: 0, Category: "name"}, // character
dbTag{Name: "chn", Order: 0, Score: 0, Category: ""}, // children's language
dbTag{Name: "col", Order: 0, Score: 0, Category: ""}, // colloquialism
dbTag{Name: "company", Order: 4, Score: 0, Category: "name"}, // company name
dbTag{Name: "creat", Order: 0, Score: 0, Category: ""}, // creature
dbTag{Name: "creat", Order: 4, Score: 0, Category: "name"}, // creature
dbTag{Name: "dated", Order: -4, Score: 0, Category: "archaism"}, // dated term
dbTag{Name: "dei", Order: 0, Score: 0, Category: ""}, // deity
dbTag{Name: "dei", Order: 4, Score: 0, Category: "name"}, // deity
dbTag{Name: "derog", Order: 0, Score: 0, Category: ""}, // derogatory
dbTag{Name: "doc", Order: 0, Score: 0, Category: ""}, // document
dbTag{Name: "doc", Order: 4, Score: 0, Category: "name"}, // document
dbTag{Name: "euph", Order: 0, Score: 0, Category: ""}, // euphemistic
dbTag{Name: "ev", Order: 0, Score: 0, Category: ""}, // event
dbTag{Name: "ev", Order: 4, Score: 0, Category: "name"}, // event
dbTag{Name: "fam", Order: 0, Score: 0, Category: ""}, // familiar language
dbTag{Name: "fem", Order: 4, Score: 0, Category: "name"}, // female term, language, or name
dbTag{Name: "fict", Order: 0, Score: 0, Category: ""}, // fiction
dbTag{Name: "fict", Order: 4, Score: 0, Category: "name"}, // fiction
dbTag{Name: "form", Order: 0, Score: 0, Category: ""}, // formal or literary term
dbTag{Name: "given", Order: 4, Score: 0, Category: "name"}, // given name or forename, gender not specified
dbTag{Name: "group", Order: 0, Score: 0, Category: ""}, // group
dbTag{Name: "group", Order: 4, Score: 0, Category: "name"}, // group
dbTag{Name: "hist", Order: 0, Score: 0, Category: ""}, // historical term
dbTag{Name: "hon", Order: 0, Score: 0, Category: ""}, // honorific or respectful (sonkeigo) language
dbTag{Name: "hum", Order: 0, Score: 0, Category: ""}, // humble (kenjougo) language
dbTag{Name: "id", Order: -5, Score: 0, Category: "expression"}, // idiomatic expression
dbTag{Name: "joc", Order: 0, Score: 0, Category: ""}, // jocular, humorous term
dbTag{Name: "leg", Order: 0, Score: 0, Category: ""}, // legend
dbTag{Name: "leg", Order: 4, Score: 0, Category: "name"}, // legend
dbTag{Name: "m-sl", Order: 0, Score: 0, Category: ""}, // manga slang
dbTag{Name: "male", Order: 4, Score: 0, Category: "name"}, // male term, language, or name
dbTag{Name: "masc", Order: 4, Score: 0, Category: "name"}, // male term, language, or name
dbTag{Name: "myth", Order: 0, Score: 0, Category: ""}, // mythology
dbTag{Name: "myth", Order: 4, Score: 0, Category: "name"}, // mythology
dbTag{Name: "net-sl", Order: 0, Score: 0, Category: ""}, // Internet slang
dbTag{Name: "obj", Order: 0, Score: 0, Category: ""}, // object
dbTag{Name: "obj", Order: 4, Score: 0, Category: "name"}, // object
dbTag{Name: "obs", Order: -4, Score: 0, Category: "archaism"}, // obsolete term
dbTag{Name: "on-mim", Order: 0, Score: 0, Category: ""}, // onomatopoeic or mimetic word
dbTag{Name: "organization", Order: 4, Score: 0, Category: "name"}, // organization name
dbTag{Name: "oth", Order: 0, Score: 0, Category: ""}, // other
dbTag{Name: "oth", Order: 4, Score: 0, Category: "name"}, // other
dbTag{Name: "person", Order: 4, Score: 0, Category: "name"}, // full name of a particular person
dbTag{Name: "place", Order: 4, Score: 0, Category: "name"}, // place name
dbTag{Name: "poet", Order: 0, Score: 0, Category: ""}, // poetical term
@ -137,10 +137,10 @@ func knownEntityTags() []dbTag {
dbTag{Name: "proverb", Order: 0, Score: 0, Category: "expression"}, // proverb
dbTag{Name: "quote", Order: 0, Score: 0, Category: "expression"}, // quotation
dbTag{Name: "rare", Order: -4, Score: 0, Category: "archaism"}, // rare
dbTag{Name: "relig", Order: 0, Score: 0, Category: ""}, // religion
dbTag{Name: "relig", Order: 4, Score: 0, Category: "name"}, // religion
dbTag{Name: "sens", Order: 0, Score: 0, Category: ""}, // sensitive
dbTag{Name: "serv", Order: 0, Score: 0, Category: ""}, // service
dbTag{Name: "ship", Order: 0, Score: 0, Category: ""}, // ship name
dbTag{Name: "serv", Order: 4, Score: 0, Category: "name"}, // service
dbTag{Name: "ship", Order: 4, Score: 0, Category: "name"}, // ship name
dbTag{Name: "sl", Order: 0, Score: 0, Category: ""}, // slang
dbTag{Name: "station", Order: 4, Score: 0, Category: "name"}, // railway station
dbTag{Name: "surname", Order: 4, Score: 0, Category: "name"}, // family or surname

View File

@ -8,19 +8,22 @@ import (
)
func jmnedictPublicationDate(dictionary jmdict.Jmnedict) string {
unknownDate := "unknown"
idx := len(dictionary.Entries) - 1
if len(dictionary.Entries) == 0 {
return "unknown"
}
dateEntry := dictionary.Entries[len(dictionary.Entries)-1]
if len(dateEntry.Translations) == 0 || len(dateEntry.Translations[0].Translations) == 0 {
return "unknown"
return unknownDate
} else if len(dictionary.Entries[idx].Translations) == 0 {
return unknownDate
} else if len(dictionary.Entries[idx].Translations[0].Translations) == 0 {
return unknownDate
}
dateGloss := dictionary.Entries[idx].Translations[0].Translations[0]
r := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`)
jmnedictDate := r.FindString(dateEntry.Translations[0].Translations[0])
if jmnedictDate != "" {
return jmnedictDate
date := r.FindString(dateGloss)
if date != "" {
return date
} else {
return "unknown"
return unknownDate
}
}

View File

@ -44,10 +44,7 @@ func replaceIterationMarks(text string) string {
// Returns nil if no segmentation is possible.
func makeKanaSegments(kana string) (segments []string) {
hiragana := replaceIterationMarks(katakanaToHiragana(kana))
kanaRunes := []rune{}
for _, kanaRune := range hiragana {
kanaRunes = append(kanaRunes, kanaRune)
}
kanaRunes := []rune(hiragana)
kanaRuneCount := len(kanaRunes)
for i := 0; i < kanaRuneCount; i++ {
for j := 0; j < kanaRuneCount-i; j++ {