1

Add "forms" term in special circumstances

If a headword appears in multiple entries, then each entry needs a
corresponding "forms" term in the output dictionary.

For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also
appears as an irregular form in entry 1252910. If a "forms" term is
not included for the former entry, then it will appear that 軽卒 is
irregular for all senses in the output dictionary.
This commit is contained in:
stephenmk 2023-01-25 18:26:47 -06:00
parent 406067eedd
commit 7bd967915c
No known key found for this signature in database
GPG Key ID: B6DA730DB06235F1
2 changed files with 50 additions and 28 deletions

View File

@ -62,7 +62,21 @@ func jmdictPublicationDate(dictionary jmdict.Jmdict) string {
return jmdictDate return jmdictDate
} }
func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm { func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
// Don't add "forms" terms to non-English dictionaries.
// Information would be duplicated if users installed more
// than one version.
if meta.language != "eng" {
return dbTerm{}, false
}
// Don't need a "forms" term for entries with one unique
// headword which does not appear in any other entries.
if !meta.hasMultipleForms[entry.Sequence] {
if len(meta.headwordHashToSeqs[headword.Hash()]) == 1 {
return dbTerm{}, false
}
}
term := baseFormsTerm(entry) term := baseFormsTerm(entry)
term.Expression = headword.Expression term.Expression = headword.Expression
term.Reading = headword.Reading term.Reading = headword.Reading
@ -72,10 +86,17 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet
term.addDefinitionTags("forms") term.addDefinitionTags("forms")
senseNumber := meta.seqToSenseCount[entry.Sequence] + 1 senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
term.Score = calculateTermScore(senseNumber, headword) term.Score = calculateTermScore(senseNumber, headword)
return term return term, true
} }
func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm { func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
// Don't add "search" terms to non-English dictionaries.
// Information would be duplicated if users installed more
// than one version.
if meta.language != "eng" {
return dbTerm{}, false
}
term := dbTerm{ term := dbTerm{
Expression: headword.Expression, Expression: headword.Expression,
Sequence: -entry.Sequence, Sequence: -entry.Sequence,
@ -98,10 +119,17 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe
) )
term.Glossary = []any{contentStructure(content)} term.Glossary = []any{contentStructure(content)}
return term return term, true
} }
func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm { func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
return dbTerm{}, false
}
if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
return dbTerm{}, false
}
term := dbTerm{ term := dbTerm{
Expression: headword.Expression, Expression: headword.Expression,
Reading: headword.Reading, Reading: headword.Reading,
@ -126,7 +154,7 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor
term.Score = calculateTermScore(senseNumber, headword) term.Score = calculateTermScore(senseNumber, headword)
return term return term, true
} }
func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) { func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {
@ -134,8 +162,7 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
return nil, false return nil, false
} }
if headword.IsSearchOnly { if headword.IsSearchOnly {
if meta.language == "eng" { if searchTerm, ok := createSearchTerm(headword, entry, meta); ok {
searchTerm := createSearchTerm(headword, entry, meta)
return []dbTerm{searchTerm}, true return []dbTerm{searchTerm}, true
} else { } else {
return nil, false return nil, false
@ -145,25 +172,19 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
senseNumber := 1 senseNumber := 1
for _, sense := range entry.Sense { for _, sense := range entry.Sense {
if !glossaryContainsLanguage(sense.Glossary, meta.language) { if !glossaryContainsLanguage(sense.Glossary, meta.language) {
// Do not increment sense number
continue continue
} }
if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) { if senseTerm, ok := createSenseTerm(sense, senseNumber, headword, entry, meta); ok {
senseNumber += 1 terms = append(terms, senseTerm)
continue
} }
if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
senseNumber += 1
continue
}
senseTerm := createSenseTerm(sense, senseNumber, headword, entry, meta)
senseNumber += 1 senseNumber += 1
terms = append(terms, senseTerm)
} }
if meta.hasMultipleForms[entry.Sequence] && meta.language == "eng" { if formsTerm, ok := createFormsTerm(headword, entry, meta); ok {
formsTerm := createFormsTerm(headword, entry, meta)
terms = append(terms, formsTerm) terms = append(terms, formsTerm)
} }
return terms, true return terms, true
} }

View File

@ -210,23 +210,24 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
return err return err
} }
meta := newJmdictMetadata(dictionary, languageName) meta := newJmdictMetadata(dictionary, "english")
terms := dbTermList{} terms := dbTermList{}
for _, entry := range dictionary.Entries { for _, entry := range dictionary.Entries {
baseTerm := baseFormsTerm(entry) baseTerm := baseFormsTerm(entry)
headwords := extractHeadwords(entry) headwords := extractHeadwords(entry)
for _, h := range headwords { for _, h := range headwords {
var term dbTerm
if h.IsSearchOnly { if h.IsSearchOnly {
term = createSearchTerm(h, entry, meta) if term, ok := createSearchTerm(h, entry, meta); ok {
} else { terms = append(terms, term)
term = baseTerm }
term.Expression = h.Expression continue
term.Reading = h.Reading
term.addTermTags(h.TermTags...)
term.Score = calculateTermScore(1, h)
} }
term := baseTerm
term.Expression = h.Expression
term.Reading = h.Reading
term.addTermTags(h.TermTags...)
term.Score = calculateTermScore(1, h)
terms = append(terms, term) terms = append(terms, term)
} }
} }