1

Add "forms" term in special circumstances

If a headword appears in multiple entries, then each entry needs a
corresponding "forms" term in the output dictionary.

For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also
appears as an irregular form in entry 1252910. If a "forms" term is
not included for the former entry, then it will appear that 軽卒 is
irregular for all senses in the output dictionary.
This commit is contained in:
stephenmk 2023-01-25 18:26:47 -06:00
parent 406067eedd
commit 7bd967915c
No known key found for this signature in database
GPG Key ID: B6DA730DB06235F1
2 changed files with 50 additions and 28 deletions

View File

@ -62,7 +62,21 @@ func jmdictPublicationDate(dictionary jmdict.Jmdict) string {
return jmdictDate
}
func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
// Don't add "forms" terms to non-English dictionaries.
// Information would be duplicated if users installed more
// than one version.
if meta.language != "eng" {
return dbTerm{}, false
}
// Don't need a "forms" term for entries with one unique
// headword which does not appear in any other entries.
if !meta.hasMultipleForms[entry.Sequence] {
if len(meta.headwordHashToSeqs[headword.Hash()]) == 1 {
return dbTerm{}, false
}
}
term := baseFormsTerm(entry)
term.Expression = headword.Expression
term.Reading = headword.Reading
@ -72,10 +86,17 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet
term.addDefinitionTags("forms")
senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
term.Score = calculateTermScore(senseNumber, headword)
return term
return term, true
}
func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
// Don't add "search" terms to non-English dictionaries.
// Information would be duplicated if users installed more
// than one version.
if meta.language != "eng" {
return dbTerm{}, false
}
term := dbTerm{
Expression: headword.Expression,
Sequence: -entry.Sequence,
@ -98,10 +119,17 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe
)
term.Glossary = []any{contentStructure(content)}
return term
return term, true
}
func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
return dbTerm{}, false
}
if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
return dbTerm{}, false
}
term := dbTerm{
Expression: headword.Expression,
Reading: headword.Reading,
@ -126,7 +154,7 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor
term.Score = calculateTermScore(senseNumber, headword)
return term
return term, true
}
func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {
@ -134,8 +162,7 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
return nil, false
}
if headword.IsSearchOnly {
if meta.language == "eng" {
searchTerm := createSearchTerm(headword, entry, meta)
if searchTerm, ok := createSearchTerm(headword, entry, meta); ok {
return []dbTerm{searchTerm}, true
} else {
return nil, false
@ -145,25 +172,19 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
senseNumber := 1
for _, sense := range entry.Sense {
if !glossaryContainsLanguage(sense.Glossary, meta.language) {
// Do not increment sense number
continue
}
if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
senseNumber += 1
continue
if senseTerm, ok := createSenseTerm(sense, senseNumber, headword, entry, meta); ok {
terms = append(terms, senseTerm)
}
if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
senseNumber += 1
continue
}
senseTerm := createSenseTerm(sense, senseNumber, headword, entry, meta)
senseNumber += 1
terms = append(terms, senseTerm)
}
if meta.hasMultipleForms[entry.Sequence] && meta.language == "eng" {
formsTerm := createFormsTerm(headword, entry, meta)
if formsTerm, ok := createFormsTerm(headword, entry, meta); ok {
terms = append(terms, formsTerm)
}
return terms, true
}

View File

@ -210,23 +210,24 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
return err
}
meta := newJmdictMetadata(dictionary, languageName)
meta := newJmdictMetadata(dictionary, "english")
terms := dbTermList{}
for _, entry := range dictionary.Entries {
baseTerm := baseFormsTerm(entry)
headwords := extractHeadwords(entry)
for _, h := range headwords {
var term dbTerm
if h.IsSearchOnly {
term = createSearchTerm(h, entry, meta)
} else {
term = baseTerm
term.Expression = h.Expression
term.Reading = h.Reading
term.addTermTags(h.TermTags...)
term.Score = calculateTermScore(1, h)
if term, ok := createSearchTerm(h, entry, meta); ok {
terms = append(terms, term)
}
continue
}
term := baseTerm
term.Expression = h.Expression
term.Reading = h.Reading
term.addTermTags(h.TermTags...)
term.Score = calculateTermScore(1, h)
terms = append(terms, term)
}
}