Add "forms" term in special circumstances
If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary.
This commit is contained in:
parent
406067eedd
commit
7bd967915c
61
jmdict.go
61
jmdict.go
@ -62,7 +62,21 @@ func jmdictPublicationDate(dictionary jmdict.Jmdict) string {
|
||||
return jmdictDate
|
||||
}
|
||||
|
||||
func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
|
||||
func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
|
||||
// Don't add "forms" terms to non-English dictionaries.
|
||||
// Information would be duplicated if users installed more
|
||||
// than one version.
|
||||
if meta.language != "eng" {
|
||||
return dbTerm{}, false
|
||||
}
|
||||
// Don't need a "forms" term for entries with one unique
|
||||
// headword which does not appear in any other entries.
|
||||
if !meta.hasMultipleForms[entry.Sequence] {
|
||||
if len(meta.headwordHashToSeqs[headword.Hash()]) == 1 {
|
||||
return dbTerm{}, false
|
||||
}
|
||||
}
|
||||
|
||||
term := baseFormsTerm(entry)
|
||||
term.Expression = headword.Expression
|
||||
term.Reading = headword.Reading
|
||||
@ -72,10 +86,17 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet
|
||||
term.addDefinitionTags("forms")
|
||||
senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
|
||||
term.Score = calculateTermScore(senseNumber, headword)
|
||||
return term
|
||||
return term, true
|
||||
}
|
||||
|
||||
func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
|
||||
func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
|
||||
// Don't add "search" terms to non-English dictionaries.
|
||||
// Information would be duplicated if users installed more
|
||||
// than one version.
|
||||
if meta.language != "eng" {
|
||||
return dbTerm{}, false
|
||||
}
|
||||
|
||||
term := dbTerm{
|
||||
Expression: headword.Expression,
|
||||
Sequence: -entry.Sequence,
|
||||
@ -98,10 +119,17 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe
|
||||
)
|
||||
|
||||
term.Glossary = []any{contentStructure(content)}
|
||||
return term
|
||||
return term, true
|
||||
}
|
||||
|
||||
func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
|
||||
func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
|
||||
if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
|
||||
return dbTerm{}, false
|
||||
}
|
||||
if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
|
||||
return dbTerm{}, false
|
||||
}
|
||||
|
||||
term := dbTerm{
|
||||
Expression: headword.Expression,
|
||||
Reading: headword.Reading,
|
||||
@ -126,7 +154,7 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor
|
||||
|
||||
term.Score = calculateTermScore(senseNumber, headword)
|
||||
|
||||
return term
|
||||
return term, true
|
||||
}
|
||||
|
||||
func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {
|
||||
@ -134,8 +162,7 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
|
||||
return nil, false
|
||||
}
|
||||
if headword.IsSearchOnly {
|
||||
if meta.language == "eng" {
|
||||
searchTerm := createSearchTerm(headword, entry, meta)
|
||||
if searchTerm, ok := createSearchTerm(headword, entry, meta); ok {
|
||||
return []dbTerm{searchTerm}, true
|
||||
} else {
|
||||
return nil, false
|
||||
@ -145,25 +172,19 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
|
||||
senseNumber := 1
|
||||
for _, sense := range entry.Sense {
|
||||
if !glossaryContainsLanguage(sense.Glossary, meta.language) {
|
||||
// Do not increment sense number
|
||||
continue
|
||||
}
|
||||
if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
|
||||
senseNumber += 1
|
||||
continue
|
||||
}
|
||||
if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
|
||||
senseNumber += 1
|
||||
continue
|
||||
}
|
||||
senseTerm := createSenseTerm(sense, senseNumber, headword, entry, meta)
|
||||
senseNumber += 1
|
||||
if senseTerm, ok := createSenseTerm(sense, senseNumber, headword, entry, meta); ok {
|
||||
terms = append(terms, senseTerm)
|
||||
}
|
||||
senseNumber += 1
|
||||
}
|
||||
|
||||
if meta.hasMultipleForms[entry.Sequence] && meta.language == "eng" {
|
||||
formsTerm := createFormsTerm(headword, entry, meta)
|
||||
if formsTerm, ok := createFormsTerm(headword, entry, meta); ok {
|
||||
terms = append(terms, formsTerm)
|
||||
}
|
||||
|
||||
return terms, true
|
||||
}
|
||||
|
||||
|
@ -210,23 +210,24 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
|
||||
return err
|
||||
}
|
||||
|
||||
meta := newJmdictMetadata(dictionary, languageName)
|
||||
meta := newJmdictMetadata(dictionary, "english")
|
||||
|
||||
terms := dbTermList{}
|
||||
for _, entry := range dictionary.Entries {
|
||||
baseTerm := baseFormsTerm(entry)
|
||||
headwords := extractHeadwords(entry)
|
||||
for _, h := range headwords {
|
||||
var term dbTerm
|
||||
if h.IsSearchOnly {
|
||||
term = createSearchTerm(h, entry, meta)
|
||||
} else {
|
||||
term = baseTerm
|
||||
if term, ok := createSearchTerm(h, entry, meta); ok {
|
||||
terms = append(terms, term)
|
||||
}
|
||||
continue
|
||||
}
|
||||
term := baseTerm
|
||||
term.Expression = h.Expression
|
||||
term.Reading = h.Reading
|
||||
term.addTermTags(h.TermTags...)
|
||||
term.Score = calculateTermScore(1, h)
|
||||
}
|
||||
terms = append(terms, term)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user