Add "forms" term in special circumstances
If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary.
This commit is contained in:
parent
406067eedd
commit
7bd967915c
59
jmdict.go
59
jmdict.go
@ -62,7 +62,21 @@ func jmdictPublicationDate(dictionary jmdict.Jmdict) string {
|
|||||||
return jmdictDate
|
return jmdictDate
|
||||||
}
|
}
|
||||||
|
|
||||||
func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
|
func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
|
||||||
|
// Don't add "forms" terms to non-English dictionaries.
|
||||||
|
// Information would be duplicated if users installed more
|
||||||
|
// than one version.
|
||||||
|
if meta.language != "eng" {
|
||||||
|
return dbTerm{}, false
|
||||||
|
}
|
||||||
|
// Don't need a "forms" term for entries with one unique
|
||||||
|
// headword which does not appear in any other entries.
|
||||||
|
if !meta.hasMultipleForms[entry.Sequence] {
|
||||||
|
if len(meta.headwordHashToSeqs[headword.Hash()]) == 1 {
|
||||||
|
return dbTerm{}, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
term := baseFormsTerm(entry)
|
term := baseFormsTerm(entry)
|
||||||
term.Expression = headword.Expression
|
term.Expression = headword.Expression
|
||||||
term.Reading = headword.Reading
|
term.Reading = headword.Reading
|
||||||
@ -72,10 +86,17 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet
|
|||||||
term.addDefinitionTags("forms")
|
term.addDefinitionTags("forms")
|
||||||
senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
|
senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
|
||||||
term.Score = calculateTermScore(senseNumber, headword)
|
term.Score = calculateTermScore(senseNumber, headword)
|
||||||
return term
|
return term, true
|
||||||
}
|
}
|
||||||
|
|
||||||
func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
|
func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
|
||||||
|
// Don't add "search" terms to non-English dictionaries.
|
||||||
|
// Information would be duplicated if users installed more
|
||||||
|
// than one version.
|
||||||
|
if meta.language != "eng" {
|
||||||
|
return dbTerm{}, false
|
||||||
|
}
|
||||||
|
|
||||||
term := dbTerm{
|
term := dbTerm{
|
||||||
Expression: headword.Expression,
|
Expression: headword.Expression,
|
||||||
Sequence: -entry.Sequence,
|
Sequence: -entry.Sequence,
|
||||||
@ -98,10 +119,17 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe
|
|||||||
)
|
)
|
||||||
|
|
||||||
term.Glossary = []any{contentStructure(content)}
|
term.Glossary = []any{contentStructure(content)}
|
||||||
return term
|
return term, true
|
||||||
}
|
}
|
||||||
|
|
||||||
func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
|
func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
|
||||||
|
if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
|
||||||
|
return dbTerm{}, false
|
||||||
|
}
|
||||||
|
if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
|
||||||
|
return dbTerm{}, false
|
||||||
|
}
|
||||||
|
|
||||||
term := dbTerm{
|
term := dbTerm{
|
||||||
Expression: headword.Expression,
|
Expression: headword.Expression,
|
||||||
Reading: headword.Reading,
|
Reading: headword.Reading,
|
||||||
@ -126,7 +154,7 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor
|
|||||||
|
|
||||||
term.Score = calculateTermScore(senseNumber, headword)
|
term.Score = calculateTermScore(senseNumber, headword)
|
||||||
|
|
||||||
return term
|
return term, true
|
||||||
}
|
}
|
||||||
|
|
||||||
func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {
|
func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {
|
||||||
@ -134,8 +162,7 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
|
|||||||
return nil, false
|
return nil, false
|
||||||
}
|
}
|
||||||
if headword.IsSearchOnly {
|
if headword.IsSearchOnly {
|
||||||
if meta.language == "eng" {
|
if searchTerm, ok := createSearchTerm(headword, entry, meta); ok {
|
||||||
searchTerm := createSearchTerm(headword, entry, meta)
|
|
||||||
return []dbTerm{searchTerm}, true
|
return []dbTerm{searchTerm}, true
|
||||||
} else {
|
} else {
|
||||||
return nil, false
|
return nil, false
|
||||||
@ -145,25 +172,19 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
|
|||||||
senseNumber := 1
|
senseNumber := 1
|
||||||
for _, sense := range entry.Sense {
|
for _, sense := range entry.Sense {
|
||||||
if !glossaryContainsLanguage(sense.Glossary, meta.language) {
|
if !glossaryContainsLanguage(sense.Glossary, meta.language) {
|
||||||
|
// Do not increment sense number
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
|
if senseTerm, ok := createSenseTerm(sense, senseNumber, headword, entry, meta); ok {
|
||||||
senseNumber += 1
|
terms = append(terms, senseTerm)
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
|
|
||||||
senseNumber += 1
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
senseTerm := createSenseTerm(sense, senseNumber, headword, entry, meta)
|
|
||||||
senseNumber += 1
|
senseNumber += 1
|
||||||
terms = append(terms, senseTerm)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if meta.hasMultipleForms[entry.Sequence] && meta.language == "eng" {
|
if formsTerm, ok := createFormsTerm(headword, entry, meta); ok {
|
||||||
formsTerm := createFormsTerm(headword, entry, meta)
|
|
||||||
terms = append(terms, formsTerm)
|
terms = append(terms, formsTerm)
|
||||||
}
|
}
|
||||||
|
|
||||||
return terms, true
|
return terms, true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -210,23 +210,24 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
meta := newJmdictMetadata(dictionary, languageName)
|
meta := newJmdictMetadata(dictionary, "english")
|
||||||
|
|
||||||
terms := dbTermList{}
|
terms := dbTermList{}
|
||||||
for _, entry := range dictionary.Entries {
|
for _, entry := range dictionary.Entries {
|
||||||
baseTerm := baseFormsTerm(entry)
|
baseTerm := baseFormsTerm(entry)
|
||||||
headwords := extractHeadwords(entry)
|
headwords := extractHeadwords(entry)
|
||||||
for _, h := range headwords {
|
for _, h := range headwords {
|
||||||
var term dbTerm
|
|
||||||
if h.IsSearchOnly {
|
if h.IsSearchOnly {
|
||||||
term = createSearchTerm(h, entry, meta)
|
if term, ok := createSearchTerm(h, entry, meta); ok {
|
||||||
} else {
|
terms = append(terms, term)
|
||||||
term = baseTerm
|
}
|
||||||
term.Expression = h.Expression
|
continue
|
||||||
term.Reading = h.Reading
|
|
||||||
term.addTermTags(h.TermTags...)
|
|
||||||
term.Score = calculateTermScore(1, h)
|
|
||||||
}
|
}
|
||||||
|
term := baseTerm
|
||||||
|
term.Expression = h.Expression
|
||||||
|
term.Reading = h.Reading
|
||||||
|
term.addTermTags(h.TermTags...)
|
||||||
|
term.Score = calculateTermScore(1, h)
|
||||||
terms = append(terms, term)
|
terms = append(terms, term)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user