1

Fix bug in term score assignments

This commit ensures that terms are grouped among their entries of
origin and displayed in correct sequential order in Yomichan's default
result grouping mode, "Group term-reading pairs."
This commit is contained in:
stephenmk 2023-01-27 19:09:12 -06:00
parent 7bd967915c
commit 517ef3d052
No known key found for this signature in database
GPG Key ID: B6DA730DB06235F1
3 changed files with 34 additions and 7 deletions

View File

@ -29,13 +29,15 @@ func grammarRules(partsOfSpeech []string) []string {
return rules return rules
} }
func calculateTermScore(senseNumber int, headword headword) int { func calculateTermScore(senseNumber int, depth int, headword headword) int {
const senseWeight int = 1 const senseWeight int = 1
const entryPositionWeight int = 100 const depthWeight int = 100
const priorityWeight int = 10000 const entryPositionWeight int = 10000
const priorityWeight int = 1000000
score := 0 score := 0
score -= (senseNumber - 1) * senseWeight score -= (senseNumber - 1) * senseWeight
score -= depth * depthWeight
score -= headword.Index * entryPositionWeight score -= headword.Index * entryPositionWeight
score += headword.Score() * priorityWeight score += headword.Score() * priorityWeight
@ -85,7 +87,8 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet
term.addDefinitionTags("forms") term.addDefinitionTags("forms")
senseNumber := meta.seqToSenseCount[entry.Sequence] + 1 senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
term.Score = calculateTermScore(senseNumber, headword) entryDepth := meta.entryDepth[entry.Sequence]
term.Score = calculateTermScore(senseNumber, entryDepth, headword)
return term, true return term, true
} }
@ -106,7 +109,7 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe
term.addRules(rules...) term.addRules(rules...)
} }
term.addTermTags(headword.TermTags...) term.addTermTags(headword.TermTags...)
term.Score = calculateTermScore(1, headword) term.Score = calculateTermScore(1, 0, headword)
redirectHeadword := meta.seqToMainHeadword[entry.Sequence] redirectHeadword := meta.seqToMainHeadword[entry.Sequence]
expHash := redirectHeadword.ExpHash() expHash := redirectHeadword.ExpHash()
@ -152,7 +155,8 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor
rules := grammarRules(sense.PartsOfSpeech) rules := grammarRules(sense.PartsOfSpeech)
term.addRules(rules...) term.addRules(rules...)
term.Score = calculateTermScore(senseNumber, headword) entryDepth := meta.entryDepth[entry.Sequence]
term.Score = calculateTermScore(senseNumber, entryDepth, headword)
return term, true return term, true
} }

View File

@ -227,7 +227,7 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
term.Expression = h.Expression term.Expression = h.Expression
term.Reading = h.Reading term.Reading = h.Reading
term.addTermTags(h.TermTags...) term.addTermTags(h.TermTags...)
term.Score = calculateTermScore(1, h) term.Score = calculateTermScore(1, 0, h)
terms = append(terms, term) terms = append(terms, term)
} }
} }

View File

@ -20,6 +20,7 @@ type jmdictMetadata struct {
referenceToSeq map[string]sequence referenceToSeq map[string]sequence
hashToSearchValues map[hash][]searchValue hashToSearchValues map[hash][]searchValue
seqToSearchHashes map[sequence][]searchHash seqToSearchHashes map[sequence][]searchHash
entryDepth map[sequence]int
hasMultipleForms map[sequence]bool hasMultipleForms map[sequence]bool
maxSenseCount int maxSenseCount int
} }
@ -29,6 +30,26 @@ type senseID struct {
number int number int
} }
func (meta *jmdictMetadata) CalculateEntryDepth(headwords []headword, entrySequence sequence) {
// This is to ensure that terms are grouped among their
// entries of origin and displayed in correct sequential order
maxDepth := 0
for _, headword := range headwords {
hash := headword.Hash()
for _, seq := range meta.headwordHashToSeqs[hash] {
seqDepth := meta.entryDepth[seq]
if seqDepth == 0 {
meta.entryDepth[seq] = 1
seqDepth = 1
}
if maxDepth < seqDepth+1 {
maxDepth = seqDepth + 1
}
}
}
meta.entryDepth[entrySequence] = maxDepth
}
func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEntry) { func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEntry) {
// Determine how many senses are in this entry for this language // Determine how many senses are in this entry for this language
@ -128,6 +149,7 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta
references: []string{}, references: []string{},
hashToSearchValues: nil, hashToSearchValues: nil,
referenceToSeq: nil, referenceToSeq: nil,
entryDepth: make(map[sequence]int),
hasMultipleForms: make(map[sequence]bool), hasMultipleForms: make(map[sequence]bool),
maxSenseCount: 0, maxSenseCount: 0,
} }
@ -141,6 +163,7 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta
formCount += 1 formCount += 1
} }
} }
meta.CalculateEntryDepth(headwords, entry.Sequence)
meta.hasMultipleForms[entry.Sequence] = (formCount > 1) meta.hasMultipleForms[entry.Sequence] = (formCount > 1)
} }