1

Fix bug in term score assignments

This commit ensures that terms are grouped among their entries of
origin and displayed in correct sequential order in Yomichan's default
result grouping mode, "Group term-reading pairs."
This commit is contained in:
stephenmk 2023-01-27 19:09:12 -06:00
parent 7bd967915c
commit 517ef3d052
No known key found for this signature in database
GPG Key ID: B6DA730DB06235F1
3 changed files with 34 additions and 7 deletions

View File

@ -29,13 +29,15 @@ func grammarRules(partsOfSpeech []string) []string {
return rules
}
func calculateTermScore(senseNumber int, headword headword) int {
func calculateTermScore(senseNumber int, depth int, headword headword) int {
const senseWeight int = 1
const entryPositionWeight int = 100
const priorityWeight int = 10000
const depthWeight int = 100
const entryPositionWeight int = 10000
const priorityWeight int = 1000000
score := 0
score -= (senseNumber - 1) * senseWeight
score -= depth * depthWeight
score -= headword.Index * entryPositionWeight
score += headword.Score() * priorityWeight
@ -85,7 +87,8 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet
term.addDefinitionTags("forms")
senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
term.Score = calculateTermScore(senseNumber, headword)
entryDepth := meta.entryDepth[entry.Sequence]
term.Score = calculateTermScore(senseNumber, entryDepth, headword)
return term, true
}
@ -106,7 +109,7 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe
term.addRules(rules...)
}
term.addTermTags(headword.TermTags...)
term.Score = calculateTermScore(1, headword)
term.Score = calculateTermScore(1, 0, headword)
redirectHeadword := meta.seqToMainHeadword[entry.Sequence]
expHash := redirectHeadword.ExpHash()
@ -152,7 +155,8 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor
rules := grammarRules(sense.PartsOfSpeech)
term.addRules(rules...)
term.Score = calculateTermScore(senseNumber, headword)
entryDepth := meta.entryDepth[entry.Sequence]
term.Score = calculateTermScore(senseNumber, entryDepth, headword)
return term, true
}

View File

@ -227,7 +227,7 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
term.Expression = h.Expression
term.Reading = h.Reading
term.addTermTags(h.TermTags...)
term.Score = calculateTermScore(1, h)
term.Score = calculateTermScore(1, 0, h)
terms = append(terms, term)
}
}

View File

@ -20,6 +20,7 @@ type jmdictMetadata struct {
referenceToSeq map[string]sequence
hashToSearchValues map[hash][]searchValue
seqToSearchHashes map[sequence][]searchHash
entryDepth map[sequence]int
hasMultipleForms map[sequence]bool
maxSenseCount int
}
@ -29,6 +30,26 @@ type senseID struct {
number int
}
func (meta *jmdictMetadata) CalculateEntryDepth(headwords []headword, entrySequence sequence) {
// This is to ensure that terms are grouped among their
// entries of origin and displayed in correct sequential order
maxDepth := 0
for _, headword := range headwords {
hash := headword.Hash()
for _, seq := range meta.headwordHashToSeqs[hash] {
seqDepth := meta.entryDepth[seq]
if seqDepth == 0 {
meta.entryDepth[seq] = 1
seqDepth = 1
}
if maxDepth < seqDepth+1 {
maxDepth = seqDepth + 1
}
}
}
meta.entryDepth[entrySequence] = maxDepth
}
func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEntry) {
// Determine how many senses are in this entry for this language
@ -128,6 +149,7 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta
references: []string{},
hashToSearchValues: nil,
referenceToSeq: nil,
entryDepth: make(map[sequence]int),
hasMultipleForms: make(map[sequence]bool),
maxSenseCount: 0,
}
@ -141,6 +163,7 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta
formCount += 1
}
}
meta.CalculateEntryDepth(headwords, entry.Sequence)
meta.hasMultipleForms[entry.Sequence] = (formCount > 1)
}