From 517ef3d052541731b0821cd32248c028278a29af Mon Sep 17 00:00:00 2001 From: stephenmk Date: Fri, 27 Jan 2023 19:09:12 -0600 Subject: [PATCH] Fix bug in term score assignments This commit ensures that terms are grouped among their entries of origin and displayed in correct sequential order in Yomichan's default result grouping mode, "Group term-reading pairs." --- jmdict.go | 16 ++++++++++------ jmdictForms.go | 2 +- jmdictMetadata.go | 23 +++++++++++++++++++++++ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/jmdict.go b/jmdict.go index 362b14d..4c54db6 100644 --- a/jmdict.go +++ b/jmdict.go @@ -29,13 +29,15 @@ func grammarRules(partsOfSpeech []string) []string { return rules } -func calculateTermScore(senseNumber int, headword headword) int { +func calculateTermScore(senseNumber int, depth int, headword headword) int { const senseWeight int = 1 - const entryPositionWeight int = 100 - const priorityWeight int = 10000 + const depthWeight int = 100 + const entryPositionWeight int = 10000 + const priorityWeight int = 1000000 score := 0 score -= (senseNumber - 1) * senseWeight + score -= depth * depthWeight score -= headword.Index * entryPositionWeight score += headword.Score() * priorityWeight @@ -85,7 +87,8 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet term.addDefinitionTags("forms") senseNumber := meta.seqToSenseCount[entry.Sequence] + 1 - term.Score = calculateTermScore(senseNumber, headword) + entryDepth := meta.entryDepth[entry.Sequence] + term.Score = calculateTermScore(senseNumber, entryDepth, headword) return term, true } @@ -106,7 +109,7 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe term.addRules(rules...) } term.addTermTags(headword.TermTags...) - term.Score = calculateTermScore(1, headword) + term.Score = calculateTermScore(1, 0, headword) redirectHeadword := meta.seqToMainHeadword[entry.Sequence] expHash := redirectHeadword.ExpHash() @@ -152,7 +155,8 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor rules := grammarRules(sense.PartsOfSpeech) term.addRules(rules...) - term.Score = calculateTermScore(senseNumber, headword) + entryDepth := meta.entryDepth[entry.Sequence] + term.Score = calculateTermScore(senseNumber, entryDepth, headword) return term, true } diff --git a/jmdictForms.go b/jmdictForms.go index af4bba6..4964233 100644 --- a/jmdictForms.go +++ b/jmdictForms.go @@ -227,7 +227,7 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int term.Expression = h.Expression term.Reading = h.Reading term.addTermTags(h.TermTags...) - term.Score = calculateTermScore(1, h) + term.Score = calculateTermScore(1, 0, h) terms = append(terms, term) } } diff --git a/jmdictMetadata.go b/jmdictMetadata.go index ec92827..99af862 100644 --- a/jmdictMetadata.go +++ b/jmdictMetadata.go @@ -20,6 +20,7 @@ type jmdictMetadata struct { referenceToSeq map[string]sequence hashToSearchValues map[hash][]searchValue seqToSearchHashes map[sequence][]searchHash + entryDepth map[sequence]int hasMultipleForms map[sequence]bool maxSenseCount int } @@ -29,6 +30,26 @@ type senseID struct { number int } +func (meta *jmdictMetadata) CalculateEntryDepth(headwords []headword, entrySequence sequence) { + // This is to ensure that terms are grouped among their + // entries of origin and displayed in correct sequential order + maxDepth := 0 + for _, headword := range headwords { + hash := headword.Hash() + for _, seq := range meta.headwordHashToSeqs[hash] { + seqDepth := meta.entryDepth[seq] + if seqDepth == 0 { + meta.entryDepth[seq] = 1 + seqDepth = 1 + } + if maxDepth < seqDepth+1 { + maxDepth = seqDepth + 1 + } + } + } + meta.entryDepth[entrySequence] = maxDepth +} + func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEntry) { // Determine how many senses are in this entry for this language @@ -128,6 +149,7 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta references: []string{}, hashToSearchValues: nil, referenceToSeq: nil, + entryDepth: make(map[sequence]int), hasMultipleForms: make(map[sequence]bool), maxSenseCount: 0, } @@ -141,6 +163,7 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta formCount += 1 } } + meta.CalculateEntryDepth(headwords, entry.Sequence) meta.hasMultipleForms[entry.Sequence] = (formCount > 1) }