diff --git a/jmdict.go b/jmdict.go index 362b14d..4c54db6 100644 --- a/jmdict.go +++ b/jmdict.go @@ -29,13 +29,15 @@ func grammarRules(partsOfSpeech []string) []string { return rules } -func calculateTermScore(senseNumber int, headword headword) int { +func calculateTermScore(senseNumber int, depth int, headword headword) int { const senseWeight int = 1 - const entryPositionWeight int = 100 - const priorityWeight int = 10000 + const depthWeight int = 100 + const entryPositionWeight int = 10000 + const priorityWeight int = 1000000 score := 0 score -= (senseNumber - 1) * senseWeight + score -= depth * depthWeight score -= headword.Index * entryPositionWeight score += headword.Score() * priorityWeight @@ -85,7 +87,8 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet term.addDefinitionTags("forms") senseNumber := meta.seqToSenseCount[entry.Sequence] + 1 - term.Score = calculateTermScore(senseNumber, headword) + entryDepth := meta.entryDepth[entry.Sequence] + term.Score = calculateTermScore(senseNumber, entryDepth, headword) return term, true } @@ -106,7 +109,7 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe term.addRules(rules...) } term.addTermTags(headword.TermTags...) - term.Score = calculateTermScore(1, headword) + term.Score = calculateTermScore(1, 0, headword) redirectHeadword := meta.seqToMainHeadword[entry.Sequence] expHash := redirectHeadword.ExpHash() @@ -152,7 +155,8 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor rules := grammarRules(sense.PartsOfSpeech) term.addRules(rules...) - term.Score = calculateTermScore(senseNumber, headword) + entryDepth := meta.entryDepth[entry.Sequence] + term.Score = calculateTermScore(senseNumber, entryDepth, headword) return term, true } diff --git a/jmdictForms.go b/jmdictForms.go index af4bba6..4964233 100644 --- a/jmdictForms.go +++ b/jmdictForms.go @@ -227,7 +227,7 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int term.Expression = h.Expression term.Reading = h.Reading term.addTermTags(h.TermTags...) - term.Score = calculateTermScore(1, h) + term.Score = calculateTermScore(1, 0, h) terms = append(terms, term) } } diff --git a/jmdictMetadata.go b/jmdictMetadata.go index ec92827..99af862 100644 --- a/jmdictMetadata.go +++ b/jmdictMetadata.go @@ -20,6 +20,7 @@ type jmdictMetadata struct { referenceToSeq map[string]sequence hashToSearchValues map[hash][]searchValue seqToSearchHashes map[sequence][]searchHash + entryDepth map[sequence]int hasMultipleForms map[sequence]bool maxSenseCount int } @@ -29,6 +30,26 @@ type senseID struct { number int } +func (meta *jmdictMetadata) CalculateEntryDepth(headwords []headword, entrySequence sequence) { + // This is to ensure that terms are grouped among their + // entries of origin and displayed in correct sequential order + maxDepth := 0 + for _, headword := range headwords { + hash := headword.Hash() + for _, seq := range meta.headwordHashToSeqs[hash] { + seqDepth := meta.entryDepth[seq] + if seqDepth == 0 { + meta.entryDepth[seq] = 1 + seqDepth = 1 + } + if maxDepth < seqDepth+1 { + maxDepth = seqDepth + 1 + } + } + } + meta.entryDepth[entrySequence] = maxDepth +} + func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEntry) { // Determine how many senses are in this entry for this language @@ -128,6 +149,7 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta references: []string{}, hashToSearchValues: nil, referenceToSeq: nil, + entryDepth: make(map[sequence]int), hasMultipleForms: make(map[sequence]bool), maxSenseCount: 0, } @@ -141,6 +163,7 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta formCount += 1 } } + meta.CalculateEntryDepth(headwords, entry.Sequence) meta.hasMultipleForms[entry.Sequence] = (formCount > 1) }