Add sourceTermExactMatchCount to prioritize exact expression matches (#882)

This commit is contained in:
toasted-nutbread 2020-10-04 18:54:03 -04:00 committed by GitHub
parent 561e36e88d
commit f904b3e11a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -295,6 +295,12 @@ class Translator {
return result; return result;
} }
_getSourceTermMatchCountSum(definitions) {
let result = 0;
for (const {sourceTermExactMatchCount} of definitions) { result += sourceTermExactMatchCount; }
return result;
}
async _findTermsGrouped(text, options) { async _findTermsGrouped(text, options) {
const {compactTags, enabledDictionaryMap} = options; const {compactTags, enabledDictionaryMap} = options;
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
@ -979,6 +985,7 @@ class Translator {
const furiganaSegments = jp.distributeFurigana(expression, reading); const furiganaSegments = jp.distributeFurigana(expression, reading);
const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)]; const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
const sourceTermExactMatchCount = (sourceTerm === expression ? 1 : 0);
return { return {
type: 'term', type: 'term',
@ -1000,8 +1007,9 @@ class Translator {
termTags: termTagsExpanded, termTags: termTagsExpanded,
// definitions // definitions
frequencies: [], frequencies: [],
pitches: [] pitches: [],
// only // only
sourceTermExactMatchCount
}; };
} }
@ -1010,6 +1018,7 @@ class Translator {
const score = this._getMaxDefinitionScore(definitions); const score = this._getMaxDefinitionScore(definitions);
const dictionaryPriority = this._getMaxDictionaryPriority(definitions); const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)]; const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
const sourceTermExactMatchCount = (sourceTerm === expression ? 1 : 0);
return { return {
type: 'termGrouped', type: 'termGrouped',
// id // id
@ -1030,13 +1039,15 @@ class Translator {
termTags: this._cloneTags(termTags), termTags: this._cloneTags(termTags),
definitions, // type: 'term' definitions, // type: 'term'
frequencies: [], frequencies: [],
pitches: [] pitches: [],
// only // only
sourceTermExactMatchCount
}; };
} }
_createMergedTermDefinition(source, rawSource, definitions, expressions, readings, termDetailsList, reasons, dictionary, score) { _createMergedTermDefinition(source, rawSource, definitions, expressions, readings, termDetailsList, reasons, dictionary, score) {
const dictionaryPriority = this._getMaxDictionaryPriority(definitions); const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
const sourceTermExactMatchCount = this._getSourceTermMatchCountSum(definitions);
return { return {
type: 'termMerged', type: 'termMerged',
// id // id
@ -1057,8 +1068,9 @@ class Translator {
// termTags // termTags
definitions, // type: 'termMergedByGlossary' definitions, // type: 'termMergedByGlossary'
frequencies: [], frequencies: [],
pitches: [] pitches: [],
// only // only
sourceTermExactMatchCount
}; };
} }
@ -1071,6 +1083,8 @@ class Translator {
only.push(...getSetIntersection(readings, allReadings)); only.push(...getSetIntersection(readings, allReadings));
} }
const sourceTermExactMatchCount = this._getSourceTermMatchCountSum(definitions);
const termInfoMap = new Map(); const termInfoMap = new Map();
this._addUniqueTermInfos(definitions, termInfoMap); this._addUniqueTermInfos(definitions, termInfoMap);
const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap); const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
@ -1102,7 +1116,8 @@ class Translator {
definitions, // type: 'term'; contains duplicate data definitions, // type: 'term'; contains duplicate data
frequencies: [], frequencies: [],
pitches: [], pitches: [],
only only,
sourceTermExactMatchCount
}; };
} }
@ -1153,6 +1168,9 @@ class Translator {
i = v1.reasons.length - v2.reasons.length; i = v1.reasons.length - v2.reasons.length;
if (i !== 0) { return i; } if (i !== 0) { return i; }
i = v2.sourceTermExactMatchCount - v1.sourceTermExactMatchCount;
if (i !== 0) { return i; }
i = v2.score - v1.score; i = v2.score - v1.score;
if (i !== 0) { return i; } if (i !== 0) { return i; }