Translator definition source term (and other info) (#881)

* Add sourceTerm field to 'term' and 'termGrouped' definitions

* Fix comparison of expressions which are always the same

* Rename/restructure term info map

* Add source term

* Add sourceTerm to expressions array

* Reuse furiganaSegments

* Add helper function _createExpressionDetailsListFromTermInfoMap

* Add expressions array to termMergedByGlossary

* Add expressions to definition types 'term' and 'termGrouped'

* Rename expressionDetails* to termDetails*

* Correct the source/rawSource/sourceTerm for related sequenced definitions

* Simplify structure of sequencedDefinitions

* Remove TODO
This commit is contained in:
toasted-nutbread 2020-10-04 18:10:10 -04:00 committed by GitHub
parent 7e31dcca39
commit 561e36e88d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -148,24 +148,17 @@ class Translator {
for (const definition of definitions) { for (const definition of definitions) {
const {sequence, dictionary} = definition; const {sequence, dictionary} = definition;
if (mainDictionary === dictionary && sequence >= 0) { if (mainDictionary === dictionary && sequence >= 0) {
const {score} = definition;
let sequencedDefinition = sequencedDefinitionMap.get(sequence); let sequencedDefinition = sequencedDefinitionMap.get(sequence);
if (typeof sequencedDefinition === 'undefined') { if (typeof sequencedDefinition === 'undefined') {
const {reasons, source, rawSource} = definition;
sequencedDefinition = { sequencedDefinition = {
reasons, sourceDefinitions: [],
score, relatedDefinitions: []
source,
rawSource,
dictionary,
definitions: []
}; };
sequencedDefinitionMap.set(sequence, sequencedDefinition); sequencedDefinitionMap.set(sequence, sequencedDefinition);
sequencedDefinitions.push(sequencedDefinition); sequencedDefinitions.push(sequencedDefinition);
sequenceList.push(sequence); sequenceList.push(sequence);
} else {
sequencedDefinition.score = Math.max(sequencedDefinition.score, score);
} }
sequencedDefinition.sourceDefinitions.push(definition);
} else { } else {
unsequencedDefinitions.push(definition); unsequencedDefinitions.push(definition);
} }
@ -174,9 +167,10 @@ class Translator {
if (sequenceList.length > 0) { if (sequenceList.length > 0) {
const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary); const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
for (const databaseDefinition of databaseDefinitions) { for (const databaseDefinition of databaseDefinitions) {
const {definitions: definitions2, source, rawSource, reasons} = sequencedDefinitions[databaseDefinition.index]; const {relatedDefinitions} = sequencedDefinitions[databaseDefinition.index];
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap); const {expression} = databaseDefinition;
definitions2.push(definition); const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, expression, expression, expression, [], enabledDictionaryMap);
relatedDefinitions.push(definition);
} }
} }
@ -203,26 +197,27 @@ class Translator {
const definitions = []; const definitions = [];
for (const databaseDefinition of databaseDefinitions) { for (const databaseDefinition of databaseDefinitions) {
const source = expressionList[databaseDefinition.index]; const source = expressionList[databaseDefinition.index];
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, [], secondarySearchDictionaryMap); const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], secondarySearchDictionaryMap);
definitions.push(definition); definitions.push(definition);
} }
return definitions; return definitions;
} }
async _getMergedDefinition(sequencedDefinition, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) { async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
const {reasons, score, source, rawSource, dictionary, definitions} = sequencedDefinition; const {reasons, source, rawSource, dictionary} = sourceDefinitions[0];
const definitionDetailsMap = new Map(); const score = this._getMaxDefinitionScore(sourceDefinitions);
const termInfoMap = new Map();
const glossaryDefinitions = []; const glossaryDefinitions = [];
const glossaryDefinitionGroupMap = new Map(); const glossaryDefinitionGroupMap = new Map();
this._mergeByGlossary(definitions, glossaryDefinitionGroupMap); this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap);
this._addDefinitionDetails(definitions, definitionDetailsMap); this._addUniqueTermInfos(relatedDefinitions, termInfoMap);
let secondaryDefinitions = await this._getMergedSecondarySearchResults(definitionDetailsMap, secondarySearchDictionaryMap); let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap);
secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions]; secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions];
this._removeUsedDefinitions(secondaryDefinitions, definitionDetailsMap, usedDefinitions); this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions);
this._removeDuplicateDefinitions(secondaryDefinitions); this._removeDuplicateDefinitions(secondaryDefinitions);
this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap); this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
@ -249,14 +244,7 @@ class Translator {
this._sortDefinitions(glossaryDefinitions, true); this._sortDefinitions(glossaryDefinitions, true);
const expressionDetailsList = []; const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
for (const [expression, readingMap] of definitionDetailsMap.entries()) {
for (const [reading, termTagsMap] of readingMap.entries()) {
const termTags = [...termTagsMap.values()];
this._sortTags(termTags);
expressionDetailsList.push(this._createExpressionDetails(expression, reading, termTags));
}
}
return this._createMergedTermDefinition( return this._createMergedTermDefinition(
source, source,
@ -264,18 +252,18 @@ class Translator {
glossaryDefinitions, glossaryDefinitions,
[...allExpressions], [...allExpressions],
[...allReadings], [...allReadings],
expressionDetailsList, termDetailsList,
reasons, reasons,
dictionary, dictionary,
score score
); );
} }
_removeUsedDefinitions(definitions, definitionDetailsMap, usedDefinitions) { _removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) {
for (let i = 0, ii = definitions.length; i < ii; ++i) { for (let i = 0, ii = definitions.length; i < ii; ++i) {
const definition = definitions[i]; const definition = definitions[i];
const {expression, reading} = definition; const {expression, reading} = definition;
const expressionMap = definitionDetailsMap.get(expression); const expressionMap = termInfoMap.get(expression);
if ( if (
typeof expressionMap !== 'undefined' && typeof expressionMap !== 'undefined' &&
typeof expressionMap.get(reading) !== 'undefined' typeof expressionMap.get(reading) !== 'undefined'
@ -333,9 +321,10 @@ class Translator {
const definitionsMerged = []; const definitionsMerged = [];
const usedDefinitions = new Set(); const usedDefinitions = new Set();
for (const sequencedDefinition of sequencedDefinitions) { for (const {sourceDefinitions, relatedDefinitions} of sequencedDefinitions) {
const result = await this._getMergedDefinition( const result = await this._getMergedDefinition(
sequencedDefinition, sourceDefinitions,
relatedDefinitions,
unsequencedDefinitions, unsequencedDefinitions,
secondarySearchDictionaryMap, secondarySearchDictionaryMap,
usedDefinitions usedDefinitions
@ -345,15 +334,15 @@ class Translator {
const unusedDefinitions = unsequencedDefinitions.filter((definition) => !usedDefinitions.has(definition)); const unusedDefinitions = unsequencedDefinitions.filter((definition) => !usedDefinitions.has(definition));
for (const groupedDefinition of this._groupTerms(unusedDefinitions, enabledDictionaryMap)) { for (const groupedDefinition of this._groupTerms(unusedDefinitions, enabledDictionaryMap)) {
const {reasons, score, expression, reading, source, rawSource, dictionary, termTags} = groupedDefinition; const {reasons, score, expression, reading, source, rawSource, sourceTerm, dictionary, furiganaSegments, termTags} = groupedDefinition;
const expressionDetails = this._createExpressionDetails(expression, reading, termTags); const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
const compatibilityDefinition = this._createMergedTermDefinition( const compatibilityDefinition = this._createMergedTermDefinition(
source, source,
rawSource, rawSource,
definitions, definitions,
[expression], [expression],
[reading], [reading],
[expressionDetails], termDetailsList,
reasons, reasons,
dictionary, dictionary,
score score
@ -403,11 +392,11 @@ class Translator {
let maxLength = 0; let maxLength = 0;
const definitions = []; const definitions = [];
for (const {databaseDefinitions, source, rawSource, reasons} of deinflections) { for (const {databaseDefinitions, source, rawSource, term, reasons} of deinflections) {
if (databaseDefinitions.length === 0) { continue; } if (databaseDefinitions.length === 0) { continue; }
maxLength = Math.max(maxLength, rawSource.length); maxLength = Math.max(maxLength, rawSource.length);
for (const databaseDefinition of databaseDefinitions) { for (const databaseDefinition of databaseDefinitions) {
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap); const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, term, reasons, enabledDictionaryMap);
definitions.push(definition); definitions.push(definition);
} }
} }
@ -773,7 +762,7 @@ class Translator {
} }
let removeIndex = i; let removeIndex = i;
if (definition.expression.length > existing[1].expression.length) { if (definition.source.length > existing[1].source.length) {
definitionGroups.set(id, [i, definition]); definitionGroups.set(id, [i, definition]);
removeIndex = existing[0]; removeIndex = existing[0];
} }
@ -877,20 +866,25 @@ class Translator {
} }
} }
_addDefinitionDetails(definitions, definitionDetailsMap) { _addUniqueTermInfos(definitions, termInfoMap) {
for (const {expression, reading, termTags} of definitions) { for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) {
let readingMap = definitionDetailsMap.get(expression); let readingMap = termInfoMap.get(expression);
if (typeof readingMap === 'undefined') { if (typeof readingMap === 'undefined') {
readingMap = new Map(); readingMap = new Map();
definitionDetailsMap.set(expression, readingMap); termInfoMap.set(expression, readingMap);
} }
let termTagsMap = readingMap.get(reading); let termInfo = readingMap.get(reading);
if (typeof termTagsMap === 'undefined') { if (typeof termInfo === 'undefined') {
termTagsMap = new Map(); termInfo = {
readingMap.set(reading, termTagsMap); sourceTerm,
furiganaSegments,
termTagsMap: new Map()
};
readingMap.set(reading, termInfo);
} }
const {termTagsMap} = termInfo;
for (const tag of termTags) { for (const tag of termTags) {
const {name} = tag; const {name} = tag;
if (termTagsMap.has(name)) { continue; } if (termTagsMap.has(name)) { continue; }
@ -973,7 +967,7 @@ class Translator {
}; };
} }
async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap) { async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, reasons, enabledDictionaryMap) {
const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition; const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap); const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
const termTagsExpanded = await this._expandTags(termTags, dictionary); const termTagsExpanded = await this._expandTags(termTags, dictionary);
@ -984,12 +978,14 @@ class Translator {
this._sortTags(termTagsExpanded); this._sortTags(termTagsExpanded);
const furiganaSegments = jp.distributeFurigana(expression, reading); const furiganaSegments = jp.distributeFurigana(expression, reading);
const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
return { return {
type: 'term', type: 'term',
id, id,
source, source,
rawSource, rawSource,
sourceTerm,
reasons, reasons,
score, score,
sequence, sequence,
@ -997,7 +993,7 @@ class Translator {
dictionaryPriority, dictionaryPriority,
expression, expression,
reading, reading,
// expressions expressions: termDetailsList,
furiganaSegments, furiganaSegments,
glossary, glossary,
definitionTags: definitionTagsExpanded, definitionTags: definitionTagsExpanded,
@ -1010,14 +1006,16 @@ class Translator {
} }
_createGroupedTermDefinition(definitions) { _createGroupedTermDefinition(definitions) {
const {expression, reading, furiganaSegments, reasons, termTags, source, rawSource} = definitions[0]; const {expression, reading, furiganaSegments, reasons, termTags, source, rawSource, sourceTerm} = definitions[0];
const score = this._getMaxDefinitionScore(definitions); const score = this._getMaxDefinitionScore(definitions);
const dictionaryPriority = this._getMaxDictionaryPriority(definitions); const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
return { return {
type: 'termGrouped', type: 'termGrouped',
// id // id
source, source,
rawSource, rawSource,
sourceTerm,
reasons: [...reasons], reasons: [...reasons],
score, score,
// sequence // sequence
@ -1025,7 +1023,7 @@ class Translator {
dictionaryPriority, dictionaryPriority,
expression, expression,
reading, reading,
// expressions expressions: termDetailsList,
furiganaSegments, // Contains duplicate data furiganaSegments, // Contains duplicate data
// glossary // glossary
// definitionTags // definitionTags
@ -1037,13 +1035,14 @@ class Translator {
}; };
} }
_createMergedTermDefinition(source, rawSource, definitions, expressions, readings, expressionDetailsList, reasons, dictionary, score) { _createMergedTermDefinition(source, rawSource, definitions, expressions, readings, termDetailsList, reasons, dictionary, score) {
const dictionaryPriority = this._getMaxDictionaryPriority(definitions); const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
return { return {
type: 'termMerged', type: 'termMerged',
// id // id
source, source,
rawSource, rawSource,
// sourceTerm
reasons, reasons,
score, score,
// sequence // sequence
@ -1051,7 +1050,7 @@ class Translator {
dictionaryPriority, dictionaryPriority,
expression: expressions, expression: expressions,
reading: readings, reading: readings,
expressions: expressionDetailsList, expressions: termDetailsList,
// furiganaSegments // furiganaSegments
// glossary // glossary
// definitionTags // definitionTags
@ -1072,6 +1071,10 @@ class Translator {
only.push(...getSetIntersection(readings, allReadings)); only.push(...getSetIntersection(readings, allReadings));
} }
const termInfoMap = new Map();
this._addUniqueTermInfos(definitions, termInfoMap);
const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
const definitionTags = this._getUniqueDefinitionTags(definitions); const definitionTags = this._getUniqueDefinitionTags(definitions);
this._sortTags(definitionTags); this._sortTags(definitionTags);
@ -1083,6 +1086,7 @@ class Translator {
// id // id
source, source,
rawSource, rawSource,
// sourceTerm
reasons: [], reasons: [],
score, score,
// sequence // sequence
@ -1090,7 +1094,7 @@ class Translator {
dictionaryPriority, dictionaryPriority,
expression: [...expressions], expression: [...expressions],
reading: [...readings], reading: [...readings],
// expressions expressions: termDetailsList,
// furiganaSegments // furiganaSegments
glossary: [...glossary], glossary: [...glossary],
definitionTags, definitionTags,
@ -1102,13 +1106,25 @@ class Translator {
}; };
} }
_createExpressionDetails(expression, reading, termTags) { _createTermDetailsListFromTermInfoMap(termInfoMap) {
const termDetailsList = [];
for (const [expression, readingMap] of termInfoMap.entries()) {
for (const [reading, {termTagsMap, sourceTerm, furiganaSegments}] of readingMap.entries()) {
const termTags = [...termTagsMap.values()];
this._sortTags(termTags);
termDetailsList.push(this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags));
}
}
return termDetailsList;
}
_createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags) {
const termFrequency = this._scoreToTermFrequency(this._getTermTagsScoreSum(termTags)); const termFrequency = this._scoreToTermFrequency(this._getTermTagsScoreSum(termTags));
const furiganaSegments = jp.distributeFurigana(expression, reading);
return { return {
sourceTerm,
expression, expression,
reading, reading,
furiganaSegments, furiganaSegments, // Contains duplicate data
termTags, termTags,
termFrequency, termFrequency,
frequencies: [], frequencies: [],