Translator definition source term (and other info) (#881)
* Add sourceTerm field to 'term' and 'termGrouped' definitions * Fix comparison of expressions which are always the same * Rename/restructure term info map * Add source term * Add sourceTerm to expressions array * Reuse furiganaSegments * Add helper function _createExpressionDetailsListFromTermInfoMap * Add expressions array to termMergedByGlossary * Add expressions to definition types 'term' and 'termGrouped' * Rename expressionDetails* to termDetails* * Correct the source/rawSource/sourceTerm for related sequenced definitions * Simplify structure of sequencedDefinitions * Remove TODO
This commit is contained in:
parent
7e31dcca39
commit
561e36e88d
@ -148,24 +148,17 @@ class Translator {
|
|||||||
for (const definition of definitions) {
|
for (const definition of definitions) {
|
||||||
const {sequence, dictionary} = definition;
|
const {sequence, dictionary} = definition;
|
||||||
if (mainDictionary === dictionary && sequence >= 0) {
|
if (mainDictionary === dictionary && sequence >= 0) {
|
||||||
const {score} = definition;
|
|
||||||
let sequencedDefinition = sequencedDefinitionMap.get(sequence);
|
let sequencedDefinition = sequencedDefinitionMap.get(sequence);
|
||||||
if (typeof sequencedDefinition === 'undefined') {
|
if (typeof sequencedDefinition === 'undefined') {
|
||||||
const {reasons, source, rawSource} = definition;
|
|
||||||
sequencedDefinition = {
|
sequencedDefinition = {
|
||||||
reasons,
|
sourceDefinitions: [],
|
||||||
score,
|
relatedDefinitions: []
|
||||||
source,
|
|
||||||
rawSource,
|
|
||||||
dictionary,
|
|
||||||
definitions: []
|
|
||||||
};
|
};
|
||||||
sequencedDefinitionMap.set(sequence, sequencedDefinition);
|
sequencedDefinitionMap.set(sequence, sequencedDefinition);
|
||||||
sequencedDefinitions.push(sequencedDefinition);
|
sequencedDefinitions.push(sequencedDefinition);
|
||||||
sequenceList.push(sequence);
|
sequenceList.push(sequence);
|
||||||
} else {
|
|
||||||
sequencedDefinition.score = Math.max(sequencedDefinition.score, score);
|
|
||||||
}
|
}
|
||||||
|
sequencedDefinition.sourceDefinitions.push(definition);
|
||||||
} else {
|
} else {
|
||||||
unsequencedDefinitions.push(definition);
|
unsequencedDefinitions.push(definition);
|
||||||
}
|
}
|
||||||
@ -174,9 +167,10 @@ class Translator {
|
|||||||
if (sequenceList.length > 0) {
|
if (sequenceList.length > 0) {
|
||||||
const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
|
const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
|
||||||
for (const databaseDefinition of databaseDefinitions) {
|
for (const databaseDefinition of databaseDefinitions) {
|
||||||
const {definitions: definitions2, source, rawSource, reasons} = sequencedDefinitions[databaseDefinition.index];
|
const {relatedDefinitions} = sequencedDefinitions[databaseDefinition.index];
|
||||||
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap);
|
const {expression} = databaseDefinition;
|
||||||
definitions2.push(definition);
|
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, expression, expression, expression, [], enabledDictionaryMap);
|
||||||
|
relatedDefinitions.push(definition);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -203,26 +197,27 @@ class Translator {
|
|||||||
const definitions = [];
|
const definitions = [];
|
||||||
for (const databaseDefinition of databaseDefinitions) {
|
for (const databaseDefinition of databaseDefinitions) {
|
||||||
const source = expressionList[databaseDefinition.index];
|
const source = expressionList[databaseDefinition.index];
|
||||||
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, [], secondarySearchDictionaryMap);
|
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], secondarySearchDictionaryMap);
|
||||||
definitions.push(definition);
|
definitions.push(definition);
|
||||||
}
|
}
|
||||||
|
|
||||||
return definitions;
|
return definitions;
|
||||||
}
|
}
|
||||||
|
|
||||||
async _getMergedDefinition(sequencedDefinition, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
|
async _getMergedDefinition(sourceDefinitions, relatedDefinitions, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
|
||||||
const {reasons, score, source, rawSource, dictionary, definitions} = sequencedDefinition;
|
const {reasons, source, rawSource, dictionary} = sourceDefinitions[0];
|
||||||
const definitionDetailsMap = new Map();
|
const score = this._getMaxDefinitionScore(sourceDefinitions);
|
||||||
|
const termInfoMap = new Map();
|
||||||
const glossaryDefinitions = [];
|
const glossaryDefinitions = [];
|
||||||
const glossaryDefinitionGroupMap = new Map();
|
const glossaryDefinitionGroupMap = new Map();
|
||||||
|
|
||||||
this._mergeByGlossary(definitions, glossaryDefinitionGroupMap);
|
this._mergeByGlossary(relatedDefinitions, glossaryDefinitionGroupMap);
|
||||||
this._addDefinitionDetails(definitions, definitionDetailsMap);
|
this._addUniqueTermInfos(relatedDefinitions, termInfoMap);
|
||||||
|
|
||||||
let secondaryDefinitions = await this._getMergedSecondarySearchResults(definitionDetailsMap, secondarySearchDictionaryMap);
|
let secondaryDefinitions = await this._getMergedSecondarySearchResults(termInfoMap, secondarySearchDictionaryMap);
|
||||||
secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions];
|
secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions];
|
||||||
|
|
||||||
this._removeUsedDefinitions(secondaryDefinitions, definitionDetailsMap, usedDefinitions);
|
this._removeUsedDefinitions(secondaryDefinitions, termInfoMap, usedDefinitions);
|
||||||
this._removeDuplicateDefinitions(secondaryDefinitions);
|
this._removeDuplicateDefinitions(secondaryDefinitions);
|
||||||
|
|
||||||
this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
|
this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
|
||||||
@ -249,14 +244,7 @@ class Translator {
|
|||||||
|
|
||||||
this._sortDefinitions(glossaryDefinitions, true);
|
this._sortDefinitions(glossaryDefinitions, true);
|
||||||
|
|
||||||
const expressionDetailsList = [];
|
const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
|
||||||
for (const [expression, readingMap] of definitionDetailsMap.entries()) {
|
|
||||||
for (const [reading, termTagsMap] of readingMap.entries()) {
|
|
||||||
const termTags = [...termTagsMap.values()];
|
|
||||||
this._sortTags(termTags);
|
|
||||||
expressionDetailsList.push(this._createExpressionDetails(expression, reading, termTags));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return this._createMergedTermDefinition(
|
return this._createMergedTermDefinition(
|
||||||
source,
|
source,
|
||||||
@ -264,18 +252,18 @@ class Translator {
|
|||||||
glossaryDefinitions,
|
glossaryDefinitions,
|
||||||
[...allExpressions],
|
[...allExpressions],
|
||||||
[...allReadings],
|
[...allReadings],
|
||||||
expressionDetailsList,
|
termDetailsList,
|
||||||
reasons,
|
reasons,
|
||||||
dictionary,
|
dictionary,
|
||||||
score
|
score
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
_removeUsedDefinitions(definitions, definitionDetailsMap, usedDefinitions) {
|
_removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) {
|
||||||
for (let i = 0, ii = definitions.length; i < ii; ++i) {
|
for (let i = 0, ii = definitions.length; i < ii; ++i) {
|
||||||
const definition = definitions[i];
|
const definition = definitions[i];
|
||||||
const {expression, reading} = definition;
|
const {expression, reading} = definition;
|
||||||
const expressionMap = definitionDetailsMap.get(expression);
|
const expressionMap = termInfoMap.get(expression);
|
||||||
if (
|
if (
|
||||||
typeof expressionMap !== 'undefined' &&
|
typeof expressionMap !== 'undefined' &&
|
||||||
typeof expressionMap.get(reading) !== 'undefined'
|
typeof expressionMap.get(reading) !== 'undefined'
|
||||||
@ -333,9 +321,10 @@ class Translator {
|
|||||||
const definitionsMerged = [];
|
const definitionsMerged = [];
|
||||||
const usedDefinitions = new Set();
|
const usedDefinitions = new Set();
|
||||||
|
|
||||||
for (const sequencedDefinition of sequencedDefinitions) {
|
for (const {sourceDefinitions, relatedDefinitions} of sequencedDefinitions) {
|
||||||
const result = await this._getMergedDefinition(
|
const result = await this._getMergedDefinition(
|
||||||
sequencedDefinition,
|
sourceDefinitions,
|
||||||
|
relatedDefinitions,
|
||||||
unsequencedDefinitions,
|
unsequencedDefinitions,
|
||||||
secondarySearchDictionaryMap,
|
secondarySearchDictionaryMap,
|
||||||
usedDefinitions
|
usedDefinitions
|
||||||
@ -345,15 +334,15 @@ class Translator {
|
|||||||
|
|
||||||
const unusedDefinitions = unsequencedDefinitions.filter((definition) => !usedDefinitions.has(definition));
|
const unusedDefinitions = unsequencedDefinitions.filter((definition) => !usedDefinitions.has(definition));
|
||||||
for (const groupedDefinition of this._groupTerms(unusedDefinitions, enabledDictionaryMap)) {
|
for (const groupedDefinition of this._groupTerms(unusedDefinitions, enabledDictionaryMap)) {
|
||||||
const {reasons, score, expression, reading, source, rawSource, dictionary, termTags} = groupedDefinition;
|
const {reasons, score, expression, reading, source, rawSource, sourceTerm, dictionary, furiganaSegments, termTags} = groupedDefinition;
|
||||||
const expressionDetails = this._createExpressionDetails(expression, reading, termTags);
|
const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
|
||||||
const compatibilityDefinition = this._createMergedTermDefinition(
|
const compatibilityDefinition = this._createMergedTermDefinition(
|
||||||
source,
|
source,
|
||||||
rawSource,
|
rawSource,
|
||||||
definitions,
|
definitions,
|
||||||
[expression],
|
[expression],
|
||||||
[reading],
|
[reading],
|
||||||
[expressionDetails],
|
termDetailsList,
|
||||||
reasons,
|
reasons,
|
||||||
dictionary,
|
dictionary,
|
||||||
score
|
score
|
||||||
@ -403,11 +392,11 @@ class Translator {
|
|||||||
|
|
||||||
let maxLength = 0;
|
let maxLength = 0;
|
||||||
const definitions = [];
|
const definitions = [];
|
||||||
for (const {databaseDefinitions, source, rawSource, reasons} of deinflections) {
|
for (const {databaseDefinitions, source, rawSource, term, reasons} of deinflections) {
|
||||||
if (databaseDefinitions.length === 0) { continue; }
|
if (databaseDefinitions.length === 0) { continue; }
|
||||||
maxLength = Math.max(maxLength, rawSource.length);
|
maxLength = Math.max(maxLength, rawSource.length);
|
||||||
for (const databaseDefinition of databaseDefinitions) {
|
for (const databaseDefinition of databaseDefinitions) {
|
||||||
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap);
|
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, term, reasons, enabledDictionaryMap);
|
||||||
definitions.push(definition);
|
definitions.push(definition);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -773,7 +762,7 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let removeIndex = i;
|
let removeIndex = i;
|
||||||
if (definition.expression.length > existing[1].expression.length) {
|
if (definition.source.length > existing[1].source.length) {
|
||||||
definitionGroups.set(id, [i, definition]);
|
definitionGroups.set(id, [i, definition]);
|
||||||
removeIndex = existing[0];
|
removeIndex = existing[0];
|
||||||
}
|
}
|
||||||
@ -877,20 +866,25 @@ class Translator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_addDefinitionDetails(definitions, definitionDetailsMap) {
|
_addUniqueTermInfos(definitions, termInfoMap) {
|
||||||
for (const {expression, reading, termTags} of definitions) {
|
for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) {
|
||||||
let readingMap = definitionDetailsMap.get(expression);
|
let readingMap = termInfoMap.get(expression);
|
||||||
if (typeof readingMap === 'undefined') {
|
if (typeof readingMap === 'undefined') {
|
||||||
readingMap = new Map();
|
readingMap = new Map();
|
||||||
definitionDetailsMap.set(expression, readingMap);
|
termInfoMap.set(expression, readingMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
let termTagsMap = readingMap.get(reading);
|
let termInfo = readingMap.get(reading);
|
||||||
if (typeof termTagsMap === 'undefined') {
|
if (typeof termInfo === 'undefined') {
|
||||||
termTagsMap = new Map();
|
termInfo = {
|
||||||
readingMap.set(reading, termTagsMap);
|
sourceTerm,
|
||||||
|
furiganaSegments,
|
||||||
|
termTagsMap: new Map()
|
||||||
|
};
|
||||||
|
readingMap.set(reading, termInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const {termTagsMap} = termInfo;
|
||||||
for (const tag of termTags) {
|
for (const tag of termTags) {
|
||||||
const {name} = tag;
|
const {name} = tag;
|
||||||
if (termTagsMap.has(name)) { continue; }
|
if (termTagsMap.has(name)) { continue; }
|
||||||
@ -973,7 +967,7 @@ class Translator {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap) {
|
async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, reasons, enabledDictionaryMap) {
|
||||||
const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
|
const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
|
||||||
const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
|
const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
|
||||||
const termTagsExpanded = await this._expandTags(termTags, dictionary);
|
const termTagsExpanded = await this._expandTags(termTags, dictionary);
|
||||||
@ -984,12 +978,14 @@ class Translator {
|
|||||||
this._sortTags(termTagsExpanded);
|
this._sortTags(termTagsExpanded);
|
||||||
|
|
||||||
const furiganaSegments = jp.distributeFurigana(expression, reading);
|
const furiganaSegments = jp.distributeFurigana(expression, reading);
|
||||||
|
const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
|
||||||
|
|
||||||
return {
|
return {
|
||||||
type: 'term',
|
type: 'term',
|
||||||
id,
|
id,
|
||||||
source,
|
source,
|
||||||
rawSource,
|
rawSource,
|
||||||
|
sourceTerm,
|
||||||
reasons,
|
reasons,
|
||||||
score,
|
score,
|
||||||
sequence,
|
sequence,
|
||||||
@ -997,7 +993,7 @@ class Translator {
|
|||||||
dictionaryPriority,
|
dictionaryPriority,
|
||||||
expression,
|
expression,
|
||||||
reading,
|
reading,
|
||||||
// expressions
|
expressions: termDetailsList,
|
||||||
furiganaSegments,
|
furiganaSegments,
|
||||||
glossary,
|
glossary,
|
||||||
definitionTags: definitionTagsExpanded,
|
definitionTags: definitionTagsExpanded,
|
||||||
@ -1010,14 +1006,16 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
_createGroupedTermDefinition(definitions) {
|
_createGroupedTermDefinition(definitions) {
|
||||||
const {expression, reading, furiganaSegments, reasons, termTags, source, rawSource} = definitions[0];
|
const {expression, reading, furiganaSegments, reasons, termTags, source, rawSource, sourceTerm} = definitions[0];
|
||||||
const score = this._getMaxDefinitionScore(definitions);
|
const score = this._getMaxDefinitionScore(definitions);
|
||||||
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
|
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
|
||||||
|
const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
|
||||||
return {
|
return {
|
||||||
type: 'termGrouped',
|
type: 'termGrouped',
|
||||||
// id
|
// id
|
||||||
source,
|
source,
|
||||||
rawSource,
|
rawSource,
|
||||||
|
sourceTerm,
|
||||||
reasons: [...reasons],
|
reasons: [...reasons],
|
||||||
score,
|
score,
|
||||||
// sequence
|
// sequence
|
||||||
@ -1025,7 +1023,7 @@ class Translator {
|
|||||||
dictionaryPriority,
|
dictionaryPriority,
|
||||||
expression,
|
expression,
|
||||||
reading,
|
reading,
|
||||||
// expressions
|
expressions: termDetailsList,
|
||||||
furiganaSegments, // Contains duplicate data
|
furiganaSegments, // Contains duplicate data
|
||||||
// glossary
|
// glossary
|
||||||
// definitionTags
|
// definitionTags
|
||||||
@ -1037,13 +1035,14 @@ class Translator {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
_createMergedTermDefinition(source, rawSource, definitions, expressions, readings, expressionDetailsList, reasons, dictionary, score) {
|
_createMergedTermDefinition(source, rawSource, definitions, expressions, readings, termDetailsList, reasons, dictionary, score) {
|
||||||
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
|
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
|
||||||
return {
|
return {
|
||||||
type: 'termMerged',
|
type: 'termMerged',
|
||||||
// id
|
// id
|
||||||
source,
|
source,
|
||||||
rawSource,
|
rawSource,
|
||||||
|
// sourceTerm
|
||||||
reasons,
|
reasons,
|
||||||
score,
|
score,
|
||||||
// sequence
|
// sequence
|
||||||
@ -1051,7 +1050,7 @@ class Translator {
|
|||||||
dictionaryPriority,
|
dictionaryPriority,
|
||||||
expression: expressions,
|
expression: expressions,
|
||||||
reading: readings,
|
reading: readings,
|
||||||
expressions: expressionDetailsList,
|
expressions: termDetailsList,
|
||||||
// furiganaSegments
|
// furiganaSegments
|
||||||
// glossary
|
// glossary
|
||||||
// definitionTags
|
// definitionTags
|
||||||
@ -1072,6 +1071,10 @@ class Translator {
|
|||||||
only.push(...getSetIntersection(readings, allReadings));
|
only.push(...getSetIntersection(readings, allReadings));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const termInfoMap = new Map();
|
||||||
|
this._addUniqueTermInfos(definitions, termInfoMap);
|
||||||
|
const termDetailsList = this._createTermDetailsListFromTermInfoMap(termInfoMap);
|
||||||
|
|
||||||
const definitionTags = this._getUniqueDefinitionTags(definitions);
|
const definitionTags = this._getUniqueDefinitionTags(definitions);
|
||||||
this._sortTags(definitionTags);
|
this._sortTags(definitionTags);
|
||||||
|
|
||||||
@ -1083,6 +1086,7 @@ class Translator {
|
|||||||
// id
|
// id
|
||||||
source,
|
source,
|
||||||
rawSource,
|
rawSource,
|
||||||
|
// sourceTerm
|
||||||
reasons: [],
|
reasons: [],
|
||||||
score,
|
score,
|
||||||
// sequence
|
// sequence
|
||||||
@ -1090,7 +1094,7 @@ class Translator {
|
|||||||
dictionaryPriority,
|
dictionaryPriority,
|
||||||
expression: [...expressions],
|
expression: [...expressions],
|
||||||
reading: [...readings],
|
reading: [...readings],
|
||||||
// expressions
|
expressions: termDetailsList,
|
||||||
// furiganaSegments
|
// furiganaSegments
|
||||||
glossary: [...glossary],
|
glossary: [...glossary],
|
||||||
definitionTags,
|
definitionTags,
|
||||||
@ -1102,13 +1106,25 @@ class Translator {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
_createExpressionDetails(expression, reading, termTags) {
|
_createTermDetailsListFromTermInfoMap(termInfoMap) {
|
||||||
|
const termDetailsList = [];
|
||||||
|
for (const [expression, readingMap] of termInfoMap.entries()) {
|
||||||
|
for (const [reading, {termTagsMap, sourceTerm, furiganaSegments}] of readingMap.entries()) {
|
||||||
|
const termTags = [...termTagsMap.values()];
|
||||||
|
this._sortTags(termTags);
|
||||||
|
termDetailsList.push(this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return termDetailsList;
|
||||||
|
}
|
||||||
|
|
||||||
|
_createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags) {
|
||||||
const termFrequency = this._scoreToTermFrequency(this._getTermTagsScoreSum(termTags));
|
const termFrequency = this._scoreToTermFrequency(this._getTermTagsScoreSum(termTags));
|
||||||
const furiganaSegments = jp.distributeFurigana(expression, reading);
|
|
||||||
return {
|
return {
|
||||||
|
sourceTerm,
|
||||||
expression,
|
expression,
|
||||||
reading,
|
reading,
|
||||||
furiganaSegments,
|
furiganaSegments, // Contains duplicate data
|
||||||
termTags,
|
termTags,
|
||||||
termFrequency,
|
termFrequency,
|
||||||
frequencies: [],
|
frequencies: [],
|
||||||
|
Loading…
Reference in New Issue
Block a user