Translator refactoring (#878)

* Add helper function _getSecondarySearchDictionaryMap

* Use spreads instead of concat

* Create helper function for assigning disambiguation info

* Add _getTermTagsScoreSum helper

* Rename variables

* Move assignment of definition details

* Convert databaseDefinitions to standard definitions

* Move creation of definitions

* Always pass definitionsByGlossary directly to _mergeByGlossary

* Refactor how used definitions are tracked

* Change _mergeByGlossary parameters

* Simplify _mergeByGlossary

* Generate definition structure at the end of the function

* Add reusable function to clone a tag

* Restructure merged sub-definition creation

* Move setup of totalExpressionSet/totalReadingSet

* Track all definitions with matching glossary

* Move creation of definitionTags

* Update score calculation for multiple definitions

* Disambiguate function name

* Rename variables

* Add helper function _createMergedGlossaryTermDefinition

* Remove rarely used check for secondary search results

Unclear what the purpose was since it was almost never used

* Update data structure

* Rename function/variable

* Add helper functions _createMergedTermDefinition

* Assign expressionDetails to variable

* Add rawSource to grouped definitions

* Use shared function for creating unused definition entries

* Add helper function _createMapKey

* Refactor _createExpressionDetails

* Change argument order

* Add _getTagNamesWithCategory helper

* Optimize _compressDefinitionTags

* Add _createGroupedTermDefinition helper

* Add type information

* Use consistent order for term definition fields, add comments for omitted fields

* Remove unused index field

* Add helper function _createKanjiDefinition

* Update types

* Add frequencies/pitches arrays

* Update terms array setup

* Simplify for loop

* Remove redundant property assignment

* Simplify how deinflections with no definitions are ignored

* Ensure duplicates are removed

* Rename variable

* Add comments about duplicate data

* Refactor variable/parameter names

* Move functions

* Rename functions

* Rename variable

* Add dictionaryPriority field

* Simplify priority comparison

* Rename argument

* Optimize comparison function when dictionary priority is not needed

* Add length check

* Remove unused parameter

* Rename variables/parameters

* Rename dictionaries to enabledDictionaryMap

* Store compactTags/mainDictionary options to variables

* Refactor _getSearchableText
This commit is contained in:
toasted-nutbread 2020-10-04 11:12:15 -04:00 committed by GitHub
parent ef333b6d72
commit 86c64ac4c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -54,45 +54,36 @@ class Translator {
} }
async findKanji(text, options) { async findKanji(text, options) {
const dictionaries = this._getEnabledDictionaryMap(options); const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
const kanjiUnique = new Set(); const kanjiUnique = new Set();
for (const c of text) { for (const c of text) {
kanjiUnique.add(c); kanjiUnique.add(c);
} }
const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], dictionaries); const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], enabledDictionaryMap);
if (databaseDefinitions.length === 0) { return []; } if (databaseDefinitions.length === 0) { return []; }
this._sortDatabaseDefinitionsByIndex(databaseDefinitions); this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
const definitions = []; const definitions = [];
for (const {index, character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) { for (const {character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) {
const expandedStats = await this._expandStats(stats, dictionary); const expandedStats = await this._expandStats(stats, dictionary);
const expandedTags = await this._expandTags(tags, dictionary); const expandedTags = await this._expandTags(tags, dictionary);
expandedTags.push(this._createDictionaryTag(dictionary)); expandedTags.push(this._createDictionaryTag(dictionary));
this._sortTags(expandedTags); this._sortTags(expandedTags);
definitions.push({ const definition = this._createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, expandedTags, expandedStats);
index, definitions.push(definition);
character,
onyomi,
kunyomi,
tags: expandedTags,
glossary,
stats: expandedStats,
dictionary,
frequencies: []
});
} }
await this._buildKanjiMeta(definitions, dictionaries); await this._buildKanjiMeta(definitions, enabledDictionaryMap);
return definitions; return definitions;
} }
// Private // Private
async _getSequencedDefinitions(definitions, mainDictionary) { async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) {
const sequenceList = []; const sequenceList = [];
const sequencedDefinitionMap = new Map(); const sequencedDefinitionMap = new Map();
const sequencedDefinitions = []; const sequencedDefinitions = [];
@ -103,13 +94,14 @@ class Translator {
const {score} = definition; const {score} = definition;
let sequencedDefinition = sequencedDefinitionMap.get(sequence); let sequencedDefinition = sequencedDefinitionMap.get(sequence);
if (typeof sequencedDefinition === 'undefined') { if (typeof sequencedDefinition === 'undefined') {
const {reasons, source} = definition; const {reasons, source, rawSource} = definition;
sequencedDefinition = { sequencedDefinition = {
reasons, reasons,
score, score,
source, source,
rawSource,
dictionary, dictionary,
databaseDefinitions: [] definitions: []
}; };
sequencedDefinitionMap.set(sequence, sequencedDefinition); sequencedDefinitionMap.set(sequence, sequencedDefinition);
sequencedDefinitions.push(sequencedDefinition); sequencedDefinitions.push(sequencedDefinition);
@ -122,155 +114,202 @@ class Translator {
} }
} }
if (sequenceList.length > 0) {
const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary); const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
for (const databaseDefinition of databaseDefinitions) { for (const databaseDefinition of databaseDefinitions) {
sequencedDefinitions[databaseDefinition.index].databaseDefinitions.push(databaseDefinition); const {definitions: definitions2, source, rawSource, reasons} = sequencedDefinitions[databaseDefinition.index];
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap);
definitions2.push(definition);
}
} }
return {sequencedDefinitions, unsequencedDefinitions}; return {sequencedDefinitions, unsequencedDefinitions};
} }
async _getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) { async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) {
if (secondarySearchDictionaries.size === 0) { if (secondarySearchDictionaryMap.size === 0) {
return []; return [];
} }
const expressionList = []; const expressionList = [];
const readingList = []; const readingList = [];
for (const [expression, readingMap] of expressionsMap.entries()) { for (const [expression, readingMap] of expressionsMap.entries()) {
if (expression === text) { continue; }
for (const reading of readingMap.keys()) { for (const reading of readingMap.keys()) {
expressionList.push(expression); expressionList.push(expression);
readingList.push(reading); readingList.push(reading);
} }
} }
const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries); const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap);
this._sortDatabaseDefinitionsByIndex(databaseDefinitions); this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
const definitions = []; const definitions = [];
for (const databaseDefinition of databaseDefinitions) { for (const databaseDefinition of databaseDefinitions) {
const source = expressionList[databaseDefinition.index]; const source = expressionList[databaseDefinition.index];
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, []); const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, [], secondarySearchDictionaryMap);
definitions.push(definition); definitions.push(definition);
} }
return definitions; return definitions;
} }
async _getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) { async _getMergedDefinition(sequencedDefinition, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
const {reasons, score, source, dictionary, databaseDefinitions} = sequencedDefinition; const {reasons, score, source, rawSource, dictionary, definitions} = sequencedDefinition;
const result = { const definitionDetailsMap = new Map();
reasons, const glossaryDefinitions = [];
score, const glossaryDefinitionGroupMap = new Map();
expression: new Set(),
reading: new Set(), this._mergeByGlossary(definitions, glossaryDefinitionGroupMap);
expressions: new Map(), this._addDefinitionDetails(definitions, definitionDetailsMap);
let secondaryDefinitions = await this._getMergedSecondarySearchResults(definitionDetailsMap, secondarySearchDictionaryMap);
secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions];
this._removeUsedDefinitions(secondaryDefinitions, definitionDetailsMap, usedDefinitions);
this._removeDuplicateDefinitions(secondaryDefinitions);
this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
const allExpressions = new Set();
const allReadings = new Set();
for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) {
for (const expression of expressions) { allExpressions.add(expression); }
for (const reading of readings) { allReadings.add(reading); }
}
for (const {expressions, readings, definitions: definitions2} of glossaryDefinitionGroupMap.values()) {
const glossaryDefinition = this._createMergedGlossaryTermDefinition(
source, source,
dictionary, rawSource,
definitions: [] definitions2,
}; expressions,
readings,
for (const definition of databaseDefinitions) { allExpressions,
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary); allReadings
definitionTags.push(this._createDictionaryTag(definition.dictionary)); );
definition.definitionTags = definitionTags; glossaryDefinitions.push(glossaryDefinition);
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
definition.termTags = termTags;
} }
const definitionsByGloss = this._mergeByGlossary(result, databaseDefinitions); this._sortDefinitions(glossaryDefinitions, true);
const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
this._mergeByGlossary(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices); const expressionDetailsList = [];
for (const [expression, readingMap] of definitionDetailsMap.entries()) {
for (const definition of definitionsByGloss.values()) {
this._sortTags(definition.definitionTags);
result.definitions.push(definition);
}
this._sortDefinitions(result.definitions, dictionaries);
const expressions = [];
for (const [expression, readingMap] of result.expressions.entries()) {
for (const [reading, termTagsMap] of readingMap.entries()) { for (const [reading, termTagsMap] of readingMap.entries()) {
const termTags = [...termTagsMap.values()]; const termTags = [...termTagsMap.values()];
const score2 = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0);
this._sortTags(termTags); this._sortTags(termTags);
expressions.push(this._createExpression(expression, reading, termTags, this._scoreToTermFrequency(score2))); expressionDetailsList.push(this._createExpressionDetails(expression, reading, termTags));
} }
} }
result.expressions = expressions; return this._createMergedTermDefinition(
result.expression = Array.from(result.expression); source,
result.reading = Array.from(result.reading); rawSource,
glossaryDefinitions,
[...allExpressions],
[...allReadings],
expressionDetailsList,
reasons,
dictionary,
score
);
}
_removeUsedDefinitions(definitions, definitionDetailsMap, usedDefinitions) {
for (let i = 0, ii = definitions.length; i < ii; ++i) {
const definition = definitions[i];
const {expression, reading} = definition;
const expressionMap = definitionDetailsMap.get(expression);
if (
typeof expressionMap !== 'undefined' &&
typeof expressionMap.get(reading) !== 'undefined'
) {
usedDefinitions.add(definition);
} else {
definitions.splice(i, 1);
--i;
--ii;
}
}
}
_getUniqueDefinitionTags(definitions) {
const definitionTagsMap = new Map();
for (const {definitionTags} of definitions) {
for (const tag of definitionTags) {
const {name} = tag;
if (definitionTagsMap.has(name)) { continue; }
definitionTagsMap.set(name, this._cloneTag(tag));
}
}
return [...definitionTagsMap.values()];
}
_getTermTagsScoreSum(termTags) {
let result = 0;
for (const {score} of termTags) { result += score; }
return result; return result;
} }
async _findTermsGrouped(text, details, options) { async _findTermsGrouped(text, details, options) {
const dictionaries = this._getEnabledDictionaryMap(options); const {general: {compactTags}} = options;
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options); const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
const definitionsGrouped = this._groupTerms(definitions, dictionaries); const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap);
await this._buildTermMeta(definitionsGrouped, dictionaries); await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap);
this._sortDefinitions(definitionsGrouped, null); this._sortDefinitions(groupedDefinitions, false);
if (options.general.compactTags) { if (compactTags) {
for (const definition of definitionsGrouped) { for (const definition of groupedDefinitions) {
this._compressDefinitionTags(definition.definitions); this._compressDefinitionTags(definition.definitions);
} }
} }
return [definitionsGrouped, length]; return [groupedDefinitions, length];
} }
async _findTermsMerged(text, details, options) { async _findTermsMerged(text, details, options) {
const dictionaries = this._getEnabledDictionaryMap(options); const {general: {compactTags, mainDictionary}} = options;
const secondarySearchDictionaries = new Map(); const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
for (const [title, dictionary] of dictionaries.entries()) { const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
if (!dictionary.allowSecondarySearches) { continue; }
secondarySearchDictionaries.set(title, dictionary);
}
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options); const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, options.general.mainDictionary); const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
const definitionsMerged = []; const definitionsMerged = [];
const mergedByTermIndices = new Set(); const usedDefinitions = new Set();
for (const sequencedDefinition of sequencedDefinitions) { for (const sequencedDefinition of sequencedDefinitions) {
const result = await this._getMergedDefinition( const result = await this._getMergedDefinition(
text,
dictionaries,
sequencedDefinition, sequencedDefinition,
unsequencedDefinitions, unsequencedDefinitions,
secondarySearchDictionaries, secondarySearchDictionaryMap,
mergedByTermIndices usedDefinitions
); );
definitionsMerged.push(result); definitionsMerged.push(result);
} }
const strayDefinitions = unsequencedDefinitions.filter((definition, index) => !mergedByTermIndices.has(index)); const unusedDefinitions = unsequencedDefinitions.filter((definition) => !usedDefinitions.has(definition));
for (const groupedDefinition of this._groupTerms(strayDefinitions, dictionaries)) { for (const groupedDefinition of this._groupTerms(unusedDefinitions, enabledDictionaryMap)) {
// from dictTermsMergeBySequence const {reasons, score, expression, reading, source, rawSource, dictionary, termTags} = groupedDefinition;
const {reasons, score, expression, reading, source, dictionary} = groupedDefinition; const expressionDetails = this._createExpressionDetails(expression, reading, termTags);
const compatibilityDefinition = { const compatibilityDefinition = this._createMergedTermDefinition(
reasons,
score,
expression: [expression],
reading: [reading],
expressions: [this._createExpression(groupedDefinition.expression, groupedDefinition.reading)],
source, source,
rawSource,
definitions,
[expression],
[reading],
[expressionDetails],
reasons,
dictionary, dictionary,
definitions: groupedDefinition.definitions score
}; );
definitionsMerged.push(compatibilityDefinition); definitionsMerged.push(compatibilityDefinition);
} }
await this._buildTermMeta(definitionsMerged, dictionaries); await this._buildTermMeta(definitionsMerged, enabledDictionaryMap);
this._sortDefinitions(definitionsMerged, null); this._sortDefinitions(definitionsMerged, false);
if (options.general.compactTags) { if (compactTags) {
for (const definition of definitionsMerged) { for (const definition of definitionsMerged) {
this._compressDefinitionTags(definition.definitions); this._compressDefinitionTags(definition.definitions);
} }
@ -280,38 +319,39 @@ class Translator {
} }
async _findTermsSplit(text, details, options) { async _findTermsSplit(text, details, options) {
const dictionaries = this._getEnabledDictionaryMap(options); const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options); const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
await this._buildTermMeta(definitions, dictionaries); await this._buildTermMeta(definitions, enabledDictionaryMap);
this._sortDefinitions(definitions, dictionaries); this._sortDefinitions(definitions, true);
return [definitions, length]; return [definitions, length];
} }
async _findTermsSimple(text, details, options) { async _findTermsSimple(text, details, options) {
const dictionaries = this._getEnabledDictionaryMap(options); const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options); const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
this._sortDefinitions(definitions, null); this._sortDefinitions(definitions, false);
return [definitions, length]; return [definitions, length];
} }
async _findTermsInternal(text, dictionaries, details, options) { async _findTermsInternal(text, enabledDictionaryMap, details, options) {
text = this._getSearchableText(text, options); text = this._getSearchableText(text, options.scanning.alphanumeric);
if (text.length === 0) { if (text.length === 0) {
return [[], 0]; return [[], 0];
} }
const deinflections = ( const deinflections = (
details.wildcard ? details.wildcard ?
await this._findTermWildcard(text, dictionaries, details.wildcard) : await this._findTermWildcard(text, enabledDictionaryMap, details.wildcard) :
await this._findTermDeinflections(text, dictionaries, options) await this._findTermDeinflections(text, enabledDictionaryMap, options)
); );
let maxLength = 0; let maxLength = 0;
const definitions = []; const definitions = [];
for (const {databaseDefinitions, source, rawSource, reasons} of deinflections) { for (const {databaseDefinitions, source, rawSource, reasons} of deinflections) {
if (databaseDefinitions.length === 0) { continue; }
maxLength = Math.max(maxLength, rawSource.length); maxLength = Math.max(maxLength, rawSource.length);
for (const databaseDefinition of databaseDefinitions) { for (const databaseDefinition of databaseDefinitions) {
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons); const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap);
definitions.push(definition); definitions.push(definition);
} }
} }
@ -320,8 +360,8 @@ class Translator {
return [definitions, maxLength]; return [definitions, maxLength];
} }
async _findTermWildcard(text, dictionaries, wildcard) { async _findTermWildcard(text, enabledDictionaryMap, wildcard) {
const databaseDefinitions = await this._database.findTermsBulk([text], dictionaries, wildcard); const databaseDefinitions = await this._database.findTermsBulk([text], enabledDictionaryMap, wildcard);
if (databaseDefinitions.length === 0) { if (databaseDefinitions.length === 0) {
return []; return [];
} }
@ -336,7 +376,7 @@ class Translator {
}]; }];
} }
async _findTermDeinflections(text, dictionaries, options) { async _findTermDeinflections(text, enabledDictionaryMap, options) {
const deinflections = this._getAllDeinflections(text, options); const deinflections = this._getAllDeinflections(text, options);
if (deinflections.length === 0) { if (deinflections.length === 0) {
@ -358,7 +398,7 @@ class Translator {
deinflectionArray.push(deinflection); deinflectionArray.push(deinflection);
} }
const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null); const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, null);
for (const databaseDefinition of databaseDefinitions) { for (const databaseDefinition of databaseDefinitions) {
const definitionRules = Deinflector.rulesToRuleFlags(databaseDefinition.rules); const definitionRules = Deinflector.rulesToRuleFlags(databaseDefinition.rules);
@ -370,7 +410,7 @@ class Translator {
} }
} }
return deinflections.filter((e) => e.databaseDefinitions.length > 0); return deinflections;
} }
_getAllDeinflections(text, options) { _getAllDeinflections(text, options) {
@ -438,13 +478,17 @@ class Translator {
} }
} }
async _buildTermMeta(definitions, dictionaries) { async _buildTermMeta(definitions, enabledDictionaryMap) {
const terms = []; const terms = [];
for (const definition of definitions) { for (const definition of definitions) {
if (definition.expressions) { switch (definition.type) {
terms.push(...definition.expressions); case 'term':
} else { case 'termGrouped':
terms.push(definition); terms.push(definition);
break;
case 'termMerged':
terms.push(...definition.expressions);
break;
} }
} }
@ -456,9 +500,8 @@ class Translator {
const expressionsUnique = []; const expressionsUnique = [];
const termsUnique = []; const termsUnique = [];
const termsUniqueMap = new Map(); const termsUniqueMap = new Map();
for (let i = 0, ii = terms.length; i < ii; ++i) { for (const term of terms) {
const term = terms[i]; const {expression} = term;
const expression = term.expression;
let termList = termsUniqueMap.get(expression); let termList = termsUniqueMap.get(expression);
if (typeof termList === 'undefined') { if (typeof termList === 'undefined') {
termList = []; termList = [];
@ -467,13 +510,9 @@ class Translator {
termsUniqueMap.set(expression, termList); termsUniqueMap.set(expression, termList);
} }
termList.push(term); termList.push(term);
// New data
term.frequencies = [];
term.pitches = [];
} }
const metas = await this._database.findTermMetaBulk(expressionsUnique, dictionaries); const metas = await this._database.findTermMetaBulk(expressionsUnique, enabledDictionaryMap);
for (const {expression, mode, data, dictionary, index} of metas) { for (const {expression, mode, data, dictionary, index} of metas) {
switch (mode) { switch (mode) {
case 'freq': case 'freq':
@ -494,13 +533,13 @@ class Translator {
} }
} }
async _buildKanjiMeta(definitions, dictionaries) { async _buildKanjiMeta(definitions, enabledDictionaryMap) {
const kanjiList = []; const kanjiList = [];
for (const {character} of definitions) { for (const {character} of definitions) {
kanjiList.push(character); kanjiList.push(character);
} }
const metas = await this._database.findKanjiMetaBulk(kanjiList, dictionaries); const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
for (const {character, mode, data, dictionary, index} of metas) { for (const {character, mode, data, dictionary, index} of metas) {
switch (mode) { switch (mode) {
case 'freq': case 'freq':
@ -638,8 +677,11 @@ class Translator {
} }
} }
_getSearchableText(text, options) { _getSearchableText(text, allowAlphanumericCharacters) {
if (!options.scanning.alphanumeric) { if (allowAlphanumericCharacters) {
return text;
}
let newText = ''; let newText = '';
for (const c of text) { for (const c of text) {
if (!jp.isCodePointJapanese(c.codePointAt(0))) { if (!jp.isCodePointJapanese(c.codePointAt(0))) {
@ -647,10 +689,7 @@ class Translator {
} }
newText += c; newText += c;
} }
text = newText; return newText;
}
return text;
} }
async _fetchJsonAsset(url) { async _fetchJsonAsset(url) {
@ -677,6 +716,20 @@ class Translator {
return enabledDictionaryMap; return enabledDictionaryMap;
} }
_getSecondarySearchDictionaryMap(enabledDictionaryMap) {
const secondarySearchDictionaryMap = new Map();
for (const [title, dictionary] of enabledDictionaryMap.entries()) {
if (!dictionary.allowSecondarySearches) { continue; }
secondarySearchDictionaryMap.set(title, dictionary);
}
return secondarySearchDictionaryMap;
}
_getDictionaryPriority(dictionary, enabledDictionaryMap) {
const info = enabledDictionaryMap.get(dictionary);
return typeof info !== 'undefined' ? info.priority : 0;
}
_removeDuplicateDefinitions(definitions) { _removeDuplicateDefinitions(definitions) {
const definitionGroups = new Map(); const definitionGroups = new Map();
for (let i = 0, ii = definitions.length; i < ii; ++i) { for (let i = 0, ii = definitions.length; i < ii; ++i) {
@ -703,43 +756,60 @@ class Translator {
_compressDefinitionTags(definitions) { _compressDefinitionTags(definitions) {
let lastDictionary = ''; let lastDictionary = '';
let lastPartOfSpeech = ''; let lastPartOfSpeech = '';
const removeCategoriesSet = new Set();
for (const definition of definitions) { for (const {definitionTags} of definitions) {
const dictionary = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'dictionary').map((tag) => tag.name).sort()); const dictionary = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'dictionary'));
const partOfSpeech = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'partOfSpeech').map((tag) => tag.name).sort()); const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech'));
const filterOutCategories = [];
if (lastDictionary === dictionary) { if (lastDictionary === dictionary) {
filterOutCategories.push('dictionary'); removeCategoriesSet.add('dictionary');
} else { } else {
lastDictionary = dictionary; lastDictionary = dictionary;
lastPartOfSpeech = ''; lastPartOfSpeech = '';
} }
if (lastPartOfSpeech === partOfSpeech) { if (lastPartOfSpeech === partOfSpeech) {
filterOutCategories.push('partOfSpeech'); removeCategoriesSet.add('partOfSpeech');
} else { } else {
lastPartOfSpeech = partOfSpeech; lastPartOfSpeech = partOfSpeech;
} }
definition.definitionTags = definition.definitionTags.filter((tag) => !filterOutCategories.includes(tag.category)); if (removeCategoriesSet.size > 0) {
this._removeTagsWithCategory(definitionTags, removeCategoriesSet);
removeCategoriesSet.clear();
}
} }
} }
_groupTerms(definitions, dictionaries) { _getTagNamesWithCategory(tags, category) {
const results = [];
for (const tag of tags) {
if (tag.category !== category) { continue; }
results.push(tag.name);
}
results.sort();
return results;
}
_removeTagsWithCategory(tags, removeCategoriesSet) {
for (let i = 0, ii = tags.length; i < ii; ++i) {
const {category} = tags[i];
if (!removeCategoriesSet.has(category)) { continue; }
tags.splice(i, 1);
--i;
--ii;
}
}
_groupTerms(definitions) {
const groups = new Map(); const groups = new Map();
for (const definition of definitions) { for (const definition of definitions) {
const key = [definition.source, definition.expression, ...definition.reasons]; const key = this._createMapKey([definition.source, definition.expression, definition.reading, ...definition.reasons]);
if (definition.reading) { let groupDefinitions = groups.get(key);
key.push(definition.reading);
}
const keyString = key.toString();
let groupDefinitions = groups.get(keyString);
if (typeof groupDefinitions === 'undefined') { if (typeof groupDefinitions === 'undefined') {
groupDefinitions = []; groupDefinitions = [];
groups.set(keyString, groupDefinitions); groups.set(key, groupDefinitions);
} }
groupDefinitions.push(definition); groupDefinitions.push(definition);
@ -747,94 +817,41 @@ class Translator {
const results = []; const results = [];
for (const groupDefinitions of groups.values()) { for (const groupDefinitions of groups.values()) {
const firstDef = groupDefinitions[0]; this._sortDefinitions(groupDefinitions, true);
this._sortDefinitions(groupDefinitions, dictionaries); const definition = this._createGroupedTermDefinition(groupDefinitions);
results.push({ results.push(definition);
definitions: groupDefinitions,
expression: firstDef.expression,
reading: firstDef.reading,
furiganaSegments: firstDef.furiganaSegments,
reasons: firstDef.reasons,
termTags: firstDef.termTags,
score: groupDefinitions.reduce((p, v) => v.score > p ? v.score : p, Number.MIN_SAFE_INTEGER),
source: firstDef.source
});
} }
return results; return results;
} }
_mergeByGlossary(result, definitions, appendTo=null, mergedIndices=null) { _mergeByGlossary(definitions, glossaryDefinitionGroupMap) {
const definitionsByGlossary = appendTo !== null ? appendTo : new Map(); for (const definition of definitions) {
const {expression, reading, dictionary, glossary} = definition;
const resultExpressionsMap = result.expressions; const key = this._createMapKey([dictionary, ...glossary]);
const resultExpressionSet = result.expression; let group = glossaryDefinitionGroupMap.get(key);
const resultReadingSet = result.reading; if (typeof group === 'undefined') {
const resultSource = result.source; group = {
expressions: new Set(),
for (let i = 0, ii = definitions.length; i < ii; ++i) { readings: new Set(),
const definition = definitions[i]; definitions: []
const {expression, reading} = definition;
if (mergedIndices !== null) {
const expressionMap = resultExpressionsMap.get(expression);
if (
typeof expressionMap !== 'undefined' &&
typeof expressionMap.get(reading) !== 'undefined'
) {
mergedIndices.add(i);
} else {
continue;
}
}
const gloss = JSON.stringify(definition.glossary.concat(definition.dictionary));
let glossDefinition = definitionsByGlossary.get(gloss);
if (typeof glossDefinition === 'undefined') {
glossDefinition = {
expression: new Set(),
reading: new Set(),
definitionTags: [],
glossary: definition.glossary,
source: resultSource,
reasons: [],
score: definition.score,
id: definition.id,
dictionary: definition.dictionary
}; };
definitionsByGlossary.set(gloss, glossDefinition); glossaryDefinitionGroupMap.set(key, group);
} }
glossDefinition.expression.add(expression); group.expressions.add(expression);
glossDefinition.reading.add(reading); group.readings.add(reading);
group.definitions.push(definition);
resultExpressionSet.add(expression);
resultReadingSet.add(reading);
for (const tag of definition.definitionTags) {
if (!glossDefinition.definitionTags.find((existingTag) => existingTag.name === tag.name)) {
glossDefinition.definitionTags.push(tag);
} }
} }
if (appendTo === null) { _addDefinitionDetails(definitions, definitionDetailsMap) {
/* for (const {expression, reading, termTags} of definitions) {
Data layout: let readingMap = definitionDetailsMap.get(expression);
resultExpressionsMap = new Map([
[expression, new Map([
[reading, new Map([
[tagName, tagInfo],
...
])],
...
])],
...
]);
*/
let readingMap = resultExpressionsMap.get(expression);
if (typeof readingMap === 'undefined') { if (typeof readingMap === 'undefined') {
readingMap = new Map(); readingMap = new Map();
resultExpressionsMap.set(expression, readingMap); definitionDetailsMap.set(expression, readingMap);
} }
let termTagsMap = readingMap.get(reading); let termTagsMap = readingMap.get(reading);
@ -843,28 +860,45 @@ class Translator {
readingMap.set(reading, termTagsMap); readingMap.set(reading, termTagsMap);
} }
for (const tag of definition.termTags) { for (const tag of termTags) {
if (!termTagsMap.has(tag.name)) { const {name} = tag;
termTagsMap.set(tag.name, tag); if (termTagsMap.has(name)) { continue; }
} termTagsMap.set(name, this._cloneTag(tag));
} }
} }
} }
for (const definition of definitionsByGlossary.values()) { _getMaxDefinitionScore(definitions) {
const only = []; let result = Number.MIN_SAFE_INTEGER;
const expressionSet = definition.expression; for (const {score} of definitions) {
const readingSet = definition.reading; if (score > result) { result = score; }
if (!areSetsEqual(expressionSet, resultExpressionSet)) {
only.push(...getSetIntersection(expressionSet, resultExpressionSet));
} }
if (!areSetsEqual(readingSet, resultReadingSet)) { return result;
only.push(...getSetIntersection(readingSet, resultReadingSet));
}
definition.only = only;
} }
return definitionsByGlossary; _getMaxDictionaryPriority(definitions) {
let result = Number.MIN_SAFE_INTEGER;
for (const {dictionaryPriority} of definitions) {
if (dictionaryPriority > result) { result = dictionaryPriority; }
}
return result;
}
_cloneTag(tag) {
const {name, category, notes, order, score, dictionary} = tag;
return this._createTag(name, category, notes, order, score, dictionary);
}
_cloneTags(tags) {
const results = [];
for (const tag of tags) {
results.push(this._cloneTag(tag));
}
return results;
}
_createMapKey(array) {
return JSON.stringify(array);
} }
_createDictionaryTag(name) { _createDictionaryTag(name) {
@ -894,8 +928,23 @@ class Translator {
}; };
} }
async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons) { _createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, tags, stats) {
return {
type: 'kanji',
character,
dictionary,
onyomi,
kunyomi,
glossary,
tags,
stats,
frequencies: []
};
}
async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap) {
const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition; const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
const termTagsExpanded = await this._expandTags(termTags, dictionary); const termTagsExpanded = await this._expandTags(termTags, dictionary);
const definitionTagsExpanded = await this._expandTags(definitionTags, dictionary); const definitionTagsExpanded = await this._expandTags(definitionTags, dictionary);
definitionTagsExpanded.push(this._createDictionaryTag(dictionary)); definitionTagsExpanded.push(this._createDictionaryTag(dictionary));
@ -906,30 +955,133 @@ class Translator {
const furiganaSegments = jp.distributeFurigana(expression, reading); const furiganaSegments = jp.distributeFurigana(expression, reading);
return { return {
type: 'term',
id,
source, source,
rawSource, rawSource,
reasons, reasons,
score, score,
id, sequence,
dictionary, dictionary,
dictionaryPriority,
expression, expression,
reading, reading,
// expressions
furiganaSegments, furiganaSegments,
glossary, glossary,
definitionTags: definitionTagsExpanded, definitionTags: definitionTagsExpanded,
termTags: termTagsExpanded, termTags: termTagsExpanded,
sequence // definitions
frequencies: [],
pitches: []
// only
}; };
} }
_createExpression(expression, reading, termTags=null, termFrequency=null) { _createGroupedTermDefinition(definitions) {
const {expression, reading, furiganaSegments, reasons, termTags, source, rawSource} = definitions[0];
const score = this._getMaxDefinitionScore(definitions);
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
return {
type: 'termGrouped',
// id
source,
rawSource,
reasons: [...reasons],
score,
// sequence
// dictionary
dictionaryPriority,
expression,
reading,
// expressions
furiganaSegments, // Contains duplicate data
// glossary
// definitionTags
termTags: this._cloneTags(termTags),
definitions,
frequencies: [],
pitches: []
// only
};
}
_createMergedTermDefinition(source, rawSource, definitions, expressions, readings, expressionDetailsList, reasons, dictionary, score) {
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
return {
type: 'termMerged',
// id
source,
rawSource,
reasons,
score,
// sequence
dictionary,
dictionaryPriority,
expression: expressions,
reading: readings,
expressions: expressionDetailsList,
// furiganaSegments
// glossary
// definitionTags
// termTags
definitions,
frequencies: [],
pitches: []
// only
};
}
_createMergedGlossaryTermDefinition(source, rawSource, definitions, expressions, readings, allExpressions, allReadings) {
const only = [];
if (!areSetsEqual(expressions, allExpressions)) {
only.push(...getSetIntersection(expressions, allExpressions));
}
if (!areSetsEqual(readings, allReadings)) {
only.push(...getSetIntersection(readings, allReadings));
}
const definitionTags = this._getUniqueDefinitionTags(definitions);
this._sortTags(definitionTags);
const {glossary, dictionary} = definitions[0];
const score = this._getMaxDefinitionScore(definitions);
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
return {
type: 'termMergedByGlossary',
// id
source,
rawSource,
reasons: [],
score,
// sequence
dictionary,
dictionaryPriority,
expression: [...expressions],
reading: [...readings],
// expressions
// furiganaSegments
glossary: [...glossary],
definitionTags,
// termTags
definitions, // Contains duplicate data
frequencies: [],
pitches: [],
only
};
}
_createExpressionDetails(expression, reading, termTags) {
const termFrequency = this._scoreToTermFrequency(this._getTermTagsScoreSum(termTags));
const furiganaSegments = jp.distributeFurigana(expression, reading); const furiganaSegments = jp.distributeFurigana(expression, reading);
return { return {
expression, expression,
reading, reading,
furiganaSegments, furiganaSegments,
termTags, termTags,
termFrequency termFrequency,
frequencies: [],
pitches: []
}; };
} }
@ -944,21 +1096,11 @@ class Translator {
}); });
} }
_sortDefinitions(definitions, dictionaries) { _sortDefinitions(definitions, useDictionaryPriority) {
if (definitions.length <= 1) { return; } if (definitions.length <= 1) { return; }
const stringComparer = this._stringComparer; const stringComparer = this._stringComparer;
definitions.sort((v1, v2) => { const compareFunction1 = (v1, v2) => {
let i; let i = v2.source.length - v1.source.length;
if (dictionaries !== null) {
const dictionaryInfo1 = dictionaries.get(v1.dictionary);
const dictionaryInfo2 = dictionaries.get(v2.dictionary);
const priority1 = typeof dictionaryInfo1 !== 'undefined' ? dictionaryInfo1.priority : 0;
const priority2 = typeof dictionaryInfo2 !== 'undefined' ? dictionaryInfo2.priority : 0;
i = priority2 - priority1;
if (i !== 0) { return i; }
}
i = v2.source.length - v1.source.length;
if (i !== 0) { return i; } if (i !== 0) { return i; }
i = v1.reasons.length - v2.reasons.length; i = v1.reasons.length - v2.reasons.length;
@ -973,7 +1115,12 @@ class Translator {
if (i !== 0) { return i; } if (i !== 0) { return i; }
return stringComparer.compare(expression1, expression2); return stringComparer.compare(expression1, expression2);
}); };
const compareFunction2 = (v1, v2) => {
const i = v2.dictionaryPriority - v1.dictionaryPriority;
return (i !== 0) ? i : compareFunction1(v1, v2);
};
definitions.sort(useDictionaryPriority ? compareFunction2 : compareFunction1);
} }
_sortDatabaseDefinitionsByIndex(definitions) { _sortDatabaseDefinitionsByIndex(definitions) {