Translator refactoring (#878)
* Add helper function _getSecondarySearchDictionaryMap * Use spreads instead of concat * Create helper function for assigning disambiguation info * Add _getTermTagsScoreSum helper * Rename variables * Move assignment of definition details * Convert databaseDefinitions to standard definitions * Move creation of definitions * Always pass definitionsByGlossary directly to _mergeByGlossary * Refactor how used definitions are tracked * Change _mergeByGlossary parameters * Simplify _mergeByGlossary * Generate definition structure at the end of the function * Add reusable function to clone a tag * Restructure merged sub-definition creation * Move setup of totalExpressionSet/totalReadingSet * Track all definitions with matching glossary * Move creation of definitionTags * Update score calculation for multiple definitions * Disambiguate function name * Rename variables * Add helper function _createMergedGlossaryTermDefinition * Remove rarely used check for secondary search results Unclear what the purpose was since it was almost never used * Update data structure * Rename function/variable * Add helper functions _createMergedTermDefinition * Assign expressionDetails to variable * Add rawSource to grouped definitions * Use shared function for creating unused definition entries * Add helper function _createMapKey * Refactor _createExpressionDetails * Change argument order * Add _getTagNamesWithCategory helper * Optimize _compressDefinitionTags * Add _createGroupedTermDefinition helper * Add type information * Use consistent order for term definition fields, add comments for omitted fields * Remove unused index field * Add helper function _createKanjiDefinition * Update types * Add frequencies/pitches arrays * Update terms array setup * Simplify for loop * Remove redundant property assignment * Simplify how deinflections with no definitions are ignored * Ensure duplicates are removed * Rename variable * Add comments about duplicate data * Refactor variable/parameter names * Move functions * Rename functions * Rename variable * Add dictionaryPriority field * Simplify priority comparison * Rename argument * Optimize comparison function when dictionary priority is not needed * Add length check * Remove unused parameter * Rename variables/parameters * Rename dictionaries to enabledDictionaryMap * Store compactTags/mainDictionary options to variables * Refactor _getSearchableText
This commit is contained in:
parent
ef333b6d72
commit
86c64ac4c2
@ -54,45 +54,36 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async findKanji(text, options) {
|
async findKanji(text, options) {
|
||||||
const dictionaries = this._getEnabledDictionaryMap(options);
|
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
|
||||||
const kanjiUnique = new Set();
|
const kanjiUnique = new Set();
|
||||||
for (const c of text) {
|
for (const c of text) {
|
||||||
kanjiUnique.add(c);
|
kanjiUnique.add(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], dictionaries);
|
const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], enabledDictionaryMap);
|
||||||
if (databaseDefinitions.length === 0) { return []; }
|
if (databaseDefinitions.length === 0) { return []; }
|
||||||
|
|
||||||
this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
|
this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
|
||||||
|
|
||||||
const definitions = [];
|
const definitions = [];
|
||||||
for (const {index, character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) {
|
for (const {character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) {
|
||||||
const expandedStats = await this._expandStats(stats, dictionary);
|
const expandedStats = await this._expandStats(stats, dictionary);
|
||||||
const expandedTags = await this._expandTags(tags, dictionary);
|
const expandedTags = await this._expandTags(tags, dictionary);
|
||||||
expandedTags.push(this._createDictionaryTag(dictionary));
|
expandedTags.push(this._createDictionaryTag(dictionary));
|
||||||
this._sortTags(expandedTags);
|
this._sortTags(expandedTags);
|
||||||
|
|
||||||
definitions.push({
|
const definition = this._createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, expandedTags, expandedStats);
|
||||||
index,
|
definitions.push(definition);
|
||||||
character,
|
|
||||||
onyomi,
|
|
||||||
kunyomi,
|
|
||||||
tags: expandedTags,
|
|
||||||
glossary,
|
|
||||||
stats: expandedStats,
|
|
||||||
dictionary,
|
|
||||||
frequencies: []
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
await this._buildKanjiMeta(definitions, dictionaries);
|
await this._buildKanjiMeta(definitions, enabledDictionaryMap);
|
||||||
|
|
||||||
return definitions;
|
return definitions;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Private
|
// Private
|
||||||
|
|
||||||
async _getSequencedDefinitions(definitions, mainDictionary) {
|
async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) {
|
||||||
const sequenceList = [];
|
const sequenceList = [];
|
||||||
const sequencedDefinitionMap = new Map();
|
const sequencedDefinitionMap = new Map();
|
||||||
const sequencedDefinitions = [];
|
const sequencedDefinitions = [];
|
||||||
@ -103,13 +94,14 @@ class Translator {
|
|||||||
const {score} = definition;
|
const {score} = definition;
|
||||||
let sequencedDefinition = sequencedDefinitionMap.get(sequence);
|
let sequencedDefinition = sequencedDefinitionMap.get(sequence);
|
||||||
if (typeof sequencedDefinition === 'undefined') {
|
if (typeof sequencedDefinition === 'undefined') {
|
||||||
const {reasons, source} = definition;
|
const {reasons, source, rawSource} = definition;
|
||||||
sequencedDefinition = {
|
sequencedDefinition = {
|
||||||
reasons,
|
reasons,
|
||||||
score,
|
score,
|
||||||
source,
|
source,
|
||||||
|
rawSource,
|
||||||
dictionary,
|
dictionary,
|
||||||
databaseDefinitions: []
|
definitions: []
|
||||||
};
|
};
|
||||||
sequencedDefinitionMap.set(sequence, sequencedDefinition);
|
sequencedDefinitionMap.set(sequence, sequencedDefinition);
|
||||||
sequencedDefinitions.push(sequencedDefinition);
|
sequencedDefinitions.push(sequencedDefinition);
|
||||||
@ -122,155 +114,202 @@ class Translator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sequenceList.length > 0) {
|
||||||
const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
|
const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
|
||||||
for (const databaseDefinition of databaseDefinitions) {
|
for (const databaseDefinition of databaseDefinitions) {
|
||||||
sequencedDefinitions[databaseDefinition.index].databaseDefinitions.push(databaseDefinition);
|
const {definitions: definitions2, source, rawSource, reasons} = sequencedDefinitions[databaseDefinition.index];
|
||||||
|
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap);
|
||||||
|
definitions2.push(definition);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return {sequencedDefinitions, unsequencedDefinitions};
|
return {sequencedDefinitions, unsequencedDefinitions};
|
||||||
}
|
}
|
||||||
|
|
||||||
async _getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) {
|
async _getMergedSecondarySearchResults(expressionsMap, secondarySearchDictionaryMap) {
|
||||||
if (secondarySearchDictionaries.size === 0) {
|
if (secondarySearchDictionaryMap.size === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
const expressionList = [];
|
const expressionList = [];
|
||||||
const readingList = [];
|
const readingList = [];
|
||||||
for (const [expression, readingMap] of expressionsMap.entries()) {
|
for (const [expression, readingMap] of expressionsMap.entries()) {
|
||||||
if (expression === text) { continue; }
|
|
||||||
for (const reading of readingMap.keys()) {
|
for (const reading of readingMap.keys()) {
|
||||||
expressionList.push(expression);
|
expressionList.push(expression);
|
||||||
readingList.push(reading);
|
readingList.push(reading);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries);
|
const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap);
|
||||||
this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
|
this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
|
||||||
|
|
||||||
const definitions = [];
|
const definitions = [];
|
||||||
for (const databaseDefinition of databaseDefinitions) {
|
for (const databaseDefinition of databaseDefinitions) {
|
||||||
const source = expressionList[databaseDefinition.index];
|
const source = expressionList[databaseDefinition.index];
|
||||||
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, []);
|
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, [], secondarySearchDictionaryMap);
|
||||||
definitions.push(definition);
|
definitions.push(definition);
|
||||||
}
|
}
|
||||||
|
|
||||||
return definitions;
|
return definitions;
|
||||||
}
|
}
|
||||||
|
|
||||||
async _getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) {
|
async _getMergedDefinition(sequencedDefinition, unsequencedDefinitions, secondarySearchDictionaryMap, usedDefinitions) {
|
||||||
const {reasons, score, source, dictionary, databaseDefinitions} = sequencedDefinition;
|
const {reasons, score, source, rawSource, dictionary, definitions} = sequencedDefinition;
|
||||||
const result = {
|
const definitionDetailsMap = new Map();
|
||||||
reasons,
|
const glossaryDefinitions = [];
|
||||||
score,
|
const glossaryDefinitionGroupMap = new Map();
|
||||||
expression: new Set(),
|
|
||||||
reading: new Set(),
|
this._mergeByGlossary(definitions, glossaryDefinitionGroupMap);
|
||||||
expressions: new Map(),
|
this._addDefinitionDetails(definitions, definitionDetailsMap);
|
||||||
|
|
||||||
|
let secondaryDefinitions = await this._getMergedSecondarySearchResults(definitionDetailsMap, secondarySearchDictionaryMap);
|
||||||
|
secondaryDefinitions = [unsequencedDefinitions, ...secondaryDefinitions];
|
||||||
|
|
||||||
|
this._removeUsedDefinitions(secondaryDefinitions, definitionDetailsMap, usedDefinitions);
|
||||||
|
this._removeDuplicateDefinitions(secondaryDefinitions);
|
||||||
|
|
||||||
|
this._mergeByGlossary(secondaryDefinitions, glossaryDefinitionGroupMap);
|
||||||
|
|
||||||
|
const allExpressions = new Set();
|
||||||
|
const allReadings = new Set();
|
||||||
|
for (const {expressions, readings} of glossaryDefinitionGroupMap.values()) {
|
||||||
|
for (const expression of expressions) { allExpressions.add(expression); }
|
||||||
|
for (const reading of readings) { allReadings.add(reading); }
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const {expressions, readings, definitions: definitions2} of glossaryDefinitionGroupMap.values()) {
|
||||||
|
const glossaryDefinition = this._createMergedGlossaryTermDefinition(
|
||||||
source,
|
source,
|
||||||
dictionary,
|
rawSource,
|
||||||
definitions: []
|
definitions2,
|
||||||
};
|
expressions,
|
||||||
|
readings,
|
||||||
for (const definition of databaseDefinitions) {
|
allExpressions,
|
||||||
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
|
allReadings
|
||||||
definitionTags.push(this._createDictionaryTag(definition.dictionary));
|
);
|
||||||
definition.definitionTags = definitionTags;
|
glossaryDefinitions.push(glossaryDefinition);
|
||||||
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
|
|
||||||
definition.termTags = termTags;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const definitionsByGloss = this._mergeByGlossary(result, databaseDefinitions);
|
this._sortDefinitions(glossaryDefinitions, true);
|
||||||
const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
|
|
||||||
|
|
||||||
this._mergeByGlossary(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
|
const expressionDetailsList = [];
|
||||||
|
for (const [expression, readingMap] of definitionDetailsMap.entries()) {
|
||||||
for (const definition of definitionsByGloss.values()) {
|
|
||||||
this._sortTags(definition.definitionTags);
|
|
||||||
result.definitions.push(definition);
|
|
||||||
}
|
|
||||||
|
|
||||||
this._sortDefinitions(result.definitions, dictionaries);
|
|
||||||
|
|
||||||
const expressions = [];
|
|
||||||
for (const [expression, readingMap] of result.expressions.entries()) {
|
|
||||||
for (const [reading, termTagsMap] of readingMap.entries()) {
|
for (const [reading, termTagsMap] of readingMap.entries()) {
|
||||||
const termTags = [...termTagsMap.values()];
|
const termTags = [...termTagsMap.values()];
|
||||||
const score2 = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0);
|
|
||||||
this._sortTags(termTags);
|
this._sortTags(termTags);
|
||||||
expressions.push(this._createExpression(expression, reading, termTags, this._scoreToTermFrequency(score2)));
|
expressionDetailsList.push(this._createExpressionDetails(expression, reading, termTags));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result.expressions = expressions;
|
return this._createMergedTermDefinition(
|
||||||
result.expression = Array.from(result.expression);
|
source,
|
||||||
result.reading = Array.from(result.reading);
|
rawSource,
|
||||||
|
glossaryDefinitions,
|
||||||
|
[...allExpressions],
|
||||||
|
[...allReadings],
|
||||||
|
expressionDetailsList,
|
||||||
|
reasons,
|
||||||
|
dictionary,
|
||||||
|
score
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
_removeUsedDefinitions(definitions, definitionDetailsMap, usedDefinitions) {
|
||||||
|
for (let i = 0, ii = definitions.length; i < ii; ++i) {
|
||||||
|
const definition = definitions[i];
|
||||||
|
const {expression, reading} = definition;
|
||||||
|
const expressionMap = definitionDetailsMap.get(expression);
|
||||||
|
if (
|
||||||
|
typeof expressionMap !== 'undefined' &&
|
||||||
|
typeof expressionMap.get(reading) !== 'undefined'
|
||||||
|
) {
|
||||||
|
usedDefinitions.add(definition);
|
||||||
|
} else {
|
||||||
|
definitions.splice(i, 1);
|
||||||
|
--i;
|
||||||
|
--ii;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_getUniqueDefinitionTags(definitions) {
|
||||||
|
const definitionTagsMap = new Map();
|
||||||
|
for (const {definitionTags} of definitions) {
|
||||||
|
for (const tag of definitionTags) {
|
||||||
|
const {name} = tag;
|
||||||
|
if (definitionTagsMap.has(name)) { continue; }
|
||||||
|
definitionTagsMap.set(name, this._cloneTag(tag));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return [...definitionTagsMap.values()];
|
||||||
|
}
|
||||||
|
|
||||||
|
_getTermTagsScoreSum(termTags) {
|
||||||
|
let result = 0;
|
||||||
|
for (const {score} of termTags) { result += score; }
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsGrouped(text, details, options) {
|
async _findTermsGrouped(text, details, options) {
|
||||||
const dictionaries = this._getEnabledDictionaryMap(options);
|
const {general: {compactTags}} = options;
|
||||||
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
|
||||||
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
|
||||||
|
|
||||||
const definitionsGrouped = this._groupTerms(definitions, dictionaries);
|
const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap);
|
||||||
await this._buildTermMeta(definitionsGrouped, dictionaries);
|
await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap);
|
||||||
this._sortDefinitions(definitionsGrouped, null);
|
this._sortDefinitions(groupedDefinitions, false);
|
||||||
|
|
||||||
if (options.general.compactTags) {
|
if (compactTags) {
|
||||||
for (const definition of definitionsGrouped) {
|
for (const definition of groupedDefinitions) {
|
||||||
this._compressDefinitionTags(definition.definitions);
|
this._compressDefinitionTags(definition.definitions);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return [definitionsGrouped, length];
|
return [groupedDefinitions, length];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsMerged(text, details, options) {
|
async _findTermsMerged(text, details, options) {
|
||||||
const dictionaries = this._getEnabledDictionaryMap(options);
|
const {general: {compactTags, mainDictionary}} = options;
|
||||||
const secondarySearchDictionaries = new Map();
|
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
|
||||||
for (const [title, dictionary] of dictionaries.entries()) {
|
const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
|
||||||
if (!dictionary.allowSecondarySearches) { continue; }
|
|
||||||
secondarySearchDictionaries.set(title, dictionary);
|
|
||||||
}
|
|
||||||
|
|
||||||
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
|
||||||
const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, options.general.mainDictionary);
|
const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
|
||||||
const definitionsMerged = [];
|
const definitionsMerged = [];
|
||||||
const mergedByTermIndices = new Set();
|
const usedDefinitions = new Set();
|
||||||
|
|
||||||
for (const sequencedDefinition of sequencedDefinitions) {
|
for (const sequencedDefinition of sequencedDefinitions) {
|
||||||
const result = await this._getMergedDefinition(
|
const result = await this._getMergedDefinition(
|
||||||
text,
|
|
||||||
dictionaries,
|
|
||||||
sequencedDefinition,
|
sequencedDefinition,
|
||||||
unsequencedDefinitions,
|
unsequencedDefinitions,
|
||||||
secondarySearchDictionaries,
|
secondarySearchDictionaryMap,
|
||||||
mergedByTermIndices
|
usedDefinitions
|
||||||
);
|
);
|
||||||
definitionsMerged.push(result);
|
definitionsMerged.push(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
const strayDefinitions = unsequencedDefinitions.filter((definition, index) => !mergedByTermIndices.has(index));
|
const unusedDefinitions = unsequencedDefinitions.filter((definition) => !usedDefinitions.has(definition));
|
||||||
for (const groupedDefinition of this._groupTerms(strayDefinitions, dictionaries)) {
|
for (const groupedDefinition of this._groupTerms(unusedDefinitions, enabledDictionaryMap)) {
|
||||||
// from dictTermsMergeBySequence
|
const {reasons, score, expression, reading, source, rawSource, dictionary, termTags} = groupedDefinition;
|
||||||
const {reasons, score, expression, reading, source, dictionary} = groupedDefinition;
|
const expressionDetails = this._createExpressionDetails(expression, reading, termTags);
|
||||||
const compatibilityDefinition = {
|
const compatibilityDefinition = this._createMergedTermDefinition(
|
||||||
reasons,
|
|
||||||
score,
|
|
||||||
expression: [expression],
|
|
||||||
reading: [reading],
|
|
||||||
expressions: [this._createExpression(groupedDefinition.expression, groupedDefinition.reading)],
|
|
||||||
source,
|
source,
|
||||||
|
rawSource,
|
||||||
|
definitions,
|
||||||
|
[expression],
|
||||||
|
[reading],
|
||||||
|
[expressionDetails],
|
||||||
|
reasons,
|
||||||
dictionary,
|
dictionary,
|
||||||
definitions: groupedDefinition.definitions
|
score
|
||||||
};
|
);
|
||||||
definitionsMerged.push(compatibilityDefinition);
|
definitionsMerged.push(compatibilityDefinition);
|
||||||
}
|
}
|
||||||
|
|
||||||
await this._buildTermMeta(definitionsMerged, dictionaries);
|
await this._buildTermMeta(definitionsMerged, enabledDictionaryMap);
|
||||||
this._sortDefinitions(definitionsMerged, null);
|
this._sortDefinitions(definitionsMerged, false);
|
||||||
|
|
||||||
if (options.general.compactTags) {
|
if (compactTags) {
|
||||||
for (const definition of definitionsMerged) {
|
for (const definition of definitionsMerged) {
|
||||||
this._compressDefinitionTags(definition.definitions);
|
this._compressDefinitionTags(definition.definitions);
|
||||||
}
|
}
|
||||||
@ -280,38 +319,39 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsSplit(text, details, options) {
|
async _findTermsSplit(text, details, options) {
|
||||||
const dictionaries = this._getEnabledDictionaryMap(options);
|
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
|
||||||
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
|
||||||
await this._buildTermMeta(definitions, dictionaries);
|
await this._buildTermMeta(definitions, enabledDictionaryMap);
|
||||||
this._sortDefinitions(definitions, dictionaries);
|
this._sortDefinitions(definitions, true);
|
||||||
return [definitions, length];
|
return [definitions, length];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsSimple(text, details, options) {
|
async _findTermsSimple(text, details, options) {
|
||||||
const dictionaries = this._getEnabledDictionaryMap(options);
|
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
|
||||||
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
|
||||||
this._sortDefinitions(definitions, null);
|
this._sortDefinitions(definitions, false);
|
||||||
return [definitions, length];
|
return [definitions, length];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsInternal(text, dictionaries, details, options) {
|
async _findTermsInternal(text, enabledDictionaryMap, details, options) {
|
||||||
text = this._getSearchableText(text, options);
|
text = this._getSearchableText(text, options.scanning.alphanumeric);
|
||||||
if (text.length === 0) {
|
if (text.length === 0) {
|
||||||
return [[], 0];
|
return [[], 0];
|
||||||
}
|
}
|
||||||
|
|
||||||
const deinflections = (
|
const deinflections = (
|
||||||
details.wildcard ?
|
details.wildcard ?
|
||||||
await this._findTermWildcard(text, dictionaries, details.wildcard) :
|
await this._findTermWildcard(text, enabledDictionaryMap, details.wildcard) :
|
||||||
await this._findTermDeinflections(text, dictionaries, options)
|
await this._findTermDeinflections(text, enabledDictionaryMap, options)
|
||||||
);
|
);
|
||||||
|
|
||||||
let maxLength = 0;
|
let maxLength = 0;
|
||||||
const definitions = [];
|
const definitions = [];
|
||||||
for (const {databaseDefinitions, source, rawSource, reasons} of deinflections) {
|
for (const {databaseDefinitions, source, rawSource, reasons} of deinflections) {
|
||||||
|
if (databaseDefinitions.length === 0) { continue; }
|
||||||
maxLength = Math.max(maxLength, rawSource.length);
|
maxLength = Math.max(maxLength, rawSource.length);
|
||||||
for (const databaseDefinition of databaseDefinitions) {
|
for (const databaseDefinition of databaseDefinitions) {
|
||||||
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons);
|
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap);
|
||||||
definitions.push(definition);
|
definitions.push(definition);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -320,8 +360,8 @@ class Translator {
|
|||||||
return [definitions, maxLength];
|
return [definitions, maxLength];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermWildcard(text, dictionaries, wildcard) {
|
async _findTermWildcard(text, enabledDictionaryMap, wildcard) {
|
||||||
const databaseDefinitions = await this._database.findTermsBulk([text], dictionaries, wildcard);
|
const databaseDefinitions = await this._database.findTermsBulk([text], enabledDictionaryMap, wildcard);
|
||||||
if (databaseDefinitions.length === 0) {
|
if (databaseDefinitions.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
@ -336,7 +376,7 @@ class Translator {
|
|||||||
}];
|
}];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermDeinflections(text, dictionaries, options) {
|
async _findTermDeinflections(text, enabledDictionaryMap, options) {
|
||||||
const deinflections = this._getAllDeinflections(text, options);
|
const deinflections = this._getAllDeinflections(text, options);
|
||||||
|
|
||||||
if (deinflections.length === 0) {
|
if (deinflections.length === 0) {
|
||||||
@ -358,7 +398,7 @@ class Translator {
|
|||||||
deinflectionArray.push(deinflection);
|
deinflectionArray.push(deinflection);
|
||||||
}
|
}
|
||||||
|
|
||||||
const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null);
|
const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, null);
|
||||||
|
|
||||||
for (const databaseDefinition of databaseDefinitions) {
|
for (const databaseDefinition of databaseDefinitions) {
|
||||||
const definitionRules = Deinflector.rulesToRuleFlags(databaseDefinition.rules);
|
const definitionRules = Deinflector.rulesToRuleFlags(databaseDefinition.rules);
|
||||||
@ -370,7 +410,7 @@ class Translator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return deinflections.filter((e) => e.databaseDefinitions.length > 0);
|
return deinflections;
|
||||||
}
|
}
|
||||||
|
|
||||||
_getAllDeinflections(text, options) {
|
_getAllDeinflections(text, options) {
|
||||||
@ -438,13 +478,17 @@ class Translator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async _buildTermMeta(definitions, dictionaries) {
|
async _buildTermMeta(definitions, enabledDictionaryMap) {
|
||||||
const terms = [];
|
const terms = [];
|
||||||
for (const definition of definitions) {
|
for (const definition of definitions) {
|
||||||
if (definition.expressions) {
|
switch (definition.type) {
|
||||||
terms.push(...definition.expressions);
|
case 'term':
|
||||||
} else {
|
case 'termGrouped':
|
||||||
terms.push(definition);
|
terms.push(definition);
|
||||||
|
break;
|
||||||
|
case 'termMerged':
|
||||||
|
terms.push(...definition.expressions);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -456,9 +500,8 @@ class Translator {
|
|||||||
const expressionsUnique = [];
|
const expressionsUnique = [];
|
||||||
const termsUnique = [];
|
const termsUnique = [];
|
||||||
const termsUniqueMap = new Map();
|
const termsUniqueMap = new Map();
|
||||||
for (let i = 0, ii = terms.length; i < ii; ++i) {
|
for (const term of terms) {
|
||||||
const term = terms[i];
|
const {expression} = term;
|
||||||
const expression = term.expression;
|
|
||||||
let termList = termsUniqueMap.get(expression);
|
let termList = termsUniqueMap.get(expression);
|
||||||
if (typeof termList === 'undefined') {
|
if (typeof termList === 'undefined') {
|
||||||
termList = [];
|
termList = [];
|
||||||
@ -467,13 +510,9 @@ class Translator {
|
|||||||
termsUniqueMap.set(expression, termList);
|
termsUniqueMap.set(expression, termList);
|
||||||
}
|
}
|
||||||
termList.push(term);
|
termList.push(term);
|
||||||
|
|
||||||
// New data
|
|
||||||
term.frequencies = [];
|
|
||||||
term.pitches = [];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const metas = await this._database.findTermMetaBulk(expressionsUnique, dictionaries);
|
const metas = await this._database.findTermMetaBulk(expressionsUnique, enabledDictionaryMap);
|
||||||
for (const {expression, mode, data, dictionary, index} of metas) {
|
for (const {expression, mode, data, dictionary, index} of metas) {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case 'freq':
|
case 'freq':
|
||||||
@ -494,13 +533,13 @@ class Translator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async _buildKanjiMeta(definitions, dictionaries) {
|
async _buildKanjiMeta(definitions, enabledDictionaryMap) {
|
||||||
const kanjiList = [];
|
const kanjiList = [];
|
||||||
for (const {character} of definitions) {
|
for (const {character} of definitions) {
|
||||||
kanjiList.push(character);
|
kanjiList.push(character);
|
||||||
}
|
}
|
||||||
|
|
||||||
const metas = await this._database.findKanjiMetaBulk(kanjiList, dictionaries);
|
const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
|
||||||
for (const {character, mode, data, dictionary, index} of metas) {
|
for (const {character, mode, data, dictionary, index} of metas) {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case 'freq':
|
case 'freq':
|
||||||
@ -638,8 +677,11 @@ class Translator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_getSearchableText(text, options) {
|
_getSearchableText(text, allowAlphanumericCharacters) {
|
||||||
if (!options.scanning.alphanumeric) {
|
if (allowAlphanumericCharacters) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
let newText = '';
|
let newText = '';
|
||||||
for (const c of text) {
|
for (const c of text) {
|
||||||
if (!jp.isCodePointJapanese(c.codePointAt(0))) {
|
if (!jp.isCodePointJapanese(c.codePointAt(0))) {
|
||||||
@ -647,10 +689,7 @@ class Translator {
|
|||||||
}
|
}
|
||||||
newText += c;
|
newText += c;
|
||||||
}
|
}
|
||||||
text = newText;
|
return newText;
|
||||||
}
|
|
||||||
|
|
||||||
return text;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async _fetchJsonAsset(url) {
|
async _fetchJsonAsset(url) {
|
||||||
@ -677,6 +716,20 @@ class Translator {
|
|||||||
return enabledDictionaryMap;
|
return enabledDictionaryMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_getSecondarySearchDictionaryMap(enabledDictionaryMap) {
|
||||||
|
const secondarySearchDictionaryMap = new Map();
|
||||||
|
for (const [title, dictionary] of enabledDictionaryMap.entries()) {
|
||||||
|
if (!dictionary.allowSecondarySearches) { continue; }
|
||||||
|
secondarySearchDictionaryMap.set(title, dictionary);
|
||||||
|
}
|
||||||
|
return secondarySearchDictionaryMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
_getDictionaryPriority(dictionary, enabledDictionaryMap) {
|
||||||
|
const info = enabledDictionaryMap.get(dictionary);
|
||||||
|
return typeof info !== 'undefined' ? info.priority : 0;
|
||||||
|
}
|
||||||
|
|
||||||
_removeDuplicateDefinitions(definitions) {
|
_removeDuplicateDefinitions(definitions) {
|
||||||
const definitionGroups = new Map();
|
const definitionGroups = new Map();
|
||||||
for (let i = 0, ii = definitions.length; i < ii; ++i) {
|
for (let i = 0, ii = definitions.length; i < ii; ++i) {
|
||||||
@ -703,43 +756,60 @@ class Translator {
|
|||||||
_compressDefinitionTags(definitions) {
|
_compressDefinitionTags(definitions) {
|
||||||
let lastDictionary = '';
|
let lastDictionary = '';
|
||||||
let lastPartOfSpeech = '';
|
let lastPartOfSpeech = '';
|
||||||
|
const removeCategoriesSet = new Set();
|
||||||
|
|
||||||
for (const definition of definitions) {
|
for (const {definitionTags} of definitions) {
|
||||||
const dictionary = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'dictionary').map((tag) => tag.name).sort());
|
const dictionary = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'dictionary'));
|
||||||
const partOfSpeech = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'partOfSpeech').map((tag) => tag.name).sort());
|
const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech'));
|
||||||
|
|
||||||
const filterOutCategories = [];
|
|
||||||
|
|
||||||
if (lastDictionary === dictionary) {
|
if (lastDictionary === dictionary) {
|
||||||
filterOutCategories.push('dictionary');
|
removeCategoriesSet.add('dictionary');
|
||||||
} else {
|
} else {
|
||||||
lastDictionary = dictionary;
|
lastDictionary = dictionary;
|
||||||
lastPartOfSpeech = '';
|
lastPartOfSpeech = '';
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lastPartOfSpeech === partOfSpeech) {
|
if (lastPartOfSpeech === partOfSpeech) {
|
||||||
filterOutCategories.push('partOfSpeech');
|
removeCategoriesSet.add('partOfSpeech');
|
||||||
} else {
|
} else {
|
||||||
lastPartOfSpeech = partOfSpeech;
|
lastPartOfSpeech = partOfSpeech;
|
||||||
}
|
}
|
||||||
|
|
||||||
definition.definitionTags = definition.definitionTags.filter((tag) => !filterOutCategories.includes(tag.category));
|
if (removeCategoriesSet.size > 0) {
|
||||||
|
this._removeTagsWithCategory(definitionTags, removeCategoriesSet);
|
||||||
|
removeCategoriesSet.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_groupTerms(definitions, dictionaries) {
|
_getTagNamesWithCategory(tags, category) {
|
||||||
|
const results = [];
|
||||||
|
for (const tag of tags) {
|
||||||
|
if (tag.category !== category) { continue; }
|
||||||
|
results.push(tag.name);
|
||||||
|
}
|
||||||
|
results.sort();
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
_removeTagsWithCategory(tags, removeCategoriesSet) {
|
||||||
|
for (let i = 0, ii = tags.length; i < ii; ++i) {
|
||||||
|
const {category} = tags[i];
|
||||||
|
if (!removeCategoriesSet.has(category)) { continue; }
|
||||||
|
tags.splice(i, 1);
|
||||||
|
--i;
|
||||||
|
--ii;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_groupTerms(definitions) {
|
||||||
const groups = new Map();
|
const groups = new Map();
|
||||||
for (const definition of definitions) {
|
for (const definition of definitions) {
|
||||||
const key = [definition.source, definition.expression, ...definition.reasons];
|
const key = this._createMapKey([definition.source, definition.expression, definition.reading, ...definition.reasons]);
|
||||||
if (definition.reading) {
|
let groupDefinitions = groups.get(key);
|
||||||
key.push(definition.reading);
|
|
||||||
}
|
|
||||||
|
|
||||||
const keyString = key.toString();
|
|
||||||
let groupDefinitions = groups.get(keyString);
|
|
||||||
if (typeof groupDefinitions === 'undefined') {
|
if (typeof groupDefinitions === 'undefined') {
|
||||||
groupDefinitions = [];
|
groupDefinitions = [];
|
||||||
groups.set(keyString, groupDefinitions);
|
groups.set(key, groupDefinitions);
|
||||||
}
|
}
|
||||||
|
|
||||||
groupDefinitions.push(definition);
|
groupDefinitions.push(definition);
|
||||||
@ -747,94 +817,41 @@ class Translator {
|
|||||||
|
|
||||||
const results = [];
|
const results = [];
|
||||||
for (const groupDefinitions of groups.values()) {
|
for (const groupDefinitions of groups.values()) {
|
||||||
const firstDef = groupDefinitions[0];
|
this._sortDefinitions(groupDefinitions, true);
|
||||||
this._sortDefinitions(groupDefinitions, dictionaries);
|
const definition = this._createGroupedTermDefinition(groupDefinitions);
|
||||||
results.push({
|
results.push(definition);
|
||||||
definitions: groupDefinitions,
|
|
||||||
expression: firstDef.expression,
|
|
||||||
reading: firstDef.reading,
|
|
||||||
furiganaSegments: firstDef.furiganaSegments,
|
|
||||||
reasons: firstDef.reasons,
|
|
||||||
termTags: firstDef.termTags,
|
|
||||||
score: groupDefinitions.reduce((p, v) => v.score > p ? v.score : p, Number.MIN_SAFE_INTEGER),
|
|
||||||
source: firstDef.source
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
_mergeByGlossary(result, definitions, appendTo=null, mergedIndices=null) {
|
_mergeByGlossary(definitions, glossaryDefinitionGroupMap) {
|
||||||
const definitionsByGlossary = appendTo !== null ? appendTo : new Map();
|
for (const definition of definitions) {
|
||||||
|
const {expression, reading, dictionary, glossary} = definition;
|
||||||
|
|
||||||
const resultExpressionsMap = result.expressions;
|
const key = this._createMapKey([dictionary, ...glossary]);
|
||||||
const resultExpressionSet = result.expression;
|
let group = glossaryDefinitionGroupMap.get(key);
|
||||||
const resultReadingSet = result.reading;
|
if (typeof group === 'undefined') {
|
||||||
const resultSource = result.source;
|
group = {
|
||||||
|
expressions: new Set(),
|
||||||
for (let i = 0, ii = definitions.length; i < ii; ++i) {
|
readings: new Set(),
|
||||||
const definition = definitions[i];
|
definitions: []
|
||||||
const {expression, reading} = definition;
|
|
||||||
|
|
||||||
if (mergedIndices !== null) {
|
|
||||||
const expressionMap = resultExpressionsMap.get(expression);
|
|
||||||
if (
|
|
||||||
typeof expressionMap !== 'undefined' &&
|
|
||||||
typeof expressionMap.get(reading) !== 'undefined'
|
|
||||||
) {
|
|
||||||
mergedIndices.add(i);
|
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const gloss = JSON.stringify(definition.glossary.concat(definition.dictionary));
|
|
||||||
let glossDefinition = definitionsByGlossary.get(gloss);
|
|
||||||
if (typeof glossDefinition === 'undefined') {
|
|
||||||
glossDefinition = {
|
|
||||||
expression: new Set(),
|
|
||||||
reading: new Set(),
|
|
||||||
definitionTags: [],
|
|
||||||
glossary: definition.glossary,
|
|
||||||
source: resultSource,
|
|
||||||
reasons: [],
|
|
||||||
score: definition.score,
|
|
||||||
id: definition.id,
|
|
||||||
dictionary: definition.dictionary
|
|
||||||
};
|
};
|
||||||
definitionsByGlossary.set(gloss, glossDefinition);
|
glossaryDefinitionGroupMap.set(key, group);
|
||||||
}
|
}
|
||||||
|
|
||||||
glossDefinition.expression.add(expression);
|
group.expressions.add(expression);
|
||||||
glossDefinition.reading.add(reading);
|
group.readings.add(reading);
|
||||||
|
group.definitions.push(definition);
|
||||||
resultExpressionSet.add(expression);
|
|
||||||
resultReadingSet.add(reading);
|
|
||||||
|
|
||||||
for (const tag of definition.definitionTags) {
|
|
||||||
if (!glossDefinition.definitionTags.find((existingTag) => existingTag.name === tag.name)) {
|
|
||||||
glossDefinition.definitionTags.push(tag);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (appendTo === null) {
|
_addDefinitionDetails(definitions, definitionDetailsMap) {
|
||||||
/*
|
for (const {expression, reading, termTags} of definitions) {
|
||||||
Data layout:
|
let readingMap = definitionDetailsMap.get(expression);
|
||||||
resultExpressionsMap = new Map([
|
|
||||||
[expression, new Map([
|
|
||||||
[reading, new Map([
|
|
||||||
[tagName, tagInfo],
|
|
||||||
...
|
|
||||||
])],
|
|
||||||
...
|
|
||||||
])],
|
|
||||||
...
|
|
||||||
]);
|
|
||||||
*/
|
|
||||||
let readingMap = resultExpressionsMap.get(expression);
|
|
||||||
if (typeof readingMap === 'undefined') {
|
if (typeof readingMap === 'undefined') {
|
||||||
readingMap = new Map();
|
readingMap = new Map();
|
||||||
resultExpressionsMap.set(expression, readingMap);
|
definitionDetailsMap.set(expression, readingMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
let termTagsMap = readingMap.get(reading);
|
let termTagsMap = readingMap.get(reading);
|
||||||
@ -843,28 +860,45 @@ class Translator {
|
|||||||
readingMap.set(reading, termTagsMap);
|
readingMap.set(reading, termTagsMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const tag of definition.termTags) {
|
for (const tag of termTags) {
|
||||||
if (!termTagsMap.has(tag.name)) {
|
const {name} = tag;
|
||||||
termTagsMap.set(tag.name, tag);
|
if (termTagsMap.has(name)) { continue; }
|
||||||
}
|
termTagsMap.set(name, this._cloneTag(tag));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const definition of definitionsByGlossary.values()) {
|
_getMaxDefinitionScore(definitions) {
|
||||||
const only = [];
|
let result = Number.MIN_SAFE_INTEGER;
|
||||||
const expressionSet = definition.expression;
|
for (const {score} of definitions) {
|
||||||
const readingSet = definition.reading;
|
if (score > result) { result = score; }
|
||||||
if (!areSetsEqual(expressionSet, resultExpressionSet)) {
|
|
||||||
only.push(...getSetIntersection(expressionSet, resultExpressionSet));
|
|
||||||
}
|
}
|
||||||
if (!areSetsEqual(readingSet, resultReadingSet)) {
|
return result;
|
||||||
only.push(...getSetIntersection(readingSet, resultReadingSet));
|
|
||||||
}
|
|
||||||
definition.only = only;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return definitionsByGlossary;
|
_getMaxDictionaryPriority(definitions) {
|
||||||
|
let result = Number.MIN_SAFE_INTEGER;
|
||||||
|
for (const {dictionaryPriority} of definitions) {
|
||||||
|
if (dictionaryPriority > result) { result = dictionaryPriority; }
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
_cloneTag(tag) {
|
||||||
|
const {name, category, notes, order, score, dictionary} = tag;
|
||||||
|
return this._createTag(name, category, notes, order, score, dictionary);
|
||||||
|
}
|
||||||
|
|
||||||
|
_cloneTags(tags) {
|
||||||
|
const results = [];
|
||||||
|
for (const tag of tags) {
|
||||||
|
results.push(this._cloneTag(tag));
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
_createMapKey(array) {
|
||||||
|
return JSON.stringify(array);
|
||||||
}
|
}
|
||||||
|
|
||||||
_createDictionaryTag(name) {
|
_createDictionaryTag(name) {
|
||||||
@ -894,8 +928,23 @@ class Translator {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons) {
|
_createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, tags, stats) {
|
||||||
|
return {
|
||||||
|
type: 'kanji',
|
||||||
|
character,
|
||||||
|
dictionary,
|
||||||
|
onyomi,
|
||||||
|
kunyomi,
|
||||||
|
glossary,
|
||||||
|
tags,
|
||||||
|
stats,
|
||||||
|
frequencies: []
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons, enabledDictionaryMap) {
|
||||||
const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
|
const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
|
||||||
|
const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
|
||||||
const termTagsExpanded = await this._expandTags(termTags, dictionary);
|
const termTagsExpanded = await this._expandTags(termTags, dictionary);
|
||||||
const definitionTagsExpanded = await this._expandTags(definitionTags, dictionary);
|
const definitionTagsExpanded = await this._expandTags(definitionTags, dictionary);
|
||||||
definitionTagsExpanded.push(this._createDictionaryTag(dictionary));
|
definitionTagsExpanded.push(this._createDictionaryTag(dictionary));
|
||||||
@ -906,30 +955,133 @@ class Translator {
|
|||||||
const furiganaSegments = jp.distributeFurigana(expression, reading);
|
const furiganaSegments = jp.distributeFurigana(expression, reading);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
type: 'term',
|
||||||
|
id,
|
||||||
source,
|
source,
|
||||||
rawSource,
|
rawSource,
|
||||||
reasons,
|
reasons,
|
||||||
score,
|
score,
|
||||||
id,
|
sequence,
|
||||||
dictionary,
|
dictionary,
|
||||||
|
dictionaryPriority,
|
||||||
expression,
|
expression,
|
||||||
reading,
|
reading,
|
||||||
|
// expressions
|
||||||
furiganaSegments,
|
furiganaSegments,
|
||||||
glossary,
|
glossary,
|
||||||
definitionTags: definitionTagsExpanded,
|
definitionTags: definitionTagsExpanded,
|
||||||
termTags: termTagsExpanded,
|
termTags: termTagsExpanded,
|
||||||
sequence
|
// definitions
|
||||||
|
frequencies: [],
|
||||||
|
pitches: []
|
||||||
|
// only
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
_createExpression(expression, reading, termTags=null, termFrequency=null) {
|
_createGroupedTermDefinition(definitions) {
|
||||||
|
const {expression, reading, furiganaSegments, reasons, termTags, source, rawSource} = definitions[0];
|
||||||
|
const score = this._getMaxDefinitionScore(definitions);
|
||||||
|
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
|
||||||
|
return {
|
||||||
|
type: 'termGrouped',
|
||||||
|
// id
|
||||||
|
source,
|
||||||
|
rawSource,
|
||||||
|
reasons: [...reasons],
|
||||||
|
score,
|
||||||
|
// sequence
|
||||||
|
// dictionary
|
||||||
|
dictionaryPriority,
|
||||||
|
expression,
|
||||||
|
reading,
|
||||||
|
// expressions
|
||||||
|
furiganaSegments, // Contains duplicate data
|
||||||
|
// glossary
|
||||||
|
// definitionTags
|
||||||
|
termTags: this._cloneTags(termTags),
|
||||||
|
definitions,
|
||||||
|
frequencies: [],
|
||||||
|
pitches: []
|
||||||
|
// only
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
_createMergedTermDefinition(source, rawSource, definitions, expressions, readings, expressionDetailsList, reasons, dictionary, score) {
|
||||||
|
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
|
||||||
|
return {
|
||||||
|
type: 'termMerged',
|
||||||
|
// id
|
||||||
|
source,
|
||||||
|
rawSource,
|
||||||
|
reasons,
|
||||||
|
score,
|
||||||
|
// sequence
|
||||||
|
dictionary,
|
||||||
|
dictionaryPriority,
|
||||||
|
expression: expressions,
|
||||||
|
reading: readings,
|
||||||
|
expressions: expressionDetailsList,
|
||||||
|
// furiganaSegments
|
||||||
|
// glossary
|
||||||
|
// definitionTags
|
||||||
|
// termTags
|
||||||
|
definitions,
|
||||||
|
frequencies: [],
|
||||||
|
pitches: []
|
||||||
|
// only
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
_createMergedGlossaryTermDefinition(source, rawSource, definitions, expressions, readings, allExpressions, allReadings) {
|
||||||
|
const only = [];
|
||||||
|
if (!areSetsEqual(expressions, allExpressions)) {
|
||||||
|
only.push(...getSetIntersection(expressions, allExpressions));
|
||||||
|
}
|
||||||
|
if (!areSetsEqual(readings, allReadings)) {
|
||||||
|
only.push(...getSetIntersection(readings, allReadings));
|
||||||
|
}
|
||||||
|
|
||||||
|
const definitionTags = this._getUniqueDefinitionTags(definitions);
|
||||||
|
this._sortTags(definitionTags);
|
||||||
|
|
||||||
|
const {glossary, dictionary} = definitions[0];
|
||||||
|
const score = this._getMaxDefinitionScore(definitions);
|
||||||
|
const dictionaryPriority = this._getMaxDictionaryPriority(definitions);
|
||||||
|
return {
|
||||||
|
type: 'termMergedByGlossary',
|
||||||
|
// id
|
||||||
|
source,
|
||||||
|
rawSource,
|
||||||
|
reasons: [],
|
||||||
|
score,
|
||||||
|
// sequence
|
||||||
|
dictionary,
|
||||||
|
dictionaryPriority,
|
||||||
|
expression: [...expressions],
|
||||||
|
reading: [...readings],
|
||||||
|
// expressions
|
||||||
|
// furiganaSegments
|
||||||
|
glossary: [...glossary],
|
||||||
|
definitionTags,
|
||||||
|
// termTags
|
||||||
|
definitions, // Contains duplicate data
|
||||||
|
frequencies: [],
|
||||||
|
pitches: [],
|
||||||
|
only
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
_createExpressionDetails(expression, reading, termTags) {
|
||||||
|
const termFrequency = this._scoreToTermFrequency(this._getTermTagsScoreSum(termTags));
|
||||||
const furiganaSegments = jp.distributeFurigana(expression, reading);
|
const furiganaSegments = jp.distributeFurigana(expression, reading);
|
||||||
return {
|
return {
|
||||||
expression,
|
expression,
|
||||||
reading,
|
reading,
|
||||||
furiganaSegments,
|
furiganaSegments,
|
||||||
termTags,
|
termTags,
|
||||||
termFrequency
|
termFrequency,
|
||||||
|
frequencies: [],
|
||||||
|
pitches: []
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -944,21 +1096,11 @@ class Translator {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
_sortDefinitions(definitions, dictionaries) {
|
_sortDefinitions(definitions, useDictionaryPriority) {
|
||||||
if (definitions.length <= 1) { return; }
|
if (definitions.length <= 1) { return; }
|
||||||
const stringComparer = this._stringComparer;
|
const stringComparer = this._stringComparer;
|
||||||
definitions.sort((v1, v2) => {
|
const compareFunction1 = (v1, v2) => {
|
||||||
let i;
|
let i = v2.source.length - v1.source.length;
|
||||||
if (dictionaries !== null) {
|
|
||||||
const dictionaryInfo1 = dictionaries.get(v1.dictionary);
|
|
||||||
const dictionaryInfo2 = dictionaries.get(v2.dictionary);
|
|
||||||
const priority1 = typeof dictionaryInfo1 !== 'undefined' ? dictionaryInfo1.priority : 0;
|
|
||||||
const priority2 = typeof dictionaryInfo2 !== 'undefined' ? dictionaryInfo2.priority : 0;
|
|
||||||
i = priority2 - priority1;
|
|
||||||
if (i !== 0) { return i; }
|
|
||||||
}
|
|
||||||
|
|
||||||
i = v2.source.length - v1.source.length;
|
|
||||||
if (i !== 0) { return i; }
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
i = v1.reasons.length - v2.reasons.length;
|
i = v1.reasons.length - v2.reasons.length;
|
||||||
@ -973,7 +1115,12 @@ class Translator {
|
|||||||
if (i !== 0) { return i; }
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
return stringComparer.compare(expression1, expression2);
|
return stringComparer.compare(expression1, expression2);
|
||||||
});
|
};
|
||||||
|
const compareFunction2 = (v1, v2) => {
|
||||||
|
const i = v2.dictionaryPriority - v1.dictionaryPriority;
|
||||||
|
return (i !== 0) ? i : compareFunction1(v1, v2);
|
||||||
|
};
|
||||||
|
definitions.sort(useDictionaryPriority ? compareFunction2 : compareFunction1);
|
||||||
}
|
}
|
||||||
|
|
||||||
_sortDatabaseDefinitionsByIndex(definitions) {
|
_sortDatabaseDefinitionsByIndex(definitions) {
|
||||||
|
Loading…
Reference in New Issue
Block a user