Dictionary data structure improvements (#877)
* Simplify object structure of deinflections * Modify existing array when removing duplicates * Update _sortDefinitions to not return a value * Use explicit argument value * Use a Collator for string comparisons * Simplify order comparison * Improve tag creation and sanitization * Switch .map to a for loop, skip null meta * Add _createKanjiStat * Move _sortDefinitions definition * Fix kanji stat sorting * Remove return value from _sortTags * Add _sortKanji * Add fast exits for sorting * Add _sortDefinitionsByIndex * Combine sort function * Improve creation of kanji definition data * Use "databaseDefinitions" instead of "definitions" to disambiguate * Simplify * Simplify further * Simplify max length calculation * More destructuring * Use databaseDefinitions variable name * Move _mergeBySequence body into _getSequencedDefinitions * Use databaseDefinitions field name * Move maxLength calculation * Use shared _createTermDefinitionFromDatabaseDefinition * Simplify map * Move definition sorts for better consistency and less redundancy * Move _createExpression function * Update setup of sequenced definitions * Simplify for loop
This commit is contained in:
parent
50f2385aaf
commit
ef333b6d72
@ -21,13 +21,14 @@ class Deinflector {
|
|||||||
this.reasons = Deinflector.normalizeReasons(reasons);
|
this.reasons = Deinflector.normalizeReasons(reasons);
|
||||||
}
|
}
|
||||||
|
|
||||||
deinflect(source) {
|
deinflect(source, rawSource) {
|
||||||
const results = [{
|
const results = [{
|
||||||
source,
|
source,
|
||||||
|
rawSource,
|
||||||
term: source,
|
term: source,
|
||||||
rules: 0,
|
rules: 0,
|
||||||
definitions: [],
|
reasons: [],
|
||||||
reasons: []
|
databaseDefinitions: []
|
||||||
}];
|
}];
|
||||||
for (let i = 0; i < results.length; ++i) {
|
for (let i = 0; i < results.length; ++i) {
|
||||||
const {rules, term, reasons} = results[i];
|
const {rules, term, reasons} = results[i];
|
||||||
@ -43,10 +44,11 @@ class Deinflector {
|
|||||||
|
|
||||||
results.push({
|
results.push({
|
||||||
source,
|
source,
|
||||||
|
rawSource,
|
||||||
term: term.substring(0, term.length - kanaIn.length) + kanaOut,
|
term: term.substring(0, term.length - kanaIn.length) + kanaOut,
|
||||||
rules: rulesOut,
|
rules: rulesOut,
|
||||||
definitions: [],
|
reasons: [reason, ...reasons],
|
||||||
reasons: [reason, ...reasons]
|
databaseDefinitions: []
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,7 @@ class Translator {
|
|||||||
this._database = database;
|
this._database = database;
|
||||||
this._deinflector = null;
|
this._deinflector = null;
|
||||||
this._tagCache = new Map();
|
this._tagCache = new Map();
|
||||||
|
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
|
||||||
}
|
}
|
||||||
|
|
||||||
async prepare() {
|
async prepare() {
|
||||||
@ -59,24 +60,29 @@ class Translator {
|
|||||||
kanjiUnique.add(c);
|
kanjiUnique.add(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
const definitions = await this._database.findKanjiBulk([...kanjiUnique], dictionaries);
|
const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], dictionaries);
|
||||||
if (definitions.length === 0) {
|
if (databaseDefinitions.length === 0) { return []; }
|
||||||
return definitions;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (definitions.length > 1) {
|
this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
|
||||||
definitions.sort((a, b) => a.index - b.index);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const definition of definitions) {
|
const definitions = [];
|
||||||
const tags = await this._expandTags(definition.tags, definition.dictionary);
|
for (const {index, character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) {
|
||||||
tags.push(this._createDictionaryTag(definition.dictionary));
|
const expandedStats = await this._expandStats(stats, dictionary);
|
||||||
this._sortTags(tags);
|
const expandedTags = await this._expandTags(tags, dictionary);
|
||||||
|
expandedTags.push(this._createDictionaryTag(dictionary));
|
||||||
|
this._sortTags(expandedTags);
|
||||||
|
|
||||||
const stats = await this._expandStats(definition.stats, definition.dictionary);
|
definitions.push({
|
||||||
|
index,
|
||||||
definition.tags = tags;
|
character,
|
||||||
definition.stats = stats;
|
onyomi,
|
||||||
|
kunyomi,
|
||||||
|
tags: expandedTags,
|
||||||
|
glossary,
|
||||||
|
stats: expandedStats,
|
||||||
|
dictionary,
|
||||||
|
frequencies: []
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
await this._buildKanjiMeta(definitions, dictionaries);
|
await this._buildKanjiMeta(definitions, dictionaries);
|
||||||
@ -87,20 +93,41 @@ class Translator {
|
|||||||
// Private
|
// Private
|
||||||
|
|
||||||
async _getSequencedDefinitions(definitions, mainDictionary) {
|
async _getSequencedDefinitions(definitions, mainDictionary) {
|
||||||
const [definitionsBySequence, defaultDefinitions] = this._mergeBySequence(definitions, mainDictionary);
|
|
||||||
|
|
||||||
const sequenceList = [];
|
const sequenceList = [];
|
||||||
|
const sequencedDefinitionMap = new Map();
|
||||||
const sequencedDefinitions = [];
|
const sequencedDefinitions = [];
|
||||||
for (const [key, value] of definitionsBySequence.entries()) {
|
const unsequencedDefinitions = [];
|
||||||
sequenceList.push(key);
|
for (const definition of definitions) {
|
||||||
sequencedDefinitions.push({definitions: value, rawDefinitions: []});
|
const {sequence, dictionary} = definition;
|
||||||
|
if (mainDictionary === dictionary && sequence >= 0) {
|
||||||
|
const {score} = definition;
|
||||||
|
let sequencedDefinition = sequencedDefinitionMap.get(sequence);
|
||||||
|
if (typeof sequencedDefinition === 'undefined') {
|
||||||
|
const {reasons, source} = definition;
|
||||||
|
sequencedDefinition = {
|
||||||
|
reasons,
|
||||||
|
score,
|
||||||
|
source,
|
||||||
|
dictionary,
|
||||||
|
databaseDefinitions: []
|
||||||
|
};
|
||||||
|
sequencedDefinitionMap.set(sequence, sequencedDefinition);
|
||||||
|
sequencedDefinitions.push(sequencedDefinition);
|
||||||
|
sequenceList.push(sequence);
|
||||||
|
} else {
|
||||||
|
sequencedDefinition.score = Math.max(sequencedDefinition.score, score);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unsequencedDefinitions.push(definition);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const definition of await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary)) {
|
const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
|
||||||
sequencedDefinitions[definition.index].rawDefinitions.push(definition);
|
for (const databaseDefinition of databaseDefinitions) {
|
||||||
|
sequencedDefinitions[databaseDefinition.index].databaseDefinitions.push(databaseDefinition);
|
||||||
}
|
}
|
||||||
|
|
||||||
return {sequencedDefinitions, defaultDefinitions};
|
return {sequencedDefinitions, unsequencedDefinitions};
|
||||||
}
|
}
|
||||||
|
|
||||||
async _getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) {
|
async _getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) {
|
||||||
@ -110,35 +137,41 @@ class Translator {
|
|||||||
|
|
||||||
const expressionList = [];
|
const expressionList = [];
|
||||||
const readingList = [];
|
const readingList = [];
|
||||||
for (const expression of expressionsMap.keys()) {
|
for (const [expression, readingMap] of expressionsMap.entries()) {
|
||||||
if (expression === text) { continue; }
|
if (expression === text) { continue; }
|
||||||
for (const reading of expressionsMap.get(expression).keys()) {
|
for (const reading of readingMap.keys()) {
|
||||||
expressionList.push(expression);
|
expressionList.push(expression);
|
||||||
readingList.push(reading);
|
readingList.push(reading);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const definitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries);
|
const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries);
|
||||||
for (const definition of definitions) {
|
this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
|
||||||
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
|
|
||||||
definitionTags.push(this._createDictionaryTag(definition.dictionary));
|
|
||||||
definition.definitionTags = definitionTags;
|
|
||||||
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
|
|
||||||
definition.termTags = termTags;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (definitions.length > 1) {
|
const definitions = [];
|
||||||
definitions.sort((a, b) => a.index - b.index);
|
for (const databaseDefinition of databaseDefinitions) {
|
||||||
|
const source = expressionList[databaseDefinition.index];
|
||||||
|
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, []);
|
||||||
|
definitions.push(definition);
|
||||||
}
|
}
|
||||||
|
|
||||||
return definitions;
|
return definitions;
|
||||||
}
|
}
|
||||||
|
|
||||||
async _getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) {
|
async _getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) {
|
||||||
const result = sequencedDefinition.definitions;
|
const {reasons, score, source, dictionary, databaseDefinitions} = sequencedDefinition;
|
||||||
const rawDefinitionsBySequence = sequencedDefinition.rawDefinitions;
|
const result = {
|
||||||
|
reasons,
|
||||||
|
score,
|
||||||
|
expression: new Set(),
|
||||||
|
reading: new Set(),
|
||||||
|
expressions: new Map(),
|
||||||
|
source,
|
||||||
|
dictionary,
|
||||||
|
definitions: []
|
||||||
|
};
|
||||||
|
|
||||||
for (const definition of rawDefinitionsBySequence) {
|
for (const definition of databaseDefinitions) {
|
||||||
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
|
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
|
||||||
definitionTags.push(this._createDictionaryTag(definition.dictionary));
|
definitionTags.push(this._createDictionaryTag(definition.dictionary));
|
||||||
definition.definitionTags = definitionTags;
|
definition.definitionTags = definitionTags;
|
||||||
@ -146,7 +179,7 @@ class Translator {
|
|||||||
definition.termTags = termTags;
|
definition.termTags = termTags;
|
||||||
}
|
}
|
||||||
|
|
||||||
const definitionsByGloss = this._mergeByGlossary(result, rawDefinitionsBySequence);
|
const definitionsByGloss = this._mergeByGlossary(result, databaseDefinitions);
|
||||||
const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
|
const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
|
||||||
|
|
||||||
this._mergeByGlossary(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
|
this._mergeByGlossary(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
|
||||||
@ -162,8 +195,9 @@ class Translator {
|
|||||||
for (const [expression, readingMap] of result.expressions.entries()) {
|
for (const [expression, readingMap] of result.expressions.entries()) {
|
||||||
for (const [reading, termTagsMap] of readingMap.entries()) {
|
for (const [reading, termTagsMap] of readingMap.entries()) {
|
||||||
const termTags = [...termTagsMap.values()];
|
const termTags = [...termTagsMap.values()];
|
||||||
const score = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0);
|
const score2 = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0);
|
||||||
expressions.push(this._createExpression(expression, reading, this._sortTags(termTags), this._scoreToTermFrequency(score)));
|
this._sortTags(termTags);
|
||||||
|
expressions.push(this._createExpression(expression, reading, termTags, this._scoreToTermFrequency(score2)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -180,6 +214,7 @@ class Translator {
|
|||||||
|
|
||||||
const definitionsGrouped = this._groupTerms(definitions, dictionaries);
|
const definitionsGrouped = this._groupTerms(definitions, dictionaries);
|
||||||
await this._buildTermMeta(definitionsGrouped, dictionaries);
|
await this._buildTermMeta(definitionsGrouped, dictionaries);
|
||||||
|
this._sortDefinitions(definitionsGrouped, null);
|
||||||
|
|
||||||
if (options.general.compactTags) {
|
if (options.general.compactTags) {
|
||||||
for (const definition of definitionsGrouped) {
|
for (const definition of definitionsGrouped) {
|
||||||
@ -199,7 +234,7 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
||||||
const {sequencedDefinitions, defaultDefinitions} = await this._getSequencedDefinitions(definitions, options.general.mainDictionary);
|
const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, options.general.mainDictionary);
|
||||||
const definitionsMerged = [];
|
const definitionsMerged = [];
|
||||||
const mergedByTermIndices = new Set();
|
const mergedByTermIndices = new Set();
|
||||||
|
|
||||||
@ -208,14 +243,14 @@ class Translator {
|
|||||||
text,
|
text,
|
||||||
dictionaries,
|
dictionaries,
|
||||||
sequencedDefinition,
|
sequencedDefinition,
|
||||||
defaultDefinitions,
|
unsequencedDefinitions,
|
||||||
secondarySearchDictionaries,
|
secondarySearchDictionaries,
|
||||||
mergedByTermIndices
|
mergedByTermIndices
|
||||||
);
|
);
|
||||||
definitionsMerged.push(result);
|
definitionsMerged.push(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
const strayDefinitions = defaultDefinitions.filter((definition, index) => !mergedByTermIndices.has(index));
|
const strayDefinitions = unsequencedDefinitions.filter((definition, index) => !mergedByTermIndices.has(index));
|
||||||
for (const groupedDefinition of this._groupTerms(strayDefinitions, dictionaries)) {
|
for (const groupedDefinition of this._groupTerms(strayDefinitions, dictionaries)) {
|
||||||
// from dictTermsMergeBySequence
|
// from dictTermsMergeBySequence
|
||||||
const {reasons, score, expression, reading, source, dictionary} = groupedDefinition;
|
const {reasons, score, expression, reading, source, dictionary} = groupedDefinition;
|
||||||
@ -233,6 +268,7 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
await this._buildTermMeta(definitionsMerged, dictionaries);
|
await this._buildTermMeta(definitionsMerged, dictionaries);
|
||||||
|
this._sortDefinitions(definitionsMerged, null);
|
||||||
|
|
||||||
if (options.general.compactTags) {
|
if (options.general.compactTags) {
|
||||||
for (const definition of definitionsMerged) {
|
for (const definition of definitionsMerged) {
|
||||||
@ -240,22 +276,21 @@ class Translator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return [this._sortDefinitions(definitionsMerged), length];
|
return [definitionsMerged, length];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsSplit(text, details, options) {
|
async _findTermsSplit(text, details, options) {
|
||||||
const dictionaries = this._getEnabledDictionaryMap(options);
|
const dictionaries = this._getEnabledDictionaryMap(options);
|
||||||
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
||||||
|
|
||||||
await this._buildTermMeta(definitions, dictionaries);
|
await this._buildTermMeta(definitions, dictionaries);
|
||||||
|
this._sortDefinitions(definitions, dictionaries);
|
||||||
return [definitions, length];
|
return [definitions, length];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsSimple(text, details, options) {
|
async _findTermsSimple(text, details, options) {
|
||||||
const dictionaries = this._getEnabledDictionaryMap(options);
|
const dictionaries = this._getEnabledDictionaryMap(options);
|
||||||
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
||||||
this._sortDefinitions(definitions);
|
this._sortDefinitions(definitions, null);
|
||||||
return [definitions, length];
|
return [definitions, length];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -271,48 +306,23 @@ class Translator {
|
|||||||
await this._findTermDeinflections(text, dictionaries, options)
|
await this._findTermDeinflections(text, dictionaries, options)
|
||||||
);
|
);
|
||||||
|
|
||||||
let definitions = [];
|
let maxLength = 0;
|
||||||
for (const deinflection of deinflections) {
|
const definitions = [];
|
||||||
for (const definition of deinflection.definitions) {
|
for (const {databaseDefinitions, source, rawSource, reasons} of deinflections) {
|
||||||
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
|
maxLength = Math.max(maxLength, rawSource.length);
|
||||||
definitionTags.push(this._createDictionaryTag(definition.dictionary));
|
for (const databaseDefinition of databaseDefinitions) {
|
||||||
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
|
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons);
|
||||||
|
definitions.push(definition);
|
||||||
const {expression, reading} = definition;
|
|
||||||
const furiganaSegments = jp.distributeFurigana(expression, reading);
|
|
||||||
|
|
||||||
definitions.push({
|
|
||||||
source: deinflection.source,
|
|
||||||
rawSource: deinflection.rawSource,
|
|
||||||
reasons: deinflection.reasons,
|
|
||||||
score: definition.score,
|
|
||||||
id: definition.id,
|
|
||||||
dictionary: definition.dictionary,
|
|
||||||
expression,
|
|
||||||
reading,
|
|
||||||
furiganaSegments,
|
|
||||||
glossary: definition.glossary,
|
|
||||||
definitionTags: this._sortTags(definitionTags),
|
|
||||||
termTags: this._sortTags(termTags),
|
|
||||||
sequence: definition.sequence
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
definitions = this._removeDuplicateDefinitions(definitions);
|
this._removeDuplicateDefinitions(definitions);
|
||||||
definitions = this._sortDefinitions(definitions, dictionaries);
|
return [definitions, maxLength];
|
||||||
|
|
||||||
let length = 0;
|
|
||||||
for (const definition of definitions) {
|
|
||||||
length = Math.max(length, definition.rawSource.length);
|
|
||||||
}
|
|
||||||
|
|
||||||
return [definitions, length];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermWildcard(text, dictionaries, wildcard) {
|
async _findTermWildcard(text, dictionaries, wildcard) {
|
||||||
const definitions = await this._database.findTermsBulk([text], dictionaries, wildcard);
|
const databaseDefinitions = await this._database.findTermsBulk([text], dictionaries, wildcard);
|
||||||
if (definitions.length === 0) {
|
if (databaseDefinitions.length === 0) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -321,8 +331,8 @@ class Translator {
|
|||||||
rawSource: text,
|
rawSource: text,
|
||||||
term: text,
|
term: text,
|
||||||
rules: 0,
|
rules: 0,
|
||||||
definitions,
|
reasons: [],
|
||||||
reasons: []
|
databaseDefinitions
|
||||||
}];
|
}];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -348,19 +358,19 @@ class Translator {
|
|||||||
deinflectionArray.push(deinflection);
|
deinflectionArray.push(deinflection);
|
||||||
}
|
}
|
||||||
|
|
||||||
const definitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null);
|
const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null);
|
||||||
|
|
||||||
for (const definition of definitions) {
|
for (const databaseDefinition of databaseDefinitions) {
|
||||||
const definitionRules = Deinflector.rulesToRuleFlags(definition.rules);
|
const definitionRules = Deinflector.rulesToRuleFlags(databaseDefinition.rules);
|
||||||
for (const deinflection of uniqueDeinflectionArrays[definition.index]) {
|
for (const deinflection of uniqueDeinflectionArrays[databaseDefinition.index]) {
|
||||||
const deinflectionRules = deinflection.rules;
|
const deinflectionRules = deinflection.rules;
|
||||||
if (deinflectionRules === 0 || (definitionRules & deinflectionRules) !== 0) {
|
if (deinflectionRules === 0 || (definitionRules & deinflectionRules) !== 0) {
|
||||||
deinflection.definitions.push(definition);
|
deinflection.databaseDefinitions.push(databaseDefinition);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return deinflections.filter((e) => e.definitions.length > 0);
|
return deinflections.filter((e) => e.databaseDefinitions.length > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
_getAllDeinflections(text, options) {
|
_getAllDeinflections(text, options) {
|
||||||
@ -411,8 +421,8 @@ class Translator {
|
|||||||
const text2Substring = text2.substring(0, i);
|
const text2Substring = text2.substring(0, i);
|
||||||
if (used.has(text2Substring)) { break; }
|
if (used.has(text2Substring)) { break; }
|
||||||
used.add(text2Substring);
|
used.add(text2Substring);
|
||||||
for (const deinflection of this._deinflector.deinflect(text2Substring)) {
|
const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
|
||||||
deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
|
for (const deinflection of this._deinflector.deinflect(text2Substring, rawSource)) {
|
||||||
deinflections.push(deinflection);
|
deinflections.push(deinflection);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -486,9 +496,8 @@ class Translator {
|
|||||||
|
|
||||||
async _buildKanjiMeta(definitions, dictionaries) {
|
async _buildKanjiMeta(definitions, dictionaries) {
|
||||||
const kanjiList = [];
|
const kanjiList = [];
|
||||||
for (const definition of definitions) {
|
for (const {character} of definitions) {
|
||||||
kanjiList.push(definition.character);
|
kanjiList.push(character);
|
||||||
definition.frequencies = [];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const metas = await this._database.findKanjiMetaBulk(kanjiList, dictionaries);
|
const metas = await this._database.findKanjiMetaBulk(kanjiList, dictionaries);
|
||||||
@ -503,11 +512,16 @@ class Translator {
|
|||||||
|
|
||||||
async _expandTags(names, title) {
|
async _expandTags(names, title) {
|
||||||
const tagMetaList = await this._getTagMetaList(names, title);
|
const tagMetaList = await this._getTagMetaList(names, title);
|
||||||
return tagMetaList.map((meta, index) => {
|
const results = [];
|
||||||
const name = names[index];
|
for (let i = 0, ii = tagMetaList.length; i < ii; ++i) {
|
||||||
const tag = this._sanitizeTag(Object.assign({}, meta !== null ? meta : {}, {name}));
|
const meta = tagMetaList[i];
|
||||||
return this._sanitizeTag(tag);
|
if (meta === null) { continue; }
|
||||||
});
|
const name = names[i];
|
||||||
|
const {category, notes, order, score, dictionary} = meta;
|
||||||
|
const tag = this._createTag(name, category, notes, order, score, dictionary);
|
||||||
|
results.push(tag);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
async _expandStats(items, title) {
|
async _expandStats(items, title) {
|
||||||
@ -520,21 +534,21 @@ class Translator {
|
|||||||
const meta = tagMetaList[i];
|
const meta = tagMetaList[i];
|
||||||
if (meta === null) { continue; }
|
if (meta === null) { continue; }
|
||||||
|
|
||||||
const category = meta.category;
|
const {category, notes, order, score, dictionary} = meta;
|
||||||
let group = statsGroups.get(category);
|
let group = statsGroups.get(category);
|
||||||
if (typeof group === 'undefined') {
|
if (typeof group === 'undefined') {
|
||||||
group = [];
|
group = [];
|
||||||
statsGroups.set(category, group);
|
statsGroups.set(category, group);
|
||||||
}
|
}
|
||||||
|
|
||||||
const stat = Object.assign({}, meta, {name, value: items[name]});
|
const value = items[name];
|
||||||
group.push(this._sanitizeTag(stat));
|
const stat = this._createKanjiStat(name, category, notes, order, score, dictionary, value);
|
||||||
|
group.push(stat);
|
||||||
}
|
}
|
||||||
|
|
||||||
const stats = {};
|
const stats = {};
|
||||||
const sortCompare = (a, b) => a.notes - b.notes;
|
|
||||||
for (const [category, group] of statsGroups.entries()) {
|
for (const [category, group] of statsGroups.entries()) {
|
||||||
group.sort(sortCompare);
|
this._sortKanjiStats(group);
|
||||||
stats[category] = group;
|
stats[category] = group;
|
||||||
}
|
}
|
||||||
return stats;
|
return stats;
|
||||||
@ -589,17 +603,6 @@ class Translator {
|
|||||||
return {reading, pitches, dictionary};
|
return {reading, pitches, dictionary};
|
||||||
}
|
}
|
||||||
|
|
||||||
_createExpression(expression, reading, termTags=null, termFrequency=null) {
|
|
||||||
const furiganaSegments = jp.distributeFurigana(expression, reading);
|
|
||||||
return {
|
|
||||||
expression,
|
|
||||||
reading,
|
|
||||||
furiganaSegments,
|
|
||||||
termTags,
|
|
||||||
termFrequency
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
_scoreToTermFrequency(score) {
|
_scoreToTermFrequency(score) {
|
||||||
if (score > 0) {
|
if (score > 0) {
|
||||||
return 'popular';
|
return 'popular';
|
||||||
@ -674,42 +677,27 @@ class Translator {
|
|||||||
return enabledDictionaryMap;
|
return enabledDictionaryMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
_sortDefinitions(definitions, dictionaries=null) {
|
|
||||||
return definitions.sort((v1, v2) => {
|
|
||||||
let i;
|
|
||||||
if (dictionaries !== null) {
|
|
||||||
const dictionaryInfo1 = dictionaries.get(v1.dictionary);
|
|
||||||
const dictionaryInfo2 = dictionaries.get(v2.dictionary);
|
|
||||||
const priority1 = typeof dictionaryInfo1 !== 'undefined' ? dictionaryInfo1.priority : 0;
|
|
||||||
const priority2 = typeof dictionaryInfo2 !== 'undefined' ? dictionaryInfo2.priority : 0;
|
|
||||||
i = priority2 - priority1;
|
|
||||||
if (i !== 0) { return i; }
|
|
||||||
}
|
|
||||||
|
|
||||||
i = v2.source.length - v1.source.length;
|
|
||||||
if (i !== 0) { return i; }
|
|
||||||
|
|
||||||
i = v1.reasons.length - v2.reasons.length;
|
|
||||||
if (i !== 0) { return i; }
|
|
||||||
|
|
||||||
i = v2.score - v1.score;
|
|
||||||
if (i !== 0) { return i; }
|
|
||||||
|
|
||||||
return v2.expression.toString().localeCompare(v1.expression.toString());
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
_removeDuplicateDefinitions(definitions) {
|
_removeDuplicateDefinitions(definitions) {
|
||||||
const definitionGroups = new Map();
|
const definitionGroups = new Map();
|
||||||
for (const definition of definitions) {
|
for (let i = 0, ii = definitions.length; i < ii; ++i) {
|
||||||
const id = definition.id;
|
const definition = definitions[i];
|
||||||
const definitionExisting = definitionGroups.get(id);
|
const {id} = definition;
|
||||||
if (typeof definitionExisting === 'undefined' || definition.expression.length > definitionExisting.expression.length) {
|
const existing = definitionGroups.get(id);
|
||||||
definitionGroups.set(id, definition);
|
if (typeof existing === 'undefined') {
|
||||||
}
|
definitionGroups.set(id, [i, definition]);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
return [...definitionGroups.values()];
|
let removeIndex = i;
|
||||||
|
if (definition.expression.length > existing[1].expression.length) {
|
||||||
|
definitionGroups.set(id, [i, definition]);
|
||||||
|
removeIndex = existing[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
definitions.splice(removeIndex, 1);
|
||||||
|
--i;
|
||||||
|
--ii;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_compressDefinitionTags(definitions) {
|
_compressDefinitionTags(definitions) {
|
||||||
@ -773,37 +761,7 @@ class Translator {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return this._sortDefinitions(results);
|
return results;
|
||||||
}
|
|
||||||
|
|
||||||
_mergeBySequence(definitions, mainDictionary) {
|
|
||||||
const sequencedDefinitions = new Map();
|
|
||||||
const nonSequencedDefinitions = [];
|
|
||||||
for (const definition of definitions) {
|
|
||||||
const sequence = definition.sequence;
|
|
||||||
if (mainDictionary === definition.dictionary && sequence >= 0) {
|
|
||||||
let sequencedDefinition = sequencedDefinitions.get(sequence);
|
|
||||||
if (typeof sequencedDefinition === 'undefined') {
|
|
||||||
sequencedDefinition = {
|
|
||||||
reasons: definition.reasons,
|
|
||||||
score: definition.score,
|
|
||||||
expression: new Set(),
|
|
||||||
reading: new Set(),
|
|
||||||
expressions: new Map(),
|
|
||||||
source: definition.source,
|
|
||||||
dictionary: definition.dictionary,
|
|
||||||
definitions: []
|
|
||||||
};
|
|
||||||
sequencedDefinitions.set(sequence, sequencedDefinition);
|
|
||||||
} else {
|
|
||||||
sequencedDefinition.score = Math.max(sequencedDefinition.score, definition.score);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nonSequencedDefinitions.push(definition);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return [sequencedDefinitions, nonSequencedDefinitions];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_mergeByGlossary(result, definitions, appendTo=null, mergedIndices=null) {
|
_mergeByGlossary(result, definitions, appendTo=null, mergedIndices=null) {
|
||||||
@ -814,7 +772,8 @@ class Translator {
|
|||||||
const resultReadingSet = result.reading;
|
const resultReadingSet = result.reading;
|
||||||
const resultSource = result.source;
|
const resultSource = result.source;
|
||||||
|
|
||||||
for (const [index, definition] of definitions.entries()) {
|
for (let i = 0, ii = definitions.length; i < ii; ++i) {
|
||||||
|
const definition = definitions[i];
|
||||||
const {expression, reading} = definition;
|
const {expression, reading} = definition;
|
||||||
|
|
||||||
if (mergedIndices !== null) {
|
if (mergedIndices !== null) {
|
||||||
@ -823,7 +782,7 @@ class Translator {
|
|||||||
typeof expressionMap !== 'undefined' &&
|
typeof expressionMap !== 'undefined' &&
|
||||||
typeof expressionMap.get(reading) !== 'undefined'
|
typeof expressionMap.get(reading) !== 'undefined'
|
||||||
) {
|
) {
|
||||||
mergedIndices.add(index);
|
mergedIndices.add(i);
|
||||||
} else {
|
} else {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -909,37 +868,127 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
_createDictionaryTag(name) {
|
_createDictionaryTag(name) {
|
||||||
return this._sanitizeTag({name, category: 'dictionary', order: 100});
|
return this._createTag(name, 'dictionary', '', 100, 0, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
_sanitizeTag(tag) {
|
_createTag(name, category, notes, order, score, dictionary) {
|
||||||
tag.name = tag.name || 'untitled';
|
return {
|
||||||
tag.category = tag.category || 'default';
|
name,
|
||||||
tag.notes = tag.notes || '';
|
category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
|
||||||
tag.order = tag.order || 0;
|
notes: (typeof notes === 'string' ? notes : ''),
|
||||||
tag.score = tag.score || 0;
|
order: (typeof order === 'number' ? order : 0),
|
||||||
return tag;
|
score: (typeof score === 'number' ? score : 0),
|
||||||
|
dictionary: (typeof dictionary === 'string' ? dictionary : null)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
_createKanjiStat(name, category, notes, order, score, dictionary, value) {
|
||||||
|
return {
|
||||||
|
name,
|
||||||
|
category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
|
||||||
|
notes: (typeof notes === 'string' ? notes : ''),
|
||||||
|
order: (typeof order === 'number' ? order : 0),
|
||||||
|
score: (typeof score === 'number' ? score : 0),
|
||||||
|
dictionary: (typeof dictionary === 'string' ? dictionary : null),
|
||||||
|
value
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons) {
|
||||||
|
const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
|
||||||
|
const termTagsExpanded = await this._expandTags(termTags, dictionary);
|
||||||
|
const definitionTagsExpanded = await this._expandTags(definitionTags, dictionary);
|
||||||
|
definitionTagsExpanded.push(this._createDictionaryTag(dictionary));
|
||||||
|
|
||||||
|
this._sortTags(definitionTagsExpanded);
|
||||||
|
this._sortTags(termTagsExpanded);
|
||||||
|
|
||||||
|
const furiganaSegments = jp.distributeFurigana(expression, reading);
|
||||||
|
|
||||||
|
return {
|
||||||
|
source,
|
||||||
|
rawSource,
|
||||||
|
reasons,
|
||||||
|
score,
|
||||||
|
id,
|
||||||
|
dictionary,
|
||||||
|
expression,
|
||||||
|
reading,
|
||||||
|
furiganaSegments,
|
||||||
|
glossary,
|
||||||
|
definitionTags: definitionTagsExpanded,
|
||||||
|
termTags: termTagsExpanded,
|
||||||
|
sequence
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
_createExpression(expression, reading, termTags=null, termFrequency=null) {
|
||||||
|
const furiganaSegments = jp.distributeFurigana(expression, reading);
|
||||||
|
return {
|
||||||
|
expression,
|
||||||
|
reading,
|
||||||
|
furiganaSegments,
|
||||||
|
termTags,
|
||||||
|
termFrequency
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
_sortTags(tags) {
|
_sortTags(tags) {
|
||||||
return tags.sort((v1, v2) => {
|
if (tags.length <= 1) { return; }
|
||||||
const order1 = v1.order;
|
const stringComparer = this._stringComparer;
|
||||||
const order2 = v2.order;
|
tags.sort((v1, v2) => {
|
||||||
if (order1 < order2) {
|
const i = v1.order - v2.order;
|
||||||
return -1;
|
if (i !== 0) { return i; }
|
||||||
} else if (order1 > order2) {
|
|
||||||
return 1;
|
return stringComparer.compare(v1.name, v2.name);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const name1 = v1.name;
|
_sortDefinitions(definitions, dictionaries) {
|
||||||
const name2 = v2.name;
|
if (definitions.length <= 1) { return; }
|
||||||
if (name1 < name2) {
|
const stringComparer = this._stringComparer;
|
||||||
return -1;
|
definitions.sort((v1, v2) => {
|
||||||
} else if (name1 > name2) {
|
let i;
|
||||||
return 1;
|
if (dictionaries !== null) {
|
||||||
|
const dictionaryInfo1 = dictionaries.get(v1.dictionary);
|
||||||
|
const dictionaryInfo2 = dictionaries.get(v2.dictionary);
|
||||||
|
const priority1 = typeof dictionaryInfo1 !== 'undefined' ? dictionaryInfo1.priority : 0;
|
||||||
|
const priority2 = typeof dictionaryInfo2 !== 'undefined' ? dictionaryInfo2.priority : 0;
|
||||||
|
i = priority2 - priority1;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
i = v2.source.length - v1.source.length;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
|
i = v1.reasons.length - v2.reasons.length;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
|
i = v2.score - v1.score;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
|
const expression1 = v1.expression;
|
||||||
|
const expression2 = v2.expression;
|
||||||
|
i = expression2.length - expression1.length;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
|
return stringComparer.compare(expression1, expression2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
_sortDatabaseDefinitionsByIndex(definitions) {
|
||||||
|
if (definitions.length <= 1) { return; }
|
||||||
|
definitions.sort((a, b) => a.index - b.index);
|
||||||
|
}
|
||||||
|
|
||||||
|
_sortKanjiStats(stats) {
|
||||||
|
if (stats.length <= 1) { return; }
|
||||||
|
const stringComparer = this._stringComparer;
|
||||||
|
stats.sort((v1, v2) => {
|
||||||
|
const i = v1.order - v2.order;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
|
return stringComparer.compare(v1.notes, v2.notes);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user