Dictionary data structure improvements (#877)

* Simplify object structure of deinflections

* Modify existing array when removing duplicates

* Update _sortDefinitions to not return a value

* Use explicit argument value

* Use a Collator for string comparisons

* Simplify order comparison

* Improve tag creation and sanitization

* Switch .map to a for loop, skip null meta

* Add _createKanjiStat

* Move _sortDefinitions definition

* Fix kanji stat sorting

* Remove return value from _sortTags

* Add _sortKanji

* Add fast exits for sorting

* Add _sortDefinitionsByIndex

* Combine sort function

* Improve creation of kanji definition data

* Use "databaseDefinitions" instead of "definitions" to disambiguate

* Simplify

* Simplify further

* Simplify max length calculation

* More destructuring

* Use databaseDefinitions variable name

* Move _mergeBySequence body into _getSequencedDefinitions

* Use databaseDefinitions field name

* Move maxLength calculation

* Use shared _createTermDefinitionFromDatabaseDefinition

* Simplify map

* Move definition sorts for better consistency and less redundancy

* Move _createExpression function

* Update setup of sequenced definitions

* Simplify for loop
This commit is contained in:
toasted-nutbread 2020-10-02 17:59:14 -04:00 committed by GitHub
parent 50f2385aaf
commit ef333b6d72
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 262 additions and 211 deletions

View File

@ -21,13 +21,14 @@ class Deinflector {
this.reasons = Deinflector.normalizeReasons(reasons); this.reasons = Deinflector.normalizeReasons(reasons);
} }
deinflect(source) { deinflect(source, rawSource) {
const results = [{ const results = [{
source, source,
rawSource,
term: source, term: source,
rules: 0, rules: 0,
definitions: [], reasons: [],
reasons: [] databaseDefinitions: []
}]; }];
for (let i = 0; i < results.length; ++i) { for (let i = 0; i < results.length; ++i) {
const {rules, term, reasons} = results[i]; const {rules, term, reasons} = results[i];
@ -43,10 +44,11 @@ class Deinflector {
results.push({ results.push({
source, source,
rawSource,
term: term.substring(0, term.length - kanaIn.length) + kanaOut, term: term.substring(0, term.length - kanaIn.length) + kanaOut,
rules: rulesOut, rules: rulesOut,
definitions: [], reasons: [reason, ...reasons],
reasons: [reason, ...reasons] databaseDefinitions: []
}); });
} }
} }

View File

@ -26,6 +26,7 @@ class Translator {
this._database = database; this._database = database;
this._deinflector = null; this._deinflector = null;
this._tagCache = new Map(); this._tagCache = new Map();
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
} }
async prepare() { async prepare() {
@ -59,24 +60,29 @@ class Translator {
kanjiUnique.add(c); kanjiUnique.add(c);
} }
const definitions = await this._database.findKanjiBulk([...kanjiUnique], dictionaries); const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], dictionaries);
if (definitions.length === 0) { if (databaseDefinitions.length === 0) { return []; }
return definitions;
}
if (definitions.length > 1) { this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
definitions.sort((a, b) => a.index - b.index);
}
for (const definition of definitions) { const definitions = [];
const tags = await this._expandTags(definition.tags, definition.dictionary); for (const {index, character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) {
tags.push(this._createDictionaryTag(definition.dictionary)); const expandedStats = await this._expandStats(stats, dictionary);
this._sortTags(tags); const expandedTags = await this._expandTags(tags, dictionary);
expandedTags.push(this._createDictionaryTag(dictionary));
this._sortTags(expandedTags);
const stats = await this._expandStats(definition.stats, definition.dictionary); definitions.push({
index,
definition.tags = tags; character,
definition.stats = stats; onyomi,
kunyomi,
tags: expandedTags,
glossary,
stats: expandedStats,
dictionary,
frequencies: []
});
} }
await this._buildKanjiMeta(definitions, dictionaries); await this._buildKanjiMeta(definitions, dictionaries);
@ -87,20 +93,41 @@ class Translator {
// Private // Private
async _getSequencedDefinitions(definitions, mainDictionary) { async _getSequencedDefinitions(definitions, mainDictionary) {
const [definitionsBySequence, defaultDefinitions] = this._mergeBySequence(definitions, mainDictionary);
const sequenceList = []; const sequenceList = [];
const sequencedDefinitionMap = new Map();
const sequencedDefinitions = []; const sequencedDefinitions = [];
for (const [key, value] of definitionsBySequence.entries()) { const unsequencedDefinitions = [];
sequenceList.push(key); for (const definition of definitions) {
sequencedDefinitions.push({definitions: value, rawDefinitions: []}); const {sequence, dictionary} = definition;
if (mainDictionary === dictionary && sequence >= 0) {
const {score} = definition;
let sequencedDefinition = sequencedDefinitionMap.get(sequence);
if (typeof sequencedDefinition === 'undefined') {
const {reasons, source} = definition;
sequencedDefinition = {
reasons,
score,
source,
dictionary,
databaseDefinitions: []
};
sequencedDefinitionMap.set(sequence, sequencedDefinition);
sequencedDefinitions.push(sequencedDefinition);
sequenceList.push(sequence);
} else {
sequencedDefinition.score = Math.max(sequencedDefinition.score, score);
}
} else {
unsequencedDefinitions.push(definition);
}
} }
for (const definition of await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary)) { const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
sequencedDefinitions[definition.index].rawDefinitions.push(definition); for (const databaseDefinition of databaseDefinitions) {
sequencedDefinitions[databaseDefinition.index].databaseDefinitions.push(databaseDefinition);
} }
return {sequencedDefinitions, defaultDefinitions}; return {sequencedDefinitions, unsequencedDefinitions};
} }
async _getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) { async _getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) {
@ -110,35 +137,41 @@ class Translator {
const expressionList = []; const expressionList = [];
const readingList = []; const readingList = [];
for (const expression of expressionsMap.keys()) { for (const [expression, readingMap] of expressionsMap.entries()) {
if (expression === text) { continue; } if (expression === text) { continue; }
for (const reading of expressionsMap.get(expression).keys()) { for (const reading of readingMap.keys()) {
expressionList.push(expression); expressionList.push(expression);
readingList.push(reading); readingList.push(reading);
} }
} }
const definitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries); const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries);
for (const definition of definitions) { this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(this._createDictionaryTag(definition.dictionary));
definition.definitionTags = definitionTags;
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
definition.termTags = termTags;
}
if (definitions.length > 1) { const definitions = [];
definitions.sort((a, b) => a.index - b.index); for (const databaseDefinition of databaseDefinitions) {
const source = expressionList[databaseDefinition.index];
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, []);
definitions.push(definition);
} }
return definitions; return definitions;
} }
async _getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) { async _getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) {
const result = sequencedDefinition.definitions; const {reasons, score, source, dictionary, databaseDefinitions} = sequencedDefinition;
const rawDefinitionsBySequence = sequencedDefinition.rawDefinitions; const result = {
reasons,
score,
expression: new Set(),
reading: new Set(),
expressions: new Map(),
source,
dictionary,
definitions: []
};
for (const definition of rawDefinitionsBySequence) { for (const definition of databaseDefinitions) {
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary); const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(this._createDictionaryTag(definition.dictionary)); definitionTags.push(this._createDictionaryTag(definition.dictionary));
definition.definitionTags = definitionTags; definition.definitionTags = definitionTags;
@ -146,7 +179,7 @@ class Translator {
definition.termTags = termTags; definition.termTags = termTags;
} }
const definitionsByGloss = this._mergeByGlossary(result, rawDefinitionsBySequence); const definitionsByGloss = this._mergeByGlossary(result, databaseDefinitions);
const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries); const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
this._mergeByGlossary(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices); this._mergeByGlossary(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
@ -162,8 +195,9 @@ class Translator {
for (const [expression, readingMap] of result.expressions.entries()) { for (const [expression, readingMap] of result.expressions.entries()) {
for (const [reading, termTagsMap] of readingMap.entries()) { for (const [reading, termTagsMap] of readingMap.entries()) {
const termTags = [...termTagsMap.values()]; const termTags = [...termTagsMap.values()];
const score = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0); const score2 = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0);
expressions.push(this._createExpression(expression, reading, this._sortTags(termTags), this._scoreToTermFrequency(score))); this._sortTags(termTags);
expressions.push(this._createExpression(expression, reading, termTags, this._scoreToTermFrequency(score2)));
} }
} }
@ -180,6 +214,7 @@ class Translator {
const definitionsGrouped = this._groupTerms(definitions, dictionaries); const definitionsGrouped = this._groupTerms(definitions, dictionaries);
await this._buildTermMeta(definitionsGrouped, dictionaries); await this._buildTermMeta(definitionsGrouped, dictionaries);
this._sortDefinitions(definitionsGrouped, null);
if (options.general.compactTags) { if (options.general.compactTags) {
for (const definition of definitionsGrouped) { for (const definition of definitionsGrouped) {
@ -199,7 +234,7 @@ class Translator {
} }
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options); const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
const {sequencedDefinitions, defaultDefinitions} = await this._getSequencedDefinitions(definitions, options.general.mainDictionary); const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, options.general.mainDictionary);
const definitionsMerged = []; const definitionsMerged = [];
const mergedByTermIndices = new Set(); const mergedByTermIndices = new Set();
@ -208,14 +243,14 @@ class Translator {
text, text,
dictionaries, dictionaries,
sequencedDefinition, sequencedDefinition,
defaultDefinitions, unsequencedDefinitions,
secondarySearchDictionaries, secondarySearchDictionaries,
mergedByTermIndices mergedByTermIndices
); );
definitionsMerged.push(result); definitionsMerged.push(result);
} }
const strayDefinitions = defaultDefinitions.filter((definition, index) => !mergedByTermIndices.has(index)); const strayDefinitions = unsequencedDefinitions.filter((definition, index) => !mergedByTermIndices.has(index));
for (const groupedDefinition of this._groupTerms(strayDefinitions, dictionaries)) { for (const groupedDefinition of this._groupTerms(strayDefinitions, dictionaries)) {
// from dictTermsMergeBySequence // from dictTermsMergeBySequence
const {reasons, score, expression, reading, source, dictionary} = groupedDefinition; const {reasons, score, expression, reading, source, dictionary} = groupedDefinition;
@ -233,6 +268,7 @@ class Translator {
} }
await this._buildTermMeta(definitionsMerged, dictionaries); await this._buildTermMeta(definitionsMerged, dictionaries);
this._sortDefinitions(definitionsMerged, null);
if (options.general.compactTags) { if (options.general.compactTags) {
for (const definition of definitionsMerged) { for (const definition of definitionsMerged) {
@ -240,22 +276,21 @@ class Translator {
} }
} }
return [this._sortDefinitions(definitionsMerged), length]; return [definitionsMerged, length];
} }
async _findTermsSplit(text, details, options) { async _findTermsSplit(text, details, options) {
const dictionaries = this._getEnabledDictionaryMap(options); const dictionaries = this._getEnabledDictionaryMap(options);
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options); const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
await this._buildTermMeta(definitions, dictionaries); await this._buildTermMeta(definitions, dictionaries);
this._sortDefinitions(definitions, dictionaries);
return [definitions, length]; return [definitions, length];
} }
async _findTermsSimple(text, details, options) { async _findTermsSimple(text, details, options) {
const dictionaries = this._getEnabledDictionaryMap(options); const dictionaries = this._getEnabledDictionaryMap(options);
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options); const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
this._sortDefinitions(definitions); this._sortDefinitions(definitions, null);
return [definitions, length]; return [definitions, length];
} }
@ -271,48 +306,23 @@ class Translator {
await this._findTermDeinflections(text, dictionaries, options) await this._findTermDeinflections(text, dictionaries, options)
); );
let definitions = []; let maxLength = 0;
for (const deinflection of deinflections) { const definitions = [];
for (const definition of deinflection.definitions) { for (const {databaseDefinitions, source, rawSource, reasons} of deinflections) {
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary); maxLength = Math.max(maxLength, rawSource.length);
definitionTags.push(this._createDictionaryTag(definition.dictionary)); for (const databaseDefinition of databaseDefinitions) {
const termTags = await this._expandTags(definition.termTags, definition.dictionary); const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons);
definitions.push(definition);
const {expression, reading} = definition;
const furiganaSegments = jp.distributeFurigana(expression, reading);
definitions.push({
source: deinflection.source,
rawSource: deinflection.rawSource,
reasons: deinflection.reasons,
score: definition.score,
id: definition.id,
dictionary: definition.dictionary,
expression,
reading,
furiganaSegments,
glossary: definition.glossary,
definitionTags: this._sortTags(definitionTags),
termTags: this._sortTags(termTags),
sequence: definition.sequence
});
} }
} }
definitions = this._removeDuplicateDefinitions(definitions); this._removeDuplicateDefinitions(definitions);
definitions = this._sortDefinitions(definitions, dictionaries); return [definitions, maxLength];
let length = 0;
for (const definition of definitions) {
length = Math.max(length, definition.rawSource.length);
}
return [definitions, length];
} }
async _findTermWildcard(text, dictionaries, wildcard) { async _findTermWildcard(text, dictionaries, wildcard) {
const definitions = await this._database.findTermsBulk([text], dictionaries, wildcard); const databaseDefinitions = await this._database.findTermsBulk([text], dictionaries, wildcard);
if (definitions.length === 0) { if (databaseDefinitions.length === 0) {
return []; return [];
} }
@ -321,8 +331,8 @@ class Translator {
rawSource: text, rawSource: text,
term: text, term: text,
rules: 0, rules: 0,
definitions, reasons: [],
reasons: [] databaseDefinitions
}]; }];
} }
@ -348,19 +358,19 @@ class Translator {
deinflectionArray.push(deinflection); deinflectionArray.push(deinflection);
} }
const definitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null); const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null);
for (const definition of definitions) { for (const databaseDefinition of databaseDefinitions) {
const definitionRules = Deinflector.rulesToRuleFlags(definition.rules); const definitionRules = Deinflector.rulesToRuleFlags(databaseDefinition.rules);
for (const deinflection of uniqueDeinflectionArrays[definition.index]) { for (const deinflection of uniqueDeinflectionArrays[databaseDefinition.index]) {
const deinflectionRules = deinflection.rules; const deinflectionRules = deinflection.rules;
if (deinflectionRules === 0 || (definitionRules & deinflectionRules) !== 0) { if (deinflectionRules === 0 || (definitionRules & deinflectionRules) !== 0) {
deinflection.definitions.push(definition); deinflection.databaseDefinitions.push(databaseDefinition);
} }
} }
} }
return deinflections.filter((e) => e.definitions.length > 0); return deinflections.filter((e) => e.databaseDefinitions.length > 0);
} }
_getAllDeinflections(text, options) { _getAllDeinflections(text, options) {
@ -411,8 +421,8 @@ class Translator {
const text2Substring = text2.substring(0, i); const text2Substring = text2.substring(0, i);
if (used.has(text2Substring)) { break; } if (used.has(text2Substring)) { break; }
used.add(text2Substring); used.add(text2Substring);
for (const deinflection of this._deinflector.deinflect(text2Substring)) { const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i)); for (const deinflection of this._deinflector.deinflect(text2Substring, rawSource)) {
deinflections.push(deinflection); deinflections.push(deinflection);
} }
} }
@ -486,9 +496,8 @@ class Translator {
async _buildKanjiMeta(definitions, dictionaries) { async _buildKanjiMeta(definitions, dictionaries) {
const kanjiList = []; const kanjiList = [];
for (const definition of definitions) { for (const {character} of definitions) {
kanjiList.push(definition.character); kanjiList.push(character);
definition.frequencies = [];
} }
const metas = await this._database.findKanjiMetaBulk(kanjiList, dictionaries); const metas = await this._database.findKanjiMetaBulk(kanjiList, dictionaries);
@ -503,11 +512,16 @@ class Translator {
async _expandTags(names, title) { async _expandTags(names, title) {
const tagMetaList = await this._getTagMetaList(names, title); const tagMetaList = await this._getTagMetaList(names, title);
return tagMetaList.map((meta, index) => { const results = [];
const name = names[index]; for (let i = 0, ii = tagMetaList.length; i < ii; ++i) {
const tag = this._sanitizeTag(Object.assign({}, meta !== null ? meta : {}, {name})); const meta = tagMetaList[i];
return this._sanitizeTag(tag); if (meta === null) { continue; }
}); const name = names[i];
const {category, notes, order, score, dictionary} = meta;
const tag = this._createTag(name, category, notes, order, score, dictionary);
results.push(tag);
}
return results;
} }
async _expandStats(items, title) { async _expandStats(items, title) {
@ -520,21 +534,21 @@ class Translator {
const meta = tagMetaList[i]; const meta = tagMetaList[i];
if (meta === null) { continue; } if (meta === null) { continue; }
const category = meta.category; const {category, notes, order, score, dictionary} = meta;
let group = statsGroups.get(category); let group = statsGroups.get(category);
if (typeof group === 'undefined') { if (typeof group === 'undefined') {
group = []; group = [];
statsGroups.set(category, group); statsGroups.set(category, group);
} }
const stat = Object.assign({}, meta, {name, value: items[name]}); const value = items[name];
group.push(this._sanitizeTag(stat)); const stat = this._createKanjiStat(name, category, notes, order, score, dictionary, value);
group.push(stat);
} }
const stats = {}; const stats = {};
const sortCompare = (a, b) => a.notes - b.notes;
for (const [category, group] of statsGroups.entries()) { for (const [category, group] of statsGroups.entries()) {
group.sort(sortCompare); this._sortKanjiStats(group);
stats[category] = group; stats[category] = group;
} }
return stats; return stats;
@ -589,17 +603,6 @@ class Translator {
return {reading, pitches, dictionary}; return {reading, pitches, dictionary};
} }
_createExpression(expression, reading, termTags=null, termFrequency=null) {
const furiganaSegments = jp.distributeFurigana(expression, reading);
return {
expression,
reading,
furiganaSegments,
termTags,
termFrequency
};
}
_scoreToTermFrequency(score) { _scoreToTermFrequency(score) {
if (score > 0) { if (score > 0) {
return 'popular'; return 'popular';
@ -674,42 +677,27 @@ class Translator {
return enabledDictionaryMap; return enabledDictionaryMap;
} }
_sortDefinitions(definitions, dictionaries=null) {
return definitions.sort((v1, v2) => {
let i;
if (dictionaries !== null) {
const dictionaryInfo1 = dictionaries.get(v1.dictionary);
const dictionaryInfo2 = dictionaries.get(v2.dictionary);
const priority1 = typeof dictionaryInfo1 !== 'undefined' ? dictionaryInfo1.priority : 0;
const priority2 = typeof dictionaryInfo2 !== 'undefined' ? dictionaryInfo2.priority : 0;
i = priority2 - priority1;
if (i !== 0) { return i; }
}
i = v2.source.length - v1.source.length;
if (i !== 0) { return i; }
i = v1.reasons.length - v2.reasons.length;
if (i !== 0) { return i; }
i = v2.score - v1.score;
if (i !== 0) { return i; }
return v2.expression.toString().localeCompare(v1.expression.toString());
});
}
_removeDuplicateDefinitions(definitions) { _removeDuplicateDefinitions(definitions) {
const definitionGroups = new Map(); const definitionGroups = new Map();
for (const definition of definitions) { for (let i = 0, ii = definitions.length; i < ii; ++i) {
const id = definition.id; const definition = definitions[i];
const definitionExisting = definitionGroups.get(id); const {id} = definition;
if (typeof definitionExisting === 'undefined' || definition.expression.length > definitionExisting.expression.length) { const existing = definitionGroups.get(id);
definitionGroups.set(id, definition); if (typeof existing === 'undefined') {
} definitionGroups.set(id, [i, definition]);
continue;
} }
return [...definitionGroups.values()]; let removeIndex = i;
if (definition.expression.length > existing[1].expression.length) {
definitionGroups.set(id, [i, definition]);
removeIndex = existing[0];
}
definitions.splice(removeIndex, 1);
--i;
--ii;
}
} }
_compressDefinitionTags(definitions) { _compressDefinitionTags(definitions) {
@ -773,37 +761,7 @@ class Translator {
}); });
} }
return this._sortDefinitions(results); return results;
}
_mergeBySequence(definitions, mainDictionary) {
const sequencedDefinitions = new Map();
const nonSequencedDefinitions = [];
for (const definition of definitions) {
const sequence = definition.sequence;
if (mainDictionary === definition.dictionary && sequence >= 0) {
let sequencedDefinition = sequencedDefinitions.get(sequence);
if (typeof sequencedDefinition === 'undefined') {
sequencedDefinition = {
reasons: definition.reasons,
score: definition.score,
expression: new Set(),
reading: new Set(),
expressions: new Map(),
source: definition.source,
dictionary: definition.dictionary,
definitions: []
};
sequencedDefinitions.set(sequence, sequencedDefinition);
} else {
sequencedDefinition.score = Math.max(sequencedDefinition.score, definition.score);
}
} else {
nonSequencedDefinitions.push(definition);
}
}
return [sequencedDefinitions, nonSequencedDefinitions];
} }
_mergeByGlossary(result, definitions, appendTo=null, mergedIndices=null) { _mergeByGlossary(result, definitions, appendTo=null, mergedIndices=null) {
@ -814,7 +772,8 @@ class Translator {
const resultReadingSet = result.reading; const resultReadingSet = result.reading;
const resultSource = result.source; const resultSource = result.source;
for (const [index, definition] of definitions.entries()) { for (let i = 0, ii = definitions.length; i < ii; ++i) {
const definition = definitions[i];
const {expression, reading} = definition; const {expression, reading} = definition;
if (mergedIndices !== null) { if (mergedIndices !== null) {
@ -823,7 +782,7 @@ class Translator {
typeof expressionMap !== 'undefined' && typeof expressionMap !== 'undefined' &&
typeof expressionMap.get(reading) !== 'undefined' typeof expressionMap.get(reading) !== 'undefined'
) { ) {
mergedIndices.add(index); mergedIndices.add(i);
} else { } else {
continue; continue;
} }
@ -909,37 +868,127 @@ class Translator {
} }
_createDictionaryTag(name) { _createDictionaryTag(name) {
return this._sanitizeTag({name, category: 'dictionary', order: 100}); return this._createTag(name, 'dictionary', '', 100, 0, name);
} }
_sanitizeTag(tag) { _createTag(name, category, notes, order, score, dictionary) {
tag.name = tag.name || 'untitled'; return {
tag.category = tag.category || 'default'; name,
tag.notes = tag.notes || ''; category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
tag.order = tag.order || 0; notes: (typeof notes === 'string' ? notes : ''),
tag.score = tag.score || 0; order: (typeof order === 'number' ? order : 0),
return tag; score: (typeof score === 'number' ? score : 0),
dictionary: (typeof dictionary === 'string' ? dictionary : null)
};
}
_createKanjiStat(name, category, notes, order, score, dictionary, value) {
return {
name,
category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
notes: (typeof notes === 'string' ? notes : ''),
order: (typeof order === 'number' ? order : 0),
score: (typeof score === 'number' ? score : 0),
dictionary: (typeof dictionary === 'string' ? dictionary : null),
value
};
}
async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, reasons) {
const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
const termTagsExpanded = await this._expandTags(termTags, dictionary);
const definitionTagsExpanded = await this._expandTags(definitionTags, dictionary);
definitionTagsExpanded.push(this._createDictionaryTag(dictionary));
this._sortTags(definitionTagsExpanded);
this._sortTags(termTagsExpanded);
const furiganaSegments = jp.distributeFurigana(expression, reading);
return {
source,
rawSource,
reasons,
score,
id,
dictionary,
expression,
reading,
furiganaSegments,
glossary,
definitionTags: definitionTagsExpanded,
termTags: termTagsExpanded,
sequence
};
}
_createExpression(expression, reading, termTags=null, termFrequency=null) {
const furiganaSegments = jp.distributeFurigana(expression, reading);
return {
expression,
reading,
furiganaSegments,
termTags,
termFrequency
};
} }
_sortTags(tags) { _sortTags(tags) {
return tags.sort((v1, v2) => { if (tags.length <= 1) { return; }
const order1 = v1.order; const stringComparer = this._stringComparer;
const order2 = v2.order; tags.sort((v1, v2) => {
if (order1 < order2) { const i = v1.order - v2.order;
return -1; if (i !== 0) { return i; }
} else if (order1 > order2) {
return 1; return stringComparer.compare(v1.name, v2.name);
});
} }
const name1 = v1.name; _sortDefinitions(definitions, dictionaries) {
const name2 = v2.name; if (definitions.length <= 1) { return; }
if (name1 < name2) { const stringComparer = this._stringComparer;
return -1; definitions.sort((v1, v2) => {
} else if (name1 > name2) { let i;
return 1; if (dictionaries !== null) {
const dictionaryInfo1 = dictionaries.get(v1.dictionary);
const dictionaryInfo2 = dictionaries.get(v2.dictionary);
const priority1 = typeof dictionaryInfo1 !== 'undefined' ? dictionaryInfo1.priority : 0;
const priority2 = typeof dictionaryInfo2 !== 'undefined' ? dictionaryInfo2.priority : 0;
i = priority2 - priority1;
if (i !== 0) { return i; }
} }
return 0; i = v2.source.length - v1.source.length;
if (i !== 0) { return i; }
i = v1.reasons.length - v2.reasons.length;
if (i !== 0) { return i; }
i = v2.score - v1.score;
if (i !== 0) { return i; }
const expression1 = v1.expression;
const expression2 = v2.expression;
i = expression2.length - expression1.length;
if (i !== 0) { return i; }
return stringComparer.compare(expression1, expression2);
});
}
_sortDatabaseDefinitionsByIndex(definitions) {
if (definitions.length <= 1) { return; }
definitions.sort((a, b) => a.index - b.index);
}
_sortKanjiStats(stats) {
if (stats.length <= 1) { return; }
const stringComparer = this._stringComparer;
stats.sort((v1, v2) => {
const i = v1.order - v2.order;
if (i !== 0) { return i; }
return stringComparer.compare(v1.notes, v2.notes);
}); });
} }
} }