Translator refactor (#720)

* Move simple find sort into translator.js

* Remove unused

* Use direct reference to database

* Make field private

* Remove statics

* Convert functions to private

* Organize by public/private
This commit is contained in:
toasted-nutbread 2020-08-09 13:21:14 -04:00 committed by GitHub
parent 427cf99b9f
commit 480e0e15e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 126 additions and 134 deletions

View File

@ -32,7 +32,6 @@
* TemplateRenderer
* Translator
* conditionsTestValue
* dictTermsSort
* jp
* profileConditionsDescriptor
* profileConditionsDescriptorPromise
@ -707,11 +706,11 @@ class Backend {
}
async _onApiGetDictionaryInfo() {
return await this._translator.database.getDictionaryInfo();
return await this._dictionaryDatabase.getDictionaryInfo();
}
async _onApiGetDictionaryCounts({dictionaryNames, getTotal}) {
return await this._translator.database.getDictionaryCounts(dictionaryNames, getTotal);
return await this._dictionaryDatabase.getDictionaryCounts(dictionaryNames, getTotal);
}
async _onApiPurgeDatabase() {
@ -1064,7 +1063,6 @@ class Backend {
options
);
if (definitions.length > 0 && sourceLength > 0) {
dictTermsSort(definitions);
const {expression, reading} = definitions[0];
const source = text.substring(0, sourceLength);
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {

View File

@ -24,16 +24,6 @@ function dictEnabledSet(options) {
return enabledDictionaryMap;
}
function dictConfigured(options) {
for (const {enabled} of Object.values(options.dictionaries)) {
if (enabled) {
return true;
}
}
return false;
}
function dictTermsSort(definitions, dictionaries=null) {
return definitions.sort((v1, v2) => {
let i;

View File

@ -33,21 +33,70 @@
class Translator {
constructor(database) {
this.database = database;
this.deinflector = null;
this.tagCache = new Map();
this._database = database;
this._deinflector = null;
this._tagCache = new Map();
}
async prepare() {
const reasons = await this._fetchJsonAsset('/bg/lang/deinflect.json');
this.deinflector = new Deinflector(reasons);
this._deinflector = new Deinflector(reasons);
}
clearDatabaseCaches() {
this.tagCache.clear();
this._tagCache.clear();
}
async getSequencedDefinitions(definitions, mainDictionary) {
async findTerms(mode, text, details, options) {
switch (mode) {
case 'group':
return await this._findTermsGrouped(text, details, options);
case 'merge':
return await this._findTermsMerged(text, details, options);
case 'split':
return await this._findTermsSplit(text, details, options);
case 'simple':
return await this._findTermsSimple(text, details, options);
default:
return [[], 0];
}
}
async findKanji(text, options) {
const dictionaries = dictEnabledSet(options);
const kanjiUnique = new Set();
for (const c of text) {
kanjiUnique.add(c);
}
const definitions = await this._database.findKanjiBulk([...kanjiUnique], dictionaries);
if (definitions.length === 0) {
return definitions;
}
if (definitions.length > 1) {
definitions.sort((a, b) => a.index - b.index);
}
for (const definition of definitions) {
const tags = await this._expandTags(definition.tags, definition.dictionary);
tags.push(dictTagBuildSource(definition.dictionary));
dictTagsSort(tags);
const stats = await this._expandStats(definition.stats, definition.dictionary);
definition.tags = tags;
definition.stats = stats;
}
await this._buildKanjiMeta(definitions, dictionaries);
return definitions;
}
// Private
async _getSequencedDefinitions(definitions, mainDictionary) {
const [definitionsBySequence, defaultDefinitions] = dictTermsMergeBySequence(definitions, mainDictionary);
const sequenceList = [];
@ -57,14 +106,14 @@ class Translator {
sequencedDefinitions.push({definitions: value, rawDefinitions: []});
}
for (const definition of await this.database.findTermsBySequenceBulk(sequenceList, mainDictionary)) {
for (const definition of await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary)) {
sequencedDefinitions[definition.index].rawDefinitions.push(definition);
}
return {sequencedDefinitions, defaultDefinitions};
}
async getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) {
async _getMergedSecondarySearchResults(text, expressionsMap, secondarySearchDictionaries) {
if (secondarySearchDictionaries.size === 0) {
return [];
}
@ -79,12 +128,12 @@ class Translator {
}
}
const definitions = await this.database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries);
const definitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries);
for (const definition of definitions) {
const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(dictTagBuildSource(definition.dictionary));
definition.definitionTags = definitionTags;
const termTags = await this.expandTags(definition.termTags, definition.dictionary);
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
definition.termTags = termTags;
}
@ -95,20 +144,20 @@ class Translator {
return definitions;
}
async getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) {
async _getMergedDefinition(text, dictionaries, sequencedDefinition, defaultDefinitions, secondarySearchDictionaries, mergedByTermIndices) {
const result = sequencedDefinition.definitions;
const rawDefinitionsBySequence = sequencedDefinition.rawDefinitions;
for (const definition of rawDefinitionsBySequence) {
const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(dictTagBuildSource(definition.dictionary));
definition.definitionTags = definitionTags;
const termTags = await this.expandTags(definition.termTags, definition.dictionary);
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
definition.termTags = termTags;
}
const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence);
const secondarySearchResults = await this.getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
dictTermsMergeByGloss(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
@ -124,7 +173,7 @@ class Translator {
for (const [reading, termTagsMap] of readingMap.entries()) {
const termTags = [...termTagsMap.values()];
const score = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0);
expressions.push(Translator.createExpression(expression, reading, dictTagsSort(termTags), Translator.scoreToTermFrequency(score)));
expressions.push(this._createExpression(expression, reading, dictTagsSort(termTags), this._scoreToTermFrequency(score)));
}
}
@ -135,27 +184,12 @@ class Translator {
return result;
}
async findTerms(mode, text, details, options) {
switch (mode) {
case 'group':
return await this.findTermsGrouped(text, details, options);
case 'merge':
return await this.findTermsMerged(text, details, options);
case 'split':
return await this.findTermsSplit(text, details, options);
case 'simple':
return await this.findTermsSimple(text, details, options);
default:
return [[], 0];
}
}
async findTermsGrouped(text, details, options) {
async _findTermsGrouped(text, details, options) {
const dictionaries = dictEnabledSet(options);
const [definitions, length] = await this.findTermsInternal(text, dictionaries, details, options);
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
const definitionsGrouped = dictTermsGroup(definitions, dictionaries);
await this.buildTermMeta(definitionsGrouped, dictionaries);
await this._buildTermMeta(definitionsGrouped, dictionaries);
if (options.general.compactTags) {
for (const definition of definitionsGrouped) {
@ -166,7 +200,7 @@ class Translator {
return [definitionsGrouped, length];
}
async findTermsMerged(text, details, options) {
async _findTermsMerged(text, details, options) {
const dictionaries = dictEnabledSet(options);
const secondarySearchDictionaries = new Map();
for (const [title, dictionary] of dictionaries.entries()) {
@ -174,13 +208,13 @@ class Translator {
secondarySearchDictionaries.set(title, dictionary);
}
const [definitions, length] = await this.findTermsInternal(text, dictionaries, details, options);
const {sequencedDefinitions, defaultDefinitions} = await this.getSequencedDefinitions(definitions, options.general.mainDictionary);
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
const {sequencedDefinitions, defaultDefinitions} = await this._getSequencedDefinitions(definitions, options.general.mainDictionary);
const definitionsMerged = [];
const mergedByTermIndices = new Set();
for (const sequencedDefinition of sequencedDefinitions) {
const result = await this.getMergedDefinition(
const result = await this._getMergedDefinition(
text,
dictionaries,
sequencedDefinition,
@ -200,7 +234,7 @@ class Translator {
score,
expression: [expression],
reading: [reading],
expressions: [Translator.createExpression(groupedDefinition.expression, groupedDefinition.reading)],
expressions: [this._createExpression(groupedDefinition.expression, groupedDefinition.reading)],
source,
dictionary,
definitions: groupedDefinition.definitions
@ -208,7 +242,7 @@ class Translator {
definitionsMerged.push(compatibilityDefinition);
}
await this.buildTermMeta(definitionsMerged, dictionaries);
await this._buildTermMeta(definitionsMerged, dictionaries);
if (options.general.compactTags) {
for (const definition of definitionsMerged) {
@ -219,38 +253,40 @@ class Translator {
return [dictTermsSort(definitionsMerged), length];
}
async findTermsSplit(text, details, options) {
async _findTermsSplit(text, details, options) {
const dictionaries = dictEnabledSet(options);
const [definitions, length] = await this.findTermsInternal(text, dictionaries, details, options);
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
await this.buildTermMeta(definitions, dictionaries);
await this._buildTermMeta(definitions, dictionaries);
return [definitions, length];
}
async findTermsSimple(text, details, options) {
async _findTermsSimple(text, details, options) {
const dictionaries = dictEnabledSet(options);
return await this.findTermsInternal(text, dictionaries, details, options);
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
dictTermsSort(definitions);
return [definitions, length];
}
async findTermsInternal(text, dictionaries, details, options) {
text = Translator.getSearchableText(text, options);
async _findTermsInternal(text, dictionaries, details, options) {
text = this._getSearchableText(text, options);
if (text.length === 0) {
return [[], 0];
}
const deinflections = (
details.wildcard ?
await this.findTermWildcard(text, dictionaries, details.wildcard) :
await this.findTermDeinflections(text, dictionaries, options)
await this._findTermWildcard(text, dictionaries, details.wildcard) :
await this._findTermDeinflections(text, dictionaries, options)
);
let definitions = [];
for (const deinflection of deinflections) {
for (const definition of deinflection.definitions) {
const definitionTags = await this.expandTags(definition.definitionTags, definition.dictionary);
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
definitionTags.push(dictTagBuildSource(definition.dictionary));
const termTags = await this.expandTags(definition.termTags, definition.dictionary);
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
const {expression, reading} = definition;
const furiganaSegments = jp.distributeFurigana(expression, reading);
@ -284,8 +320,8 @@ class Translator {
return [definitions, length];
}
async findTermWildcard(text, dictionaries, wildcard) {
const definitions = await this.database.findTermsBulk([text], dictionaries, wildcard);
async _findTermWildcard(text, dictionaries, wildcard) {
const definitions = await this._database.findTermsBulk([text], dictionaries, wildcard);
if (definitions.length === 0) {
return [];
}
@ -300,8 +336,8 @@ class Translator {
}];
}
async findTermDeinflections(text, dictionaries, options) {
const deinflections = this.getAllDeinflections(text, options);
async _findTermDeinflections(text, dictionaries, options) {
const deinflections = this._getAllDeinflections(text, options);
if (deinflections.length === 0) {
return [];
@ -322,7 +358,7 @@ class Translator {
deinflectionArray.push(deinflection);
}
const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null);
const definitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, dictionaries, null);
for (const definition of definitions) {
const definitionRules = Deinflector.rulesToRuleFlags(definition.rules);
@ -337,7 +373,7 @@ class Translator {
return deinflections.filter((e) => e.definitions.length > 0);
}
getAllDeinflections(text, options) {
_getAllDeinflections(text, options) {
const translationOptions = options.translation;
const collapseEmphaticOptions = [[false, false]];
switch (translationOptions.collapseEmphaticSequences) {
@ -349,17 +385,17 @@ class Translator {
break;
}
const textOptionVariantArray = [
Translator.getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),
Translator.getTextOptionEntryVariants(translationOptions.convertNumericCharacters),
Translator.getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),
Translator.getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana),
Translator.getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana),
this._getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),
this._getTextOptionEntryVariants(translationOptions.convertNumericCharacters),
this._getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),
this._getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana),
this._getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana),
collapseEmphaticOptions
];
const deinflections = [];
const used = new Set();
for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of Translator.getArrayVariants(textOptionVariantArray)) {
for (const [halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) {
let text2 = text;
const sourceMap = new TextSourceMap(text2);
if (halfWidth) {
@ -385,7 +421,7 @@ class Translator {
const text2Substring = text2.substring(0, i);
if (used.has(text2Substring)) { break; }
used.add(text2Substring);
for (const deinflection of this.deinflector.deinflect(text2Substring)) {
for (const deinflection of this._deinflector.deinflect(text2Substring)) {
deinflection.rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
deinflections.push(deinflection);
}
@ -394,7 +430,7 @@ class Translator {
return deinflections;
}
static getTextOptionEntryVariants(value) {
_getTextOptionEntryVariants(value) {
switch (value) {
case 'true': return [true];
case 'variant': return [false, true];
@ -402,39 +438,7 @@ class Translator {
}
}
async findKanji(text, options) {
const dictionaries = dictEnabledSet(options);
const kanjiUnique = new Set();
for (const c of text) {
kanjiUnique.add(c);
}
const definitions = await this.database.findKanjiBulk([...kanjiUnique], dictionaries);
if (definitions.length === 0) {
return definitions;
}
if (definitions.length > 1) {
definitions.sort((a, b) => a.index - b.index);
}
for (const definition of definitions) {
const tags = await this.expandTags(definition.tags, definition.dictionary);
tags.push(dictTagBuildSource(definition.dictionary));
dictTagsSort(tags);
const stats = await this.expandStats(definition.stats, definition.dictionary);
definition.tags = tags;
definition.stats = stats;
}
await this.buildKanjiMeta(definitions, dictionaries);
return definitions;
}
async buildTermMeta(definitions, dictionaries) {
async _buildTermMeta(definitions, dictionaries) {
const terms = [];
for (const definition of definitions) {
if (definition.expressions) {
@ -469,19 +473,19 @@ class Translator {
term.pitches = [];
}
const metas = await this.database.findTermMetaBulk(expressionsUnique, dictionaries);
const metas = await this._database.findTermMetaBulk(expressionsUnique, dictionaries);
for (const {expression, mode, data, dictionary, index} of metas) {
switch (mode) {
case 'freq':
for (const term of termsUnique[index]) {
const frequencyData = this.getFrequencyData(expression, data, dictionary, term);
const frequencyData = this._getFrequencyData(expression, data, dictionary, term);
if (frequencyData === null) { continue; }
term.frequencies.push(frequencyData);
}
break;
case 'pitch':
for (const term of termsUnique[index]) {
const pitchData = await this.getPitchData(expression, data, dictionary, term);
const pitchData = await this._getPitchData(expression, data, dictionary, term);
if (pitchData === null) { continue; }
term.pitches.push(pitchData);
}
@ -490,14 +494,14 @@ class Translator {
}
}
async buildKanjiMeta(definitions, dictionaries) {
async _buildKanjiMeta(definitions, dictionaries) {
const kanjiList = [];
for (const definition of definitions) {
kanjiList.push(definition.character);
definition.frequencies = [];
}
const metas = await this.database.findKanjiMetaBulk(kanjiList, dictionaries);
const metas = await this._database.findKanjiMetaBulk(kanjiList, dictionaries);
for (const {character, mode, data, dictionary, index} of metas) {
switch (mode) {
case 'freq':
@ -507,8 +511,8 @@ class Translator {
}
}
async expandTags(names, title) {
const tagMetaList = await this.getTagMetaList(names, title);
async _expandTags(names, title) {
const tagMetaList = await this._getTagMetaList(names, title);
return tagMetaList.map((meta, index) => {
const name = names[index];
const tag = dictTagSanitize(Object.assign({}, meta !== null ? meta : {}, {name}));
@ -516,9 +520,9 @@ class Translator {
});
}
async expandStats(items, title) {
async _expandStats(items, title) {
const names = Object.keys(items);
const tagMetaList = await this.getTagMetaList(names, title);
const tagMetaList = await this._getTagMetaList(names, title);
const statsGroups = new Map();
for (let i = 0; i < names.length; ++i) {
@ -546,20 +550,20 @@ class Translator {
return stats;
}
async getTagMetaList(names, title) {
async _getTagMetaList(names, title) {
const tagMetaList = [];
let cache = this.tagCache.get(title);
let cache = this._tagCache.get(title);
if (typeof cache === 'undefined') {
cache = new Map();
this.tagCache.set(title, cache);
this._tagCache.set(title, cache);
}
for (const name of names) {
const base = Translator.getNameBase(name);
const base = this._getNameBase(name);
let tagMeta = cache.get(base);
if (typeof tagMeta === 'undefined') {
tagMeta = await this.database.findTagForTitle(base, title);
tagMeta = await this._database.findTagForTitle(base, title);
cache.set(base, tagMeta);
}
@ -569,7 +573,7 @@ class Translator {
return tagMetaList;
}
getFrequencyData(expression, data, dictionary, term) {
_getFrequencyData(expression, data, dictionary, term) {
if (data !== null && typeof data === 'object') {
const {frequency, reading} = data;
@ -581,21 +585,21 @@ class Translator {
return {expression, frequency: data, dictionary};
}
async getPitchData(expression, data, dictionary, term) {
async _getPitchData(expression, data, dictionary, term) {
const reading = data.reading;
const termReading = term.reading || expression;
if (reading !== termReading) { return null; }
const pitches = [];
for (let {position, tags} of data.pitches) {
tags = Array.isArray(tags) ? await this.getTagMetaList(tags, dictionary) : [];
tags = Array.isArray(tags) ? await this._getTagMetaList(tags, dictionary) : [];
pitches.push({position, tags});
}
return {reading, pitches, dictionary};
}
static createExpression(expression, reading, termTags=null, termFrequency=null) {
_createExpression(expression, reading, termTags=null, termFrequency=null) {
const furiganaSegments = jp.distributeFurigana(expression, reading);
return {
expression,
@ -606,7 +610,7 @@ class Translator {
};
}
static scoreToTermFrequency(score) {
_scoreToTermFrequency(score) {
if (score > 0) {
return 'popular';
} else if (score < 0) {
@ -616,12 +620,12 @@ class Translator {
}
}
static getNameBase(name) {
_getNameBase(name) {
const pos = name.indexOf(':');
return (pos >= 0 ? name.substring(0, pos) : name);
}
static *getArrayVariants(arrayVariants) {
*_getArrayVariants(arrayVariants) {
const ii = arrayVariants.length;
let total = 1;
@ -641,7 +645,7 @@ class Translator {
}
}
static getSearchableText(text, options) {
_getSearchableText(text, options) {
if (!options.scanning.alphanumeric) {
let newText = '';
for (const c of text) {