Translator options refactor (#879)

* Refactor internal options for findTerms to not use the settings object

* Move findTerms/findKanji options creation

* Deconstruct used options values to variables before any await calls

* Rename findTermsOptions to just options

* Add documentation comments

* Add type information about definitions
This commit is contained in:
toasted-nutbread 2020-10-04 12:54:55 -04:00 committed by GitHub
parent 86c64ac4c2
commit 2bd82353e4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 149 additions and 54 deletions

View File

@ -376,16 +376,19 @@ class Backend {
async _onApiKanjiFind({text, optionsContext}) { async _onApiKanjiFind({text, optionsContext}) {
const options = this.getOptions(optionsContext); const options = this.getOptions(optionsContext);
const definitions = await this._translator.findKanji(text, options); const {general: {maxResults}} = options;
definitions.splice(options.general.maxResults); const findKanjiOptions = this._getTranslatorFindKanjiOptions(options);
const definitions = await this._translator.findKanji(text, findKanjiOptions);
definitions.splice(maxResults);
return definitions; return definitions;
} }
async _onApiTermsFind({text, details, optionsContext}) { async _onApiTermsFind({text, details, optionsContext}) {
const options = this.getOptions(optionsContext); const options = this.getOptions(optionsContext);
const mode = options.general.resultOutputMode; const {general: {resultOutputMode: mode, maxResults}} = options;
const [definitions, length] = await this._translator.findTerms(mode, text, details, options); const findTermsOptions = this._getTranslatorFindTermsOptions(details, options);
definitions.splice(options.general.maxResults); const [definitions, length] = await this._translator.findTerms(mode, text, findTermsOptions);
definitions.splice(maxResults);
return {length, definitions}; return {length, definitions};
} }
@ -948,25 +951,26 @@ class Backend {
} }
async _textParseScanning(text, options) { async _textParseScanning(text, options) {
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);
const results = []; const results = [];
while (text.length > 0) { while (text.length > 0) {
const term = []; const term = [];
const [definitions, sourceLength] = await this._translator.findTerms( const [definitions, sourceLength] = await this._translator.findTerms(
'simple', 'simple',
text.substring(0, options.scanning.length), text.substring(0, scanningLength),
{}, findTermsOptions
options
); );
if (definitions.length > 0 && sourceLength > 0) { if (definitions.length > 0 && sourceLength > 0) {
const {expression, reading} = definitions[0]; const {expression, reading} = definitions[0];
const source = text.substring(0, sourceLength); const source = text.substring(0, sourceLength);
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) { for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); const reading2 = jp.convertReading(text2, furigana, readingMode);
term.push({text: text2, reading: reading2}); term.push({text: text2, reading: reading2});
} }
text = text.substring(source.length); text = text.substring(source.length);
} else { } else {
const reading = jp.convertReading(text[0], '', options.parsing.readingMode); const reading = jp.convertReading(text[0], '', readingMode);
term.push({text: text[0], reading}); term.push({text: text[0], reading});
text = text.substring(1); text = text.substring(1);
} }
@ -976,6 +980,7 @@ class Backend {
} }
async _textParseMecab(text, options) { async _textParseMecab(text, options) {
const {parsing: {readingMode}} = options;
const results = []; const results = [];
const rawResults = await this._mecab.parseText(text); const rawResults = await this._mecab.parseText(text);
for (const [mecabName, parsedLines] of Object.entries(rawResults)) { for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
@ -988,7 +993,7 @@ class Backend {
jp.convertKatakanaToHiragana(reading), jp.convertKatakanaToHiragana(reading),
source source
)) { )) {
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); const reading2 = jp.convertReading(text2, furigana, readingMode);
term.push({text: text2, reading: reading2}); term.push({text: text2, reading: reading2});
} }
result.push(term); result.push(term);
@ -1660,4 +1665,48 @@ class Backend {
await this._optionsUtil.save(options); await this._optionsUtil.save(options);
this._applyOptions(source); this._applyOptions(source);
} }
_getTranslatorFindTermsOptions(details, options) {
const {wildcard} = details;
const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
const {
general: {compactTags, mainDictionary},
scanning: {alphanumeric},
translation: {
convertHalfWidthCharacters,
convertNumericCharacters,
convertAlphabeticCharacters,
convertHiraganaToKatakana,
convertKatakanaToHiragana,
collapseEmphaticSequences
}
} = options;
return {
wildcard,
compactTags,
mainDictionary,
alphanumeric,
convertHalfWidthCharacters,
convertNumericCharacters,
convertAlphabeticCharacters,
convertHiraganaToKatakana,
convertKatakanaToHiragana,
collapseEmphaticSequences,
enabledDictionaryMap
};
}
_getTranslatorFindKanjiOptions(options) {
const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
return {enabledDictionaryMap};
}
_getTranslatorEnabledDictionaryMap(options) {
const enabledDictionaryMap = new Map();
for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) {
if (!enabled) { continue; }
enabledDictionaryMap.set(title, {priority, allowSecondarySearches});
}
return enabledDictionaryMap;
}
} }

View File

@ -21,7 +21,14 @@
* jp * jp
*/ */
/**
* Class which finds term and kanji definitions for text.
*/
class Translator { class Translator {
/**
* Creates a new Translator instance.
* @param database An instance of DictionaryDatabase.
*/
constructor(database) { constructor(database) {
this._database = database; this._database = database;
this._deinflector = null; this._deinflector = null;
@ -29,32 +36,82 @@ class Translator {
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
} }
/**
* Initializes the instance for use. The public API should not be used until
* this function has been called and await'd.
*/
async prepare() { async prepare() {
const reasons = await this._fetchJsonAsset('/bg/lang/deinflect.json'); const reasons = await this._fetchJsonAsset('/bg/lang/deinflect.json');
this._deinflector = new Deinflector(reasons); this._deinflector = new Deinflector(reasons);
} }
/**
* Clears the database tag cache. This should be executed if the database is changed.
*/
clearDatabaseCaches() { clearDatabaseCaches() {
this._tagCache.clear(); this._tagCache.clear();
} }
async findTerms(mode, text, details, options) { /**
* Finds term definitions for the given text.
* @param mode The mode to use for finding terms, which determines the format of the resulting array.
* @param text The text to find terms for.
* @param options An object using the following structure:
* {
* wildcard: (null or string),
* compactTags: (boolean),
* mainDictionary: (string),
* alphanumeric: (boolean),
* convertHalfWidthCharacters: (boolean),
* convertNumericCharacters: (boolean),
* convertAlphabeticCharacters: (boolean),
* convertHiraganaToKatakana: (boolean),
* convertKatakanaToHiragana: (boolean),
* collapseEmphaticSequences: (boolean),
* enabledDictionaryMap: (Map of [
* (string),
* {
* priority: (number),
* allowSecondarySearches: (boolean)
* }
* ])
* }
* @returns An array of [definitions, textLength]. The structure of each definition depends on the
* mode parameter, see the _create?TermDefinition?() functions for structure details.
*/
async findTerms(mode, text, options) {
switch (mode) { switch (mode) {
case 'group': case 'group':
return await this._findTermsGrouped(text, details, options); return await this._findTermsGrouped(text, options);
case 'merge': case 'merge':
return await this._findTermsMerged(text, details, options); return await this._findTermsMerged(text, options);
case 'split': case 'split':
return await this._findTermsSplit(text, details, options); return await this._findTermsSplit(text, options);
case 'simple': case 'simple':
return await this._findTermsSimple(text, details, options); return await this._findTermsSimple(text, options);
default: default:
return [[], 0]; return [[], 0];
} }
} }
/**
* Finds kanji definitions for the given text.
* @param text The text to find kanji definitions for. This string can be of any length,
* but is typically just one character, which is a single kanji. If the string is multiple
* characters long, each character will be searched in the database.
* @param options An object using the following structure:
* {
* enabledDictionaryMap: (Map of [
* (string),
* {
* priority: (number)
* }
* ])
* }
* @returns An array of definitions. See the _createKanjiDefinition() function for structure details.
*/
async findKanji(text, options) { async findKanji(text, options) {
const enabledDictionaryMap = this._getEnabledDictionaryMap(options); const {enabledDictionaryMap} = options;
const kanjiUnique = new Set(); const kanjiUnique = new Set();
for (const c of text) { for (const c of text) {
kanjiUnique.add(c); kanjiUnique.add(c);
@ -250,10 +307,9 @@ class Translator {
return result; return result;
} }
async _findTermsGrouped(text, details, options) { async _findTermsGrouped(text, options) {
const {general: {compactTags}} = options; const {compactTags, enabledDictionaryMap} = options;
const enabledDictionaryMap = this._getEnabledDictionaryMap(options); const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap); const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap);
await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap); await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap);
@ -268,12 +324,11 @@ class Translator {
return [groupedDefinitions, length]; return [groupedDefinitions, length];
} }
async _findTermsMerged(text, details, options) { async _findTermsMerged(text, options) {
const {general: {compactTags, mainDictionary}} = options; const {compactTags, mainDictionary, enabledDictionaryMap} = options;
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap); const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options); const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap); const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
const definitionsMerged = []; const definitionsMerged = [];
const usedDefinitions = new Set(); const usedDefinitions = new Set();
@ -318,30 +373,31 @@ class Translator {
return [definitionsMerged, length]; return [definitionsMerged, length];
} }
async _findTermsSplit(text, details, options) { async _findTermsSplit(text, options) {
const enabledDictionaryMap = this._getEnabledDictionaryMap(options); const {enabledDictionaryMap} = options;
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options); const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
await this._buildTermMeta(definitions, enabledDictionaryMap); await this._buildTermMeta(definitions, enabledDictionaryMap);
this._sortDefinitions(definitions, true); this._sortDefinitions(definitions, true);
return [definitions, length]; return [definitions, length];
} }
async _findTermsSimple(text, details, options) { async _findTermsSimple(text, options) {
const enabledDictionaryMap = this._getEnabledDictionaryMap(options); const {enabledDictionaryMap} = options;
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options); const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
this._sortDefinitions(definitions, false); this._sortDefinitions(definitions, false);
return [definitions, length]; return [definitions, length];
} }
async _findTermsInternal(text, enabledDictionaryMap, details, options) { async _findTermsInternal(text, enabledDictionaryMap, options) {
text = this._getSearchableText(text, options.scanning.alphanumeric); const {alphanumeric, wildcard} = options;
text = this._getSearchableText(text, alphanumeric);
if (text.length === 0) { if (text.length === 0) {
return [[], 0]; return [[], 0];
} }
const deinflections = ( const deinflections = (
details.wildcard ? wildcard ?
await this._findTermWildcard(text, enabledDictionaryMap, details.wildcard) : await this._findTermWildcard(text, enabledDictionaryMap, wildcard) :
await this._findTermDeinflections(text, enabledDictionaryMap, options) await this._findTermDeinflections(text, enabledDictionaryMap, options)
); );
@ -414,9 +470,8 @@ class Translator {
} }
_getAllDeinflections(text, options) { _getAllDeinflections(text, options) {
const translationOptions = options.translation;
const collapseEmphaticOptions = [[false, false]]; const collapseEmphaticOptions = [[false, false]];
switch (translationOptions.collapseEmphaticSequences) { switch (options.collapseEmphaticSequences) {
case 'true': case 'true':
collapseEmphaticOptions.push([true, false]); collapseEmphaticOptions.push([true, false]);
break; break;
@ -425,11 +480,11 @@ class Translator {
break; break;
} }
const textOptionVariantArray = [ const textOptionVariantArray = [
this._getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters), this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),
this._getTextOptionEntryVariants(translationOptions.convertNumericCharacters), this._getTextOptionEntryVariants(options.convertNumericCharacters),
this._getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters), this._getTextOptionEntryVariants(options.convertAlphabeticCharacters),
this._getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana), this._getTextOptionEntryVariants(options.convertHiraganaToKatakana),
this._getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana), this._getTextOptionEntryVariants(options.convertKatakanaToHiragana),
collapseEmphaticOptions collapseEmphaticOptions
]; ];
@ -707,15 +762,6 @@ class Translator {
return await response.json(); return await response.json();
} }
_getEnabledDictionaryMap(options) {
const enabledDictionaryMap = new Map();
for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) {
if (!enabled) { continue; }
enabledDictionaryMap.set(title, {priority, allowSecondarySearches});
}
return enabledDictionaryMap;
}
_getSecondarySearchDictionaryMap(enabledDictionaryMap) { _getSecondarySearchDictionaryMap(enabledDictionaryMap) {
const secondarySearchDictionaryMap = new Map(); const secondarySearchDictionaryMap = new Map();
for (const [title, dictionary] of enabledDictionaryMap.entries()) { for (const [title, dictionary] of enabledDictionaryMap.entries()) {
@ -999,7 +1045,7 @@ class Translator {
// glossary // glossary
// definitionTags // definitionTags
termTags: this._cloneTags(termTags), termTags: this._cloneTags(termTags),
definitions, definitions, // type: 'term'
frequencies: [], frequencies: [],
pitches: [] pitches: []
// only // only
@ -1025,7 +1071,7 @@ class Translator {
// glossary // glossary
// definitionTags // definitionTags
// termTags // termTags
definitions, definitions, // type: 'termMergedByGlossary'
frequencies: [], frequencies: [],
pitches: [] pitches: []
// only // only
@ -1064,7 +1110,7 @@ class Translator {
glossary: [...glossary], glossary: [...glossary],
definitionTags, definitionTags,
// termTags // termTags
definitions, // Contains duplicate data definitions, // type: 'term'; contains duplicate data
frequencies: [], frequencies: [],
pitches: [], pitches: [],
only only