From 2bd82353e46ecc8c16f2b55f81c8daae6f73e12e Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sun, 4 Oct 2020 12:54:55 -0400 Subject: [PATCH] Translator options refactor (#879) * Refactor internal options for findTerms to not use the settings object * Move findTerms/findKanji options creation * Deconstruct used options values to variables before any await calls * Rename findTermsOptions to just options * Add documentation comments * Add type information about definitions --- ext/bg/js/backend.js | 71 +++++++++++++++++---- ext/bg/js/translator.js | 132 +++++++++++++++++++++++++++------------- 2 files changed, 149 insertions(+), 54 deletions(-) diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index a268396d..ebdd2cda 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -376,16 +376,19 @@ class Backend { async _onApiKanjiFind({text, optionsContext}) { const options = this.getOptions(optionsContext); - const definitions = await this._translator.findKanji(text, options); - definitions.splice(options.general.maxResults); + const {general: {maxResults}} = options; + const findKanjiOptions = this._getTranslatorFindKanjiOptions(options); + const definitions = await this._translator.findKanji(text, findKanjiOptions); + definitions.splice(maxResults); return definitions; } async _onApiTermsFind({text, details, optionsContext}) { const options = this.getOptions(optionsContext); - const mode = options.general.resultOutputMode; - const [definitions, length] = await this._translator.findTerms(mode, text, details, options); - definitions.splice(options.general.maxResults); + const {general: {resultOutputMode: mode, maxResults}} = options; + const findTermsOptions = this._getTranslatorFindTermsOptions(details, options); + const [definitions, length] = await this._translator.findTerms(mode, text, findTermsOptions); + definitions.splice(maxResults); return {length, definitions}; } @@ -948,25 +951,26 @@ class Backend { } async _textParseScanning(text, options) { + const {scanning: {length: scanningLength}, parsing: {readingMode}} = options; + const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options); const results = []; while (text.length > 0) { const term = []; const [definitions, sourceLength] = await this._translator.findTerms( 'simple', - text.substring(0, options.scanning.length), - {}, - options + text.substring(0, scanningLength), + findTermsOptions ); if (definitions.length > 0 && sourceLength > 0) { const {expression, reading} = definitions[0]; const source = text.substring(0, sourceLength); for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) { - const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); + const reading2 = jp.convertReading(text2, furigana, readingMode); term.push({text: text2, reading: reading2}); } text = text.substring(source.length); } else { - const reading = jp.convertReading(text[0], '', options.parsing.readingMode); + const reading = jp.convertReading(text[0], '', readingMode); term.push({text: text[0], reading}); text = text.substring(1); } @@ -976,6 +980,7 @@ class Backend { } async _textParseMecab(text, options) { + const {parsing: {readingMode}} = options; const results = []; const rawResults = await this._mecab.parseText(text); for (const [mecabName, parsedLines] of Object.entries(rawResults)) { @@ -988,7 +993,7 @@ class Backend { jp.convertKatakanaToHiragana(reading), source )) { - const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode); + const reading2 = jp.convertReading(text2, furigana, readingMode); term.push({text: text2, reading: reading2}); } result.push(term); @@ -1660,4 +1665,48 @@ class Backend { await this._optionsUtil.save(options); this._applyOptions(source); } + + _getTranslatorFindTermsOptions(details, options) { + const {wildcard} = details; + const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options); + const { + general: {compactTags, mainDictionary}, + scanning: {alphanumeric}, + translation: { + convertHalfWidthCharacters, + convertNumericCharacters, + convertAlphabeticCharacters, + convertHiraganaToKatakana, + convertKatakanaToHiragana, + collapseEmphaticSequences + } + } = options; + return { + wildcard, + compactTags, + mainDictionary, + alphanumeric, + convertHalfWidthCharacters, + convertNumericCharacters, + convertAlphabeticCharacters, + convertHiraganaToKatakana, + convertKatakanaToHiragana, + collapseEmphaticSequences, + enabledDictionaryMap + }; + } + + _getTranslatorFindKanjiOptions(options) { + const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options); + return {enabledDictionaryMap}; + } + + _getTranslatorEnabledDictionaryMap(options) { + const enabledDictionaryMap = new Map(); + for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) { + if (!enabled) { continue; } + enabledDictionaryMap.set(title, {priority, allowSecondarySearches}); + } + return enabledDictionaryMap; + } } diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 34b6ca34..7af3f61f 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -21,7 +21,14 @@ * jp */ +/** + * Class which finds term and kanji definitions for text. + */ class Translator { + /** + * Creates a new Translator instance. + * @param database An instance of DictionaryDatabase. + */ constructor(database) { this._database = database; this._deinflector = null; @@ -29,32 +36,82 @@ class Translator { this._stringComparer = new Intl.Collator('en-US'); // Invariant locale } + /** + * Initializes the instance for use. The public API should not be used until + * this function has been called and await'd. + */ async prepare() { const reasons = await this._fetchJsonAsset('/bg/lang/deinflect.json'); this._deinflector = new Deinflector(reasons); } + /** + * Clears the database tag cache. This should be executed if the database is changed. + */ clearDatabaseCaches() { this._tagCache.clear(); } - async findTerms(mode, text, details, options) { + /** + * Finds term definitions for the given text. + * @param mode The mode to use for finding terms, which determines the format of the resulting array. + * @param text The text to find terms for. + * @param options An object using the following structure: + * { + * wildcard: (null or string), + * compactTags: (boolean), + * mainDictionary: (string), + * alphanumeric: (boolean), + * convertHalfWidthCharacters: (boolean), + * convertNumericCharacters: (boolean), + * convertAlphabeticCharacters: (boolean), + * convertHiraganaToKatakana: (boolean), + * convertKatakanaToHiragana: (boolean), + * collapseEmphaticSequences: (boolean), + * enabledDictionaryMap: (Map of [ + * (string), + * { + * priority: (number), + * allowSecondarySearches: (boolean) + * } + * ]) + * } + * @returns An array of [definitions, textLength]. The structure of each definition depends on the + * mode parameter, see the _create?TermDefinition?() functions for structure details. + */ + async findTerms(mode, text, options) { switch (mode) { case 'group': - return await this._findTermsGrouped(text, details, options); + return await this._findTermsGrouped(text, options); case 'merge': - return await this._findTermsMerged(text, details, options); + return await this._findTermsMerged(text, options); case 'split': - return await this._findTermsSplit(text, details, options); + return await this._findTermsSplit(text, options); case 'simple': - return await this._findTermsSimple(text, details, options); + return await this._findTermsSimple(text, options); default: return [[], 0]; } } + /** + * Finds kanji definitions for the given text. + * @param text The text to find kanji definitions for. This string can be of any length, + * but is typically just one character, which is a single kanji. If the string is multiple + * characters long, each character will be searched in the database. + * @param options An object using the following structure: + * { + * enabledDictionaryMap: (Map of [ + * (string), + * { + * priority: (number) + * } + * ]) + * } + * @returns An array of definitions. See the _createKanjiDefinition() function for structure details. + */ async findKanji(text, options) { - const enabledDictionaryMap = this._getEnabledDictionaryMap(options); + const {enabledDictionaryMap} = options; const kanjiUnique = new Set(); for (const c of text) { kanjiUnique.add(c); @@ -250,10 +307,9 @@ class Translator { return result; } - async _findTermsGrouped(text, details, options) { - const {general: {compactTags}} = options; - const enabledDictionaryMap = this._getEnabledDictionaryMap(options); - const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options); + async _findTermsGrouped(text, options) { + const {compactTags, enabledDictionaryMap} = options; + const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap); await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap); @@ -268,12 +324,11 @@ class Translator { return [groupedDefinitions, length]; } - async _findTermsMerged(text, details, options) { - const {general: {compactTags, mainDictionary}} = options; - const enabledDictionaryMap = this._getEnabledDictionaryMap(options); + async _findTermsMerged(text, options) { + const {compactTags, mainDictionary, enabledDictionaryMap} = options; const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap); - const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options); + const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap); const definitionsMerged = []; const usedDefinitions = new Set(); @@ -318,30 +373,31 @@ class Translator { return [definitionsMerged, length]; } - async _findTermsSplit(text, details, options) { - const enabledDictionaryMap = this._getEnabledDictionaryMap(options); - const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options); + async _findTermsSplit(text, options) { + const {enabledDictionaryMap} = options; + const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); await this._buildTermMeta(definitions, enabledDictionaryMap); this._sortDefinitions(definitions, true); return [definitions, length]; } - async _findTermsSimple(text, details, options) { - const enabledDictionaryMap = this._getEnabledDictionaryMap(options); - const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options); + async _findTermsSimple(text, options) { + const {enabledDictionaryMap} = options; + const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options); this._sortDefinitions(definitions, false); return [definitions, length]; } - async _findTermsInternal(text, enabledDictionaryMap, details, options) { - text = this._getSearchableText(text, options.scanning.alphanumeric); + async _findTermsInternal(text, enabledDictionaryMap, options) { + const {alphanumeric, wildcard} = options; + text = this._getSearchableText(text, alphanumeric); if (text.length === 0) { return [[], 0]; } const deinflections = ( - details.wildcard ? - await this._findTermWildcard(text, enabledDictionaryMap, details.wildcard) : + wildcard ? + await this._findTermWildcard(text, enabledDictionaryMap, wildcard) : await this._findTermDeinflections(text, enabledDictionaryMap, options) ); @@ -414,9 +470,8 @@ class Translator { } _getAllDeinflections(text, options) { - const translationOptions = options.translation; const collapseEmphaticOptions = [[false, false]]; - switch (translationOptions.collapseEmphaticSequences) { + switch (options.collapseEmphaticSequences) { case 'true': collapseEmphaticOptions.push([true, false]); break; @@ -425,11 +480,11 @@ class Translator { break; } const textOptionVariantArray = [ - this._getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters), - this._getTextOptionEntryVariants(translationOptions.convertNumericCharacters), - this._getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters), - this._getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana), - this._getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana), + this._getTextOptionEntryVariants(options.convertHalfWidthCharacters), + this._getTextOptionEntryVariants(options.convertNumericCharacters), + this._getTextOptionEntryVariants(options.convertAlphabeticCharacters), + this._getTextOptionEntryVariants(options.convertHiraganaToKatakana), + this._getTextOptionEntryVariants(options.convertKatakanaToHiragana), collapseEmphaticOptions ]; @@ -707,15 +762,6 @@ class Translator { return await response.json(); } - _getEnabledDictionaryMap(options) { - const enabledDictionaryMap = new Map(); - for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) { - if (!enabled) { continue; } - enabledDictionaryMap.set(title, {priority, allowSecondarySearches}); - } - return enabledDictionaryMap; - } - _getSecondarySearchDictionaryMap(enabledDictionaryMap) { const secondarySearchDictionaryMap = new Map(); for (const [title, dictionary] of enabledDictionaryMap.entries()) { @@ -999,7 +1045,7 @@ class Translator { // glossary // definitionTags termTags: this._cloneTags(termTags), - definitions, + definitions, // type: 'term' frequencies: [], pitches: [] // only @@ -1025,7 +1071,7 @@ class Translator { // glossary // definitionTags // termTags - definitions, + definitions, // type: 'termMergedByGlossary' frequencies: [], pitches: [] // only @@ -1064,7 +1110,7 @@ class Translator { glossary: [...glossary], definitionTags, // termTags - definitions, // Contains duplicate data + definitions, // type: 'term'; contains duplicate data frequencies: [], pitches: [], only