Translator options refactor (#879)

* Refactor internal options for findTerms to not use the settings object

* Move findTerms/findKanji options creation

* Deconstruct used options values to variables before any await calls

* Rename findTermsOptions to just options

* Add documentation comments

* Add type information about definitions
This commit is contained in:
toasted-nutbread 2020-10-04 12:54:55 -04:00 committed by GitHub
parent 86c64ac4c2
commit 2bd82353e4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 149 additions and 54 deletions

View File

@ -376,16 +376,19 @@ class Backend {
async _onApiKanjiFind({text, optionsContext}) {
const options = this.getOptions(optionsContext);
const definitions = await this._translator.findKanji(text, options);
definitions.splice(options.general.maxResults);
const {general: {maxResults}} = options;
const findKanjiOptions = this._getTranslatorFindKanjiOptions(options);
const definitions = await this._translator.findKanji(text, findKanjiOptions);
definitions.splice(maxResults);
return definitions;
}
async _onApiTermsFind({text, details, optionsContext}) {
const options = this.getOptions(optionsContext);
const mode = options.general.resultOutputMode;
const [definitions, length] = await this._translator.findTerms(mode, text, details, options);
definitions.splice(options.general.maxResults);
const {general: {resultOutputMode: mode, maxResults}} = options;
const findTermsOptions = this._getTranslatorFindTermsOptions(details, options);
const [definitions, length] = await this._translator.findTerms(mode, text, findTermsOptions);
definitions.splice(maxResults);
return {length, definitions};
}
@ -948,25 +951,26 @@ class Backend {
}
async _textParseScanning(text, options) {
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);
const results = [];
while (text.length > 0) {
const term = [];
const [definitions, sourceLength] = await this._translator.findTerms(
'simple',
text.substring(0, options.scanning.length),
{},
options
text.substring(0, scanningLength),
findTermsOptions
);
if (definitions.length > 0 && sourceLength > 0) {
const {expression, reading} = definitions[0];
const source = text.substring(0, sourceLength);
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
const reading2 = jp.convertReading(text2, furigana, readingMode);
term.push({text: text2, reading: reading2});
}
text = text.substring(source.length);
} else {
const reading = jp.convertReading(text[0], '', options.parsing.readingMode);
const reading = jp.convertReading(text[0], '', readingMode);
term.push({text: text[0], reading});
text = text.substring(1);
}
@ -976,6 +980,7 @@ class Backend {
}
async _textParseMecab(text, options) {
const {parsing: {readingMode}} = options;
const results = [];
const rawResults = await this._mecab.parseText(text);
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
@ -988,7 +993,7 @@ class Backend {
jp.convertKatakanaToHiragana(reading),
source
)) {
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
const reading2 = jp.convertReading(text2, furigana, readingMode);
term.push({text: text2, reading: reading2});
}
result.push(term);
@ -1660,4 +1665,48 @@ class Backend {
await this._optionsUtil.save(options);
this._applyOptions(source);
}
_getTranslatorFindTermsOptions(details, options) {
const {wildcard} = details;
const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
const {
general: {compactTags, mainDictionary},
scanning: {alphanumeric},
translation: {
convertHalfWidthCharacters,
convertNumericCharacters,
convertAlphabeticCharacters,
convertHiraganaToKatakana,
convertKatakanaToHiragana,
collapseEmphaticSequences
}
} = options;
return {
wildcard,
compactTags,
mainDictionary,
alphanumeric,
convertHalfWidthCharacters,
convertNumericCharacters,
convertAlphabeticCharacters,
convertHiraganaToKatakana,
convertKatakanaToHiragana,
collapseEmphaticSequences,
enabledDictionaryMap
};
}
_getTranslatorFindKanjiOptions(options) {
const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
return {enabledDictionaryMap};
}
_getTranslatorEnabledDictionaryMap(options) {
const enabledDictionaryMap = new Map();
for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) {
if (!enabled) { continue; }
enabledDictionaryMap.set(title, {priority, allowSecondarySearches});
}
return enabledDictionaryMap;
}
}

View File

@ -21,7 +21,14 @@
* jp
*/
/**
* Class which finds term and kanji definitions for text.
*/
class Translator {
/**
* Creates a new Translator instance.
* @param database An instance of DictionaryDatabase.
*/
constructor(database) {
this._database = database;
this._deinflector = null;
@ -29,32 +36,82 @@ class Translator {
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
}
/**
* Initializes the instance for use. The public API should not be used until
* this function has been called and await'd.
*/
async prepare() {
const reasons = await this._fetchJsonAsset('/bg/lang/deinflect.json');
this._deinflector = new Deinflector(reasons);
}
/**
* Clears the database tag cache. This should be executed if the database is changed.
*/
clearDatabaseCaches() {
this._tagCache.clear();
}
async findTerms(mode, text, details, options) {
/**
* Finds term definitions for the given text.
* @param mode The mode to use for finding terms, which determines the format of the resulting array.
* @param text The text to find terms for.
* @param options An object using the following structure:
* {
* wildcard: (null or string),
* compactTags: (boolean),
* mainDictionary: (string),
* alphanumeric: (boolean),
* convertHalfWidthCharacters: (boolean),
* convertNumericCharacters: (boolean),
* convertAlphabeticCharacters: (boolean),
* convertHiraganaToKatakana: (boolean),
* convertKatakanaToHiragana: (boolean),
* collapseEmphaticSequences: (boolean),
* enabledDictionaryMap: (Map of [
* (string),
* {
* priority: (number),
* allowSecondarySearches: (boolean)
* }
* ])
* }
* @returns An array of [definitions, textLength]. The structure of each definition depends on the
* mode parameter, see the _create?TermDefinition?() functions for structure details.
*/
async findTerms(mode, text, options) {
switch (mode) {
case 'group':
return await this._findTermsGrouped(text, details, options);
return await this._findTermsGrouped(text, options);
case 'merge':
return await this._findTermsMerged(text, details, options);
return await this._findTermsMerged(text, options);
case 'split':
return await this._findTermsSplit(text, details, options);
return await this._findTermsSplit(text, options);
case 'simple':
return await this._findTermsSimple(text, details, options);
return await this._findTermsSimple(text, options);
default:
return [[], 0];
}
}
/**
* Finds kanji definitions for the given text.
* @param text The text to find kanji definitions for. This string can be of any length,
* but is typically just one character, which is a single kanji. If the string is multiple
* characters long, each character will be searched in the database.
* @param options An object using the following structure:
* {
* enabledDictionaryMap: (Map of [
* (string),
* {
* priority: (number)
* }
* ])
* }
* @returns An array of definitions. See the _createKanjiDefinition() function for structure details.
*/
async findKanji(text, options) {
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
const {enabledDictionaryMap} = options;
const kanjiUnique = new Set();
for (const c of text) {
kanjiUnique.add(c);
@ -250,10 +307,9 @@ class Translator {
return result;
}
async _findTermsGrouped(text, details, options) {
const {general: {compactTags}} = options;
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
async _findTermsGrouped(text, options) {
const {compactTags, enabledDictionaryMap} = options;
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap);
await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap);
@ -268,12 +324,11 @@ class Translator {
return [groupedDefinitions, length];
}
async _findTermsMerged(text, details, options) {
const {general: {compactTags, mainDictionary}} = options;
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
async _findTermsMerged(text, options) {
const {compactTags, mainDictionary, enabledDictionaryMap} = options;
const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
const definitionsMerged = [];
const usedDefinitions = new Set();
@ -318,30 +373,31 @@ class Translator {
return [definitionsMerged, length];
}
async _findTermsSplit(text, details, options) {
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
async _findTermsSplit(text, options) {
const {enabledDictionaryMap} = options;
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
await this._buildTermMeta(definitions, enabledDictionaryMap);
this._sortDefinitions(definitions, true);
return [definitions, length];
}
async _findTermsSimple(text, details, options) {
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
async _findTermsSimple(text, options) {
const {enabledDictionaryMap} = options;
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
this._sortDefinitions(definitions, false);
return [definitions, length];
}
async _findTermsInternal(text, enabledDictionaryMap, details, options) {
text = this._getSearchableText(text, options.scanning.alphanumeric);
async _findTermsInternal(text, enabledDictionaryMap, options) {
const {alphanumeric, wildcard} = options;
text = this._getSearchableText(text, alphanumeric);
if (text.length === 0) {
return [[], 0];
}
const deinflections = (
details.wildcard ?
await this._findTermWildcard(text, enabledDictionaryMap, details.wildcard) :
wildcard ?
await this._findTermWildcard(text, enabledDictionaryMap, wildcard) :
await this._findTermDeinflections(text, enabledDictionaryMap, options)
);
@ -414,9 +470,8 @@ class Translator {
}
_getAllDeinflections(text, options) {
const translationOptions = options.translation;
const collapseEmphaticOptions = [[false, false]];
switch (translationOptions.collapseEmphaticSequences) {
switch (options.collapseEmphaticSequences) {
case 'true':
collapseEmphaticOptions.push([true, false]);
break;
@ -425,11 +480,11 @@ class Translator {
break;
}
const textOptionVariantArray = [
this._getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),
this._getTextOptionEntryVariants(translationOptions.convertNumericCharacters),
this._getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),
this._getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana),
this._getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana),
this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),
this._getTextOptionEntryVariants(options.convertNumericCharacters),
this._getTextOptionEntryVariants(options.convertAlphabeticCharacters),
this._getTextOptionEntryVariants(options.convertHiraganaToKatakana),
this._getTextOptionEntryVariants(options.convertKatakanaToHiragana),
collapseEmphaticOptions
];
@ -707,15 +762,6 @@ class Translator {
return await response.json();
}
_getEnabledDictionaryMap(options) {
const enabledDictionaryMap = new Map();
for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) {
if (!enabled) { continue; }
enabledDictionaryMap.set(title, {priority, allowSecondarySearches});
}
return enabledDictionaryMap;
}
_getSecondarySearchDictionaryMap(enabledDictionaryMap) {
const secondarySearchDictionaryMap = new Map();
for (const [title, dictionary] of enabledDictionaryMap.entries()) {
@ -999,7 +1045,7 @@ class Translator {
// glossary
// definitionTags
termTags: this._cloneTags(termTags),
definitions,
definitions, // type: 'term'
frequencies: [],
pitches: []
// only
@ -1025,7 +1071,7 @@ class Translator {
// glossary
// definitionTags
// termTags
definitions,
definitions, // type: 'termMergedByGlossary'
frequencies: [],
pitches: []
// only
@ -1064,7 +1110,7 @@ class Translator {
glossary: [...glossary],
definitionTags,
// termTags
definitions, // Contains duplicate data
definitions, // type: 'term'; contains duplicate data
frequencies: [],
pitches: [],
only