Translator options refactor (#879)
* Refactor internal options for findTerms to not use the settings object * Move findTerms/findKanji options creation * Deconstruct used options values to variables before any await calls * Rename findTermsOptions to just options * Add documentation comments * Add type information about definitions
This commit is contained in:
parent
86c64ac4c2
commit
2bd82353e4
@ -376,16 +376,19 @@ class Backend {
|
|||||||
|
|
||||||
async _onApiKanjiFind({text, optionsContext}) {
|
async _onApiKanjiFind({text, optionsContext}) {
|
||||||
const options = this.getOptions(optionsContext);
|
const options = this.getOptions(optionsContext);
|
||||||
const definitions = await this._translator.findKanji(text, options);
|
const {general: {maxResults}} = options;
|
||||||
definitions.splice(options.general.maxResults);
|
const findKanjiOptions = this._getTranslatorFindKanjiOptions(options);
|
||||||
|
const definitions = await this._translator.findKanji(text, findKanjiOptions);
|
||||||
|
definitions.splice(maxResults);
|
||||||
return definitions;
|
return definitions;
|
||||||
}
|
}
|
||||||
|
|
||||||
async _onApiTermsFind({text, details, optionsContext}) {
|
async _onApiTermsFind({text, details, optionsContext}) {
|
||||||
const options = this.getOptions(optionsContext);
|
const options = this.getOptions(optionsContext);
|
||||||
const mode = options.general.resultOutputMode;
|
const {general: {resultOutputMode: mode, maxResults}} = options;
|
||||||
const [definitions, length] = await this._translator.findTerms(mode, text, details, options);
|
const findTermsOptions = this._getTranslatorFindTermsOptions(details, options);
|
||||||
definitions.splice(options.general.maxResults);
|
const [definitions, length] = await this._translator.findTerms(mode, text, findTermsOptions);
|
||||||
|
definitions.splice(maxResults);
|
||||||
return {length, definitions};
|
return {length, definitions};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -948,25 +951,26 @@ class Backend {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async _textParseScanning(text, options) {
|
async _textParseScanning(text, options) {
|
||||||
|
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
|
||||||
|
const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);
|
||||||
const results = [];
|
const results = [];
|
||||||
while (text.length > 0) {
|
while (text.length > 0) {
|
||||||
const term = [];
|
const term = [];
|
||||||
const [definitions, sourceLength] = await this._translator.findTerms(
|
const [definitions, sourceLength] = await this._translator.findTerms(
|
||||||
'simple',
|
'simple',
|
||||||
text.substring(0, options.scanning.length),
|
text.substring(0, scanningLength),
|
||||||
{},
|
findTermsOptions
|
||||||
options
|
|
||||||
);
|
);
|
||||||
if (definitions.length > 0 && sourceLength > 0) {
|
if (definitions.length > 0 && sourceLength > 0) {
|
||||||
const {expression, reading} = definitions[0];
|
const {expression, reading} = definitions[0];
|
||||||
const source = text.substring(0, sourceLength);
|
const source = text.substring(0, sourceLength);
|
||||||
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
|
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
|
||||||
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
|
const reading2 = jp.convertReading(text2, furigana, readingMode);
|
||||||
term.push({text: text2, reading: reading2});
|
term.push({text: text2, reading: reading2});
|
||||||
}
|
}
|
||||||
text = text.substring(source.length);
|
text = text.substring(source.length);
|
||||||
} else {
|
} else {
|
||||||
const reading = jp.convertReading(text[0], '', options.parsing.readingMode);
|
const reading = jp.convertReading(text[0], '', readingMode);
|
||||||
term.push({text: text[0], reading});
|
term.push({text: text[0], reading});
|
||||||
text = text.substring(1);
|
text = text.substring(1);
|
||||||
}
|
}
|
||||||
@ -976,6 +980,7 @@ class Backend {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async _textParseMecab(text, options) {
|
async _textParseMecab(text, options) {
|
||||||
|
const {parsing: {readingMode}} = options;
|
||||||
const results = [];
|
const results = [];
|
||||||
const rawResults = await this._mecab.parseText(text);
|
const rawResults = await this._mecab.parseText(text);
|
||||||
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
|
for (const [mecabName, parsedLines] of Object.entries(rawResults)) {
|
||||||
@ -988,7 +993,7 @@ class Backend {
|
|||||||
jp.convertKatakanaToHiragana(reading),
|
jp.convertKatakanaToHiragana(reading),
|
||||||
source
|
source
|
||||||
)) {
|
)) {
|
||||||
const reading2 = jp.convertReading(text2, furigana, options.parsing.readingMode);
|
const reading2 = jp.convertReading(text2, furigana, readingMode);
|
||||||
term.push({text: text2, reading: reading2});
|
term.push({text: text2, reading: reading2});
|
||||||
}
|
}
|
||||||
result.push(term);
|
result.push(term);
|
||||||
@ -1660,4 +1665,48 @@ class Backend {
|
|||||||
await this._optionsUtil.save(options);
|
await this._optionsUtil.save(options);
|
||||||
this._applyOptions(source);
|
this._applyOptions(source);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_getTranslatorFindTermsOptions(details, options) {
|
||||||
|
const {wildcard} = details;
|
||||||
|
const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
|
||||||
|
const {
|
||||||
|
general: {compactTags, mainDictionary},
|
||||||
|
scanning: {alphanumeric},
|
||||||
|
translation: {
|
||||||
|
convertHalfWidthCharacters,
|
||||||
|
convertNumericCharacters,
|
||||||
|
convertAlphabeticCharacters,
|
||||||
|
convertHiraganaToKatakana,
|
||||||
|
convertKatakanaToHiragana,
|
||||||
|
collapseEmphaticSequences
|
||||||
|
}
|
||||||
|
} = options;
|
||||||
|
return {
|
||||||
|
wildcard,
|
||||||
|
compactTags,
|
||||||
|
mainDictionary,
|
||||||
|
alphanumeric,
|
||||||
|
convertHalfWidthCharacters,
|
||||||
|
convertNumericCharacters,
|
||||||
|
convertAlphabeticCharacters,
|
||||||
|
convertHiraganaToKatakana,
|
||||||
|
convertKatakanaToHiragana,
|
||||||
|
collapseEmphaticSequences,
|
||||||
|
enabledDictionaryMap
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
_getTranslatorFindKanjiOptions(options) {
|
||||||
|
const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
|
||||||
|
return {enabledDictionaryMap};
|
||||||
|
}
|
||||||
|
|
||||||
|
_getTranslatorEnabledDictionaryMap(options) {
|
||||||
|
const enabledDictionaryMap = new Map();
|
||||||
|
for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) {
|
||||||
|
if (!enabled) { continue; }
|
||||||
|
enabledDictionaryMap.set(title, {priority, allowSecondarySearches});
|
||||||
|
}
|
||||||
|
return enabledDictionaryMap;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,7 +21,14 @@
|
|||||||
* jp
|
* jp
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class which finds term and kanji definitions for text.
|
||||||
|
*/
|
||||||
class Translator {
|
class Translator {
|
||||||
|
/**
|
||||||
|
* Creates a new Translator instance.
|
||||||
|
* @param database An instance of DictionaryDatabase.
|
||||||
|
*/
|
||||||
constructor(database) {
|
constructor(database) {
|
||||||
this._database = database;
|
this._database = database;
|
||||||
this._deinflector = null;
|
this._deinflector = null;
|
||||||
@ -29,32 +36,82 @@ class Translator {
|
|||||||
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
|
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the instance for use. The public API should not be used until
|
||||||
|
* this function has been called and await'd.
|
||||||
|
*/
|
||||||
async prepare() {
|
async prepare() {
|
||||||
const reasons = await this._fetchJsonAsset('/bg/lang/deinflect.json');
|
const reasons = await this._fetchJsonAsset('/bg/lang/deinflect.json');
|
||||||
this._deinflector = new Deinflector(reasons);
|
this._deinflector = new Deinflector(reasons);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clears the database tag cache. This should be executed if the database is changed.
|
||||||
|
*/
|
||||||
clearDatabaseCaches() {
|
clearDatabaseCaches() {
|
||||||
this._tagCache.clear();
|
this._tagCache.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
async findTerms(mode, text, details, options) {
|
/**
|
||||||
|
* Finds term definitions for the given text.
|
||||||
|
* @param mode The mode to use for finding terms, which determines the format of the resulting array.
|
||||||
|
* @param text The text to find terms for.
|
||||||
|
* @param options An object using the following structure:
|
||||||
|
* {
|
||||||
|
* wildcard: (null or string),
|
||||||
|
* compactTags: (boolean),
|
||||||
|
* mainDictionary: (string),
|
||||||
|
* alphanumeric: (boolean),
|
||||||
|
* convertHalfWidthCharacters: (boolean),
|
||||||
|
* convertNumericCharacters: (boolean),
|
||||||
|
* convertAlphabeticCharacters: (boolean),
|
||||||
|
* convertHiraganaToKatakana: (boolean),
|
||||||
|
* convertKatakanaToHiragana: (boolean),
|
||||||
|
* collapseEmphaticSequences: (boolean),
|
||||||
|
* enabledDictionaryMap: (Map of [
|
||||||
|
* (string),
|
||||||
|
* {
|
||||||
|
* priority: (number),
|
||||||
|
* allowSecondarySearches: (boolean)
|
||||||
|
* }
|
||||||
|
* ])
|
||||||
|
* }
|
||||||
|
* @returns An array of [definitions, textLength]. The structure of each definition depends on the
|
||||||
|
* mode parameter, see the _create?TermDefinition?() functions for structure details.
|
||||||
|
*/
|
||||||
|
async findTerms(mode, text, options) {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case 'group':
|
case 'group':
|
||||||
return await this._findTermsGrouped(text, details, options);
|
return await this._findTermsGrouped(text, options);
|
||||||
case 'merge':
|
case 'merge':
|
||||||
return await this._findTermsMerged(text, details, options);
|
return await this._findTermsMerged(text, options);
|
||||||
case 'split':
|
case 'split':
|
||||||
return await this._findTermsSplit(text, details, options);
|
return await this._findTermsSplit(text, options);
|
||||||
case 'simple':
|
case 'simple':
|
||||||
return await this._findTermsSimple(text, details, options);
|
return await this._findTermsSimple(text, options);
|
||||||
default:
|
default:
|
||||||
return [[], 0];
|
return [[], 0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds kanji definitions for the given text.
|
||||||
|
* @param text The text to find kanji definitions for. This string can be of any length,
|
||||||
|
* but is typically just one character, which is a single kanji. If the string is multiple
|
||||||
|
* characters long, each character will be searched in the database.
|
||||||
|
* @param options An object using the following structure:
|
||||||
|
* {
|
||||||
|
* enabledDictionaryMap: (Map of [
|
||||||
|
* (string),
|
||||||
|
* {
|
||||||
|
* priority: (number)
|
||||||
|
* }
|
||||||
|
* ])
|
||||||
|
* }
|
||||||
|
* @returns An array of definitions. See the _createKanjiDefinition() function for structure details.
|
||||||
|
*/
|
||||||
async findKanji(text, options) {
|
async findKanji(text, options) {
|
||||||
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
|
const {enabledDictionaryMap} = options;
|
||||||
const kanjiUnique = new Set();
|
const kanjiUnique = new Set();
|
||||||
for (const c of text) {
|
for (const c of text) {
|
||||||
kanjiUnique.add(c);
|
kanjiUnique.add(c);
|
||||||
@ -250,10 +307,9 @@ class Translator {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsGrouped(text, details, options) {
|
async _findTermsGrouped(text, options) {
|
||||||
const {general: {compactTags}} = options;
|
const {compactTags, enabledDictionaryMap} = options;
|
||||||
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
|
||||||
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
|
|
||||||
|
|
||||||
const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap);
|
const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap);
|
||||||
await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap);
|
await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap);
|
||||||
@ -268,12 +324,11 @@ class Translator {
|
|||||||
return [groupedDefinitions, length];
|
return [groupedDefinitions, length];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsMerged(text, details, options) {
|
async _findTermsMerged(text, options) {
|
||||||
const {general: {compactTags, mainDictionary}} = options;
|
const {compactTags, mainDictionary, enabledDictionaryMap} = options;
|
||||||
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
|
|
||||||
const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
|
const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
|
||||||
|
|
||||||
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
|
||||||
const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
|
const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
|
||||||
const definitionsMerged = [];
|
const definitionsMerged = [];
|
||||||
const usedDefinitions = new Set();
|
const usedDefinitions = new Set();
|
||||||
@ -318,30 +373,31 @@ class Translator {
|
|||||||
return [definitionsMerged, length];
|
return [definitionsMerged, length];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsSplit(text, details, options) {
|
async _findTermsSplit(text, options) {
|
||||||
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
|
const {enabledDictionaryMap} = options;
|
||||||
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
|
||||||
await this._buildTermMeta(definitions, enabledDictionaryMap);
|
await this._buildTermMeta(definitions, enabledDictionaryMap);
|
||||||
this._sortDefinitions(definitions, true);
|
this._sortDefinitions(definitions, true);
|
||||||
return [definitions, length];
|
return [definitions, length];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsSimple(text, details, options) {
|
async _findTermsSimple(text, options) {
|
||||||
const enabledDictionaryMap = this._getEnabledDictionaryMap(options);
|
const {enabledDictionaryMap} = options;
|
||||||
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
|
||||||
this._sortDefinitions(definitions, false);
|
this._sortDefinitions(definitions, false);
|
||||||
return [definitions, length];
|
return [definitions, length];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsInternal(text, enabledDictionaryMap, details, options) {
|
async _findTermsInternal(text, enabledDictionaryMap, options) {
|
||||||
text = this._getSearchableText(text, options.scanning.alphanumeric);
|
const {alphanumeric, wildcard} = options;
|
||||||
|
text = this._getSearchableText(text, alphanumeric);
|
||||||
if (text.length === 0) {
|
if (text.length === 0) {
|
||||||
return [[], 0];
|
return [[], 0];
|
||||||
}
|
}
|
||||||
|
|
||||||
const deinflections = (
|
const deinflections = (
|
||||||
details.wildcard ?
|
wildcard ?
|
||||||
await this._findTermWildcard(text, enabledDictionaryMap, details.wildcard) :
|
await this._findTermWildcard(text, enabledDictionaryMap, wildcard) :
|
||||||
await this._findTermDeinflections(text, enabledDictionaryMap, options)
|
await this._findTermDeinflections(text, enabledDictionaryMap, options)
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -414,9 +470,8 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
_getAllDeinflections(text, options) {
|
_getAllDeinflections(text, options) {
|
||||||
const translationOptions = options.translation;
|
|
||||||
const collapseEmphaticOptions = [[false, false]];
|
const collapseEmphaticOptions = [[false, false]];
|
||||||
switch (translationOptions.collapseEmphaticSequences) {
|
switch (options.collapseEmphaticSequences) {
|
||||||
case 'true':
|
case 'true':
|
||||||
collapseEmphaticOptions.push([true, false]);
|
collapseEmphaticOptions.push([true, false]);
|
||||||
break;
|
break;
|
||||||
@ -425,11 +480,11 @@ class Translator {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const textOptionVariantArray = [
|
const textOptionVariantArray = [
|
||||||
this._getTextOptionEntryVariants(translationOptions.convertHalfWidthCharacters),
|
this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),
|
||||||
this._getTextOptionEntryVariants(translationOptions.convertNumericCharacters),
|
this._getTextOptionEntryVariants(options.convertNumericCharacters),
|
||||||
this._getTextOptionEntryVariants(translationOptions.convertAlphabeticCharacters),
|
this._getTextOptionEntryVariants(options.convertAlphabeticCharacters),
|
||||||
this._getTextOptionEntryVariants(translationOptions.convertHiraganaToKatakana),
|
this._getTextOptionEntryVariants(options.convertHiraganaToKatakana),
|
||||||
this._getTextOptionEntryVariants(translationOptions.convertKatakanaToHiragana),
|
this._getTextOptionEntryVariants(options.convertKatakanaToHiragana),
|
||||||
collapseEmphaticOptions
|
collapseEmphaticOptions
|
||||||
];
|
];
|
||||||
|
|
||||||
@ -707,15 +762,6 @@ class Translator {
|
|||||||
return await response.json();
|
return await response.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
_getEnabledDictionaryMap(options) {
|
|
||||||
const enabledDictionaryMap = new Map();
|
|
||||||
for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) {
|
|
||||||
if (!enabled) { continue; }
|
|
||||||
enabledDictionaryMap.set(title, {priority, allowSecondarySearches});
|
|
||||||
}
|
|
||||||
return enabledDictionaryMap;
|
|
||||||
}
|
|
||||||
|
|
||||||
_getSecondarySearchDictionaryMap(enabledDictionaryMap) {
|
_getSecondarySearchDictionaryMap(enabledDictionaryMap) {
|
||||||
const secondarySearchDictionaryMap = new Map();
|
const secondarySearchDictionaryMap = new Map();
|
||||||
for (const [title, dictionary] of enabledDictionaryMap.entries()) {
|
for (const [title, dictionary] of enabledDictionaryMap.entries()) {
|
||||||
@ -999,7 +1045,7 @@ class Translator {
|
|||||||
// glossary
|
// glossary
|
||||||
// definitionTags
|
// definitionTags
|
||||||
termTags: this._cloneTags(termTags),
|
termTags: this._cloneTags(termTags),
|
||||||
definitions,
|
definitions, // type: 'term'
|
||||||
frequencies: [],
|
frequencies: [],
|
||||||
pitches: []
|
pitches: []
|
||||||
// only
|
// only
|
||||||
@ -1025,7 +1071,7 @@ class Translator {
|
|||||||
// glossary
|
// glossary
|
||||||
// definitionTags
|
// definitionTags
|
||||||
// termTags
|
// termTags
|
||||||
definitions,
|
definitions, // type: 'termMergedByGlossary'
|
||||||
frequencies: [],
|
frequencies: [],
|
||||||
pitches: []
|
pitches: []
|
||||||
// only
|
// only
|
||||||
@ -1064,7 +1110,7 @@ class Translator {
|
|||||||
glossary: [...glossary],
|
glossary: [...glossary],
|
||||||
definitionTags,
|
definitionTags,
|
||||||
// termTags
|
// termTags
|
||||||
definitions, // Contains duplicate data
|
definitions, // type: 'term'; contains duplicate data
|
||||||
frequencies: [],
|
frequencies: [],
|
||||||
pitches: [],
|
pitches: [],
|
||||||
only
|
only
|
||||||
|
Loading…
Reference in New Issue
Block a user