From 057283245e2a2ce55f89cacb42067b8c93dd28cd Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 5 Jun 2021 13:35:23 -0400 Subject: [PATCH] Add support for definitionless main dictionary (#1729) --- dev/translator-vm.js | 6 ++ ext/js/background/backend.js | 22 +++++-- ext/js/language/translator.js | 104 +++++++++++++++++++++++++++++++++- 3 files changed, 125 insertions(+), 7 deletions(-) diff --git a/dev/translator-vm.js b/dev/translator-vm.js index 8397b65e..f68b25db 100644 --- a/dev/translator-vm.js +++ b/dev/translator-vm.js @@ -170,6 +170,12 @@ class TranslatorVM extends DatabaseVM { enabledDictionaryMap = new Map(enabledDictionaryMap); options.enabledDictionaryMap = enabledDictionaryMap; } + const {excludeDictionaryDefinitions} = options; + options.excludeDictionaryDefinitions = ( + Array.isArray(excludeDictionaryDefinitions) ? + new Set(excludeDictionaryDefinitions) : + null + ); return options; } diff --git a/ext/js/background/backend.js b/ext/js/background/backend.js index 42b03c59..33650319 100644 --- a/ext/js/background/backend.js +++ b/ext/js/background/backend.js @@ -411,7 +411,7 @@ class Backend { async _onApiTermsFind({text, details, optionsContext}) { const options = this._getProfileOptions(optionsContext); const {general: {resultOutputMode: mode, maxResults}} = options; - const findTermsOptions = this._getTranslatorFindTermsOptions(details, options); + const findTermsOptions = this._getTranslatorFindTermsOptions(mode, details, options); const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(mode, text, findTermsOptions); dictionaryEntries.splice(maxResults); return {dictionaryEntries, originalTextLength}; @@ -1044,14 +1044,15 @@ class Backend { async _textParseScanning(text, options) { const jp = this._japaneseUtil; const {scanning: {length: scanningLength}, parsing: {readingMode}} = options; - const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options); + const mode = 'simple'; + const findTermsOptions = this._getTranslatorFindTermsOptions(mode, {wildcard: null}, options); const results = []; let previousUngroupedSegment = null; let i = 0; const ii = text.length; while (i < ii) { const {dictionaryEntries, originalTextLength} = await this._translator.findTerms( - 'simple', + mode, text.substring(i, i + scanningLength), findTermsOptions ); @@ -1869,7 +1870,7 @@ class Backend { this._applyOptions(source); } - _getTranslatorFindTermsOptions(details, options) { + _getTranslatorFindTermsOptions(mode, details, options) { const {wildcard} = details; const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options); const { @@ -1886,6 +1887,16 @@ class Backend { } } = options; const textReplacements = this._getTranslatorTextReplacements(textReplacementsOptions); + let excludeDictionaryDefinitions = null; + if (mode === 'merge' && !enabledDictionaryMap.has(mainDictionary)) { + enabledDictionaryMap.set(mainDictionary, { + index: enabledDictionaryMap.size, + priority: 0, + allowSecondarySearches: false + }); + excludeDictionaryDefinitions = new Set(); + excludeDictionaryDefinitions.add(mainDictionary); + } return { wildcard, mainDictionary, @@ -1897,7 +1908,8 @@ class Backend { convertKatakanaToHiragana, collapseEmphaticSequences, textReplacements, - enabledDictionaryMap + enabledDictionaryMap, + excludeDictionaryDefinitions }; } diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index d416d405..bf2c7322 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -85,13 +85,14 @@ class Translator { * priority: (number), * allowSecondarySearches: (boolean) * } - * ]) + * ]), + * excludeDictionaryDefinitions: (Set of (string) or null) * } * ``` * @returns An object of the structure `{dictionaryEntries, originalTextLength}`. */ async findTerms(mode, text, options) { - const {enabledDictionaryMap} = options; + const {enabledDictionaryMap, excludeDictionaryDefinitions} = options; let {dictionaryEntries, originalTextLength} = await this._findTermsInternal(text, enabledDictionaryMap, options); switch (mode) { @@ -103,6 +104,10 @@ class Translator { break; } + if (excludeDictionaryDefinitions !== null) { + this._removeExcludedDefinitions(dictionaryEntries, excludeDictionaryDefinitions); + } + if (dictionaryEntries.length > 1) { this._sortTermDictionaryEntries(dictionaryEntries); } @@ -492,6 +497,101 @@ class Translator { return newDictionaryEntries; } + _removeExcludedDefinitions(dictionaryEntries, excludeDictionaryDefinitions) { + for (let i = dictionaryEntries.length - 1; i >= 0; --i) { + const dictionaryEntry = dictionaryEntries[i]; + const {definitions, pronunciations, frequencies, headwords} = dictionaryEntry; + const definitionsChanged = this._removeArrayItemsWithDictionary(definitions, excludeDictionaryDefinitions); + this._removeArrayItemsWithDictionary(pronunciations, excludeDictionaryDefinitions); + this._removeArrayItemsWithDictionary(frequencies, excludeDictionaryDefinitions); + this._removeTagGroupsWithDictionary(definitions, excludeDictionaryDefinitions); + this._removeTagGroupsWithDictionary(headwords, excludeDictionaryDefinitions); + + if (!definitionsChanged) { continue; } + + if (definitions.length === 0) { + dictionaryEntries.splice(i, 1); + } else { + this._removeUnusedHeadwords(dictionaryEntry); + } + } + } + + _removeUnusedHeadwords(dictionaryEntry) { + const {definitions, pronunciations, frequencies, headwords} = dictionaryEntry; + const removeHeadwordIndices = new Set(); + for (let i = 0, ii = headwords.length; i < ii; ++i) { + removeHeadwordIndices.add(i); + } + for (const {headwordIndices} of definitions) { + for (const headwordIndex of headwordIndices) { + removeHeadwordIndices.delete(headwordIndex); + } + } + + if (removeHeadwordIndices.size === 0) { return; } + + const indexRemap = new Map(); + let oldIndex = 0; + for (let i = 0, ii = headwords.length; i < ii; ++i) { + if (removeHeadwordIndices.has(i)) { + headwords.splice(i, 1); + --i; + --ii; + } else { + indexRemap.set(oldIndex, indexRemap.size); + } + ++oldIndex; + } + + this._updateDefinitionHeadwordIndices(definitions, indexRemap); + this._updateArrayItemsHeadwordIndex(pronunciations, indexRemap); + this._updateArrayItemsHeadwordIndex(frequencies, indexRemap); + } + + _updateDefinitionHeadwordIndices(definitions, indexRemap) { + for (const {headwordIndices} of definitions) { + for (let i = headwordIndices.length - 1; i >= 0; --i) { + const newHeadwordIndex = indexRemap.get(headwordIndices[i]); + if (typeof newHeadwordIndex === 'undefined') { + headwordIndices.splice(i, 1); + } else { + headwordIndices[i] = newHeadwordIndex; + } + } + } + } + + _updateArrayItemsHeadwordIndex(array, indexRemap) { + for (let i = array.length - 1; i >= 0; --i) { + const item = array[i]; + const {headwordIndex} = item; + const newHeadwordIndex = indexRemap.get(headwordIndex); + if (typeof newHeadwordIndex === 'undefined') { + array.splice(i, 1); + } else { + item.headwordIndex = newHeadwordIndex; + } + } + } + + _removeArrayItemsWithDictionary(array, excludeDictionaryDefinitions) { + let changed = false; + for (let j = array.length - 1; j >= 0; --j) { + const {dictionary} = array[j]; + if (!excludeDictionaryDefinitions.has(dictionary)) { continue; } + array.splice(j, 1); + changed = true; + } + return changed; + } + + _removeTagGroupsWithDictionary(array, excludeDictionaryDefinitions) { + for (const {tags} of array) { + this._removeArrayItemsWithDictionary(tags, excludeDictionaryDefinitions); + } + } + // Tags _getTermTagTargets(dictionaryEntries) {