Add support for definitionless main dictionary (#1729)

This commit is contained in:
toasted-nutbread 2021-06-05 13:35:23 -04:00 committed by GitHub
parent 57fb496fbc
commit 057283245e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 125 additions and 7 deletions

View File

@ -170,6 +170,12 @@ class TranslatorVM extends DatabaseVM {
enabledDictionaryMap = new Map(enabledDictionaryMap); enabledDictionaryMap = new Map(enabledDictionaryMap);
options.enabledDictionaryMap = enabledDictionaryMap; options.enabledDictionaryMap = enabledDictionaryMap;
} }
const {excludeDictionaryDefinitions} = options;
options.excludeDictionaryDefinitions = (
Array.isArray(excludeDictionaryDefinitions) ?
new Set(excludeDictionaryDefinitions) :
null
);
return options; return options;
} }

View File

@ -411,7 +411,7 @@ class Backend {
async _onApiTermsFind({text, details, optionsContext}) { async _onApiTermsFind({text, details, optionsContext}) {
const options = this._getProfileOptions(optionsContext); const options = this._getProfileOptions(optionsContext);
const {general: {resultOutputMode: mode, maxResults}} = options; const {general: {resultOutputMode: mode, maxResults}} = options;
const findTermsOptions = this._getTranslatorFindTermsOptions(details, options); const findTermsOptions = this._getTranslatorFindTermsOptions(mode, details, options);
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(mode, text, findTermsOptions); const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(mode, text, findTermsOptions);
dictionaryEntries.splice(maxResults); dictionaryEntries.splice(maxResults);
return {dictionaryEntries, originalTextLength}; return {dictionaryEntries, originalTextLength};
@ -1044,14 +1044,15 @@ class Backend {
async _textParseScanning(text, options) { async _textParseScanning(text, options) {
const jp = this._japaneseUtil; const jp = this._japaneseUtil;
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options; const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options); const mode = 'simple';
const findTermsOptions = this._getTranslatorFindTermsOptions(mode, {wildcard: null}, options);
const results = []; const results = [];
let previousUngroupedSegment = null; let previousUngroupedSegment = null;
let i = 0; let i = 0;
const ii = text.length; const ii = text.length;
while (i < ii) { while (i < ii) {
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms( const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(
'simple', mode,
text.substring(i, i + scanningLength), text.substring(i, i + scanningLength),
findTermsOptions findTermsOptions
); );
@ -1869,7 +1870,7 @@ class Backend {
this._applyOptions(source); this._applyOptions(source);
} }
_getTranslatorFindTermsOptions(details, options) { _getTranslatorFindTermsOptions(mode, details, options) {
const {wildcard} = details; const {wildcard} = details;
const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options); const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
const { const {
@ -1886,6 +1887,16 @@ class Backend {
} }
} = options; } = options;
const textReplacements = this._getTranslatorTextReplacements(textReplacementsOptions); const textReplacements = this._getTranslatorTextReplacements(textReplacementsOptions);
let excludeDictionaryDefinitions = null;
if (mode === 'merge' && !enabledDictionaryMap.has(mainDictionary)) {
enabledDictionaryMap.set(mainDictionary, {
index: enabledDictionaryMap.size,
priority: 0,
allowSecondarySearches: false
});
excludeDictionaryDefinitions = new Set();
excludeDictionaryDefinitions.add(mainDictionary);
}
return { return {
wildcard, wildcard,
mainDictionary, mainDictionary,
@ -1897,7 +1908,8 @@ class Backend {
convertKatakanaToHiragana, convertKatakanaToHiragana,
collapseEmphaticSequences, collapseEmphaticSequences,
textReplacements, textReplacements,
enabledDictionaryMap enabledDictionaryMap,
excludeDictionaryDefinitions
}; };
} }

View File

@ -85,13 +85,14 @@ class Translator {
* priority: (number), * priority: (number),
* allowSecondarySearches: (boolean) * allowSecondarySearches: (boolean)
* } * }
* ]) * ]),
* excludeDictionaryDefinitions: (Set of (string) or null)
* } * }
* ``` * ```
* @returns An object of the structure `{dictionaryEntries, originalTextLength}`. * @returns An object of the structure `{dictionaryEntries, originalTextLength}`.
*/ */
async findTerms(mode, text, options) { async findTerms(mode, text, options) {
const {enabledDictionaryMap} = options; const {enabledDictionaryMap, excludeDictionaryDefinitions} = options;
let {dictionaryEntries, originalTextLength} = await this._findTermsInternal(text, enabledDictionaryMap, options); let {dictionaryEntries, originalTextLength} = await this._findTermsInternal(text, enabledDictionaryMap, options);
switch (mode) { switch (mode) {
@ -103,6 +104,10 @@ class Translator {
break; break;
} }
if (excludeDictionaryDefinitions !== null) {
this._removeExcludedDefinitions(dictionaryEntries, excludeDictionaryDefinitions);
}
if (dictionaryEntries.length > 1) { if (dictionaryEntries.length > 1) {
this._sortTermDictionaryEntries(dictionaryEntries); this._sortTermDictionaryEntries(dictionaryEntries);
} }
@ -492,6 +497,101 @@ class Translator {
return newDictionaryEntries; return newDictionaryEntries;
} }
_removeExcludedDefinitions(dictionaryEntries, excludeDictionaryDefinitions) {
for (let i = dictionaryEntries.length - 1; i >= 0; --i) {
const dictionaryEntry = dictionaryEntries[i];
const {definitions, pronunciations, frequencies, headwords} = dictionaryEntry;
const definitionsChanged = this._removeArrayItemsWithDictionary(definitions, excludeDictionaryDefinitions);
this._removeArrayItemsWithDictionary(pronunciations, excludeDictionaryDefinitions);
this._removeArrayItemsWithDictionary(frequencies, excludeDictionaryDefinitions);
this._removeTagGroupsWithDictionary(definitions, excludeDictionaryDefinitions);
this._removeTagGroupsWithDictionary(headwords, excludeDictionaryDefinitions);
if (!definitionsChanged) { continue; }
if (definitions.length === 0) {
dictionaryEntries.splice(i, 1);
} else {
this._removeUnusedHeadwords(dictionaryEntry);
}
}
}
_removeUnusedHeadwords(dictionaryEntry) {
const {definitions, pronunciations, frequencies, headwords} = dictionaryEntry;
const removeHeadwordIndices = new Set();
for (let i = 0, ii = headwords.length; i < ii; ++i) {
removeHeadwordIndices.add(i);
}
for (const {headwordIndices} of definitions) {
for (const headwordIndex of headwordIndices) {
removeHeadwordIndices.delete(headwordIndex);
}
}
if (removeHeadwordIndices.size === 0) { return; }
const indexRemap = new Map();
let oldIndex = 0;
for (let i = 0, ii = headwords.length; i < ii; ++i) {
if (removeHeadwordIndices.has(i)) {
headwords.splice(i, 1);
--i;
--ii;
} else {
indexRemap.set(oldIndex, indexRemap.size);
}
++oldIndex;
}
this._updateDefinitionHeadwordIndices(definitions, indexRemap);
this._updateArrayItemsHeadwordIndex(pronunciations, indexRemap);
this._updateArrayItemsHeadwordIndex(frequencies, indexRemap);
}
_updateDefinitionHeadwordIndices(definitions, indexRemap) {
for (const {headwordIndices} of definitions) {
for (let i = headwordIndices.length - 1; i >= 0; --i) {
const newHeadwordIndex = indexRemap.get(headwordIndices[i]);
if (typeof newHeadwordIndex === 'undefined') {
headwordIndices.splice(i, 1);
} else {
headwordIndices[i] = newHeadwordIndex;
}
}
}
}
_updateArrayItemsHeadwordIndex(array, indexRemap) {
for (let i = array.length - 1; i >= 0; --i) {
const item = array[i];
const {headwordIndex} = item;
const newHeadwordIndex = indexRemap.get(headwordIndex);
if (typeof newHeadwordIndex === 'undefined') {
array.splice(i, 1);
} else {
item.headwordIndex = newHeadwordIndex;
}
}
}
_removeArrayItemsWithDictionary(array, excludeDictionaryDefinitions) {
let changed = false;
for (let j = array.length - 1; j >= 0; --j) {
const {dictionary} = array[j];
if (!excludeDictionaryDefinitions.has(dictionary)) { continue; }
array.splice(j, 1);
changed = true;
}
return changed;
}
_removeTagGroupsWithDictionary(array, excludeDictionaryDefinitions) {
for (const {tags} of array) {
this._removeArrayItemsWithDictionary(tags, excludeDictionaryDefinitions);
}
}
// Tags // Tags
_getTermTagTargets(dictionaryEntries) { _getTermTagTargets(dictionaryEntries) {