Add support for definitionless main dictionary (#1729)
This commit is contained in:
parent
57fb496fbc
commit
057283245e
@ -170,6 +170,12 @@ class TranslatorVM extends DatabaseVM {
|
||||
enabledDictionaryMap = new Map(enabledDictionaryMap);
|
||||
options.enabledDictionaryMap = enabledDictionaryMap;
|
||||
}
|
||||
const {excludeDictionaryDefinitions} = options;
|
||||
options.excludeDictionaryDefinitions = (
|
||||
Array.isArray(excludeDictionaryDefinitions) ?
|
||||
new Set(excludeDictionaryDefinitions) :
|
||||
null
|
||||
);
|
||||
|
||||
return options;
|
||||
}
|
||||
|
@ -411,7 +411,7 @@ class Backend {
|
||||
async _onApiTermsFind({text, details, optionsContext}) {
|
||||
const options = this._getProfileOptions(optionsContext);
|
||||
const {general: {resultOutputMode: mode, maxResults}} = options;
|
||||
const findTermsOptions = this._getTranslatorFindTermsOptions(details, options);
|
||||
const findTermsOptions = this._getTranslatorFindTermsOptions(mode, details, options);
|
||||
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(mode, text, findTermsOptions);
|
||||
dictionaryEntries.splice(maxResults);
|
||||
return {dictionaryEntries, originalTextLength};
|
||||
@ -1044,14 +1044,15 @@ class Backend {
|
||||
async _textParseScanning(text, options) {
|
||||
const jp = this._japaneseUtil;
|
||||
const {scanning: {length: scanningLength}, parsing: {readingMode}} = options;
|
||||
const findTermsOptions = this._getTranslatorFindTermsOptions({wildcard: null}, options);
|
||||
const mode = 'simple';
|
||||
const findTermsOptions = this._getTranslatorFindTermsOptions(mode, {wildcard: null}, options);
|
||||
const results = [];
|
||||
let previousUngroupedSegment = null;
|
||||
let i = 0;
|
||||
const ii = text.length;
|
||||
while (i < ii) {
|
||||
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(
|
||||
'simple',
|
||||
mode,
|
||||
text.substring(i, i + scanningLength),
|
||||
findTermsOptions
|
||||
);
|
||||
@ -1869,7 +1870,7 @@ class Backend {
|
||||
this._applyOptions(source);
|
||||
}
|
||||
|
||||
_getTranslatorFindTermsOptions(details, options) {
|
||||
_getTranslatorFindTermsOptions(mode, details, options) {
|
||||
const {wildcard} = details;
|
||||
const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
|
||||
const {
|
||||
@ -1886,6 +1887,16 @@ class Backend {
|
||||
}
|
||||
} = options;
|
||||
const textReplacements = this._getTranslatorTextReplacements(textReplacementsOptions);
|
||||
let excludeDictionaryDefinitions = null;
|
||||
if (mode === 'merge' && !enabledDictionaryMap.has(mainDictionary)) {
|
||||
enabledDictionaryMap.set(mainDictionary, {
|
||||
index: enabledDictionaryMap.size,
|
||||
priority: 0,
|
||||
allowSecondarySearches: false
|
||||
});
|
||||
excludeDictionaryDefinitions = new Set();
|
||||
excludeDictionaryDefinitions.add(mainDictionary);
|
||||
}
|
||||
return {
|
||||
wildcard,
|
||||
mainDictionary,
|
||||
@ -1897,7 +1908,8 @@ class Backend {
|
||||
convertKatakanaToHiragana,
|
||||
collapseEmphaticSequences,
|
||||
textReplacements,
|
||||
enabledDictionaryMap
|
||||
enabledDictionaryMap,
|
||||
excludeDictionaryDefinitions
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -85,13 +85,14 @@ class Translator {
|
||||
* priority: (number),
|
||||
* allowSecondarySearches: (boolean)
|
||||
* }
|
||||
* ])
|
||||
* ]),
|
||||
* excludeDictionaryDefinitions: (Set of (string) or null)
|
||||
* }
|
||||
* ```
|
||||
* @returns An object of the structure `{dictionaryEntries, originalTextLength}`.
|
||||
*/
|
||||
async findTerms(mode, text, options) {
|
||||
const {enabledDictionaryMap} = options;
|
||||
const {enabledDictionaryMap, excludeDictionaryDefinitions} = options;
|
||||
let {dictionaryEntries, originalTextLength} = await this._findTermsInternal(text, enabledDictionaryMap, options);
|
||||
|
||||
switch (mode) {
|
||||
@ -103,6 +104,10 @@ class Translator {
|
||||
break;
|
||||
}
|
||||
|
||||
if (excludeDictionaryDefinitions !== null) {
|
||||
this._removeExcludedDefinitions(dictionaryEntries, excludeDictionaryDefinitions);
|
||||
}
|
||||
|
||||
if (dictionaryEntries.length > 1) {
|
||||
this._sortTermDictionaryEntries(dictionaryEntries);
|
||||
}
|
||||
@ -492,6 +497,101 @@ class Translator {
|
||||
return newDictionaryEntries;
|
||||
}
|
||||
|
||||
_removeExcludedDefinitions(dictionaryEntries, excludeDictionaryDefinitions) {
|
||||
for (let i = dictionaryEntries.length - 1; i >= 0; --i) {
|
||||
const dictionaryEntry = dictionaryEntries[i];
|
||||
const {definitions, pronunciations, frequencies, headwords} = dictionaryEntry;
|
||||
const definitionsChanged = this._removeArrayItemsWithDictionary(definitions, excludeDictionaryDefinitions);
|
||||
this._removeArrayItemsWithDictionary(pronunciations, excludeDictionaryDefinitions);
|
||||
this._removeArrayItemsWithDictionary(frequencies, excludeDictionaryDefinitions);
|
||||
this._removeTagGroupsWithDictionary(definitions, excludeDictionaryDefinitions);
|
||||
this._removeTagGroupsWithDictionary(headwords, excludeDictionaryDefinitions);
|
||||
|
||||
if (!definitionsChanged) { continue; }
|
||||
|
||||
if (definitions.length === 0) {
|
||||
dictionaryEntries.splice(i, 1);
|
||||
} else {
|
||||
this._removeUnusedHeadwords(dictionaryEntry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_removeUnusedHeadwords(dictionaryEntry) {
|
||||
const {definitions, pronunciations, frequencies, headwords} = dictionaryEntry;
|
||||
const removeHeadwordIndices = new Set();
|
||||
for (let i = 0, ii = headwords.length; i < ii; ++i) {
|
||||
removeHeadwordIndices.add(i);
|
||||
}
|
||||
for (const {headwordIndices} of definitions) {
|
||||
for (const headwordIndex of headwordIndices) {
|
||||
removeHeadwordIndices.delete(headwordIndex);
|
||||
}
|
||||
}
|
||||
|
||||
if (removeHeadwordIndices.size === 0) { return; }
|
||||
|
||||
const indexRemap = new Map();
|
||||
let oldIndex = 0;
|
||||
for (let i = 0, ii = headwords.length; i < ii; ++i) {
|
||||
if (removeHeadwordIndices.has(i)) {
|
||||
headwords.splice(i, 1);
|
||||
--i;
|
||||
--ii;
|
||||
} else {
|
||||
indexRemap.set(oldIndex, indexRemap.size);
|
||||
}
|
||||
++oldIndex;
|
||||
}
|
||||
|
||||
this._updateDefinitionHeadwordIndices(definitions, indexRemap);
|
||||
this._updateArrayItemsHeadwordIndex(pronunciations, indexRemap);
|
||||
this._updateArrayItemsHeadwordIndex(frequencies, indexRemap);
|
||||
}
|
||||
|
||||
_updateDefinitionHeadwordIndices(definitions, indexRemap) {
|
||||
for (const {headwordIndices} of definitions) {
|
||||
for (let i = headwordIndices.length - 1; i >= 0; --i) {
|
||||
const newHeadwordIndex = indexRemap.get(headwordIndices[i]);
|
||||
if (typeof newHeadwordIndex === 'undefined') {
|
||||
headwordIndices.splice(i, 1);
|
||||
} else {
|
||||
headwordIndices[i] = newHeadwordIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_updateArrayItemsHeadwordIndex(array, indexRemap) {
|
||||
for (let i = array.length - 1; i >= 0; --i) {
|
||||
const item = array[i];
|
||||
const {headwordIndex} = item;
|
||||
const newHeadwordIndex = indexRemap.get(headwordIndex);
|
||||
if (typeof newHeadwordIndex === 'undefined') {
|
||||
array.splice(i, 1);
|
||||
} else {
|
||||
item.headwordIndex = newHeadwordIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_removeArrayItemsWithDictionary(array, excludeDictionaryDefinitions) {
|
||||
let changed = false;
|
||||
for (let j = array.length - 1; j >= 0; --j) {
|
||||
const {dictionary} = array[j];
|
||||
if (!excludeDictionaryDefinitions.has(dictionary)) { continue; }
|
||||
array.splice(j, 1);
|
||||
changed = true;
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
_removeTagGroupsWithDictionary(array, excludeDictionaryDefinitions) {
|
||||
for (const {tags} of array) {
|
||||
this._removeArrayItemsWithDictionary(tags, excludeDictionaryDefinitions);
|
||||
}
|
||||
}
|
||||
|
||||
// Tags
|
||||
|
||||
_getTermTagTargets(dictionaryEntries) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user