Merge dictionary.js functions into Translator class (#726)
* Move dictFieldSplit * Copy dictionary.js definitions into translator.js * Convert to member functions * Refactor names * Remove dictionary.js * Rename
This commit is contained in:
parent
f502dd4f21
commit
9f8f83508e
@ -35,7 +35,6 @@
|
|||||||
<script src="/bg/js/dictionary-database.js"></script>
|
<script src="/bg/js/dictionary-database.js"></script>
|
||||||
<script src="/bg/js/dictionary-importer.js"></script>
|
<script src="/bg/js/dictionary-importer.js"></script>
|
||||||
<script src="/bg/js/deinflector.js"></script>
|
<script src="/bg/js/deinflector.js"></script>
|
||||||
<script src="/bg/js/dictionary.js"></script>
|
|
||||||
<script src="/bg/js/json-schema.js"></script>
|
<script src="/bg/js/json-schema.js"></script>
|
||||||
<script src="/bg/js/media-utility.js"></script>
|
<script src="/bg/js/media-utility.js"></script>
|
||||||
<script src="/bg/js/options.js"></script>
|
<script src="/bg/js/options.js"></script>
|
||||||
|
@ -17,7 +17,6 @@
|
|||||||
|
|
||||||
/* global
|
/* global
|
||||||
* Database
|
* Database
|
||||||
* dictFieldSplit
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class DictionaryDatabase {
|
class DictionaryDatabase {
|
||||||
@ -436,9 +435,9 @@ class DictionaryDatabase {
|
|||||||
index,
|
index,
|
||||||
expression: row.expression,
|
expression: row.expression,
|
||||||
reading: row.reading,
|
reading: row.reading,
|
||||||
definitionTags: dictFieldSplit(row.definitionTags || row.tags || ''),
|
definitionTags: this._splitField(row.definitionTags || row.tags || ''),
|
||||||
termTags: dictFieldSplit(row.termTags || ''),
|
termTags: this._splitField(row.termTags || ''),
|
||||||
rules: dictFieldSplit(row.rules),
|
rules: this._splitField(row.rules),
|
||||||
glossary: row.glossary,
|
glossary: row.glossary,
|
||||||
score: row.score,
|
score: row.score,
|
||||||
dictionary: row.dictionary,
|
dictionary: row.dictionary,
|
||||||
@ -451,9 +450,9 @@ class DictionaryDatabase {
|
|||||||
return {
|
return {
|
||||||
index,
|
index,
|
||||||
character: row.character,
|
character: row.character,
|
||||||
onyomi: dictFieldSplit(row.onyomi),
|
onyomi: this._splitField(row.onyomi),
|
||||||
kunyomi: dictFieldSplit(row.kunyomi),
|
kunyomi: this._splitField(row.kunyomi),
|
||||||
tags: dictFieldSplit(row.tags),
|
tags: this._splitField(row.tags),
|
||||||
glossary: row.meanings,
|
glossary: row.meanings,
|
||||||
stats: row.stats,
|
stats: row.stats,
|
||||||
dictionary: row.dictionary
|
dictionary: row.dictionary
|
||||||
@ -471,4 +470,8 @@ class DictionaryDatabase {
|
|||||||
_createMedia(row, index) {
|
_createMedia(row, index) {
|
||||||
return Object.assign({}, row, {index});
|
return Object.assign({}, row, {index});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_splitField(field) {
|
||||||
|
return field.length === 0 ? [] : field.split(' ');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,298 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2016-2020 Yomichan Authors
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
function dictEnabledSet(options) {
|
|
||||||
const enabledDictionaryMap = new Map();
|
|
||||||
for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) {
|
|
||||||
if (!enabled) { continue; }
|
|
||||||
enabledDictionaryMap.set(title, {priority, allowSecondarySearches});
|
|
||||||
}
|
|
||||||
return enabledDictionaryMap;
|
|
||||||
}
|
|
||||||
|
|
||||||
function dictTermsSort(definitions, dictionaries=null) {
|
|
||||||
return definitions.sort((v1, v2) => {
|
|
||||||
let i;
|
|
||||||
if (dictionaries !== null) {
|
|
||||||
const dictionaryInfo1 = dictionaries.get(v1.dictionary);
|
|
||||||
const dictionaryInfo2 = dictionaries.get(v2.dictionary);
|
|
||||||
const priority1 = typeof dictionaryInfo1 !== 'undefined' ? dictionaryInfo1.priority : 0;
|
|
||||||
const priority2 = typeof dictionaryInfo2 !== 'undefined' ? dictionaryInfo2.priority : 0;
|
|
||||||
i = priority2 - priority1;
|
|
||||||
if (i !== 0) { return i; }
|
|
||||||
}
|
|
||||||
|
|
||||||
i = v2.source.length - v1.source.length;
|
|
||||||
if (i !== 0) { return i; }
|
|
||||||
|
|
||||||
i = v1.reasons.length - v2.reasons.length;
|
|
||||||
if (i !== 0) { return i; }
|
|
||||||
|
|
||||||
i = v2.score - v1.score;
|
|
||||||
if (i !== 0) { return i; }
|
|
||||||
|
|
||||||
return v2.expression.toString().localeCompare(v1.expression.toString());
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function dictTermsUndupe(definitions) {
|
|
||||||
const definitionGroups = new Map();
|
|
||||||
for (const definition of definitions) {
|
|
||||||
const id = definition.id;
|
|
||||||
const definitionExisting = definitionGroups.get(id);
|
|
||||||
if (typeof definitionExisting === 'undefined' || definition.expression.length > definitionExisting.expression.length) {
|
|
||||||
definitionGroups.set(id, definition);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return [...definitionGroups.values()];
|
|
||||||
}
|
|
||||||
|
|
||||||
function dictTermsCompressTags(definitions) {
|
|
||||||
let lastDictionary = '';
|
|
||||||
let lastPartOfSpeech = '';
|
|
||||||
|
|
||||||
for (const definition of definitions) {
|
|
||||||
const dictionary = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'dictionary').map((tag) => tag.name).sort());
|
|
||||||
const partOfSpeech = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'partOfSpeech').map((tag) => tag.name).sort());
|
|
||||||
|
|
||||||
const filterOutCategories = [];
|
|
||||||
|
|
||||||
if (lastDictionary === dictionary) {
|
|
||||||
filterOutCategories.push('dictionary');
|
|
||||||
} else {
|
|
||||||
lastDictionary = dictionary;
|
|
||||||
lastPartOfSpeech = '';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lastPartOfSpeech === partOfSpeech) {
|
|
||||||
filterOutCategories.push('partOfSpeech');
|
|
||||||
} else {
|
|
||||||
lastPartOfSpeech = partOfSpeech;
|
|
||||||
}
|
|
||||||
|
|
||||||
definition.definitionTags = definition.definitionTags.filter((tag) => !filterOutCategories.includes(tag.category));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function dictTermsGroup(definitions, dictionaries) {
|
|
||||||
const groups = new Map();
|
|
||||||
for (const definition of definitions) {
|
|
||||||
const key = [definition.source, definition.expression, ...definition.reasons];
|
|
||||||
if (definition.reading) {
|
|
||||||
key.push(definition.reading);
|
|
||||||
}
|
|
||||||
|
|
||||||
const keyString = key.toString();
|
|
||||||
let groupDefinitions = groups.get(keyString);
|
|
||||||
if (typeof groupDefinitions === 'undefined') {
|
|
||||||
groupDefinitions = [];
|
|
||||||
groups.set(keyString, groupDefinitions);
|
|
||||||
}
|
|
||||||
|
|
||||||
groupDefinitions.push(definition);
|
|
||||||
}
|
|
||||||
|
|
||||||
const results = [];
|
|
||||||
for (const groupDefinitions of groups.values()) {
|
|
||||||
const firstDef = groupDefinitions[0];
|
|
||||||
dictTermsSort(groupDefinitions, dictionaries);
|
|
||||||
results.push({
|
|
||||||
definitions: groupDefinitions,
|
|
||||||
expression: firstDef.expression,
|
|
||||||
reading: firstDef.reading,
|
|
||||||
furiganaSegments: firstDef.furiganaSegments,
|
|
||||||
reasons: firstDef.reasons,
|
|
||||||
termTags: firstDef.termTags,
|
|
||||||
score: groupDefinitions.reduce((p, v) => v.score > p ? v.score : p, Number.MIN_SAFE_INTEGER),
|
|
||||||
source: firstDef.source
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return dictTermsSort(results);
|
|
||||||
}
|
|
||||||
|
|
||||||
function dictTermsMergeBySequence(definitions, mainDictionary) {
|
|
||||||
const sequencedDefinitions = new Map();
|
|
||||||
const nonSequencedDefinitions = [];
|
|
||||||
for (const definition of definitions) {
|
|
||||||
const sequence = definition.sequence;
|
|
||||||
if (mainDictionary === definition.dictionary && sequence >= 0) {
|
|
||||||
let sequencedDefinition = sequencedDefinitions.get(sequence);
|
|
||||||
if (typeof sequencedDefinition === 'undefined') {
|
|
||||||
sequencedDefinition = {
|
|
||||||
reasons: definition.reasons,
|
|
||||||
score: definition.score,
|
|
||||||
expression: new Set(),
|
|
||||||
reading: new Set(),
|
|
||||||
expressions: new Map(),
|
|
||||||
source: definition.source,
|
|
||||||
dictionary: definition.dictionary,
|
|
||||||
definitions: []
|
|
||||||
};
|
|
||||||
sequencedDefinitions.set(sequence, sequencedDefinition);
|
|
||||||
} else {
|
|
||||||
sequencedDefinition.score = Math.max(sequencedDefinition.score, definition.score);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
nonSequencedDefinitions.push(definition);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return [sequencedDefinitions, nonSequencedDefinitions];
|
|
||||||
}
|
|
||||||
|
|
||||||
function dictTermsMergeByGloss(result, definitions, appendTo=null, mergedIndices=null) {
|
|
||||||
const definitionsByGloss = appendTo !== null ? appendTo : new Map();
|
|
||||||
|
|
||||||
const resultExpressionsMap = result.expressions;
|
|
||||||
const resultExpressionSet = result.expression;
|
|
||||||
const resultReadingSet = result.reading;
|
|
||||||
const resultSource = result.source;
|
|
||||||
|
|
||||||
for (const [index, definition] of definitions.entries()) {
|
|
||||||
const {expression, reading} = definition;
|
|
||||||
|
|
||||||
if (mergedIndices !== null) {
|
|
||||||
const expressionMap = resultExpressionsMap.get(expression);
|
|
||||||
if (
|
|
||||||
typeof expressionMap !== 'undefined' &&
|
|
||||||
typeof expressionMap.get(reading) !== 'undefined'
|
|
||||||
) {
|
|
||||||
mergedIndices.add(index);
|
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const gloss = JSON.stringify(definition.glossary.concat(definition.dictionary));
|
|
||||||
let glossDefinition = definitionsByGloss.get(gloss);
|
|
||||||
if (typeof glossDefinition === 'undefined') {
|
|
||||||
glossDefinition = {
|
|
||||||
expression: new Set(),
|
|
||||||
reading: new Set(),
|
|
||||||
definitionTags: [],
|
|
||||||
glossary: definition.glossary,
|
|
||||||
source: resultSource,
|
|
||||||
reasons: [],
|
|
||||||
score: definition.score,
|
|
||||||
id: definition.id,
|
|
||||||
dictionary: definition.dictionary
|
|
||||||
};
|
|
||||||
definitionsByGloss.set(gloss, glossDefinition);
|
|
||||||
}
|
|
||||||
|
|
||||||
glossDefinition.expression.add(expression);
|
|
||||||
glossDefinition.reading.add(reading);
|
|
||||||
|
|
||||||
resultExpressionSet.add(expression);
|
|
||||||
resultReadingSet.add(reading);
|
|
||||||
|
|
||||||
for (const tag of definition.definitionTags) {
|
|
||||||
if (!glossDefinition.definitionTags.find((existingTag) => existingTag.name === tag.name)) {
|
|
||||||
glossDefinition.definitionTags.push(tag);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (appendTo === null) {
|
|
||||||
/*
|
|
||||||
Data layout:
|
|
||||||
resultExpressionsMap = new Map([
|
|
||||||
[expression, new Map([
|
|
||||||
[reading, new Map([
|
|
||||||
[tagName, tagInfo],
|
|
||||||
...
|
|
||||||
])],
|
|
||||||
...
|
|
||||||
])],
|
|
||||||
...
|
|
||||||
]);
|
|
||||||
*/
|
|
||||||
let readingMap = resultExpressionsMap.get(expression);
|
|
||||||
if (typeof readingMap === 'undefined') {
|
|
||||||
readingMap = new Map();
|
|
||||||
resultExpressionsMap.set(expression, readingMap);
|
|
||||||
}
|
|
||||||
|
|
||||||
let termTagsMap = readingMap.get(reading);
|
|
||||||
if (typeof termTagsMap === 'undefined') {
|
|
||||||
termTagsMap = new Map();
|
|
||||||
readingMap.set(reading, termTagsMap);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const tag of definition.termTags) {
|
|
||||||
if (!termTagsMap.has(tag.name)) {
|
|
||||||
termTagsMap.set(tag.name, tag);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const definition of definitionsByGloss.values()) {
|
|
||||||
const only = [];
|
|
||||||
const expressionSet = definition.expression;
|
|
||||||
const readingSet = definition.reading;
|
|
||||||
if (!areSetsEqual(expressionSet, resultExpressionSet)) {
|
|
||||||
only.push(...getSetIntersection(expressionSet, resultExpressionSet));
|
|
||||||
}
|
|
||||||
if (!areSetsEqual(readingSet, resultReadingSet)) {
|
|
||||||
only.push(...getSetIntersection(readingSet, resultReadingSet));
|
|
||||||
}
|
|
||||||
definition.only = only;
|
|
||||||
}
|
|
||||||
|
|
||||||
return definitionsByGloss;
|
|
||||||
}
|
|
||||||
|
|
||||||
function dictTagBuildSource(name) {
|
|
||||||
return dictTagSanitize({name, category: 'dictionary', order: 100});
|
|
||||||
}
|
|
||||||
|
|
||||||
function dictTagSanitize(tag) {
|
|
||||||
tag.name = tag.name || 'untitled';
|
|
||||||
tag.category = tag.category || 'default';
|
|
||||||
tag.notes = tag.notes || '';
|
|
||||||
tag.order = tag.order || 0;
|
|
||||||
tag.score = tag.score || 0;
|
|
||||||
return tag;
|
|
||||||
}
|
|
||||||
|
|
||||||
function dictTagsSort(tags) {
|
|
||||||
return tags.sort((v1, v2) => {
|
|
||||||
const order1 = v1.order;
|
|
||||||
const order2 = v2.order;
|
|
||||||
if (order1 < order2) {
|
|
||||||
return -1;
|
|
||||||
} else if (order1 > order2) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const name1 = v1.name;
|
|
||||||
const name2 = v2.name;
|
|
||||||
if (name1 < name2) {
|
|
||||||
return -1;
|
|
||||||
} else if (name1 > name2) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function dictFieldSplit(field) {
|
|
||||||
return field.length === 0 ? [] : field.split(' ');
|
|
||||||
}
|
|
@ -18,16 +18,6 @@
|
|||||||
/* global
|
/* global
|
||||||
* Deinflector
|
* Deinflector
|
||||||
* TextSourceMap
|
* TextSourceMap
|
||||||
* dictEnabledSet
|
|
||||||
* dictTagBuildSource
|
|
||||||
* dictTagSanitize
|
|
||||||
* dictTagsSort
|
|
||||||
* dictTermsCompressTags
|
|
||||||
* dictTermsGroup
|
|
||||||
* dictTermsMergeByGloss
|
|
||||||
* dictTermsMergeBySequence
|
|
||||||
* dictTermsSort
|
|
||||||
* dictTermsUndupe
|
|
||||||
* jp
|
* jp
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -63,7 +53,7 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async findKanji(text, options) {
|
async findKanji(text, options) {
|
||||||
const dictionaries = dictEnabledSet(options);
|
const dictionaries = this._getEnabledDictionaryMap(options);
|
||||||
const kanjiUnique = new Set();
|
const kanjiUnique = new Set();
|
||||||
for (const c of text) {
|
for (const c of text) {
|
||||||
kanjiUnique.add(c);
|
kanjiUnique.add(c);
|
||||||
@ -80,8 +70,8 @@ class Translator {
|
|||||||
|
|
||||||
for (const definition of definitions) {
|
for (const definition of definitions) {
|
||||||
const tags = await this._expandTags(definition.tags, definition.dictionary);
|
const tags = await this._expandTags(definition.tags, definition.dictionary);
|
||||||
tags.push(dictTagBuildSource(definition.dictionary));
|
tags.push(this._createDictionaryTag(definition.dictionary));
|
||||||
dictTagsSort(tags);
|
this._sortTags(tags);
|
||||||
|
|
||||||
const stats = await this._expandStats(definition.stats, definition.dictionary);
|
const stats = await this._expandStats(definition.stats, definition.dictionary);
|
||||||
|
|
||||||
@ -97,7 +87,7 @@ class Translator {
|
|||||||
// Private
|
// Private
|
||||||
|
|
||||||
async _getSequencedDefinitions(definitions, mainDictionary) {
|
async _getSequencedDefinitions(definitions, mainDictionary) {
|
||||||
const [definitionsBySequence, defaultDefinitions] = dictTermsMergeBySequence(definitions, mainDictionary);
|
const [definitionsBySequence, defaultDefinitions] = this._mergeBySequence(definitions, mainDictionary);
|
||||||
|
|
||||||
const sequenceList = [];
|
const sequenceList = [];
|
||||||
const sequencedDefinitions = [];
|
const sequencedDefinitions = [];
|
||||||
@ -131,7 +121,7 @@ class Translator {
|
|||||||
const definitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries);
|
const definitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaries);
|
||||||
for (const definition of definitions) {
|
for (const definition of definitions) {
|
||||||
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
|
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
|
||||||
definitionTags.push(dictTagBuildSource(definition.dictionary));
|
definitionTags.push(this._createDictionaryTag(definition.dictionary));
|
||||||
definition.definitionTags = definitionTags;
|
definition.definitionTags = definitionTags;
|
||||||
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
|
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
|
||||||
definition.termTags = termTags;
|
definition.termTags = termTags;
|
||||||
@ -150,30 +140,30 @@ class Translator {
|
|||||||
|
|
||||||
for (const definition of rawDefinitionsBySequence) {
|
for (const definition of rawDefinitionsBySequence) {
|
||||||
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
|
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
|
||||||
definitionTags.push(dictTagBuildSource(definition.dictionary));
|
definitionTags.push(this._createDictionaryTag(definition.dictionary));
|
||||||
definition.definitionTags = definitionTags;
|
definition.definitionTags = definitionTags;
|
||||||
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
|
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
|
||||||
definition.termTags = termTags;
|
definition.termTags = termTags;
|
||||||
}
|
}
|
||||||
|
|
||||||
const definitionsByGloss = dictTermsMergeByGloss(result, rawDefinitionsBySequence);
|
const definitionsByGloss = this._mergeByGlossary(result, rawDefinitionsBySequence);
|
||||||
const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
|
const secondarySearchResults = await this._getMergedSecondarySearchResults(text, result.expressions, secondarySearchDictionaries);
|
||||||
|
|
||||||
dictTermsMergeByGloss(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
|
this._mergeByGlossary(result, defaultDefinitions.concat(secondarySearchResults), definitionsByGloss, mergedByTermIndices);
|
||||||
|
|
||||||
for (const definition of definitionsByGloss.values()) {
|
for (const definition of definitionsByGloss.values()) {
|
||||||
dictTagsSort(definition.definitionTags);
|
this._sortTags(definition.definitionTags);
|
||||||
result.definitions.push(definition);
|
result.definitions.push(definition);
|
||||||
}
|
}
|
||||||
|
|
||||||
dictTermsSort(result.definitions, dictionaries);
|
this._sortDefinitions(result.definitions, dictionaries);
|
||||||
|
|
||||||
const expressions = [];
|
const expressions = [];
|
||||||
for (const [expression, readingMap] of result.expressions.entries()) {
|
for (const [expression, readingMap] of result.expressions.entries()) {
|
||||||
for (const [reading, termTagsMap] of readingMap.entries()) {
|
for (const [reading, termTagsMap] of readingMap.entries()) {
|
||||||
const termTags = [...termTagsMap.values()];
|
const termTags = [...termTagsMap.values()];
|
||||||
const score = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0);
|
const score = termTags.map((tag) => tag.score).reduce((p, v) => p + v, 0);
|
||||||
expressions.push(this._createExpression(expression, reading, dictTagsSort(termTags), this._scoreToTermFrequency(score)));
|
expressions.push(this._createExpression(expression, reading, this._sortTags(termTags), this._scoreToTermFrequency(score)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,15 +175,15 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsGrouped(text, details, options) {
|
async _findTermsGrouped(text, details, options) {
|
||||||
const dictionaries = dictEnabledSet(options);
|
const dictionaries = this._getEnabledDictionaryMap(options);
|
||||||
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
||||||
|
|
||||||
const definitionsGrouped = dictTermsGroup(definitions, dictionaries);
|
const definitionsGrouped = this._groupTerms(definitions, dictionaries);
|
||||||
await this._buildTermMeta(definitionsGrouped, dictionaries);
|
await this._buildTermMeta(definitionsGrouped, dictionaries);
|
||||||
|
|
||||||
if (options.general.compactTags) {
|
if (options.general.compactTags) {
|
||||||
for (const definition of definitionsGrouped) {
|
for (const definition of definitionsGrouped) {
|
||||||
dictTermsCompressTags(definition.definitions);
|
this._compressDefinitionTags(definition.definitions);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -201,7 +191,7 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsMerged(text, details, options) {
|
async _findTermsMerged(text, details, options) {
|
||||||
const dictionaries = dictEnabledSet(options);
|
const dictionaries = this._getEnabledDictionaryMap(options);
|
||||||
const secondarySearchDictionaries = new Map();
|
const secondarySearchDictionaries = new Map();
|
||||||
for (const [title, dictionary] of dictionaries.entries()) {
|
for (const [title, dictionary] of dictionaries.entries()) {
|
||||||
if (!dictionary.allowSecondarySearches) { continue; }
|
if (!dictionary.allowSecondarySearches) { continue; }
|
||||||
@ -226,7 +216,7 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const strayDefinitions = defaultDefinitions.filter((definition, index) => !mergedByTermIndices.has(index));
|
const strayDefinitions = defaultDefinitions.filter((definition, index) => !mergedByTermIndices.has(index));
|
||||||
for (const groupedDefinition of dictTermsGroup(strayDefinitions, dictionaries)) {
|
for (const groupedDefinition of this._groupTerms(strayDefinitions, dictionaries)) {
|
||||||
// from dictTermsMergeBySequence
|
// from dictTermsMergeBySequence
|
||||||
const {reasons, score, expression, reading, source, dictionary} = groupedDefinition;
|
const {reasons, score, expression, reading, source, dictionary} = groupedDefinition;
|
||||||
const compatibilityDefinition = {
|
const compatibilityDefinition = {
|
||||||
@ -246,15 +236,15 @@ class Translator {
|
|||||||
|
|
||||||
if (options.general.compactTags) {
|
if (options.general.compactTags) {
|
||||||
for (const definition of definitionsMerged) {
|
for (const definition of definitionsMerged) {
|
||||||
dictTermsCompressTags(definition.definitions);
|
this._compressDefinitionTags(definition.definitions);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return [dictTermsSort(definitionsMerged), length];
|
return [this._sortDefinitions(definitionsMerged), length];
|
||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsSplit(text, details, options) {
|
async _findTermsSplit(text, details, options) {
|
||||||
const dictionaries = dictEnabledSet(options);
|
const dictionaries = this._getEnabledDictionaryMap(options);
|
||||||
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
||||||
|
|
||||||
await this._buildTermMeta(definitions, dictionaries);
|
await this._buildTermMeta(definitions, dictionaries);
|
||||||
@ -263,9 +253,9 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async _findTermsSimple(text, details, options) {
|
async _findTermsSimple(text, details, options) {
|
||||||
const dictionaries = dictEnabledSet(options);
|
const dictionaries = this._getEnabledDictionaryMap(options);
|
||||||
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
const [definitions, length] = await this._findTermsInternal(text, dictionaries, details, options);
|
||||||
dictTermsSort(definitions);
|
this._sortDefinitions(definitions);
|
||||||
return [definitions, length];
|
return [definitions, length];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -285,7 +275,7 @@ class Translator {
|
|||||||
for (const deinflection of deinflections) {
|
for (const deinflection of deinflections) {
|
||||||
for (const definition of deinflection.definitions) {
|
for (const definition of deinflection.definitions) {
|
||||||
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
|
const definitionTags = await this._expandTags(definition.definitionTags, definition.dictionary);
|
||||||
definitionTags.push(dictTagBuildSource(definition.dictionary));
|
definitionTags.push(this._createDictionaryTag(definition.dictionary));
|
||||||
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
|
const termTags = await this._expandTags(definition.termTags, definition.dictionary);
|
||||||
|
|
||||||
const {expression, reading} = definition;
|
const {expression, reading} = definition;
|
||||||
@ -302,15 +292,15 @@ class Translator {
|
|||||||
reading,
|
reading,
|
||||||
furiganaSegments,
|
furiganaSegments,
|
||||||
glossary: definition.glossary,
|
glossary: definition.glossary,
|
||||||
definitionTags: dictTagsSort(definitionTags),
|
definitionTags: this._sortTags(definitionTags),
|
||||||
termTags: dictTagsSort(termTags),
|
termTags: this._sortTags(termTags),
|
||||||
sequence: definition.sequence
|
sequence: definition.sequence
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
definitions = dictTermsUndupe(definitions);
|
definitions = this._removeDuplicateDefinitions(definitions);
|
||||||
definitions = dictTermsSort(definitions, dictionaries);
|
definitions = this._sortDefinitions(definitions, dictionaries);
|
||||||
|
|
||||||
let length = 0;
|
let length = 0;
|
||||||
for (const definition of definitions) {
|
for (const definition of definitions) {
|
||||||
@ -515,8 +505,8 @@ class Translator {
|
|||||||
const tagMetaList = await this._getTagMetaList(names, title);
|
const tagMetaList = await this._getTagMetaList(names, title);
|
||||||
return tagMetaList.map((meta, index) => {
|
return tagMetaList.map((meta, index) => {
|
||||||
const name = names[index];
|
const name = names[index];
|
||||||
const tag = dictTagSanitize(Object.assign({}, meta !== null ? meta : {}, {name}));
|
const tag = this._sanitizeTag(Object.assign({}, meta !== null ? meta : {}, {name}));
|
||||||
return dictTagSanitize(tag);
|
return this._sanitizeTag(tag);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -538,7 +528,7 @@ class Translator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const stat = Object.assign({}, meta, {name, value: items[name]});
|
const stat = Object.assign({}, meta, {name, value: items[name]});
|
||||||
group.push(dictTagSanitize(stat));
|
group.push(this._sanitizeTag(stat));
|
||||||
}
|
}
|
||||||
|
|
||||||
const stats = {};
|
const stats = {};
|
||||||
@ -674,4 +664,282 @@ class Translator {
|
|||||||
}
|
}
|
||||||
return await response.json();
|
return await response.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_getEnabledDictionaryMap(options) {
|
||||||
|
const enabledDictionaryMap = new Map();
|
||||||
|
for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) {
|
||||||
|
if (!enabled) { continue; }
|
||||||
|
enabledDictionaryMap.set(title, {priority, allowSecondarySearches});
|
||||||
|
}
|
||||||
|
return enabledDictionaryMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
_sortDefinitions(definitions, dictionaries=null) {
|
||||||
|
return definitions.sort((v1, v2) => {
|
||||||
|
let i;
|
||||||
|
if (dictionaries !== null) {
|
||||||
|
const dictionaryInfo1 = dictionaries.get(v1.dictionary);
|
||||||
|
const dictionaryInfo2 = dictionaries.get(v2.dictionary);
|
||||||
|
const priority1 = typeof dictionaryInfo1 !== 'undefined' ? dictionaryInfo1.priority : 0;
|
||||||
|
const priority2 = typeof dictionaryInfo2 !== 'undefined' ? dictionaryInfo2.priority : 0;
|
||||||
|
i = priority2 - priority1;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
}
|
||||||
|
|
||||||
|
i = v2.source.length - v1.source.length;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
|
i = v1.reasons.length - v2.reasons.length;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
|
i = v2.score - v1.score;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
|
return v2.expression.toString().localeCompare(v1.expression.toString());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
_removeDuplicateDefinitions(definitions) {
|
||||||
|
const definitionGroups = new Map();
|
||||||
|
for (const definition of definitions) {
|
||||||
|
const id = definition.id;
|
||||||
|
const definitionExisting = definitionGroups.get(id);
|
||||||
|
if (typeof definitionExisting === 'undefined' || definition.expression.length > definitionExisting.expression.length) {
|
||||||
|
definitionGroups.set(id, definition);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [...definitionGroups.values()];
|
||||||
|
}
|
||||||
|
|
||||||
|
_compressDefinitionTags(definitions) {
|
||||||
|
let lastDictionary = '';
|
||||||
|
let lastPartOfSpeech = '';
|
||||||
|
|
||||||
|
for (const definition of definitions) {
|
||||||
|
const dictionary = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'dictionary').map((tag) => tag.name).sort());
|
||||||
|
const partOfSpeech = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'partOfSpeech').map((tag) => tag.name).sort());
|
||||||
|
|
||||||
|
const filterOutCategories = [];
|
||||||
|
|
||||||
|
if (lastDictionary === dictionary) {
|
||||||
|
filterOutCategories.push('dictionary');
|
||||||
|
} else {
|
||||||
|
lastDictionary = dictionary;
|
||||||
|
lastPartOfSpeech = '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastPartOfSpeech === partOfSpeech) {
|
||||||
|
filterOutCategories.push('partOfSpeech');
|
||||||
|
} else {
|
||||||
|
lastPartOfSpeech = partOfSpeech;
|
||||||
|
}
|
||||||
|
|
||||||
|
definition.definitionTags = definition.definitionTags.filter((tag) => !filterOutCategories.includes(tag.category));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_groupTerms(definitions, dictionaries) {
|
||||||
|
const groups = new Map();
|
||||||
|
for (const definition of definitions) {
|
||||||
|
const key = [definition.source, definition.expression, ...definition.reasons];
|
||||||
|
if (definition.reading) {
|
||||||
|
key.push(definition.reading);
|
||||||
|
}
|
||||||
|
|
||||||
|
const keyString = key.toString();
|
||||||
|
let groupDefinitions = groups.get(keyString);
|
||||||
|
if (typeof groupDefinitions === 'undefined') {
|
||||||
|
groupDefinitions = [];
|
||||||
|
groups.set(keyString, groupDefinitions);
|
||||||
|
}
|
||||||
|
|
||||||
|
groupDefinitions.push(definition);
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = [];
|
||||||
|
for (const groupDefinitions of groups.values()) {
|
||||||
|
const firstDef = groupDefinitions[0];
|
||||||
|
this._sortDefinitions(groupDefinitions, dictionaries);
|
||||||
|
results.push({
|
||||||
|
definitions: groupDefinitions,
|
||||||
|
expression: firstDef.expression,
|
||||||
|
reading: firstDef.reading,
|
||||||
|
furiganaSegments: firstDef.furiganaSegments,
|
||||||
|
reasons: firstDef.reasons,
|
||||||
|
termTags: firstDef.termTags,
|
||||||
|
score: groupDefinitions.reduce((p, v) => v.score > p ? v.score : p, Number.MIN_SAFE_INTEGER),
|
||||||
|
source: firstDef.source
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return this._sortDefinitions(results);
|
||||||
|
}
|
||||||
|
|
||||||
|
_mergeBySequence(definitions, mainDictionary) {
|
||||||
|
const sequencedDefinitions = new Map();
|
||||||
|
const nonSequencedDefinitions = [];
|
||||||
|
for (const definition of definitions) {
|
||||||
|
const sequence = definition.sequence;
|
||||||
|
if (mainDictionary === definition.dictionary && sequence >= 0) {
|
||||||
|
let sequencedDefinition = sequencedDefinitions.get(sequence);
|
||||||
|
if (typeof sequencedDefinition === 'undefined') {
|
||||||
|
sequencedDefinition = {
|
||||||
|
reasons: definition.reasons,
|
||||||
|
score: definition.score,
|
||||||
|
expression: new Set(),
|
||||||
|
reading: new Set(),
|
||||||
|
expressions: new Map(),
|
||||||
|
source: definition.source,
|
||||||
|
dictionary: definition.dictionary,
|
||||||
|
definitions: []
|
||||||
|
};
|
||||||
|
sequencedDefinitions.set(sequence, sequencedDefinition);
|
||||||
|
} else {
|
||||||
|
sequencedDefinition.score = Math.max(sequencedDefinition.score, definition.score);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
nonSequencedDefinitions.push(definition);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return [sequencedDefinitions, nonSequencedDefinitions];
|
||||||
|
}
|
||||||
|
|
||||||
|
_mergeByGlossary(result, definitions, appendTo=null, mergedIndices=null) {
|
||||||
|
const definitionsByGlossary = appendTo !== null ? appendTo : new Map();
|
||||||
|
|
||||||
|
const resultExpressionsMap = result.expressions;
|
||||||
|
const resultExpressionSet = result.expression;
|
||||||
|
const resultReadingSet = result.reading;
|
||||||
|
const resultSource = result.source;
|
||||||
|
|
||||||
|
for (const [index, definition] of definitions.entries()) {
|
||||||
|
const {expression, reading} = definition;
|
||||||
|
|
||||||
|
if (mergedIndices !== null) {
|
||||||
|
const expressionMap = resultExpressionsMap.get(expression);
|
||||||
|
if (
|
||||||
|
typeof expressionMap !== 'undefined' &&
|
||||||
|
typeof expressionMap.get(reading) !== 'undefined'
|
||||||
|
) {
|
||||||
|
mergedIndices.add(index);
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const gloss = JSON.stringify(definition.glossary.concat(definition.dictionary));
|
||||||
|
let glossDefinition = definitionsByGlossary.get(gloss);
|
||||||
|
if (typeof glossDefinition === 'undefined') {
|
||||||
|
glossDefinition = {
|
||||||
|
expression: new Set(),
|
||||||
|
reading: new Set(),
|
||||||
|
definitionTags: [],
|
||||||
|
glossary: definition.glossary,
|
||||||
|
source: resultSource,
|
||||||
|
reasons: [],
|
||||||
|
score: definition.score,
|
||||||
|
id: definition.id,
|
||||||
|
dictionary: definition.dictionary
|
||||||
|
};
|
||||||
|
definitionsByGlossary.set(gloss, glossDefinition);
|
||||||
|
}
|
||||||
|
|
||||||
|
glossDefinition.expression.add(expression);
|
||||||
|
glossDefinition.reading.add(reading);
|
||||||
|
|
||||||
|
resultExpressionSet.add(expression);
|
||||||
|
resultReadingSet.add(reading);
|
||||||
|
|
||||||
|
for (const tag of definition.definitionTags) {
|
||||||
|
if (!glossDefinition.definitionTags.find((existingTag) => existingTag.name === tag.name)) {
|
||||||
|
glossDefinition.definitionTags.push(tag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (appendTo === null) {
|
||||||
|
/*
|
||||||
|
Data layout:
|
||||||
|
resultExpressionsMap = new Map([
|
||||||
|
[expression, new Map([
|
||||||
|
[reading, new Map([
|
||||||
|
[tagName, tagInfo],
|
||||||
|
...
|
||||||
|
])],
|
||||||
|
...
|
||||||
|
])],
|
||||||
|
...
|
||||||
|
]);
|
||||||
|
*/
|
||||||
|
let readingMap = resultExpressionsMap.get(expression);
|
||||||
|
if (typeof readingMap === 'undefined') {
|
||||||
|
readingMap = new Map();
|
||||||
|
resultExpressionsMap.set(expression, readingMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
let termTagsMap = readingMap.get(reading);
|
||||||
|
if (typeof termTagsMap === 'undefined') {
|
||||||
|
termTagsMap = new Map();
|
||||||
|
readingMap.set(reading, termTagsMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const tag of definition.termTags) {
|
||||||
|
if (!termTagsMap.has(tag.name)) {
|
||||||
|
termTagsMap.set(tag.name, tag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const definition of definitionsByGlossary.values()) {
|
||||||
|
const only = [];
|
||||||
|
const expressionSet = definition.expression;
|
||||||
|
const readingSet = definition.reading;
|
||||||
|
if (!areSetsEqual(expressionSet, resultExpressionSet)) {
|
||||||
|
only.push(...getSetIntersection(expressionSet, resultExpressionSet));
|
||||||
|
}
|
||||||
|
if (!areSetsEqual(readingSet, resultReadingSet)) {
|
||||||
|
only.push(...getSetIntersection(readingSet, resultReadingSet));
|
||||||
|
}
|
||||||
|
definition.only = only;
|
||||||
|
}
|
||||||
|
|
||||||
|
return definitionsByGlossary;
|
||||||
|
}
|
||||||
|
|
||||||
|
_createDictionaryTag(name) {
|
||||||
|
return this._sanitizeTag({name, category: 'dictionary', order: 100});
|
||||||
|
}
|
||||||
|
|
||||||
|
_sanitizeTag(tag) {
|
||||||
|
tag.name = tag.name || 'untitled';
|
||||||
|
tag.category = tag.category || 'default';
|
||||||
|
tag.notes = tag.notes || '';
|
||||||
|
tag.order = tag.order || 0;
|
||||||
|
tag.score = tag.score || 0;
|
||||||
|
return tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
_sortTags(tags) {
|
||||||
|
return tags.sort((v1, v2) => {
|
||||||
|
const order1 = v1.order;
|
||||||
|
const order2 = v2.order;
|
||||||
|
if (order1 < order2) {
|
||||||
|
return -1;
|
||||||
|
} else if (order1 > order2) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const name1 = v1.name;
|
||||||
|
const name2 = v2.name;
|
||||||
|
if (name1 < name2) {
|
||||||
|
return -1;
|
||||||
|
} else if (name1 > name2) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -77,7 +77,6 @@
|
|||||||
<script src="/mixed/js/api.js"></script>
|
<script src="/mixed/js/api.js"></script>
|
||||||
<script src="/mixed/js/japanese.js"></script>
|
<script src="/mixed/js/japanese.js"></script>
|
||||||
|
|
||||||
<script src="/bg/js/dictionary.js"></script>
|
|
||||||
<script src="/bg/js/handlebars.js"></script>
|
<script src="/bg/js/handlebars.js"></script>
|
||||||
<script src="/fg/js/document-util.js"></script>
|
<script src="/fg/js/document-util.js"></script>
|
||||||
<script src="/fg/js/dom-text-scanner.js"></script>
|
<script src="/fg/js/dom-text-scanner.js"></script>
|
||||||
|
@ -1143,7 +1143,6 @@
|
|||||||
<script src="/bg/js/anki.js"></script>
|
<script src="/bg/js/anki.js"></script>
|
||||||
<script src="/bg/js/anki-note-builder.js"></script>
|
<script src="/bg/js/anki-note-builder.js"></script>
|
||||||
<script src="/bg/js/conditions.js"></script>
|
<script src="/bg/js/conditions.js"></script>
|
||||||
<script src="/bg/js/dictionary.js"></script>
|
|
||||||
<script src="/bg/js/handlebars.js"></script>
|
<script src="/bg/js/handlebars.js"></script>
|
||||||
<script src="/bg/js/options.js"></script>
|
<script src="/bg/js/options.js"></script>
|
||||||
<script src="/bg/js/profile-conditions.js"></script>
|
<script src="/bg/js/profile-conditions.js"></script>
|
||||||
|
@ -116,7 +116,6 @@ vm.execute([
|
|||||||
'mixed/js/core.js',
|
'mixed/js/core.js',
|
||||||
'mixed/js/cache-map.js',
|
'mixed/js/cache-map.js',
|
||||||
'bg/js/json-schema.js',
|
'bg/js/json-schema.js',
|
||||||
'bg/js/dictionary.js',
|
|
||||||
'bg/js/media-utility.js',
|
'bg/js/media-utility.js',
|
||||||
'bg/js/dictionary-importer.js',
|
'bg/js/dictionary-importer.js',
|
||||||
'bg/js/database.js',
|
'bg/js/database.js',
|
||||||
|
Loading…
Reference in New Issue
Block a user