yomichan/ext/bg/js/dictionary.js

334 lines
11 KiB
JavaScript
Raw Normal View History

2017-07-19 06:07:46 +00:00
/*
2020-01-01 17:00:00 +00:00
* Copyright (C) 2016-2020 Alex Yatskov <alex@foosoft.net>
2017-07-19 06:07:46 +00:00
* Author: Alex Yatskov <alex@foosoft.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
2020-01-01 17:00:31 +00:00
* along with this program. If not, see <https://www.gnu.org/licenses/>.
2017-07-19 06:07:46 +00:00
*/
function dictEnabledSet(options) {
2020-02-15 20:01:21 +00:00
const enabledDictionaryMap = new Map();
for (const [title, {enabled, priority, allowSecondarySearches}] of Object.entries(options.dictionaries)) {
if (!enabled) { continue; }
enabledDictionaryMap.set(title, {priority, allowSecondarySearches});
2017-07-19 06:07:46 +00:00
}
2020-02-15 20:01:21 +00:00
return enabledDictionaryMap;
2017-07-19 06:07:46 +00:00
}
function dictConfigured(options) {
for (const {enabled} of Object.values(options.dictionaries)) {
if (enabled) {
2017-07-19 06:07:46 +00:00
return true;
}
}
return false;
}
function dictTermsSort(definitions, dictionaries=null) {
return definitions.sort((v1, v2) => {
2019-11-05 00:53:25 +00:00
let i;
2017-07-19 06:07:46 +00:00
if (dictionaries !== null) {
2020-02-15 20:01:21 +00:00
const dictionaryInfo1 = dictionaries.get(v1.dictionary);
const dictionaryInfo2 = dictionaries.get(v2.dictionary);
2020-02-22 16:30:24 +00:00
const priority1 = typeof dictionaryInfo1 !== 'undefined' ? dictionaryInfo1.priority : 0;
const priority2 = typeof dictionaryInfo2 !== 'undefined' ? dictionaryInfo2.priority : 0;
2020-02-15 20:01:21 +00:00
i = priority2 - priority1;
2019-11-05 00:53:25 +00:00
if (i !== 0) { return i; }
2017-07-19 06:07:46 +00:00
}
2019-11-05 00:53:25 +00:00
i = v2.source.length - v1.source.length;
if (i !== 0) { return i; }
2017-07-19 06:07:46 +00:00
2020-01-13 02:31:04 +00:00
i = v1.reasons.length - v2.reasons.length;
2019-11-05 00:53:25 +00:00
if (i !== 0) { return i; }
2017-07-19 06:07:46 +00:00
2019-11-05 00:53:25 +00:00
i = v2.score - v1.score;
if (i !== 0) { return i; }
2017-09-17 20:07:15 +00:00
return v2.expression.toString().localeCompare(v1.expression.toString());
2017-07-19 06:07:46 +00:00
});
}
function dictTermsUndupe(definitions) {
2020-02-15 17:55:35 +00:00
const definitionGroups = new Map();
2017-07-19 06:07:46 +00:00
for (const definition of definitions) {
2020-02-15 17:55:35 +00:00
const id = definition.id;
const definitionExisting = definitionGroups.get(id);
if (typeof definitionExisting === 'undefined' || definition.expression.length > definitionExisting.expression.length) {
definitionGroups.set(id, definition);
2017-07-19 06:07:46 +00:00
}
}
2020-02-15 17:55:35 +00:00
return [...definitionGroups.values()];
2017-07-19 06:07:46 +00:00
}
2017-10-12 06:59:09 +00:00
function dictTermsCompressTags(definitions) {
let lastDictionary = '';
2017-10-13 00:14:06 +00:00
let lastPartOfSpeech = '';
2017-10-12 06:59:09 +00:00
for (const definition of definitions) {
2019-11-27 03:01:54 +00:00
const dictionary = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'dictionary').map((tag) => tag.name).sort());
const partOfSpeech = JSON.stringify(definition.definitionTags.filter((tag) => tag.category === 'partOfSpeech').map((tag) => tag.name).sort());
2017-10-12 06:59:09 +00:00
const filterOutCategories = [];
if (lastDictionary === dictionary) {
filterOutCategories.push('dictionary');
} else {
lastDictionary = dictionary;
2017-10-13 00:14:06 +00:00
lastPartOfSpeech = '';
2017-10-12 06:59:09 +00:00
}
2017-10-13 00:14:06 +00:00
if (lastPartOfSpeech === partOfSpeech) {
filterOutCategories.push('partOfSpeech');
2017-10-12 06:59:09 +00:00
} else {
2017-10-13 00:14:06 +00:00
lastPartOfSpeech = partOfSpeech;
2017-10-12 06:59:09 +00:00
}
2019-11-27 03:01:54 +00:00
definition.definitionTags = definition.definitionTags.filter((tag) => !filterOutCategories.includes(tag.category));
2017-10-12 06:59:09 +00:00
}
}
2017-07-19 06:07:46 +00:00
function dictTermsGroup(definitions, dictionaries) {
2020-02-15 17:45:18 +00:00
const groups = new Map();
2017-07-19 06:07:46 +00:00
for (const definition of definitions) {
2020-02-15 17:55:21 +00:00
const key = [definition.source, definition.expression, ...definition.reasons];
2017-07-19 06:07:46 +00:00
if (definition.reading) {
key.push(definition.reading);
}
const keyString = key.toString();
2020-02-15 17:45:18 +00:00
let groupDefinitions = groups.get(keyString);
if (typeof groupDefinitions === 'undefined') {
groupDefinitions = [];
groups.set(keyString, groupDefinitions);
2017-07-19 06:07:46 +00:00
}
2020-02-15 17:45:18 +00:00
groupDefinitions.push(definition);
2017-07-19 06:07:46 +00:00
}
const results = [];
2020-02-15 17:45:18 +00:00
for (const groupDefinitions of groups.values()) {
const firstDef = groupDefinitions[0];
dictTermsSort(groupDefinitions, dictionaries);
2017-07-19 06:07:46 +00:00
results.push({
2020-02-15 17:45:18 +00:00
definitions: groupDefinitions,
2017-07-19 06:07:46 +00:00
expression: firstDef.expression,
reading: firstDef.reading,
2019-12-27 20:08:55 +00:00
furiganaSegments: firstDef.furiganaSegments,
2017-07-19 06:07:46 +00:00
reasons: firstDef.reasons,
2019-12-27 20:08:55 +00:00
termTags: firstDef.termTags,
2020-02-15 17:45:18 +00:00
score: groupDefinitions.reduce((p, v) => v.score > p ? v.score : p, Number.MIN_SAFE_INTEGER),
2017-08-26 19:20:04 +00:00
source: firstDef.source
2017-07-19 06:07:46 +00:00
});
}
return dictTermsSort(results);
}
2020-02-15 22:31:08 +00:00
function dictAreSetsEqual(set1, set2) {
if (set1.size !== set2.size) {
return false;
}
for (const value of set1) {
if (!set2.has(value)) {
return false;
}
}
return true;
}
function dictGetSetIntersection(set1, set2) {
const result = [];
for (const value of set1) {
if (set2.has(value)) {
result.push(value);
}
}
return result;
}
function dictTermsMergeBySequence(definitions, mainDictionary) {
const sequencedDefinitions = new Map();
const nonSequencedDefinitions = [];
for (const definition of definitions) {
const sequence = definition.sequence;
if (mainDictionary === definition.dictionary && sequence >= 0) {
2020-02-01 15:34:13 +00:00
let sequencedDefinition = sequencedDefinitions.get(sequence);
if (typeof sequencedDefinition === 'undefined') {
sequencedDefinition = {
reasons: definition.reasons,
2020-02-02 01:35:21 +00:00
score: definition.score,
expression: new Set(),
reading: new Set(),
expressions: new Map(),
source: definition.source,
dictionary: definition.dictionary,
definitions: []
};
sequencedDefinitions.set(sequence, sequencedDefinition);
} else {
sequencedDefinition.score = Math.max(sequencedDefinition.score, definition.score);
}
} else {
nonSequencedDefinitions.push(definition);
}
}
return [sequencedDefinitions, nonSequencedDefinitions];
}
2020-02-15 21:17:11 +00:00
function dictTermsMergeByGloss(result, definitions, appendTo=null, mergedIndices=null) {
const definitionsByGloss = appendTo !== null ? appendTo : new Map();
const resultExpressionsMap = result.expressions;
const resultExpressionSet = result.expression;
const resultReadingSet = result.reading;
const resultSource = result.source;
2017-10-06 22:19:40 +00:00
for (const [index, definition] of definitions.entries()) {
const {expression, reading} = definition;
2020-02-15 22:00:01 +00:00
if (mergedIndices !== null) {
const expressionMap = resultExpressionsMap.get(expression);
2020-02-15 22:00:01 +00:00
if (
typeof expressionMap !== 'undefined' &&
typeof expressionMap.get(reading) !== 'undefined'
2020-02-15 22:00:01 +00:00
) {
2017-10-06 22:19:40 +00:00
mergedIndices.add(index);
2020-02-15 22:00:01 +00:00
} else {
continue;
2017-10-06 22:19:40 +00:00
}
}
const gloss = JSON.stringify(definition.glossary.concat(definition.dictionary));
2020-02-15 21:17:11 +00:00
let glossDefinition = definitionsByGloss.get(gloss);
if (typeof glossDefinition === 'undefined') {
glossDefinition = {
expression: new Set(),
reading: new Set(),
definitionTags: [],
2017-10-06 22:19:40 +00:00
glossary: definition.glossary,
source: resultSource,
reasons: [],
score: definition.score,
id: definition.id,
dictionary: definition.dictionary
};
2020-02-15 21:17:11 +00:00
definitionsByGloss.set(gloss, glossDefinition);
}
glossDefinition.expression.add(expression);
glossDefinition.reading.add(reading);
resultExpressionSet.add(expression);
resultReadingSet.add(reading);
for (const tag of definition.definitionTags) {
2020-02-15 21:17:11 +00:00
if (!glossDefinition.definitionTags.find((existingTag) => existingTag.name === tag.name)) {
glossDefinition.definitionTags.push(tag);
}
}
2020-02-15 21:17:11 +00:00
if (appendTo === null) {
/*
Data layout:
2020-02-22 16:30:24 +00:00
resultExpressionsMap = new Map([
[expression, new Map([
[reading, new Map([
[tagName, tagInfo],
...
])],
...
])],
...
]);
*/
let readingMap = resultExpressionsMap.get(expression);
if (typeof readingMap === 'undefined') {
readingMap = new Map();
resultExpressionsMap.set(expression, readingMap);
}
let termTagsMap = readingMap.get(reading);
if (typeof termTagsMap === 'undefined') {
termTagsMap = new Map();
readingMap.set(reading, termTagsMap);
}
for (const tag of definition.termTags) {
if (!termTagsMap.has(tag.name)) {
termTagsMap.set(tag.name, tag);
}
}
}
}
2020-02-15 21:17:11 +00:00
for (const definition of definitionsByGloss.values()) {
2020-02-15 22:31:08 +00:00
const only = [];
const expressionSet = definition.expression;
const readingSet = definition.reading;
if (!dictAreSetsEqual(expressionSet, resultExpressionSet)) {
only.push(...dictGetSetIntersection(expressionSet, resultExpressionSet));
2017-10-06 22:19:40 +00:00
}
2020-02-15 22:31:08 +00:00
if (!dictAreSetsEqual(readingSet, resultReadingSet)) {
only.push(...dictGetSetIntersection(readingSet, resultReadingSet));
2017-10-06 22:19:40 +00:00
}
2020-02-22 16:30:24 +00:00
definition.only = only;
2017-10-06 22:19:40 +00:00
}
return definitionsByGloss;
2020-02-22 16:30:24 +00:00
}
2017-07-19 06:07:46 +00:00
function dictTagBuildSource(name) {
return dictTagSanitize({name, category: 'dictionary', order: 100});
}
function dictTagSanitize(tag) {
tag.name = tag.name || 'untitled';
tag.category = tag.category || 'default';
tag.notes = tag.notes || '';
tag.order = tag.order || 0;
2017-10-24 09:38:05 +00:00
tag.score = tag.score || 0;
2017-07-19 06:07:46 +00:00
return tag;
}
function dictTagsSort(tags) {
return tags.sort((v1, v2) => {
const order1 = v1.order;
const order2 = v2.order;
if (order1 < order2) {
return -1;
} else if (order1 > order2) {
return 1;
}
const name1 = v1.name;
const name2 = v2.name;
if (name1 < name2) {
return -1;
} else if (name1 > name2) {
return 1;
}
return 0;
});
}
function dictFieldSplit(field) {
return field.length === 0 ? [] : field.split(' ');
}