2016-03-20 17:52:27 +00:00
|
|
|
/*
|
2021-01-01 19:50:41 +00:00
|
|
|
* Copyright (C) 2016-2021 Yomichan Authors
|
2016-03-20 17:52:27 +00:00
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
2020-01-01 17:00:31 +00:00
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
2016-03-20 17:52:27 +00:00
|
|
|
*/
|
|
|
|
|
2020-03-11 02:30:36 +00:00
|
|
|
/* global
|
|
|
|
* Deinflector
|
2020-03-28 21:51:58 +00:00
|
|
|
* TextSourceMap
|
2020-03-11 02:30:36 +00:00
|
|
|
*/
|
2016-03-24 02:25:32 +00:00
|
|
|
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
|
|
|
* Class which finds term and kanji definitions for text.
|
|
|
|
*/
|
2016-03-20 17:52:27 +00:00
|
|
|
class Translator {
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
|
|
|
* Creates a new Translator instance.
|
|
|
|
* @param database An instance of DictionaryDatabase.
|
|
|
|
*/
|
2020-11-29 18:09:02 +00:00
|
|
|
constructor({japaneseUtil, database}) {
|
|
|
|
this._japaneseUtil = japaneseUtil;
|
2020-08-09 17:21:14 +00:00
|
|
|
this._database = database;
|
|
|
|
this._deinflector = null;
|
|
|
|
this._tagCache = new Map();
|
2020-10-02 21:59:14 +00:00
|
|
|
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
|
2016-03-26 21:16:21 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
|
|
|
* Initializes the instance for use. The public API should not be used until
|
2020-10-04 17:09:04 +00:00
|
|
|
* this function has been called.
|
|
|
|
* @param deinflectionReasons The raw deinflections reasons data that the Deinflector uses.
|
2020-10-04 16:54:55 +00:00
|
|
|
*/
|
2020-10-04 17:09:04 +00:00
|
|
|
prepare(deinflectionReasons) {
|
|
|
|
this._deinflector = new Deinflector(deinflectionReasons);
|
2016-03-20 17:52:27 +00:00
|
|
|
}
|
2016-03-21 01:45:37 +00:00
|
|
|
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
|
|
|
* Clears the database tag cache. This should be executed if the database is changed.
|
|
|
|
*/
|
2020-05-06 23:28:26 +00:00
|
|
|
clearDatabaseCaches() {
|
2020-08-09 17:21:14 +00:00
|
|
|
this._tagCache.clear();
|
2019-11-02 20:21:06 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
|
|
|
* Finds term definitions for the given text.
|
|
|
|
* @param mode The mode to use for finding terms, which determines the format of the resulting array.
|
2020-12-09 01:27:36 +00:00
|
|
|
* One of: 'group', 'merge', 'split', 'simple'
|
2020-10-04 16:54:55 +00:00
|
|
|
* @param text The text to find terms for.
|
|
|
|
* @param options An object using the following structure:
|
|
|
|
* {
|
2020-12-09 01:27:36 +00:00
|
|
|
* wildcard: (enum: null, 'prefix', 'suffix'),
|
2020-10-04 16:54:55 +00:00
|
|
|
* mainDictionary: (string),
|
|
|
|
* alphanumeric: (boolean),
|
2020-10-04 23:36:21 +00:00
|
|
|
* convertHalfWidthCharacters: (enum: 'false', 'true', 'variant'),
|
|
|
|
* convertNumericCharacters: (enum: 'false', 'true', 'variant'),
|
|
|
|
* convertAlphabeticCharacters: (enum: 'false', 'true', 'variant'),
|
|
|
|
* convertHiraganaToKatakana: (enum: 'false', 'true', 'variant'),
|
|
|
|
* convertKatakanaToHiragana: (enum: 'false', 'true', 'variant'),
|
|
|
|
* collapseEmphaticSequences: (enum: 'false', 'true', 'full'),
|
2021-01-03 17:12:55 +00:00
|
|
|
* textReplacements: [
|
|
|
|
* (null or [
|
|
|
|
* {pattern: (RegExp), replacement: (string)}
|
|
|
|
* ...
|
|
|
|
* ])
|
|
|
|
* ...
|
|
|
|
* ],
|
2020-10-04 16:54:55 +00:00
|
|
|
* enabledDictionaryMap: (Map of [
|
|
|
|
* (string),
|
|
|
|
* {
|
2021-03-06 18:04:50 +00:00
|
|
|
* index: (number),
|
|
|
|
* priority: (number),
|
2020-10-04 16:54:55 +00:00
|
|
|
* allowSecondarySearches: (boolean)
|
|
|
|
* }
|
|
|
|
* ])
|
|
|
|
* }
|
|
|
|
* @returns An array of [definitions, textLength]. The structure of each definition depends on the
|
|
|
|
* mode parameter, see the _create?TermDefinition?() functions for structure details.
|
|
|
|
*/
|
|
|
|
async findTerms(mode, text, options) {
|
2020-08-09 17:21:14 +00:00
|
|
|
switch (mode) {
|
|
|
|
case 'group':
|
2020-10-04 16:54:55 +00:00
|
|
|
return await this._findTermsGrouped(text, options);
|
2020-08-09 17:21:14 +00:00
|
|
|
case 'merge':
|
2020-10-04 16:54:55 +00:00
|
|
|
return await this._findTermsMerged(text, options);
|
2020-08-09 17:21:14 +00:00
|
|
|
case 'split':
|
2020-10-04 16:54:55 +00:00
|
|
|
return await this._findTermsSplit(text, options);
|
2020-08-09 17:21:14 +00:00
|
|
|
case 'simple':
|
2020-10-04 16:54:55 +00:00
|
|
|
return await this._findTermsSimple(text, options);
|
2020-08-09 17:21:14 +00:00
|
|
|
default:
|
|
|
|
return [[], 0];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
|
|
|
* Finds kanji definitions for the given text.
|
|
|
|
* @param text The text to find kanji definitions for. This string can be of any length,
|
|
|
|
* but is typically just one character, which is a single kanji. If the string is multiple
|
|
|
|
* characters long, each character will be searched in the database.
|
|
|
|
* @param options An object using the following structure:
|
|
|
|
* {
|
|
|
|
* enabledDictionaryMap: (Map of [
|
|
|
|
* (string),
|
|
|
|
* {
|
2021-03-06 18:04:50 +00:00
|
|
|
* index: (number),
|
|
|
|
* priority: (number)
|
2020-10-04 16:54:55 +00:00
|
|
|
* }
|
|
|
|
* ])
|
|
|
|
* }
|
|
|
|
* @returns An array of definitions. See the _createKanjiDefinition() function for structure details.
|
|
|
|
*/
|
2020-08-09 17:21:14 +00:00
|
|
|
async findKanji(text, options) {
|
2020-10-04 16:54:55 +00:00
|
|
|
const {enabledDictionaryMap} = options;
|
2020-08-09 17:21:14 +00:00
|
|
|
const kanjiUnique = new Set();
|
|
|
|
for (const c of text) {
|
|
|
|
kanjiUnique.add(c);
|
|
|
|
}
|
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
const databaseDefinitions = await this._database.findKanjiBulk([...kanjiUnique], enabledDictionaryMap);
|
2020-10-02 21:59:14 +00:00
|
|
|
if (databaseDefinitions.length === 0) { return []; }
|
2020-08-09 17:21:14 +00:00
|
|
|
|
2020-10-02 21:59:14 +00:00
|
|
|
this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
|
2020-08-09 17:21:14 +00:00
|
|
|
|
2020-10-02 21:59:14 +00:00
|
|
|
const definitions = [];
|
2020-10-04 15:12:15 +00:00
|
|
|
for (const {character, onyomi, kunyomi, tags, glossary, stats, dictionary} of databaseDefinitions) {
|
2020-10-02 21:59:14 +00:00
|
|
|
const expandedStats = await this._expandStats(stats, dictionary);
|
|
|
|
const expandedTags = await this._expandTags(tags, dictionary);
|
|
|
|
this._sortTags(expandedTags);
|
2020-08-09 17:21:14 +00:00
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
const definition = this._createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, expandedTags, expandedStats);
|
|
|
|
definitions.push(definition);
|
2020-08-09 17:21:14 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
await this._buildKanjiMeta(definitions, enabledDictionaryMap);
|
2020-08-09 17:21:14 +00:00
|
|
|
|
|
|
|
return definitions;
|
|
|
|
}
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
// Find terms core functions
|
|
|
|
|
|
|
|
async _findTermsSimple(text, options) {
|
|
|
|
const {enabledDictionaryMap} = options;
|
|
|
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
|
2021-02-27 04:55:32 +00:00
|
|
|
this._sortDefinitions(definitions);
|
2020-10-04 23:33:22 +00:00
|
|
|
return [definitions, length];
|
|
|
|
}
|
|
|
|
|
|
|
|
async _findTermsSplit(text, options) {
|
|
|
|
const {enabledDictionaryMap} = options;
|
|
|
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
|
|
|
|
await this._buildTermMeta(definitions, enabledDictionaryMap);
|
2021-02-27 04:55:32 +00:00
|
|
|
this._sortDefinitions(definitions);
|
2020-10-04 23:33:22 +00:00
|
|
|
return [definitions, length];
|
|
|
|
}
|
|
|
|
|
|
|
|
async _findTermsGrouped(text, options) {
|
2020-11-13 01:34:11 +00:00
|
|
|
const {enabledDictionaryMap} = options;
|
2020-10-04 23:33:22 +00:00
|
|
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
|
|
|
|
|
|
|
|
const groupedDefinitions = this._groupTerms(definitions, enabledDictionaryMap);
|
|
|
|
await this._buildTermMeta(groupedDefinitions, enabledDictionaryMap);
|
2021-02-27 04:55:32 +00:00
|
|
|
this._sortDefinitions(groupedDefinitions);
|
2020-10-04 23:33:22 +00:00
|
|
|
|
2020-11-13 01:34:11 +00:00
|
|
|
for (const definition of groupedDefinitions) {
|
|
|
|
this._flagRedundantDefinitionTags(definition.definitions);
|
2020-10-04 23:33:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return [groupedDefinitions, length];
|
|
|
|
}
|
|
|
|
|
|
|
|
async _findTermsMerged(text, options) {
|
2020-11-13 01:34:11 +00:00
|
|
|
const {mainDictionary, enabledDictionaryMap} = options;
|
2020-10-04 23:33:22 +00:00
|
|
|
const [definitions, length] = await this._findTermsInternal(text, enabledDictionaryMap, options);
|
|
|
|
const {sequencedDefinitions, unsequencedDefinitions} = await this._getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap);
|
|
|
|
const definitionsMerged = [];
|
2021-03-02 03:17:23 +00:00
|
|
|
|
|
|
|
for (const {relatedDefinitions, secondaryDefinitions} of sequencedDefinitions) {
|
|
|
|
const mergedDefinition = this._getMergedDefinition(relatedDefinitions, secondaryDefinitions);
|
|
|
|
definitionsMerged.push(mergedDefinition);
|
2020-10-04 23:33:22 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
for (const groupedDefinition of this._groupTerms(unsequencedDefinitions, enabledDictionaryMap)) {
|
2020-10-07 00:28:49 +00:00
|
|
|
const {reasons, score, expression, reading, source, rawSource, sourceTerm, furiganaSegments, termTags, definitions: definitions2} = groupedDefinition;
|
2020-10-04 23:33:22 +00:00
|
|
|
const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
|
|
|
|
const compatibilityDefinition = this._createMergedTermDefinition(
|
|
|
|
source,
|
|
|
|
rawSource,
|
2020-10-06 23:31:53 +00:00
|
|
|
this._convertTermDefinitionsToMergedGlossaryTermDefinitions(definitions2),
|
2020-10-04 23:33:22 +00:00
|
|
|
[expression],
|
|
|
|
[reading],
|
|
|
|
termDetailsList,
|
|
|
|
reasons,
|
|
|
|
score
|
|
|
|
);
|
|
|
|
definitionsMerged.push(compatibilityDefinition);
|
|
|
|
}
|
|
|
|
|
|
|
|
await this._buildTermMeta(definitionsMerged, enabledDictionaryMap);
|
2021-02-27 04:55:32 +00:00
|
|
|
this._sortDefinitions(definitionsMerged);
|
2020-10-04 23:33:22 +00:00
|
|
|
|
2020-11-13 01:34:11 +00:00
|
|
|
for (const definition of definitionsMerged) {
|
|
|
|
this._flagRedundantDefinitionTags(definition.definitions);
|
2020-10-04 23:33:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return [definitionsMerged, length];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find terms internal implementation
|
|
|
|
|
|
|
|
async _findTermsInternal(text, enabledDictionaryMap, options) {
|
|
|
|
const {alphanumeric, wildcard} = options;
|
|
|
|
text = this._getSearchableText(text, alphanumeric);
|
|
|
|
if (text.length === 0) {
|
|
|
|
return [[], 0];
|
|
|
|
}
|
|
|
|
|
|
|
|
const deinflections = (
|
|
|
|
wildcard ?
|
|
|
|
await this._findTermWildcard(text, enabledDictionaryMap, wildcard) :
|
|
|
|
await this._findTermDeinflections(text, enabledDictionaryMap, options)
|
|
|
|
);
|
|
|
|
|
|
|
|
let maxLength = 0;
|
|
|
|
const definitions = [];
|
2021-03-02 03:17:23 +00:00
|
|
|
const definitionIds = new Set();
|
2020-10-04 23:33:22 +00:00
|
|
|
for (const {databaseDefinitions, source, rawSource, term, reasons} of deinflections) {
|
|
|
|
if (databaseDefinitions.length === 0) { continue; }
|
|
|
|
maxLength = Math.max(maxLength, rawSource.length);
|
|
|
|
for (const databaseDefinition of databaseDefinitions) {
|
2021-03-02 03:17:23 +00:00
|
|
|
const {id} = databaseDefinition;
|
|
|
|
if (definitionIds.has(id)) { continue; }
|
2021-03-02 00:01:30 +00:00
|
|
|
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, term, reasons, true, enabledDictionaryMap);
|
2020-10-04 23:33:22 +00:00
|
|
|
definitions.push(definition);
|
2021-03-02 03:17:23 +00:00
|
|
|
definitionIds.add(id);
|
2020-10-04 23:33:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return [definitions, maxLength];
|
|
|
|
}
|
|
|
|
|
|
|
|
async _findTermWildcard(text, enabledDictionaryMap, wildcard) {
|
|
|
|
const databaseDefinitions = await this._database.findTermsBulk([text], enabledDictionaryMap, wildcard);
|
|
|
|
if (databaseDefinitions.length === 0) {
|
|
|
|
return [];
|
|
|
|
}
|
|
|
|
|
|
|
|
return [{
|
|
|
|
source: text,
|
|
|
|
rawSource: text,
|
|
|
|
term: text,
|
|
|
|
rules: 0,
|
|
|
|
reasons: [],
|
|
|
|
databaseDefinitions
|
|
|
|
}];
|
|
|
|
}
|
|
|
|
|
|
|
|
async _findTermDeinflections(text, enabledDictionaryMap, options) {
|
|
|
|
const deinflections = this._getAllDeinflections(text, options);
|
|
|
|
|
|
|
|
if (deinflections.length === 0) {
|
|
|
|
return [];
|
|
|
|
}
|
|
|
|
|
|
|
|
const uniqueDeinflectionTerms = [];
|
|
|
|
const uniqueDeinflectionArrays = [];
|
|
|
|
const uniqueDeinflectionsMap = new Map();
|
|
|
|
for (const deinflection of deinflections) {
|
|
|
|
const term = deinflection.term;
|
|
|
|
let deinflectionArray = uniqueDeinflectionsMap.get(term);
|
|
|
|
if (typeof deinflectionArray === 'undefined') {
|
|
|
|
deinflectionArray = [];
|
|
|
|
uniqueDeinflectionTerms.push(term);
|
|
|
|
uniqueDeinflectionArrays.push(deinflectionArray);
|
|
|
|
uniqueDeinflectionsMap.set(term, deinflectionArray);
|
|
|
|
}
|
|
|
|
deinflectionArray.push(deinflection);
|
|
|
|
}
|
|
|
|
|
|
|
|
const databaseDefinitions = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, null);
|
|
|
|
|
|
|
|
for (const databaseDefinition of databaseDefinitions) {
|
|
|
|
const definitionRules = Deinflector.rulesToRuleFlags(databaseDefinition.rules);
|
|
|
|
for (const deinflection of uniqueDeinflectionArrays[databaseDefinition.index]) {
|
|
|
|
const deinflectionRules = deinflection.rules;
|
|
|
|
if (deinflectionRules === 0 || (definitionRules & deinflectionRules) !== 0) {
|
|
|
|
deinflection.databaseDefinitions.push(databaseDefinition);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return deinflections;
|
|
|
|
}
|
|
|
|
|
|
|
|
_getAllDeinflections(text, options) {
|
|
|
|
const textOptionVariantArray = [
|
2021-01-03 17:12:55 +00:00
|
|
|
this._getTextReplacementsVariants(options),
|
2020-10-04 23:33:22 +00:00
|
|
|
this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),
|
|
|
|
this._getTextOptionEntryVariants(options.convertNumericCharacters),
|
|
|
|
this._getTextOptionEntryVariants(options.convertAlphabeticCharacters),
|
|
|
|
this._getTextOptionEntryVariants(options.convertHiraganaToKatakana),
|
|
|
|
this._getTextOptionEntryVariants(options.convertKatakanaToHiragana),
|
2021-01-02 04:16:44 +00:00
|
|
|
this._getCollapseEmphaticOptions(options)
|
2020-10-04 23:33:22 +00:00
|
|
|
];
|
|
|
|
|
2020-11-29 18:09:02 +00:00
|
|
|
const jp = this._japaneseUtil;
|
2020-10-04 23:33:22 +00:00
|
|
|
const deinflections = [];
|
|
|
|
const used = new Set();
|
2021-01-03 17:12:55 +00:00
|
|
|
for (const [textReplacements, halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) {
|
2020-10-04 23:33:22 +00:00
|
|
|
let text2 = text;
|
|
|
|
const sourceMap = new TextSourceMap(text2);
|
2021-01-03 17:12:55 +00:00
|
|
|
if (textReplacements !== null) {
|
|
|
|
text2 = this._applyTextReplacements(text2, sourceMap, textReplacements);
|
|
|
|
}
|
2020-10-04 23:33:22 +00:00
|
|
|
if (halfWidth) {
|
|
|
|
text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);
|
|
|
|
}
|
|
|
|
if (numeric) {
|
|
|
|
text2 = jp.convertNumericToFullWidth(text2);
|
|
|
|
}
|
|
|
|
if (alphabetic) {
|
|
|
|
text2 = jp.convertAlphabeticToKana(text2, sourceMap);
|
|
|
|
}
|
|
|
|
if (katakana) {
|
|
|
|
text2 = jp.convertHiraganaToKatakana(text2);
|
|
|
|
}
|
|
|
|
if (hiragana) {
|
|
|
|
text2 = jp.convertKatakanaToHiragana(text2);
|
|
|
|
}
|
|
|
|
if (collapseEmphatic) {
|
|
|
|
text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (let i = text2.length; i > 0; --i) {
|
|
|
|
const text2Substring = text2.substring(0, i);
|
|
|
|
if (used.has(text2Substring)) { break; }
|
|
|
|
used.add(text2Substring);
|
|
|
|
const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
|
|
|
|
for (const deinflection of this._deinflector.deinflect(text2Substring, rawSource)) {
|
|
|
|
deinflections.push(deinflection);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return deinflections;
|
|
|
|
}
|
2020-08-09 17:21:14 +00:00
|
|
|
|
2021-03-02 00:01:30 +00:00
|
|
|
/**
|
|
|
|
* @param definitions An array of 'term' definitions.
|
|
|
|
* @param mainDictionary The name of the main dictionary.
|
|
|
|
* @param enabledDictionaryMap The map of enabled dictionaries and their settings.
|
|
|
|
*/
|
2020-10-04 15:12:15 +00:00
|
|
|
async _getSequencedDefinitions(definitions, mainDictionary, enabledDictionaryMap) {
|
2021-03-02 03:17:23 +00:00
|
|
|
const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
|
2020-01-28 23:58:14 +00:00
|
|
|
const sequenceList = [];
|
2020-10-02 21:59:14 +00:00
|
|
|
const sequencedDefinitionMap = new Map();
|
2020-01-28 23:58:14 +00:00
|
|
|
const sequencedDefinitions = [];
|
2021-03-02 03:17:23 +00:00
|
|
|
const unsequencedDefinitions = new Map();
|
2020-10-02 21:59:14 +00:00
|
|
|
for (const definition of definitions) {
|
2021-03-02 03:17:23 +00:00
|
|
|
const {sequence, dictionary, id} = definition;
|
2020-10-02 21:59:14 +00:00
|
|
|
if (mainDictionary === dictionary && sequence >= 0) {
|
|
|
|
let sequencedDefinition = sequencedDefinitionMap.get(sequence);
|
|
|
|
if (typeof sequencedDefinition === 'undefined') {
|
|
|
|
sequencedDefinition = {
|
2020-10-05 02:04:44 +00:00
|
|
|
relatedDefinitions: [],
|
2021-03-02 03:17:23 +00:00
|
|
|
definitionIds: new Set(),
|
|
|
|
secondaryDefinitions: []
|
2020-10-02 21:59:14 +00:00
|
|
|
};
|
|
|
|
sequencedDefinitionMap.set(sequence, sequencedDefinition);
|
|
|
|
sequencedDefinitions.push(sequencedDefinition);
|
|
|
|
sequenceList.push(sequence);
|
|
|
|
}
|
2020-10-05 02:04:44 +00:00
|
|
|
sequencedDefinition.relatedDefinitions.push(definition);
|
2021-03-02 03:17:23 +00:00
|
|
|
sequencedDefinition.definitionIds.add(id);
|
2020-10-02 21:59:14 +00:00
|
|
|
} else {
|
2021-03-02 03:17:23 +00:00
|
|
|
unsequencedDefinitions.set(id, definition);
|
2020-10-02 21:59:14 +00:00
|
|
|
}
|
2020-01-28 23:58:14 +00:00
|
|
|
}
|
2019-10-19 15:34:12 +00:00
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
if (sequenceList.length > 0) {
|
2021-03-02 03:17:23 +00:00
|
|
|
await this._addRelatedDefinitions(sequencedDefinitions, unsequencedDefinitions, sequenceList, mainDictionary, enabledDictionaryMap);
|
|
|
|
await this._addSecondaryDefinitions(sequencedDefinitions, unsequencedDefinitions, enabledDictionaryMap, secondarySearchDictionaryMap);
|
2019-10-19 15:34:12 +00:00
|
|
|
}
|
|
|
|
|
2020-10-05 02:04:44 +00:00
|
|
|
for (const {relatedDefinitions} of sequencedDefinitions) {
|
|
|
|
this._sortDefinitionsById(relatedDefinitions);
|
|
|
|
}
|
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
return {sequencedDefinitions, unsequencedDefinitions: [...unsequencedDefinitions.values()]};
|
2019-10-19 15:34:12 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
async _addRelatedDefinitions(sequencedDefinitions, unsequencedDefinitions, sequenceList, mainDictionary, enabledDictionaryMap) {
|
|
|
|
const databaseDefinitions = await this._database.findTermsBySequenceBulk(sequenceList, mainDictionary);
|
|
|
|
for (const databaseDefinition of databaseDefinitions) {
|
|
|
|
const {relatedDefinitions, definitionIds} = sequencedDefinitions[databaseDefinition.index];
|
|
|
|
const {id} = databaseDefinition;
|
|
|
|
if (definitionIds.has(id)) { continue; }
|
|
|
|
|
|
|
|
const {source, rawSource, sourceTerm} = relatedDefinitions[0];
|
|
|
|
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, [], false, enabledDictionaryMap);
|
|
|
|
relatedDefinitions.push(definition);
|
|
|
|
definitionIds.add(id);
|
|
|
|
unsequencedDefinitions.delete(id);
|
2019-10-19 16:24:38 +00:00
|
|
|
}
|
2021-03-02 03:17:23 +00:00
|
|
|
}
|
2019-10-19 16:24:38 +00:00
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
async _addSecondaryDefinitions(sequencedDefinitions, unsequencedDefinitions, enabledDictionaryMap, secondarySearchDictionaryMap) {
|
|
|
|
if (unsequencedDefinitions.length === 0 && secondarySearchDictionaryMap.size === 0) { return; }
|
|
|
|
|
|
|
|
// Prepare grouping info
|
2019-10-19 16:42:26 +00:00
|
|
|
const expressionList = [];
|
|
|
|
const readingList = [];
|
2021-03-02 03:17:23 +00:00
|
|
|
const targetList = [];
|
|
|
|
const targetMap = new Map();
|
|
|
|
|
|
|
|
for (const sequencedDefinition of sequencedDefinitions) {
|
|
|
|
const {relatedDefinitions} = sequencedDefinition;
|
|
|
|
for (const definition of relatedDefinitions) {
|
|
|
|
const {expressions: [{expression, reading}]} = definition;
|
|
|
|
const key = this._createMapKey([expression, reading]);
|
|
|
|
let target = targetMap.get(key);
|
|
|
|
if (typeof target === 'undefined') {
|
|
|
|
target = {
|
|
|
|
sequencedDefinitions: [],
|
|
|
|
searchSecondary: false
|
|
|
|
};
|
|
|
|
targetMap.set(key, target);
|
|
|
|
}
|
|
|
|
target.sequencedDefinitions.push(sequencedDefinition);
|
|
|
|
if (!definition.isPrimary && !target.searchSecondary) {
|
|
|
|
target.searchSecondary = true;
|
|
|
|
expressionList.push(expression);
|
|
|
|
readingList.push(reading);
|
|
|
|
targetList.push(target);
|
|
|
|
}
|
2019-10-19 16:24:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
// Group unsequenced definitions with sequenced definitions that have a matching [expression, reading].
|
|
|
|
for (const [id, definition] of unsequencedDefinitions.entries()) {
|
|
|
|
const {expressions: [{expression, reading}]} = definition;
|
|
|
|
const key = this._createMapKey([expression, reading]);
|
|
|
|
const target = targetMap.get(key);
|
|
|
|
if (typeof target === 'undefined') { continue; }
|
|
|
|
|
|
|
|
for (const {definitionIds, secondaryDefinitions} of target.sequencedDefinitions) {
|
|
|
|
if (definitionIds.has(id)) { continue; }
|
|
|
|
|
|
|
|
secondaryDefinitions.push(definition);
|
|
|
|
definitionIds.add(id);
|
|
|
|
unsequencedDefinitions.delete(id);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Search database for additional secondary terms
|
|
|
|
if (expressionList.length === 0 || secondarySearchDictionaryMap.size === 0) { return; }
|
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
const databaseDefinitions = await this._database.findTermsExactBulk(expressionList, readingList, secondarySearchDictionaryMap);
|
2020-10-02 21:59:14 +00:00
|
|
|
this._sortDatabaseDefinitionsByIndex(databaseDefinitions);
|
2019-10-19 16:42:26 +00:00
|
|
|
|
2020-10-02 21:59:14 +00:00
|
|
|
for (const databaseDefinition of databaseDefinitions) {
|
2021-03-02 03:17:23 +00:00
|
|
|
const {index, id} = databaseDefinition;
|
|
|
|
const source = expressionList[index];
|
|
|
|
const target = targetList[index];
|
|
|
|
for (const {definitionIds, secondaryDefinitions} of target.sequencedDefinitions) {
|
|
|
|
if (definitionIds.has(id)) { continue; }
|
|
|
|
|
|
|
|
const definition = await this._createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, source, source, [], false, enabledDictionaryMap);
|
|
|
|
secondaryDefinitions.push(definition);
|
|
|
|
definitionIds.add(id);
|
|
|
|
unsequencedDefinitions.delete(id);
|
|
|
|
}
|
2019-10-19 16:42:26 +00:00
|
|
|
}
|
2019-10-19 16:24:38 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
_getMergedDefinition(relatedDefinitions, secondaryDefinitions) {
|
|
|
|
const {reasons, source, rawSource} = relatedDefinitions[0];
|
|
|
|
const allDefinitions = secondaryDefinitions.length > 0 ? [...relatedDefinitions, ...secondaryDefinitions] : relatedDefinitions;
|
|
|
|
const score = this._getMaxPrimaryDefinitionScore(allDefinitions);
|
2017-10-03 04:20:02 +00:00
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
// Merge by glossary
|
2020-10-04 15:12:15 +00:00
|
|
|
const allExpressions = new Set();
|
|
|
|
const allReadings = new Set();
|
2021-03-02 03:17:23 +00:00
|
|
|
const glossaryDefinitionGroupMap = new Map();
|
|
|
|
for (const definition of allDefinitions) {
|
|
|
|
const {dictionary, glossary, expressions: [{expression, reading}]} = definition;
|
|
|
|
|
|
|
|
const key = this._createMapKey([dictionary, ...glossary]);
|
|
|
|
let group = glossaryDefinitionGroupMap.get(key);
|
|
|
|
if (typeof group === 'undefined') {
|
|
|
|
group = {
|
|
|
|
expressions: new Set(),
|
|
|
|
readings: new Set(),
|
|
|
|
definitions: []
|
|
|
|
};
|
|
|
|
glossaryDefinitionGroupMap.set(key, group);
|
|
|
|
}
|
|
|
|
|
|
|
|
allExpressions.add(expression);
|
|
|
|
allReadings.add(reading);
|
|
|
|
group.expressions.add(expression);
|
|
|
|
group.readings.add(reading);
|
|
|
|
group.definitions.push(definition);
|
2019-10-19 16:16:38 +00:00
|
|
|
}
|
2017-10-03 04:20:02 +00:00
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
const glossaryDefinitions = [];
|
2020-10-07 00:28:49 +00:00
|
|
|
for (const {expressions, readings, definitions} of glossaryDefinitionGroupMap.values()) {
|
2020-10-04 15:12:15 +00:00
|
|
|
const glossaryDefinition = this._createMergedGlossaryTermDefinition(
|
|
|
|
source,
|
|
|
|
rawSource,
|
2020-10-07 00:28:49 +00:00
|
|
|
definitions,
|
2020-10-04 15:12:15 +00:00
|
|
|
expressions,
|
|
|
|
readings,
|
|
|
|
allExpressions,
|
|
|
|
allReadings
|
|
|
|
);
|
|
|
|
glossaryDefinitions.push(glossaryDefinition);
|
|
|
|
}
|
2021-02-27 04:55:32 +00:00
|
|
|
this._sortDefinitions(glossaryDefinitions);
|
2020-10-04 15:12:15 +00:00
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
const termDetailsList = this._createTermDetailsList(allDefinitions);
|
2020-10-04 15:12:15 +00:00
|
|
|
|
|
|
|
return this._createMergedTermDefinition(
|
|
|
|
source,
|
|
|
|
rawSource,
|
|
|
|
glossaryDefinitions,
|
|
|
|
[...allExpressions],
|
|
|
|
[...allReadings],
|
2020-10-04 22:10:10 +00:00
|
|
|
termDetailsList,
|
2020-10-04 15:12:15 +00:00
|
|
|
reasons,
|
|
|
|
score
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2020-10-04 22:10:10 +00:00
|
|
|
_removeUsedDefinitions(definitions, termInfoMap, usedDefinitions) {
|
2020-10-04 15:12:15 +00:00
|
|
|
for (let i = 0, ii = definitions.length; i < ii; ++i) {
|
|
|
|
const definition = definitions[i];
|
|
|
|
const {expression, reading} = definition;
|
2020-10-04 22:10:10 +00:00
|
|
|
const expressionMap = termInfoMap.get(expression);
|
2020-10-04 15:12:15 +00:00
|
|
|
if (
|
|
|
|
typeof expressionMap !== 'undefined' &&
|
|
|
|
typeof expressionMap.get(reading) !== 'undefined'
|
|
|
|
) {
|
|
|
|
usedDefinitions.add(definition);
|
|
|
|
} else {
|
|
|
|
definitions.splice(i, 1);
|
|
|
|
--i;
|
|
|
|
--ii;
|
2017-10-01 01:17:02 +00:00
|
|
|
}
|
2019-10-19 16:16:38 +00:00
|
|
|
}
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
2017-10-03 04:20:02 +00:00
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
_getUniqueDefinitionTags(definitions) {
|
|
|
|
const definitionTagsMap = new Map();
|
|
|
|
for (const {definitionTags} of definitions) {
|
|
|
|
for (const tag of definitionTags) {
|
|
|
|
const {name} = tag;
|
|
|
|
if (definitionTagsMap.has(name)) { continue; }
|
|
|
|
definitionTagsMap.set(name, this._cloneTag(tag));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return [...definitionTagsMap.values()];
|
|
|
|
}
|
2017-10-01 01:17:02 +00:00
|
|
|
|
2020-11-13 01:34:11 +00:00
|
|
|
_flagRedundantDefinitionTags(definitions) {
|
2021-01-29 02:33:30 +00:00
|
|
|
let lastDictionary = null;
|
2020-10-04 23:33:22 +00:00
|
|
|
let lastPartOfSpeech = '';
|
|
|
|
const removeCategoriesSet = new Set();
|
2017-04-22 20:02:06 +00:00
|
|
|
|
2021-01-29 02:33:30 +00:00
|
|
|
for (const {dictionary, definitionTags} of definitions) {
|
2020-10-04 23:33:22 +00:00
|
|
|
const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(definitionTags, 'partOfSpeech'));
|
2016-03-21 03:34:50 +00:00
|
|
|
|
2021-01-29 02:33:30 +00:00
|
|
|
if (lastDictionary !== dictionary) {
|
2020-10-04 23:33:22 +00:00
|
|
|
lastDictionary = dictionary;
|
|
|
|
lastPartOfSpeech = '';
|
2017-01-08 19:18:55 +00:00
|
|
|
}
|
2017-07-10 21:10:58 +00:00
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
if (lastPartOfSpeech === partOfSpeech) {
|
|
|
|
removeCategoriesSet.add('partOfSpeech');
|
|
|
|
} else {
|
|
|
|
lastPartOfSpeech = partOfSpeech;
|
|
|
|
}
|
2016-03-21 01:45:37 +00:00
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
if (removeCategoriesSet.size > 0) {
|
2020-11-13 01:34:11 +00:00
|
|
|
this._flagTagsWithCategoryAsRedundant(definitionTags, removeCategoriesSet);
|
2020-10-04 23:33:22 +00:00
|
|
|
removeCategoriesSet.clear();
|
|
|
|
}
|
2019-11-05 01:52:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-02 00:01:30 +00:00
|
|
|
/**
|
|
|
|
* Groups definitions with the same [source, expression, reading, reasons].
|
|
|
|
* @param definitions An array of 'term' definitions.
|
|
|
|
* @returns An array of 'termGrouped' definitions.
|
|
|
|
*/
|
2020-10-04 23:33:22 +00:00
|
|
|
_groupTerms(definitions) {
|
|
|
|
const groups = new Map();
|
|
|
|
for (const definition of definitions) {
|
2021-03-02 00:01:30 +00:00
|
|
|
const {source, reasons, expressions: [{expression, reading}]} = definition;
|
|
|
|
const key = this._createMapKey([source, expression, reading, ...reasons]);
|
2020-10-04 23:33:22 +00:00
|
|
|
let groupDefinitions = groups.get(key);
|
|
|
|
if (typeof groupDefinitions === 'undefined') {
|
|
|
|
groupDefinitions = [];
|
|
|
|
groups.set(key, groupDefinitions);
|
2019-10-01 02:09:16 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
groupDefinitions.push(definition);
|
|
|
|
}
|
2019-08-31 01:06:21 +00:00
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
const results = [];
|
|
|
|
for (const groupDefinitions of groups.values()) {
|
2021-02-27 04:55:32 +00:00
|
|
|
this._sortDefinitions(groupDefinitions);
|
2020-10-04 23:33:22 +00:00
|
|
|
const definition = this._createGroupedTermDefinition(groupDefinitions);
|
|
|
|
results.push(definition);
|
2019-08-31 01:06:21 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
return results;
|
2019-08-31 01:06:21 +00:00
|
|
|
}
|
|
|
|
|
2020-10-06 23:31:53 +00:00
|
|
|
_convertTermDefinitionsToMergedGlossaryTermDefinitions(definitions) {
|
|
|
|
const convertedDefinitions = [];
|
|
|
|
for (const definition of definitions) {
|
|
|
|
const {source, rawSource, expression, reading} = definition;
|
|
|
|
const expressions = new Set([expression]);
|
|
|
|
const readings = new Set([reading]);
|
|
|
|
const convertedDefinition = this._createMergedGlossaryTermDefinition(source, rawSource, [definition], expressions, readings, expressions, readings);
|
|
|
|
convertedDefinitions.push(convertedDefinition);
|
|
|
|
}
|
|
|
|
return convertedDefinitions;
|
|
|
|
}
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
// Metadata building
|
2019-12-22 19:07:30 +00:00
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
async _buildTermMeta(definitions, enabledDictionaryMap) {
|
2021-02-28 03:27:00 +00:00
|
|
|
const allDefinitions = this._getAllDefinitions(definitions);
|
|
|
|
const expressionMap = new Map();
|
|
|
|
const expressionValues = [];
|
|
|
|
const expressionKeys = [];
|
|
|
|
|
|
|
|
for (const {expressions, frequencies: frequencies1, pitches: pitches1} of allDefinitions) {
|
|
|
|
for (let i = 0, ii = expressions.length; i < ii; ++i) {
|
2021-03-06 00:25:31 +00:00
|
|
|
let {expression, reading, frequencies: frequencies2, pitches: pitches2} = expressions[i];
|
|
|
|
if (reading.length === 0) { reading = expression; }
|
2021-02-28 03:27:00 +00:00
|
|
|
let readingMap = expressionMap.get(expression);
|
|
|
|
if (typeof readingMap === 'undefined') {
|
|
|
|
readingMap = new Map();
|
|
|
|
expressionMap.set(expression, readingMap);
|
|
|
|
expressionValues.push(readingMap);
|
|
|
|
expressionKeys.push(expression);
|
2020-11-02 02:24:35 +00:00
|
|
|
}
|
2021-02-28 03:27:00 +00:00
|
|
|
let targets = readingMap.get(reading);
|
|
|
|
if (typeof targets === 'undefined') {
|
|
|
|
targets = [];
|
|
|
|
readingMap.set(reading, targets);
|
2020-11-02 02:24:35 +00:00
|
|
|
}
|
2021-02-28 03:27:00 +00:00
|
|
|
targets.push(
|
|
|
|
{frequencies: frequencies1, pitches: pitches1, index: i},
|
|
|
|
{frequencies: frequencies2, pitches: pitches2, index: i}
|
|
|
|
);
|
2019-08-31 01:06:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-28 03:27:00 +00:00
|
|
|
const metas = await this._database.findTermMetaBulk(expressionKeys, enabledDictionaryMap);
|
2020-01-25 03:24:05 +00:00
|
|
|
for (const {expression, mode, data, dictionary, index} of metas) {
|
2021-02-28 04:11:41 +00:00
|
|
|
const dictionaryOrder = this._getDictionaryOrder(dictionary, enabledDictionaryMap);
|
2021-02-28 03:27:00 +00:00
|
|
|
const map2 = expressionValues[index];
|
|
|
|
for (const [reading, targets] of map2.entries()) {
|
2020-11-02 02:24:35 +00:00
|
|
|
switch (mode) {
|
|
|
|
case 'freq':
|
|
|
|
{
|
2021-02-28 03:27:00 +00:00
|
|
|
let frequency = data;
|
|
|
|
const hasReading = (data !== null && typeof data === 'object');
|
|
|
|
if (hasReading) {
|
|
|
|
if (data.reading !== reading) { continue; }
|
|
|
|
frequency = data.frequency;
|
|
|
|
}
|
|
|
|
for (const {frequencies, index: expressionIndex} of targets) {
|
2021-02-28 04:11:41 +00:00
|
|
|
frequencies.push({index: frequencies.length, expressionIndex, dictionary, dictionaryOrder, expression, reading, hasReading, frequency});
|
2021-02-28 03:27:00 +00:00
|
|
|
}
|
2020-11-02 02:24:35 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'pitch':
|
|
|
|
{
|
2021-02-28 03:27:00 +00:00
|
|
|
if (data.reading !== reading) { continue; }
|
|
|
|
const pitches2 = [];
|
|
|
|
for (let {position, tags} of data.pitches) {
|
|
|
|
tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : [];
|
|
|
|
pitches2.push({position, tags});
|
|
|
|
}
|
|
|
|
for (const {pitches, index: expressionIndex} of targets) {
|
2021-02-28 04:11:41 +00:00
|
|
|
pitches.push({index: pitches.length, expressionIndex, dictionary, dictionaryOrder, expression, reading, pitches: pitches2});
|
2021-02-28 03:27:00 +00:00
|
|
|
}
|
2020-11-02 02:24:35 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2017-09-23 15:46:34 +00:00
|
|
|
}
|
2017-09-14 06:22:41 +00:00
|
|
|
}
|
2021-02-28 03:27:00 +00:00
|
|
|
|
|
|
|
for (const definition of allDefinitions) {
|
|
|
|
this._sortTermDefinitionMeta(definition);
|
|
|
|
}
|
2017-09-14 06:22:41 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
async _buildKanjiMeta(definitions, enabledDictionaryMap) {
|
2020-01-25 03:27:25 +00:00
|
|
|
const kanjiList = [];
|
2020-10-02 21:59:14 +00:00
|
|
|
for (const {character} of definitions) {
|
|
|
|
kanjiList.push(character);
|
2020-01-25 03:27:25 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
|
2020-01-25 03:27:25 +00:00
|
|
|
for (const {character, mode, data, dictionary, index} of metas) {
|
2021-02-28 04:11:41 +00:00
|
|
|
const dictionaryOrder = this._getDictionaryOrder(dictionary, enabledDictionaryMap);
|
2020-01-25 03:27:25 +00:00
|
|
|
switch (mode) {
|
|
|
|
case 'freq':
|
2020-12-30 20:12:08 +00:00
|
|
|
{
|
2021-02-28 03:27:00 +00:00
|
|
|
const {frequencies} = definitions[index];
|
2021-02-28 04:11:41 +00:00
|
|
|
frequencies.push({index: frequencies.length, dictionary, dictionaryOrder, character, frequency: data});
|
2020-12-30 20:12:08 +00:00
|
|
|
}
|
2020-01-25 03:27:25 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2021-02-28 03:27:00 +00:00
|
|
|
|
|
|
|
for (const definition of definitions) {
|
|
|
|
this._sortKanjiDefinitionMeta(definition);
|
|
|
|
}
|
2020-01-25 03:27:25 +00:00
|
|
|
}
|
|
|
|
|
2020-10-07 00:28:49 +00:00
|
|
|
async _expandTags(names, dictionary) {
|
|
|
|
const tagMetaList = await this._getTagMetaList(names, dictionary);
|
2020-10-02 21:59:14 +00:00
|
|
|
const results = [];
|
|
|
|
for (let i = 0, ii = tagMetaList.length; i < ii; ++i) {
|
|
|
|
const meta = tagMetaList[i];
|
|
|
|
const name = names[i];
|
2020-10-07 00:28:49 +00:00
|
|
|
const {category, notes, order, score} = (meta !== null ? meta : {});
|
2020-11-13 01:34:11 +00:00
|
|
|
const tag = this._createTag(name, category, notes, order, score, dictionary, false);
|
2020-10-02 21:59:14 +00:00
|
|
|
results.push(tag);
|
|
|
|
}
|
|
|
|
return results;
|
2017-09-13 23:42:04 +00:00
|
|
|
}
|
2017-09-18 02:16:08 +00:00
|
|
|
|
2020-10-07 00:28:49 +00:00
|
|
|
async _expandStats(items, dictionary) {
|
2019-10-19 17:32:05 +00:00
|
|
|
const names = Object.keys(items);
|
2020-10-07 00:28:49 +00:00
|
|
|
const tagMetaList = await this._getTagMetaList(names, dictionary);
|
2017-09-18 02:16:08 +00:00
|
|
|
|
2020-02-15 18:12:03 +00:00
|
|
|
const statsGroups = new Map();
|
2019-10-19 17:32:05 +00:00
|
|
|
for (let i = 0; i < names.length; ++i) {
|
|
|
|
const name = names[i];
|
|
|
|
const meta = tagMetaList[i];
|
|
|
|
if (meta === null) { continue; }
|
|
|
|
|
2020-10-07 00:28:49 +00:00
|
|
|
const {category, notes, order, score} = meta;
|
2020-02-15 18:12:03 +00:00
|
|
|
let group = statsGroups.get(category);
|
|
|
|
if (typeof group === 'undefined') {
|
|
|
|
group = [];
|
|
|
|
statsGroups.set(category, group);
|
|
|
|
}
|
2019-08-30 23:38:36 +00:00
|
|
|
|
2020-10-02 21:59:14 +00:00
|
|
|
const value = items[name];
|
|
|
|
const stat = this._createKanjiStat(name, category, notes, order, score, dictionary, value);
|
|
|
|
group.push(stat);
|
2017-09-18 02:16:08 +00:00
|
|
|
}
|
|
|
|
|
2020-02-15 18:12:03 +00:00
|
|
|
const stats = {};
|
|
|
|
for (const [category, group] of statsGroups.entries()) {
|
2020-10-02 21:59:14 +00:00
|
|
|
this._sortKanjiStats(group);
|
2020-02-15 18:12:03 +00:00
|
|
|
stats[category] = group;
|
2017-09-22 06:20:51 +00:00
|
|
|
}
|
2017-09-18 02:57:39 +00:00
|
|
|
return stats;
|
2017-09-18 02:16:08 +00:00
|
|
|
}
|
2019-08-11 18:12:01 +00:00
|
|
|
|
2020-10-07 00:28:49 +00:00
|
|
|
async _getTagMetaList(names, dictionary) {
|
2019-10-19 17:32:05 +00:00
|
|
|
const tagMetaList = [];
|
2020-10-07 00:28:49 +00:00
|
|
|
let cache = this._tagCache.get(dictionary);
|
2020-02-14 01:24:54 +00:00
|
|
|
if (typeof cache === 'undefined') {
|
|
|
|
cache = new Map();
|
2020-10-07 00:28:49 +00:00
|
|
|
this._tagCache.set(dictionary, cache);
|
2020-02-14 01:24:54 +00:00
|
|
|
}
|
2019-10-19 17:32:05 +00:00
|
|
|
|
|
|
|
for (const name of names) {
|
2020-08-09 17:21:14 +00:00
|
|
|
const base = this._getNameBase(name);
|
2019-10-19 17:32:05 +00:00
|
|
|
|
2020-02-14 01:24:54 +00:00
|
|
|
let tagMeta = cache.get(base);
|
|
|
|
if (typeof tagMeta === 'undefined') {
|
2020-10-07 00:28:49 +00:00
|
|
|
tagMeta = await this._database.findTagForTitle(base, dictionary);
|
2020-02-14 01:24:54 +00:00
|
|
|
cache.set(base, tagMeta);
|
2019-10-19 17:32:05 +00:00
|
|
|
}
|
2020-02-14 01:24:54 +00:00
|
|
|
|
|
|
|
tagMetaList.push(tagMeta);
|
2019-10-19 17:32:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return tagMetaList;
|
|
|
|
}
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
// Simple helpers
|
|
|
|
|
2020-08-09 17:21:14 +00:00
|
|
|
_scoreToTermFrequency(score) {
|
2019-10-19 18:18:57 +00:00
|
|
|
if (score > 0) {
|
|
|
|
return 'popular';
|
|
|
|
} else if (score < 0) {
|
|
|
|
return 'rare';
|
|
|
|
} else {
|
|
|
|
return 'normal';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-09 17:21:14 +00:00
|
|
|
_getNameBase(name) {
|
2019-08-11 18:12:01 +00:00
|
|
|
const pos = name.indexOf(':');
|
2019-11-24 16:02:52 +00:00
|
|
|
return (pos >= 0 ? name.substring(0, pos) : name);
|
2019-08-11 18:12:01 +00:00
|
|
|
}
|
2019-12-22 18:09:35 +00:00
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
_getSearchableText(text, allowAlphanumericCharacters) {
|
|
|
|
if (allowAlphanumericCharacters) {
|
|
|
|
return text;
|
2019-12-22 23:26:27 +00:00
|
|
|
}
|
|
|
|
|
2020-11-29 18:09:02 +00:00
|
|
|
const jp = this._japaneseUtil;
|
2020-10-04 15:12:15 +00:00
|
|
|
let newText = '';
|
|
|
|
for (const c of text) {
|
|
|
|
if (!jp.isCodePointJapanese(c.codePointAt(0))) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
newText += c;
|
|
|
|
}
|
|
|
|
return newText;
|
2019-12-22 23:26:27 +00:00
|
|
|
}
|
2020-08-02 17:30:55 +00:00
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
_getTextOptionEntryVariants(value) {
|
|
|
|
switch (value) {
|
|
|
|
case 'true': return [true];
|
|
|
|
case 'variant': return [false, true];
|
|
|
|
default: return [false];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-02 04:16:44 +00:00
|
|
|
_getCollapseEmphaticOptions(options) {
|
|
|
|
const collapseEmphaticOptions = [[false, false]];
|
|
|
|
switch (options.collapseEmphaticSequences) {
|
|
|
|
case 'true':
|
|
|
|
collapseEmphaticOptions.push([true, false]);
|
|
|
|
break;
|
|
|
|
case 'full':
|
|
|
|
collapseEmphaticOptions.push([true, false], [true, true]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return collapseEmphaticOptions;
|
|
|
|
}
|
|
|
|
|
2021-01-03 17:12:55 +00:00
|
|
|
_getTextReplacementsVariants(options) {
|
|
|
|
return options.textReplacements;
|
|
|
|
}
|
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
_getSecondarySearchDictionaryMap(enabledDictionaryMap) {
|
|
|
|
const secondarySearchDictionaryMap = new Map();
|
2020-10-07 00:28:49 +00:00
|
|
|
for (const [dictionary, details] of enabledDictionaryMap.entries()) {
|
|
|
|
if (!details.allowSecondarySearches) { continue; }
|
|
|
|
secondarySearchDictionaryMap.set(dictionary, details);
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
return secondarySearchDictionaryMap;
|
|
|
|
}
|
|
|
|
|
2021-02-28 04:11:41 +00:00
|
|
|
_getDictionaryOrder(dictionary, enabledDictionaryMap) {
|
2020-10-04 15:12:15 +00:00
|
|
|
const info = enabledDictionaryMap.get(dictionary);
|
2021-03-06 18:04:50 +00:00
|
|
|
const {index, priority} = typeof info !== 'undefined' ? info : {index: enabledDictionaryMap.size, priority: 0};
|
|
|
|
return {index, priority};
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
_getTagNamesWithCategory(tags, category) {
|
|
|
|
const results = [];
|
|
|
|
for (const tag of tags) {
|
|
|
|
if (tag.category !== category) { continue; }
|
|
|
|
results.push(tag.name);
|
|
|
|
}
|
|
|
|
results.sort();
|
|
|
|
return results;
|
|
|
|
}
|
|
|
|
|
2020-11-13 01:34:11 +00:00
|
|
|
_flagTagsWithCategoryAsRedundant(tags, removeCategoriesSet) {
|
|
|
|
for (const tag of tags) {
|
|
|
|
if (removeCategoriesSet.has(tag.category)) {
|
|
|
|
tag.redundant = true;
|
|
|
|
}
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-07 00:28:49 +00:00
|
|
|
_getUniqueDictionaryNames(definitions) {
|
|
|
|
const uniqueDictionaryNames = new Set();
|
|
|
|
for (const {dictionaryNames} of definitions) {
|
|
|
|
for (const dictionaryName of dictionaryNames) {
|
|
|
|
uniqueDictionaryNames.add(dictionaryName);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return [...uniqueDictionaryNames];
|
|
|
|
}
|
|
|
|
|
2020-12-19 21:47:32 +00:00
|
|
|
_getUniqueTermTags(definitions) {
|
|
|
|
const newTermTags = [];
|
|
|
|
if (definitions.length <= 1) {
|
|
|
|
for (const {termTags} of definitions) {
|
|
|
|
for (const tag of termTags) {
|
|
|
|
newTermTags.push(this._cloneTag(tag));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
const tagsSet = new Set();
|
|
|
|
let checkTagsMap = false;
|
|
|
|
for (const {termTags} of definitions) {
|
|
|
|
for (const tag of termTags) {
|
|
|
|
const key = this._getTagMapKey(tag);
|
|
|
|
if (checkTagsMap && tagsSet.has(key)) { continue; }
|
|
|
|
tagsSet.add(key);
|
|
|
|
newTermTags.push(this._cloneTag(tag));
|
|
|
|
}
|
|
|
|
checkTagsMap = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return newTermTags;
|
|
|
|
}
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
*_getArrayVariants(arrayVariants) {
|
|
|
|
const ii = arrayVariants.length;
|
2020-08-10 01:04:09 +00:00
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
let total = 1;
|
|
|
|
for (let i = 0; i < ii; ++i) {
|
|
|
|
total *= arrayVariants[i].length;
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
for (let a = 0; a < total; ++a) {
|
|
|
|
const variant = [];
|
|
|
|
let index = a;
|
|
|
|
for (let i = 0; i < ii; ++i) {
|
|
|
|
const entryVariants = arrayVariants[i];
|
|
|
|
variant.push(entryVariants[index % entryVariants.length]);
|
|
|
|
index = Math.floor(index / entryVariants.length);
|
|
|
|
}
|
|
|
|
yield variant;
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-08 02:36:20 +00:00
|
|
|
_areSetsEqual(set1, set2) {
|
|
|
|
if (set1.size !== set2.size) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const value of set1) {
|
|
|
|
if (!set2.has(value)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
_getSetIntersection(set1, set2) {
|
|
|
|
const result = [];
|
|
|
|
for (const value of set1) {
|
|
|
|
if (set2.has(value)) {
|
|
|
|
result.push(value);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-02-28 03:27:00 +00:00
|
|
|
_getAllDefinitions(definitions) {
|
|
|
|
definitions = [...definitions];
|
|
|
|
for (let i = 0; i < definitions.length; ++i) {
|
|
|
|
const childDefinitions = definitions[i].definitions;
|
|
|
|
if (Array.isArray(childDefinitions)) {
|
|
|
|
definitions.push(...childDefinitions);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return definitions;
|
|
|
|
}
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
// Reduction functions
|
2020-08-10 01:04:09 +00:00
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
_getTermTagsScoreSum(termTags) {
|
|
|
|
let result = 0;
|
|
|
|
for (const {score} of termTags) {
|
|
|
|
result += score;
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
2020-10-04 23:33:22 +00:00
|
|
|
return result;
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
2020-08-10 01:04:09 +00:00
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
_getSourceTermMatchCountSum(definitions) {
|
|
|
|
let result = 0;
|
|
|
|
for (const {sourceTermExactMatchCount} of definitions) {
|
|
|
|
result += sourceTermExactMatchCount;
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
2020-10-04 23:33:22 +00:00
|
|
|
return result;
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
2020-08-10 01:04:09 +00:00
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
_getMaxDefinitionScore(definitions) {
|
|
|
|
let result = Number.MIN_SAFE_INTEGER;
|
|
|
|
for (const {score} of definitions) {
|
|
|
|
if (score > result) { result = score; }
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
2020-08-10 01:04:09 +00:00
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
_getMaxPrimaryDefinitionScore(definitions) {
|
|
|
|
let result = Number.MIN_SAFE_INTEGER;
|
|
|
|
for (const {isPrimary, score} of definitions) {
|
|
|
|
if (isPrimary && score > result) { result = score; }
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-03-06 18:04:50 +00:00
|
|
|
_getBestDictionaryOrder(definitions) {
|
|
|
|
let index = Number.MAX_SAFE_INTEGER;
|
|
|
|
let priority = Number.MIN_SAFE_INTEGER;
|
|
|
|
for (const {dictionaryOrder: {index: index2, priority: priority2}} of definitions) {
|
|
|
|
if (index2 < index) { index = index2; }
|
|
|
|
if (priority2 > priority) { priority = priority2; }
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
2021-03-06 18:04:50 +00:00
|
|
|
return {index, priority};
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
2020-08-10 01:04:09 +00:00
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
// Common data creation and cloning functions
|
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
_cloneTag(tag) {
|
2020-11-13 01:34:11 +00:00
|
|
|
const {name, category, notes, order, score, dictionary, redundant} = tag;
|
|
|
|
return this._createTag(name, category, notes, order, score, dictionary, redundant);
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
|
2020-12-19 21:47:32 +00:00
|
|
|
_getTagMapKey(tag) {
|
|
|
|
const {name, category, notes} = tag;
|
|
|
|
return this._createMapKey([name, category, notes]);
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
2020-08-10 01:04:09 +00:00
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
_createMapKey(array) {
|
|
|
|
return JSON.stringify(array);
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
|
|
|
|
2020-11-13 01:34:11 +00:00
|
|
|
_createTag(name, category, notes, order, score, dictionary, redundant) {
|
2020-10-02 21:59:14 +00:00
|
|
|
return {
|
|
|
|
name,
|
|
|
|
category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
|
|
|
|
notes: (typeof notes === 'string' ? notes : ''),
|
|
|
|
order: (typeof order === 'number' ? order : 0),
|
|
|
|
score: (typeof score === 'number' ? score : 0),
|
2020-11-13 01:34:11 +00:00
|
|
|
dictionary: (typeof dictionary === 'string' ? dictionary : null),
|
|
|
|
redundant
|
2020-10-02 21:59:14 +00:00
|
|
|
};
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
|
|
|
|
2020-10-02 21:59:14 +00:00
|
|
|
_createKanjiStat(name, category, notes, order, score, dictionary, value) {
|
|
|
|
return {
|
|
|
|
name,
|
|
|
|
category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
|
|
|
|
notes: (typeof notes === 'string' ? notes : ''),
|
|
|
|
order: (typeof order === 'number' ? order : 0),
|
|
|
|
score: (typeof score === 'number' ? score : 0),
|
|
|
|
dictionary: (typeof dictionary === 'string' ? dictionary : null),
|
|
|
|
value
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
_createKanjiDefinition(character, dictionary, onyomi, kunyomi, glossary, tags, stats) {
|
|
|
|
return {
|
|
|
|
type: 'kanji',
|
|
|
|
character,
|
|
|
|
dictionary,
|
|
|
|
onyomi,
|
|
|
|
kunyomi,
|
|
|
|
glossary,
|
|
|
|
tags,
|
|
|
|
stats,
|
|
|
|
frequencies: []
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2021-03-02 00:01:30 +00:00
|
|
|
async _createTermDefinitionFromDatabaseDefinition(databaseDefinition, source, rawSource, sourceTerm, reasons, isPrimary, enabledDictionaryMap) {
|
2020-10-02 21:59:14 +00:00
|
|
|
const {expression, reading, definitionTags, termTags, glossary, score, dictionary, id, sequence} = databaseDefinition;
|
2021-02-28 04:11:41 +00:00
|
|
|
const dictionaryOrder = this._getDictionaryOrder(dictionary, enabledDictionaryMap);
|
2020-10-02 21:59:14 +00:00
|
|
|
const termTagsExpanded = await this._expandTags(termTags, dictionary);
|
|
|
|
const definitionTagsExpanded = await this._expandTags(definitionTags, dictionary);
|
|
|
|
|
|
|
|
this._sortTags(definitionTagsExpanded);
|
|
|
|
this._sortTags(termTagsExpanded);
|
|
|
|
|
2020-11-29 18:09:02 +00:00
|
|
|
const furiganaSegments = this._japaneseUtil.distributeFurigana(expression, reading);
|
2020-11-14 17:09:14 +00:00
|
|
|
const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTagsExpanded)];
|
2020-10-04 22:54:03 +00:00
|
|
|
const sourceTermExactMatchCount = (sourceTerm === expression ? 1 : 0);
|
2020-10-02 21:59:14 +00:00
|
|
|
|
|
|
|
return {
|
2020-10-04 15:12:15 +00:00
|
|
|
type: 'term',
|
|
|
|
id,
|
2020-10-02 21:59:14 +00:00
|
|
|
source,
|
|
|
|
rawSource,
|
2020-10-04 22:10:10 +00:00
|
|
|
sourceTerm,
|
2020-10-02 21:59:14 +00:00
|
|
|
reasons,
|
|
|
|
score,
|
2021-03-02 00:01:30 +00:00
|
|
|
isPrimary,
|
2020-10-04 15:12:15 +00:00
|
|
|
sequence,
|
2020-10-02 21:59:14 +00:00
|
|
|
dictionary,
|
2021-02-28 04:11:41 +00:00
|
|
|
dictionaryOrder,
|
2020-10-07 00:28:49 +00:00
|
|
|
dictionaryNames: [dictionary],
|
2020-10-02 21:59:14 +00:00
|
|
|
expression,
|
|
|
|
reading,
|
2020-10-04 22:10:10 +00:00
|
|
|
expressions: termDetailsList,
|
2020-10-02 21:59:14 +00:00
|
|
|
furiganaSegments,
|
|
|
|
glossary,
|
|
|
|
definitionTags: definitionTagsExpanded,
|
|
|
|
termTags: termTagsExpanded,
|
2020-10-04 15:12:15 +00:00
|
|
|
// definitions
|
|
|
|
frequencies: [],
|
2020-10-04 22:54:03 +00:00
|
|
|
pitches: [],
|
2020-10-04 15:12:15 +00:00
|
|
|
// only
|
2020-10-04 22:54:03 +00:00
|
|
|
sourceTermExactMatchCount
|
2020-10-02 21:59:14 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
_createGroupedTermDefinition(definitions) {
|
2020-12-19 21:47:32 +00:00
|
|
|
const {expression, reading, furiganaSegments, reasons, source, rawSource, sourceTerm} = definitions[0];
|
2020-10-04 15:12:15 +00:00
|
|
|
const score = this._getMaxDefinitionScore(definitions);
|
2021-03-06 18:04:50 +00:00
|
|
|
const dictionaryOrder = this._getBestDictionaryOrder(definitions);
|
2020-10-07 00:28:49 +00:00
|
|
|
const dictionaryNames = this._getUniqueDictionaryNames(definitions);
|
2020-12-19 21:47:32 +00:00
|
|
|
const termTags = this._getUniqueTermTags(definitions);
|
2020-10-04 22:10:10 +00:00
|
|
|
const termDetailsList = [this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags)];
|
2020-10-04 22:54:03 +00:00
|
|
|
const sourceTermExactMatchCount = (sourceTerm === expression ? 1 : 0);
|
2020-10-04 15:12:15 +00:00
|
|
|
return {
|
|
|
|
type: 'termGrouped',
|
|
|
|
// id
|
|
|
|
source,
|
|
|
|
rawSource,
|
2020-10-04 22:10:10 +00:00
|
|
|
sourceTerm,
|
2020-10-04 15:12:15 +00:00
|
|
|
reasons: [...reasons],
|
|
|
|
score,
|
2021-03-02 00:01:30 +00:00
|
|
|
// isPrimary
|
2020-10-04 15:12:15 +00:00
|
|
|
// sequence
|
2020-10-07 00:28:49 +00:00
|
|
|
dictionary: dictionaryNames[0],
|
2021-02-28 04:11:41 +00:00
|
|
|
dictionaryOrder,
|
2020-10-07 00:28:49 +00:00
|
|
|
dictionaryNames,
|
2020-10-04 15:12:15 +00:00
|
|
|
expression,
|
|
|
|
reading,
|
2020-10-04 22:10:10 +00:00
|
|
|
expressions: termDetailsList,
|
2020-10-04 15:12:15 +00:00
|
|
|
furiganaSegments, // Contains duplicate data
|
|
|
|
// glossary
|
|
|
|
// definitionTags
|
2020-12-19 21:47:32 +00:00
|
|
|
termTags,
|
2020-10-04 16:54:55 +00:00
|
|
|
definitions, // type: 'term'
|
2020-10-04 15:12:15 +00:00
|
|
|
frequencies: [],
|
2020-10-04 22:54:03 +00:00
|
|
|
pitches: [],
|
2020-10-04 15:12:15 +00:00
|
|
|
// only
|
2020-10-04 22:54:03 +00:00
|
|
|
sourceTermExactMatchCount
|
2020-10-04 15:12:15 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-10-07 00:28:49 +00:00
|
|
|
_createMergedTermDefinition(source, rawSource, definitions, expressions, readings, termDetailsList, reasons, score) {
|
2021-03-06 18:04:50 +00:00
|
|
|
const dictionaryOrder = this._getBestDictionaryOrder(definitions);
|
2020-10-04 22:54:03 +00:00
|
|
|
const sourceTermExactMatchCount = this._getSourceTermMatchCountSum(definitions);
|
2020-10-07 00:28:49 +00:00
|
|
|
const dictionaryNames = this._getUniqueDictionaryNames(definitions);
|
2020-10-04 15:12:15 +00:00
|
|
|
return {
|
|
|
|
type: 'termMerged',
|
|
|
|
// id
|
|
|
|
source,
|
|
|
|
rawSource,
|
2020-10-04 22:10:10 +00:00
|
|
|
// sourceTerm
|
2020-10-04 15:12:15 +00:00
|
|
|
reasons,
|
|
|
|
score,
|
2021-03-02 00:01:30 +00:00
|
|
|
// isPrimary
|
2020-10-04 15:12:15 +00:00
|
|
|
// sequence
|
2020-10-07 00:28:49 +00:00
|
|
|
dictionary: dictionaryNames[0],
|
2021-02-28 04:11:41 +00:00
|
|
|
dictionaryOrder,
|
2020-10-07 00:28:49 +00:00
|
|
|
dictionaryNames,
|
2020-10-04 15:12:15 +00:00
|
|
|
expression: expressions,
|
|
|
|
reading: readings,
|
2020-10-04 22:10:10 +00:00
|
|
|
expressions: termDetailsList,
|
2020-10-04 15:12:15 +00:00
|
|
|
// furiganaSegments
|
|
|
|
// glossary
|
|
|
|
// definitionTags
|
|
|
|
// termTags
|
2020-10-04 16:54:55 +00:00
|
|
|
definitions, // type: 'termMergedByGlossary'
|
2020-10-04 15:12:15 +00:00
|
|
|
frequencies: [],
|
2020-10-04 22:54:03 +00:00
|
|
|
pitches: [],
|
2020-10-04 15:12:15 +00:00
|
|
|
// only
|
2020-10-04 22:54:03 +00:00
|
|
|
sourceTermExactMatchCount
|
2020-10-04 15:12:15 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
_createMergedGlossaryTermDefinition(source, rawSource, definitions, expressions, readings, allExpressions, allReadings) {
|
|
|
|
const only = [];
|
2021-01-08 02:36:20 +00:00
|
|
|
if (!this._areSetsEqual(expressions, allExpressions)) {
|
|
|
|
only.push(...this._getSetIntersection(expressions, allExpressions));
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
2021-01-08 02:36:20 +00:00
|
|
|
if (!this._areSetsEqual(readings, allReadings)) {
|
|
|
|
only.push(...this._getSetIntersection(readings, allReadings));
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 22:54:03 +00:00
|
|
|
const sourceTermExactMatchCount = this._getSourceTermMatchCountSum(definitions);
|
2020-10-07 00:28:49 +00:00
|
|
|
const dictionaryNames = this._getUniqueDictionaryNames(definitions);
|
2020-10-04 22:54:03 +00:00
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
const termDetailsList = this._createTermDetailsList(definitions);
|
2020-10-04 22:10:10 +00:00
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
const definitionTags = this._getUniqueDefinitionTags(definitions);
|
|
|
|
this._sortTags(definitionTags);
|
|
|
|
|
2020-10-07 00:28:49 +00:00
|
|
|
const {glossary} = definitions[0];
|
2020-10-04 15:12:15 +00:00
|
|
|
const score = this._getMaxDefinitionScore(definitions);
|
2021-03-06 18:04:50 +00:00
|
|
|
const dictionaryOrder = this._getBestDictionaryOrder(definitions);
|
2020-10-04 15:12:15 +00:00
|
|
|
return {
|
|
|
|
type: 'termMergedByGlossary',
|
|
|
|
// id
|
|
|
|
source,
|
|
|
|
rawSource,
|
2020-10-04 22:10:10 +00:00
|
|
|
// sourceTerm
|
2020-10-04 15:12:15 +00:00
|
|
|
reasons: [],
|
|
|
|
score,
|
2021-03-02 00:01:30 +00:00
|
|
|
// isPrimary
|
2020-10-04 15:12:15 +00:00
|
|
|
// sequence
|
2020-10-07 00:28:49 +00:00
|
|
|
dictionary: dictionaryNames[0],
|
2021-02-28 04:11:41 +00:00
|
|
|
dictionaryOrder,
|
2020-10-07 00:28:49 +00:00
|
|
|
dictionaryNames,
|
2020-10-04 15:12:15 +00:00
|
|
|
expression: [...expressions],
|
|
|
|
reading: [...readings],
|
2020-10-04 22:10:10 +00:00
|
|
|
expressions: termDetailsList,
|
2020-10-04 15:12:15 +00:00
|
|
|
// furiganaSegments
|
|
|
|
glossary: [...glossary],
|
|
|
|
definitionTags,
|
|
|
|
// termTags
|
2020-10-04 16:54:55 +00:00
|
|
|
definitions, // type: 'term'; contains duplicate data
|
2020-10-04 15:12:15 +00:00
|
|
|
frequencies: [],
|
|
|
|
pitches: [],
|
2020-10-04 22:54:03 +00:00
|
|
|
only,
|
|
|
|
sourceTermExactMatchCount
|
2020-10-04 15:12:15 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2021-03-02 03:17:23 +00:00
|
|
|
_createTermDetailsList(definitions) {
|
|
|
|
const termInfoMap = new Map();
|
|
|
|
for (const {expression, reading, sourceTerm, furiganaSegments, termTags} of definitions) {
|
|
|
|
let readingMap = termInfoMap.get(expression);
|
|
|
|
if (typeof readingMap === 'undefined') {
|
|
|
|
readingMap = new Map();
|
|
|
|
termInfoMap.set(expression, readingMap);
|
|
|
|
}
|
|
|
|
|
|
|
|
let termInfo = readingMap.get(reading);
|
|
|
|
if (typeof termInfo === 'undefined') {
|
|
|
|
termInfo = {
|
|
|
|
sourceTerm,
|
|
|
|
furiganaSegments,
|
|
|
|
termTagsMap: new Map()
|
|
|
|
};
|
|
|
|
readingMap.set(reading, termInfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
const {termTagsMap} = termInfo;
|
|
|
|
for (const tag of termTags) {
|
|
|
|
const {name} = tag;
|
|
|
|
if (termTagsMap.has(name)) { continue; }
|
|
|
|
termTagsMap.set(name, this._cloneTag(tag));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-04 22:10:10 +00:00
|
|
|
const termDetailsList = [];
|
|
|
|
for (const [expression, readingMap] of termInfoMap.entries()) {
|
|
|
|
for (const [reading, {termTagsMap, sourceTerm, furiganaSegments}] of readingMap.entries()) {
|
|
|
|
const termTags = [...termTagsMap.values()];
|
|
|
|
this._sortTags(termTags);
|
|
|
|
termDetailsList.push(this._createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return termDetailsList;
|
|
|
|
}
|
|
|
|
|
|
|
|
_createTermDetails(sourceTerm, expression, reading, furiganaSegments, termTags) {
|
2020-10-04 15:12:15 +00:00
|
|
|
const termFrequency = this._scoreToTermFrequency(this._getTermTagsScoreSum(termTags));
|
2020-10-02 21:59:14 +00:00
|
|
|
return {
|
2020-10-04 22:10:10 +00:00
|
|
|
sourceTerm,
|
2020-10-02 21:59:14 +00:00
|
|
|
expression,
|
|
|
|
reading,
|
2020-10-04 22:10:10 +00:00
|
|
|
furiganaSegments, // Contains duplicate data
|
2020-10-02 21:59:14 +00:00
|
|
|
termTags,
|
2020-10-04 15:12:15 +00:00
|
|
|
termFrequency,
|
|
|
|
frequencies: [],
|
|
|
|
pitches: []
|
2020-10-02 21:59:14 +00:00
|
|
|
};
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
// Sorting functions
|
|
|
|
|
2020-08-10 01:04:09 +00:00
|
|
|
_sortTags(tags) {
|
2020-10-02 21:59:14 +00:00
|
|
|
if (tags.length <= 1) { return; }
|
|
|
|
const stringComparer = this._stringComparer;
|
|
|
|
tags.sort((v1, v2) => {
|
|
|
|
const i = v1.order - v2.order;
|
|
|
|
if (i !== 0) { return i; }
|
2020-08-10 01:04:09 +00:00
|
|
|
|
2020-10-02 21:59:14 +00:00
|
|
|
return stringComparer.compare(v1.name, v2.name);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-02-27 04:55:32 +00:00
|
|
|
_sortDefinitions(definitions) {
|
2020-10-02 21:59:14 +00:00
|
|
|
if (definitions.length <= 1) { return; }
|
|
|
|
const stringComparer = this._stringComparer;
|
2021-02-27 04:55:32 +00:00
|
|
|
const compareFunction = (v1, v2) => {
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by length of source term
|
2021-03-06 18:27:20 +00:00
|
|
|
let i = v2.source.length - v1.source.length;
|
2020-10-02 21:59:14 +00:00
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by the number of inflection reasons
|
2020-10-02 21:59:14 +00:00
|
|
|
i = v1.reasons.length - v2.reasons.length;
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by how many terms exactly match the source (e.g. for exact kana prioritization)
|
2020-10-04 22:54:03 +00:00
|
|
|
i = v2.sourceTermExactMatchCount - v1.sourceTermExactMatchCount;
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-03-06 18:27:20 +00:00
|
|
|
// Sort by dictionary priority
|
|
|
|
i = v2.dictionaryOrder.priority - v1.dictionaryOrder.priority;
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by term score
|
2020-10-02 21:59:14 +00:00
|
|
|
i = v2.score - v1.score;
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by expression string comparison (skip if either expression is not a string, e.g. array)
|
2020-10-02 21:59:14 +00:00
|
|
|
const expression1 = v1.expression;
|
|
|
|
const expression2 = v2.expression;
|
2021-03-06 18:04:50 +00:00
|
|
|
if (typeof expression1 === 'string' && typeof expression2 === 'string') {
|
|
|
|
i = expression2.length - expression1.length;
|
|
|
|
if (i !== 0) { return i; }
|
2020-10-04 23:04:21 +00:00
|
|
|
|
2021-03-06 18:04:50 +00:00
|
|
|
i = stringComparer.compare(expression1, expression2);
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
}
|
2020-10-02 21:59:14 +00:00
|
|
|
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by dictionary order
|
|
|
|
i = v1.dictionaryOrder.index - v2.dictionaryOrder.index;
|
|
|
|
return i;
|
2020-10-04 15:12:15 +00:00
|
|
|
};
|
2021-02-27 04:55:32 +00:00
|
|
|
definitions.sort(compareFunction);
|
2020-10-02 21:59:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
_sortDatabaseDefinitionsByIndex(definitions) {
|
|
|
|
if (definitions.length <= 1) { return; }
|
|
|
|
definitions.sort((a, b) => a.index - b.index);
|
|
|
|
}
|
|
|
|
|
2020-10-05 02:04:44 +00:00
|
|
|
_sortDefinitionsById(definitions) {
|
|
|
|
if (definitions.length <= 1) { return; }
|
|
|
|
definitions.sort((a, b) => a.id - b.id);
|
|
|
|
}
|
|
|
|
|
2020-10-02 21:59:14 +00:00
|
|
|
_sortKanjiStats(stats) {
|
|
|
|
if (stats.length <= 1) { return; }
|
|
|
|
const stringComparer = this._stringComparer;
|
|
|
|
stats.sort((v1, v2) => {
|
|
|
|
const i = v1.order - v2.order;
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
|
|
|
return stringComparer.compare(v1.notes, v2.notes);
|
2020-08-10 01:04:09 +00:00
|
|
|
});
|
|
|
|
}
|
2021-01-03 17:12:55 +00:00
|
|
|
|
2021-02-28 03:27:00 +00:00
|
|
|
_sortTermDefinitionMeta(definition) {
|
|
|
|
const compareFunction = (v1, v2) => {
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by dictionary priority
|
|
|
|
let i = v2.dictionaryOrder.priority - v1.dictionaryOrder.priority;
|
2021-02-28 03:27:00 +00:00
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
|
|
|
// Sory by expression order
|
|
|
|
i = v1.expressionIndex - v2.expressionIndex;
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by dictionary order
|
|
|
|
i = v1.dictionaryOrder.index - v2.dictionaryOrder.index;
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-02-28 03:27:00 +00:00
|
|
|
// Default order
|
|
|
|
i = v1.index - v2.index;
|
|
|
|
return i;
|
|
|
|
};
|
|
|
|
|
|
|
|
const {expressions, frequencies: frequencies1, pitches: pitches1} = definition;
|
|
|
|
frequencies1.sort(compareFunction);
|
|
|
|
pitches1.sort(compareFunction);
|
|
|
|
for (const {frequencies: frequencies2, pitches: pitches2} of expressions) {
|
|
|
|
frequencies2.sort(compareFunction);
|
|
|
|
pitches2.sort(compareFunction);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_sortKanjiDefinitionMeta(definition) {
|
|
|
|
const compareFunction = (v1, v2) => {
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by dictionary priority
|
|
|
|
let i = v2.dictionaryOrder.priority - v1.dictionaryOrder.priority;
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
|
|
|
// Sort by dictionary order
|
|
|
|
i = v1.dictionaryOrder.index - v2.dictionaryOrder.index;
|
2021-02-28 03:27:00 +00:00
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
|
|
|
// Default order
|
|
|
|
i = v1.index - v2.index;
|
|
|
|
return i;
|
|
|
|
};
|
|
|
|
|
|
|
|
const {frequencies} = definition;
|
|
|
|
frequencies.sort(compareFunction);
|
|
|
|
}
|
|
|
|
|
2021-01-03 17:12:55 +00:00
|
|
|
// Regex functions
|
|
|
|
|
|
|
|
_applyTextReplacements(text, sourceMap, replacements) {
|
|
|
|
for (const {pattern, replacement} of replacements) {
|
|
|
|
text = this._applyTextReplacement(text, sourceMap, pattern, replacement);
|
|
|
|
}
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
|
|
|
_applyTextReplacement(text, sourceMap, pattern, replacement) {
|
|
|
|
const isGlobal = pattern.global;
|
|
|
|
if (isGlobal) { pattern.lastIndex = 0; }
|
|
|
|
for (let loop = true; loop; loop = isGlobal) {
|
|
|
|
const match = pattern.exec(text);
|
|
|
|
if (match === null) { break; }
|
|
|
|
|
|
|
|
const matchText = match[0];
|
|
|
|
const index = match.index;
|
|
|
|
const actualReplacement = this._applyMatchReplacement(replacement, match);
|
|
|
|
const actualReplacementLength = actualReplacement.length;
|
|
|
|
const delta = actualReplacementLength - (matchText.length > 0 ? matchText.length : -1);
|
|
|
|
|
|
|
|
text = `${text.substring(0, index)}${actualReplacement}${text.substring(index + matchText.length)}`;
|
|
|
|
pattern.lastIndex += delta;
|
|
|
|
|
|
|
|
if (actualReplacementLength > 0) {
|
|
|
|
sourceMap.insert(index, ...(new Array(actualReplacementLength).fill(0)));
|
2021-02-19 23:39:43 +00:00
|
|
|
sourceMap.combine(index - 1 + actualReplacementLength, matchText.length);
|
2021-01-03 17:12:55 +00:00
|
|
|
} else {
|
|
|
|
sourceMap.combine(index, matchText.length);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
|
|
|
_applyMatchReplacement(replacement, match) {
|
|
|
|
const pattern = /\$(?:\$|&|`|'|(\d\d?)|<([^>]*)>)/g;
|
|
|
|
return replacement.replace(pattern, (g0, g1, g2) => {
|
|
|
|
if (typeof g1 !== 'undefined') {
|
|
|
|
const matchIndex = Number.parseInt(g1, 10);
|
|
|
|
if (matchIndex >= 1 && matchIndex <= match.length) {
|
|
|
|
return match[matchIndex];
|
|
|
|
}
|
|
|
|
} else if (typeof g2 !== 'undefined') {
|
|
|
|
const {groups} = match;
|
|
|
|
if (typeof groups === 'object' && groups !== null && Object.prototype.hasOwnProperty.call(groups, g2)) {
|
|
|
|
return groups[g2];
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
switch (g0) {
|
|
|
|
case '$': return '$';
|
|
|
|
case '&': return match[0];
|
|
|
|
case '`': return replacement.substring(0, match.index);
|
|
|
|
case '\'': return replacement.substring(match.index + g0.length);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return g0;
|
|
|
|
});
|
|
|
|
}
|
2016-03-20 17:52:27 +00:00
|
|
|
}
|