2016-03-20 17:52:27 +00:00
|
|
|
/*
|
2021-01-01 19:50:41 +00:00
|
|
|
* Copyright (C) 2016-2021 Yomichan Authors
|
2016-03-20 17:52:27 +00:00
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
2020-01-01 17:00:31 +00:00
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
2016-03-20 17:52:27 +00:00
|
|
|
*/
|
|
|
|
|
2020-03-11 02:30:36 +00:00
|
|
|
/* global
|
|
|
|
* Deinflector
|
2021-03-09 02:01:55 +00:00
|
|
|
* RegexUtil
|
2020-03-28 21:51:58 +00:00
|
|
|
* TextSourceMap
|
2020-03-11 02:30:36 +00:00
|
|
|
*/
|
2016-03-24 02:25:32 +00:00
|
|
|
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
2021-03-25 23:55:31 +00:00
|
|
|
* Class which finds term and kanji dictionary entries for text.
|
2020-10-04 16:54:55 +00:00
|
|
|
*/
|
2016-03-20 17:52:27 +00:00
|
|
|
class Translator {
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
|
|
|
* Creates a new Translator instance.
|
2021-04-04 20:22:35 +00:00
|
|
|
* @param japaneseUtil An instance of JapaneseUtil.
|
2020-10-04 16:54:55 +00:00
|
|
|
* @param database An instance of DictionaryDatabase.
|
|
|
|
*/
|
2020-11-29 18:09:02 +00:00
|
|
|
constructor({japaneseUtil, database}) {
|
|
|
|
this._japaneseUtil = japaneseUtil;
|
2020-08-09 17:21:14 +00:00
|
|
|
this._database = database;
|
|
|
|
this._deinflector = null;
|
|
|
|
this._tagCache = new Map();
|
2020-10-02 21:59:14 +00:00
|
|
|
this._stringComparer = new Intl.Collator('en-US'); // Invariant locale
|
2016-03-26 21:16:21 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
|
|
|
* Initializes the instance for use. The public API should not be used until
|
2020-10-04 17:09:04 +00:00
|
|
|
* this function has been called.
|
|
|
|
* @param deinflectionReasons The raw deinflections reasons data that the Deinflector uses.
|
2020-10-04 16:54:55 +00:00
|
|
|
*/
|
2020-10-04 17:09:04 +00:00
|
|
|
prepare(deinflectionReasons) {
|
|
|
|
this._deinflector = new Deinflector(deinflectionReasons);
|
2016-03-20 17:52:27 +00:00
|
|
|
}
|
2016-03-21 01:45:37 +00:00
|
|
|
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
|
|
|
* Clears the database tag cache. This should be executed if the database is changed.
|
|
|
|
*/
|
2020-05-06 23:28:26 +00:00
|
|
|
clearDatabaseCaches() {
|
2020-08-09 17:21:14 +00:00
|
|
|
this._tagCache.clear();
|
2019-11-02 20:21:06 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
|
|
|
* Finds term definitions for the given text.
|
|
|
|
* @param mode The mode to use for finding terms, which determines the format of the resulting array.
|
2020-12-09 01:27:36 +00:00
|
|
|
* One of: 'group', 'merge', 'split', 'simple'
|
2020-10-04 16:54:55 +00:00
|
|
|
* @param text The text to find terms for.
|
|
|
|
* @param options An object using the following structure:
|
2021-03-25 23:55:31 +00:00
|
|
|
* ```
|
2020-10-04 16:54:55 +00:00
|
|
|
* {
|
2020-12-09 01:27:36 +00:00
|
|
|
* wildcard: (enum: null, 'prefix', 'suffix'),
|
2020-10-04 16:54:55 +00:00
|
|
|
* mainDictionary: (string),
|
|
|
|
* alphanumeric: (boolean),
|
2020-10-04 23:36:21 +00:00
|
|
|
* convertHalfWidthCharacters: (enum: 'false', 'true', 'variant'),
|
|
|
|
* convertNumericCharacters: (enum: 'false', 'true', 'variant'),
|
|
|
|
* convertAlphabeticCharacters: (enum: 'false', 'true', 'variant'),
|
|
|
|
* convertHiraganaToKatakana: (enum: 'false', 'true', 'variant'),
|
|
|
|
* convertKatakanaToHiragana: (enum: 'false', 'true', 'variant'),
|
|
|
|
* collapseEmphaticSequences: (enum: 'false', 'true', 'full'),
|
2021-01-03 17:12:55 +00:00
|
|
|
* textReplacements: [
|
|
|
|
* (null or [
|
|
|
|
* {pattern: (RegExp), replacement: (string)}
|
|
|
|
* ...
|
|
|
|
* ])
|
|
|
|
* ...
|
|
|
|
* ],
|
2020-10-04 16:54:55 +00:00
|
|
|
* enabledDictionaryMap: (Map of [
|
|
|
|
* (string),
|
|
|
|
* {
|
2021-03-06 18:04:50 +00:00
|
|
|
* index: (number),
|
|
|
|
* priority: (number),
|
2020-10-04 16:54:55 +00:00
|
|
|
* allowSecondarySearches: (boolean)
|
|
|
|
* }
|
|
|
|
* ])
|
|
|
|
* }
|
2021-03-25 23:55:31 +00:00
|
|
|
* ```
|
|
|
|
* @returns An object of the structure `{dictionaryEntries, originalTextLength}`.
|
2020-10-04 16:54:55 +00:00
|
|
|
*/
|
|
|
|
async findTerms(mode, text, options) {
|
2021-03-25 23:55:31 +00:00
|
|
|
const {enabledDictionaryMap} = options;
|
|
|
|
let {dictionaryEntries, originalTextLength} = await this._findTermsInternal(text, enabledDictionaryMap, options);
|
|
|
|
|
2020-08-09 17:21:14 +00:00
|
|
|
switch (mode) {
|
|
|
|
case 'group':
|
2021-03-25 23:55:31 +00:00
|
|
|
dictionaryEntries = this._groupDictionaryEntriesByHeadword(dictionaryEntries);
|
|
|
|
break;
|
2020-08-09 17:21:14 +00:00
|
|
|
case 'merge':
|
2021-03-25 23:55:31 +00:00
|
|
|
dictionaryEntries = await this._getRelatedDictionaryEntries(dictionaryEntries, options.mainDictionary, enabledDictionaryMap);
|
|
|
|
break;
|
2020-08-09 17:21:14 +00:00
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
|
|
|
|
if (dictionaryEntries.length > 1) {
|
|
|
|
this._sortTermDictionaryEntries(dictionaryEntries);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (mode === 'simple') {
|
|
|
|
this._clearTermTags(dictionaryEntries);
|
|
|
|
} else {
|
|
|
|
await this._addTermMeta(dictionaryEntries, enabledDictionaryMap);
|
|
|
|
await this._expandTermTags(dictionaryEntries);
|
|
|
|
this._sortTermDictionaryEntryData(dictionaryEntries);
|
|
|
|
}
|
|
|
|
|
|
|
|
return {dictionaryEntries, originalTextLength};
|
2020-08-09 17:21:14 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 16:54:55 +00:00
|
|
|
/**
|
|
|
|
* Finds kanji definitions for the given text.
|
|
|
|
* @param text The text to find kanji definitions for. This string can be of any length,
|
|
|
|
* but is typically just one character, which is a single kanji. If the string is multiple
|
|
|
|
* characters long, each character will be searched in the database.
|
|
|
|
* @param options An object using the following structure:
|
|
|
|
* {
|
|
|
|
* enabledDictionaryMap: (Map of [
|
|
|
|
* (string),
|
|
|
|
* {
|
2021-03-06 18:04:50 +00:00
|
|
|
* index: (number),
|
|
|
|
* priority: (number)
|
2020-10-04 16:54:55 +00:00
|
|
|
* }
|
|
|
|
* ])
|
|
|
|
* }
|
|
|
|
* @returns An array of definitions. See the _createKanjiDefinition() function for structure details.
|
|
|
|
*/
|
2020-08-09 17:21:14 +00:00
|
|
|
async findKanji(text, options) {
|
2020-10-04 16:54:55 +00:00
|
|
|
const {enabledDictionaryMap} = options;
|
2020-08-09 17:21:14 +00:00
|
|
|
const kanjiUnique = new Set();
|
|
|
|
for (const c of text) {
|
|
|
|
kanjiUnique.add(c);
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const databaseEntries = await this._database.findKanjiBulk([...kanjiUnique], enabledDictionaryMap);
|
|
|
|
if (databaseEntries.length === 0) { return []; }
|
2020-10-04 23:33:22 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
this._sortDatabaseEntriesByIndex(databaseEntries);
|
2020-10-04 23:33:22 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const dictionaryEntries = [];
|
2021-04-08 23:59:55 +00:00
|
|
|
for (const {character, onyomi, kunyomi, tags, definitions, stats, dictionary} of databaseEntries) {
|
2021-03-25 23:55:31 +00:00
|
|
|
const expandedStats = await this._expandKanjiStats(stats, dictionary);
|
2020-10-04 23:33:22 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const tagGroups = [];
|
|
|
|
if (tags.length > 0) { tagGroups.push(this._createTagGroup(dictionary, tags)); }
|
2021-03-02 03:17:23 +00:00
|
|
|
|
2021-04-08 23:59:55 +00:00
|
|
|
const dictionaryEntry = this._createKanjiDictionaryEntry(character, dictionary, onyomi, kunyomi, tagGroups, expandedStats, definitions);
|
2021-03-25 23:55:31 +00:00
|
|
|
dictionaryEntries.push(dictionaryEntry);
|
2020-10-04 23:33:22 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
await this._addKanjiMeta(dictionaryEntries, enabledDictionaryMap);
|
|
|
|
await this._expandKanjiTags(dictionaryEntries);
|
2020-10-04 23:33:22 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
this._sortKanjiDictionaryEntryData(dictionaryEntries);
|
2020-10-04 23:33:22 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
return dictionaryEntries;
|
2020-10-04 23:33:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Find terms internal implementation
|
|
|
|
|
|
|
|
async _findTermsInternal(text, enabledDictionaryMap, options) {
|
|
|
|
const {alphanumeric, wildcard} = options;
|
|
|
|
text = this._getSearchableText(text, alphanumeric);
|
|
|
|
if (text.length === 0) {
|
|
|
|
return [[], 0];
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const deinflections = await (
|
2020-10-04 23:33:22 +00:00
|
|
|
wildcard ?
|
2021-03-25 23:55:31 +00:00
|
|
|
this._findTermsWildcard(text, enabledDictionaryMap, wildcard) :
|
|
|
|
this._findTermDeinflections(text, enabledDictionaryMap, options)
|
2020-10-04 23:33:22 +00:00
|
|
|
);
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
let originalTextLength = 0;
|
|
|
|
const dictionaryEntries = [];
|
|
|
|
const ids = new Set();
|
|
|
|
for (const {databaseEntries, originalText, transformedText, deinflectedText, reasons} of deinflections) {
|
|
|
|
if (databaseEntries.length === 0) { continue; }
|
|
|
|
originalTextLength = Math.max(originalTextLength, originalText.length);
|
|
|
|
for (const databaseEntry of databaseEntries) {
|
|
|
|
const {id} = databaseEntry;
|
|
|
|
if (ids.has(id)) { continue; }
|
|
|
|
const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, reasons, true, enabledDictionaryMap);
|
|
|
|
dictionaryEntries.push(dictionaryEntry);
|
|
|
|
ids.add(id);
|
2020-10-04 23:33:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
return {dictionaryEntries, originalTextLength};
|
2020-10-04 23:33:22 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
async _findTermsWildcard(text, enabledDictionaryMap, wildcard) {
|
|
|
|
const databaseEntries = await this._database.findTermsBulk([text], enabledDictionaryMap, wildcard);
|
|
|
|
return databaseEntries.length > 0 ? [this._createDeinflection(text, text, text, 0, [], databaseEntries)] : [];
|
2020-10-04 23:33:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async _findTermDeinflections(text, enabledDictionaryMap, options) {
|
|
|
|
const deinflections = this._getAllDeinflections(text, options);
|
|
|
|
|
|
|
|
if (deinflections.length === 0) {
|
|
|
|
return [];
|
|
|
|
}
|
|
|
|
|
|
|
|
const uniqueDeinflectionTerms = [];
|
|
|
|
const uniqueDeinflectionArrays = [];
|
|
|
|
const uniqueDeinflectionsMap = new Map();
|
|
|
|
for (const deinflection of deinflections) {
|
2021-03-25 23:55:31 +00:00
|
|
|
const term = deinflection.deinflectedText;
|
2020-10-04 23:33:22 +00:00
|
|
|
let deinflectionArray = uniqueDeinflectionsMap.get(term);
|
|
|
|
if (typeof deinflectionArray === 'undefined') {
|
|
|
|
deinflectionArray = [];
|
|
|
|
uniqueDeinflectionTerms.push(term);
|
|
|
|
uniqueDeinflectionArrays.push(deinflectionArray);
|
|
|
|
uniqueDeinflectionsMap.set(term, deinflectionArray);
|
|
|
|
}
|
|
|
|
deinflectionArray.push(deinflection);
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const databaseEntries = await this._database.findTermsBulk(uniqueDeinflectionTerms, enabledDictionaryMap, null);
|
2020-10-04 23:33:22 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const databaseEntry of databaseEntries) {
|
|
|
|
const definitionRules = Deinflector.rulesToRuleFlags(databaseEntry.rules);
|
|
|
|
for (const deinflection of uniqueDeinflectionArrays[databaseEntry.index]) {
|
2020-10-04 23:33:22 +00:00
|
|
|
const deinflectionRules = deinflection.rules;
|
|
|
|
if (deinflectionRules === 0 || (definitionRules & deinflectionRules) !== 0) {
|
2021-03-25 23:55:31 +00:00
|
|
|
deinflection.databaseEntries.push(databaseEntry);
|
2020-10-04 23:33:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return deinflections;
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Deinflections and text transformations
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
_getAllDeinflections(text, options) {
|
|
|
|
const textOptionVariantArray = [
|
2021-01-03 17:12:55 +00:00
|
|
|
this._getTextReplacementsVariants(options),
|
2020-10-04 23:33:22 +00:00
|
|
|
this._getTextOptionEntryVariants(options.convertHalfWidthCharacters),
|
|
|
|
this._getTextOptionEntryVariants(options.convertNumericCharacters),
|
|
|
|
this._getTextOptionEntryVariants(options.convertAlphabeticCharacters),
|
|
|
|
this._getTextOptionEntryVariants(options.convertHiraganaToKatakana),
|
|
|
|
this._getTextOptionEntryVariants(options.convertKatakanaToHiragana),
|
2021-01-02 04:16:44 +00:00
|
|
|
this._getCollapseEmphaticOptions(options)
|
2020-10-04 23:33:22 +00:00
|
|
|
];
|
|
|
|
|
2020-11-29 18:09:02 +00:00
|
|
|
const jp = this._japaneseUtil;
|
2020-10-04 23:33:22 +00:00
|
|
|
const deinflections = [];
|
|
|
|
const used = new Set();
|
2021-01-03 17:12:55 +00:00
|
|
|
for (const [textReplacements, halfWidth, numeric, alphabetic, katakana, hiragana, [collapseEmphatic, collapseEmphaticFull]] of this._getArrayVariants(textOptionVariantArray)) {
|
2020-10-04 23:33:22 +00:00
|
|
|
let text2 = text;
|
|
|
|
const sourceMap = new TextSourceMap(text2);
|
2021-01-03 17:12:55 +00:00
|
|
|
if (textReplacements !== null) {
|
|
|
|
text2 = this._applyTextReplacements(text2, sourceMap, textReplacements);
|
|
|
|
}
|
2020-10-04 23:33:22 +00:00
|
|
|
if (halfWidth) {
|
|
|
|
text2 = jp.convertHalfWidthKanaToFullWidth(text2, sourceMap);
|
|
|
|
}
|
|
|
|
if (numeric) {
|
|
|
|
text2 = jp.convertNumericToFullWidth(text2);
|
|
|
|
}
|
|
|
|
if (alphabetic) {
|
|
|
|
text2 = jp.convertAlphabeticToKana(text2, sourceMap);
|
|
|
|
}
|
|
|
|
if (katakana) {
|
|
|
|
text2 = jp.convertHiraganaToKatakana(text2);
|
|
|
|
}
|
|
|
|
if (hiragana) {
|
|
|
|
text2 = jp.convertKatakanaToHiragana(text2);
|
|
|
|
}
|
|
|
|
if (collapseEmphatic) {
|
|
|
|
text2 = jp.collapseEmphaticSequences(text2, collapseEmphaticFull, sourceMap);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (let i = text2.length; i > 0; --i) {
|
2021-03-07 19:07:26 +00:00
|
|
|
const source = text2.substring(0, i);
|
|
|
|
if (used.has(source)) { break; }
|
|
|
|
used.add(source);
|
2020-10-04 23:33:22 +00:00
|
|
|
const rawSource = sourceMap.source.substring(0, sourceMap.getSourceLength(i));
|
2021-03-07 19:07:26 +00:00
|
|
|
for (const {term, rules, reasons} of this._deinflector.deinflect(source)) {
|
2021-03-25 23:55:31 +00:00
|
|
|
deinflections.push(this._createDeinflection(rawSource, source, term, rules, reasons, []));
|
2020-10-04 23:33:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return deinflections;
|
|
|
|
}
|
2020-08-09 17:21:14 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_applyTextReplacements(text, sourceMap, replacements) {
|
|
|
|
for (const {pattern, replacement} of replacements) {
|
|
|
|
text = RegexUtil.applyTextReplacement(text, sourceMap, pattern, replacement);
|
|
|
|
}
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
|
|
|
_getSearchableText(text, allowAlphanumericCharacters) {
|
|
|
|
if (allowAlphanumericCharacters) { return text; }
|
|
|
|
const jp = this._japaneseUtil;
|
|
|
|
let length = 0;
|
|
|
|
for (const c of text) {
|
|
|
|
if (!jp.isCodePointJapanese(c.codePointAt(0))) { break; }
|
|
|
|
length += c.length;
|
|
|
|
}
|
|
|
|
return length >= text.length ? text : text.substring(0, length);
|
2021-03-07 19:07:26 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_getTextOptionEntryVariants(value) {
|
|
|
|
switch (value) {
|
|
|
|
case 'true': return [true];
|
|
|
|
case 'variant': return [false, true];
|
|
|
|
default: return [false];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_getCollapseEmphaticOptions(options) {
|
|
|
|
const collapseEmphaticOptions = [[false, false]];
|
|
|
|
switch (options.collapseEmphaticSequences) {
|
|
|
|
case 'true':
|
|
|
|
collapseEmphaticOptions.push([true, false]);
|
|
|
|
break;
|
|
|
|
case 'full':
|
|
|
|
collapseEmphaticOptions.push([true, false], [true, true]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return collapseEmphaticOptions;
|
|
|
|
}
|
|
|
|
|
|
|
|
_getTextReplacementsVariants(options) {
|
|
|
|
return options.textReplacements;
|
|
|
|
}
|
|
|
|
|
|
|
|
_createDeinflection(originalText, transformedText, deinflectedText, rules, reasons, databaseEntries) {
|
|
|
|
return {originalText, transformedText, deinflectedText, rules, reasons, databaseEntries};
|
|
|
|
}
|
|
|
|
|
|
|
|
// Term dictionary entry grouping
|
|
|
|
|
|
|
|
async _getRelatedDictionaryEntries(dictionaryEntries, mainDictionary, enabledDictionaryMap) {
|
2020-01-28 23:58:14 +00:00
|
|
|
const sequenceList = [];
|
2021-03-25 23:55:31 +00:00
|
|
|
const groupedDictionaryEntries = [];
|
|
|
|
const groupedDictionaryEntriesMap = new Map();
|
|
|
|
const ungroupedDictionaryEntriesMap = new Map();
|
|
|
|
for (const dictionaryEntry of dictionaryEntries) {
|
|
|
|
const {id, sequence, definitions: [{dictionary}]} = dictionaryEntry;
|
2020-10-02 21:59:14 +00:00
|
|
|
if (mainDictionary === dictionary && sequence >= 0) {
|
2021-03-25 23:55:31 +00:00
|
|
|
let group = groupedDictionaryEntriesMap.get(sequence);
|
|
|
|
if (typeof group === 'undefined') {
|
2021-04-03 19:41:44 +00:00
|
|
|
group = {
|
|
|
|
sequence,
|
|
|
|
sequenceDictionary: dictionary,
|
|
|
|
ids: new Set(),
|
|
|
|
dictionaryEntries: []
|
|
|
|
};
|
2021-03-25 23:55:31 +00:00
|
|
|
sequenceList.push({query: sequence, dictionary});
|
|
|
|
groupedDictionaryEntries.push(group);
|
|
|
|
groupedDictionaryEntriesMap.set(sequence, group);
|
2020-10-02 21:59:14 +00:00
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
group.dictionaryEntries.push(dictionaryEntry);
|
|
|
|
group.ids.add(id);
|
2020-10-02 21:59:14 +00:00
|
|
|
} else {
|
2021-03-25 23:55:31 +00:00
|
|
|
ungroupedDictionaryEntriesMap.set(id, dictionaryEntry);
|
2020-10-02 21:59:14 +00:00
|
|
|
}
|
2020-01-28 23:58:14 +00:00
|
|
|
}
|
2019-10-19 15:34:12 +00:00
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
if (sequenceList.length > 0) {
|
2021-03-25 23:55:31 +00:00
|
|
|
const secondarySearchDictionaryMap = this._getSecondarySearchDictionaryMap(enabledDictionaryMap);
|
|
|
|
await this._addRelatedDictionaryEntries(groupedDictionaryEntries, ungroupedDictionaryEntriesMap, sequenceList, mainDictionary, enabledDictionaryMap);
|
|
|
|
for (const group of groupedDictionaryEntries) {
|
|
|
|
this._sortTermDictionaryEntriesById(group.dictionaryEntries);
|
|
|
|
}
|
|
|
|
if (ungroupedDictionaryEntriesMap.size !== 0 || secondarySearchDictionaryMap.size !== 0) {
|
|
|
|
await this._addSecondaryRelatedDictionaryEntries(groupedDictionaryEntries, ungroupedDictionaryEntriesMap, enabledDictionaryMap, secondarySearchDictionaryMap);
|
|
|
|
}
|
2019-10-19 15:34:12 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const newDictionaryEntries = [];
|
|
|
|
for (const group of groupedDictionaryEntries) {
|
2021-04-03 19:41:44 +00:00
|
|
|
newDictionaryEntries.push(this._createGroupedDictionaryEntry(group.dictionaryEntries, group.sequence, group.sequenceDictionary, true));
|
2020-10-05 02:04:44 +00:00
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
newDictionaryEntries.push(...this._groupDictionaryEntriesByHeadword(ungroupedDictionaryEntriesMap.values()));
|
|
|
|
return newDictionaryEntries;
|
|
|
|
}
|
|
|
|
|
|
|
|
async _addRelatedDictionaryEntries(groupedDictionaryEntries, ungroupedDictionaryEntriesMap, sequenceList, mainDictionary, enabledDictionaryMap) {
|
|
|
|
const databaseEntries = await this._database.findTermsBySequenceBulk(sequenceList);
|
|
|
|
for (const databaseEntry of databaseEntries) {
|
|
|
|
const {dictionaryEntries, ids} = groupedDictionaryEntries[databaseEntry.index];
|
|
|
|
const {id} = databaseEntry;
|
|
|
|
if (ids.has(id)) { continue; }
|
|
|
|
|
2021-04-04 20:22:35 +00:00
|
|
|
const {term} = databaseEntry;
|
|
|
|
const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, term, term, term, [], false, enabledDictionaryMap);
|
2021-03-25 23:55:31 +00:00
|
|
|
dictionaryEntries.push(dictionaryEntry);
|
|
|
|
ids.add(id);
|
|
|
|
ungroupedDictionaryEntriesMap.delete(id);
|
2019-10-19 16:24:38 +00:00
|
|
|
}
|
2021-03-02 03:17:23 +00:00
|
|
|
}
|
2019-10-19 16:24:38 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
async _addSecondaryRelatedDictionaryEntries(groupedDictionaryEntries, ungroupedDictionaryEntriesMap, enabledDictionaryMap, secondarySearchDictionaryMap) {
|
2021-03-02 03:17:23 +00:00
|
|
|
// Prepare grouping info
|
2021-03-15 02:51:20 +00:00
|
|
|
const termList = [];
|
2021-03-02 03:17:23 +00:00
|
|
|
const targetList = [];
|
|
|
|
const targetMap = new Map();
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const group of groupedDictionaryEntries) {
|
|
|
|
const {dictionaryEntries} = group;
|
|
|
|
for (const dictionaryEntry of dictionaryEntries) {
|
|
|
|
const {term, reading} = dictionaryEntry.headwords[0];
|
|
|
|
const key = this._createMapKey([term, reading]);
|
2021-03-02 03:17:23 +00:00
|
|
|
let target = targetMap.get(key);
|
|
|
|
if (typeof target === 'undefined') {
|
|
|
|
target = {
|
2021-03-25 23:55:31 +00:00
|
|
|
groups: [],
|
2021-03-02 03:17:23 +00:00
|
|
|
searchSecondary: false
|
|
|
|
};
|
|
|
|
targetMap.set(key, target);
|
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
target.groups.push(group);
|
|
|
|
if (!dictionaryEntry.isPrimary && !target.searchSecondary) {
|
2021-03-02 03:17:23 +00:00
|
|
|
target.searchSecondary = true;
|
2021-04-04 20:22:35 +00:00
|
|
|
termList.push({term, reading});
|
2021-03-02 03:17:23 +00:00
|
|
|
targetList.push(target);
|
|
|
|
}
|
2019-10-19 16:24:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-04 20:22:35 +00:00
|
|
|
// Group unsequenced dictionary entries with sequenced entries that have a matching [term, reading].
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const [id, dictionaryEntry] of ungroupedDictionaryEntriesMap.entries()) {
|
|
|
|
const {term, reading} = dictionaryEntry.headwords[0];
|
|
|
|
const key = this._createMapKey([term, reading]);
|
2021-03-02 03:17:23 +00:00
|
|
|
const target = targetMap.get(key);
|
|
|
|
if (typeof target === 'undefined') { continue; }
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const {ids, dictionaryEntries} of target.groups) {
|
|
|
|
if (ids.has(id)) { continue; }
|
2021-03-02 03:17:23 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
dictionaryEntries.push(dictionaryEntry);
|
|
|
|
ids.add(id);
|
|
|
|
ungroupedDictionaryEntriesMap.delete(id);
|
2021-03-02 03:17:23 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Search database for additional secondary terms
|
2021-03-15 02:51:20 +00:00
|
|
|
if (termList.length === 0 || secondarySearchDictionaryMap.size === 0) { return; }
|
2021-03-02 03:17:23 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const databaseEntries = await this._database.findTermsExactBulk(termList, secondarySearchDictionaryMap);
|
|
|
|
this._sortDatabaseEntriesByIndex(databaseEntries);
|
2019-10-19 16:42:26 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const databaseEntry of databaseEntries) {
|
|
|
|
const {index, id} = databaseEntry;
|
2021-04-04 20:22:35 +00:00
|
|
|
const sourceText = termList[index].term;
|
2021-03-02 03:17:23 +00:00
|
|
|
const target = targetList[index];
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const {ids, dictionaryEntries} of target.groups) {
|
|
|
|
if (ids.has(id)) { continue; }
|
2021-03-02 03:17:23 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, sourceText, sourceText, sourceText, [], false, enabledDictionaryMap);
|
|
|
|
dictionaryEntries.push(dictionaryEntry);
|
|
|
|
ids.add(id);
|
|
|
|
ungroupedDictionaryEntriesMap.delete(id);
|
2021-03-02 03:17:23 +00:00
|
|
|
}
|
2019-10-19 16:42:26 +00:00
|
|
|
}
|
2019-10-19 16:24:38 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_groupDictionaryEntriesByHeadword(dictionaryEntries) {
|
|
|
|
const groups = new Map();
|
|
|
|
for (const dictionaryEntry of dictionaryEntries) {
|
|
|
|
const {inflections, headwords: [{term, reading}]} = dictionaryEntry;
|
|
|
|
const key = this._createMapKey([term, reading, ...inflections]);
|
|
|
|
let dictionaryEntries2 = groups.get(key);
|
|
|
|
if (typeof dictionaryEntries2 === 'undefined') {
|
|
|
|
dictionaryEntries2 = [];
|
|
|
|
groups.set(key, dictionaryEntries2);
|
|
|
|
}
|
|
|
|
dictionaryEntries2.push(dictionaryEntry);
|
|
|
|
}
|
2017-10-03 04:20:02 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const results = [];
|
|
|
|
for (const dictionaryEntries2 of groups.values()) {
|
2021-04-03 19:41:44 +00:00
|
|
|
const dictionaryEntry = this._createGroupedDictionaryEntry(dictionaryEntries2, -1, null, false);
|
2021-03-25 23:55:31 +00:00
|
|
|
results.push(dictionaryEntry);
|
|
|
|
}
|
|
|
|
return results;
|
|
|
|
}
|
2021-03-02 03:17:23 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Tags
|
|
|
|
|
|
|
|
_getTermTagTargets(dictionaryEntries) {
|
|
|
|
const tagTargets = [];
|
|
|
|
for (const {headwords, definitions, pronunciations} of dictionaryEntries) {
|
|
|
|
this._addTagExpansionTargets(tagTargets, headwords);
|
|
|
|
this._addTagExpansionTargets(tagTargets, definitions);
|
|
|
|
for (const {pitches} of pronunciations) {
|
|
|
|
this._addTagExpansionTargets(tagTargets, pitches);
|
2021-03-02 03:17:23 +00:00
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
}
|
|
|
|
return tagTargets;
|
|
|
|
}
|
|
|
|
|
|
|
|
_clearTermTags(dictionaryEntries) {
|
|
|
|
this._getTermTagTargets(dictionaryEntries);
|
|
|
|
}
|
|
|
|
|
|
|
|
async _expandTermTags(dictionaryEntries) {
|
|
|
|
const tagTargets = this._getTermTagTargets(dictionaryEntries);
|
|
|
|
await this._expandTagGroups(tagTargets);
|
|
|
|
this._groupTags(tagTargets);
|
|
|
|
}
|
2021-03-02 03:17:23 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
async _expandKanjiTags(dictionaryEntries) {
|
|
|
|
const tagTargets = [];
|
|
|
|
this._addTagExpansionTargets(tagTargets, dictionaryEntries);
|
|
|
|
await this._expandTagGroups(tagTargets);
|
|
|
|
this._groupTags(tagTargets);
|
|
|
|
}
|
|
|
|
|
|
|
|
async _expandTagGroups(tagTargets) {
|
|
|
|
const allItems = [];
|
|
|
|
const targetMap = new Map();
|
|
|
|
for (const {tagGroups, tags} of tagTargets) {
|
|
|
|
for (const {dictionary, tagNames} of tagGroups) {
|
|
|
|
let dictionaryItems = targetMap.get(dictionary);
|
|
|
|
if (typeof dictionaryItems === 'undefined') {
|
|
|
|
dictionaryItems = new Map();
|
|
|
|
targetMap.set(dictionary, dictionaryItems);
|
|
|
|
}
|
|
|
|
for (const tagName of tagNames) {
|
|
|
|
let item = dictionaryItems.get(tagName);
|
|
|
|
if (typeof item === 'undefined') {
|
|
|
|
const query = this._getNameBase(tagName);
|
|
|
|
item = {query, dictionary, tagName, cache: null, databaseTag: null, targets: []};
|
|
|
|
dictionaryItems.set(tagName, item);
|
|
|
|
allItems.push(item);
|
|
|
|
}
|
|
|
|
item.targets.push(tags);
|
|
|
|
}
|
|
|
|
}
|
2019-10-19 16:16:38 +00:00
|
|
|
}
|
2017-10-03 04:20:02 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const nonCachedItems = [];
|
|
|
|
const tagCache = this._tagCache;
|
|
|
|
for (const [dictionary, dictionaryItems] of targetMap.entries()) {
|
|
|
|
let cache = tagCache.get(dictionary);
|
|
|
|
if (typeof cache === 'undefined') {
|
|
|
|
cache = new Map();
|
|
|
|
tagCache.set(dictionary, cache);
|
|
|
|
}
|
|
|
|
for (const item of dictionaryItems.values()) {
|
|
|
|
const databaseTag = cache.get(item.query);
|
|
|
|
if (typeof databaseTag !== 'undefined') {
|
|
|
|
item.databaseTag = databaseTag;
|
|
|
|
} else {
|
|
|
|
item.cache = cache;
|
|
|
|
nonCachedItems.push(item);
|
|
|
|
}
|
|
|
|
}
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const nonCachedItemCount = nonCachedItems.length;
|
|
|
|
if (nonCachedItemCount > 0) {
|
|
|
|
const databaseTags = await this._database.findTagMetaBulk(nonCachedItems);
|
|
|
|
for (let i = 0; i < nonCachedItemCount; ++i) {
|
|
|
|
const item = nonCachedItems[i];
|
|
|
|
let databaseTag = databaseTags[i];
|
|
|
|
if (typeof databaseTag === 'undefined') { databaseTag = null; }
|
|
|
|
item.databaseTag = databaseTag;
|
|
|
|
item.cache.set(item.query, databaseTag);
|
|
|
|
}
|
|
|
|
}
|
2020-10-04 15:12:15 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const {dictionary, tagName, databaseTag, targets} of allItems) {
|
|
|
|
for (const tags of targets) {
|
|
|
|
tags.push(this._createTag(databaseTag, tagName, dictionary));
|
|
|
|
}
|
|
|
|
}
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_groupTags(tagTargets) {
|
|
|
|
const stringComparer = this._stringComparer;
|
|
|
|
const compare = (v1, v2) => {
|
|
|
|
const i = v1.order - v2.order;
|
|
|
|
return i !== 0 ? i : stringComparer.compare(v1.name, v2.name);
|
|
|
|
};
|
|
|
|
|
|
|
|
for (const {tags} of tagTargets) {
|
|
|
|
if (tags.length <= 1) { continue; }
|
|
|
|
this._mergeSimilarTags(tags);
|
|
|
|
tags.sort(compare);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_addTagExpansionTargets(tagTargets, objects) {
|
|
|
|
for (const value of objects) {
|
|
|
|
const tagGroups = value.tags;
|
|
|
|
if (tagGroups.length === 0) { continue; }
|
|
|
|
const tags = [];
|
|
|
|
value.tags = tags;
|
|
|
|
tagTargets.push({tagGroups, tags});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_mergeSimilarTags(tags) {
|
|
|
|
let tagCount = tags.length;
|
|
|
|
for (let i = 0; i < tagCount; ++i) {
|
|
|
|
const tag1 = tags[i];
|
|
|
|
const {category, name} = tag1;
|
|
|
|
for (let j = i + 1; j < tagCount; ++j) {
|
|
|
|
const tag2 = tags[j];
|
|
|
|
if (tag2.name !== name || tag2.category !== category) { continue; }
|
|
|
|
// Merge tag
|
|
|
|
tag1.order = Math.min(tag1.order, tag2.order);
|
|
|
|
tag1.score = Math.max(tag1.score, tag2.score);
|
|
|
|
tag1.dictionaries.push(...tag2.dictionaries);
|
|
|
|
this._addUniqueStrings(tag1.content, tag2.content);
|
|
|
|
tags.splice(j, 1);
|
|
|
|
--tagCount;
|
|
|
|
--j;
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
_getTagNamesWithCategory(tags, category) {
|
|
|
|
const results = [];
|
|
|
|
for (const tag of tags) {
|
|
|
|
if (tag.category !== category) { continue; }
|
|
|
|
results.push(tag.name);
|
|
|
|
}
|
|
|
|
results.sort();
|
|
|
|
return results;
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
2017-10-01 01:17:02 +00:00
|
|
|
|
2020-11-13 01:34:11 +00:00
|
|
|
_flagRedundantDefinitionTags(definitions) {
|
2021-03-25 23:55:31 +00:00
|
|
|
if (definitions.length === 0) { return; }
|
|
|
|
|
2021-01-29 02:33:30 +00:00
|
|
|
let lastDictionary = null;
|
2020-10-04 23:33:22 +00:00
|
|
|
let lastPartOfSpeech = '';
|
|
|
|
const removeCategoriesSet = new Set();
|
2017-04-22 20:02:06 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const {dictionary, tags} of definitions) {
|
|
|
|
const partOfSpeech = this._createMapKey(this._getTagNamesWithCategory(tags, 'partOfSpeech'));
|
2016-03-21 03:34:50 +00:00
|
|
|
|
2021-01-29 02:33:30 +00:00
|
|
|
if (lastDictionary !== dictionary) {
|
2020-10-04 23:33:22 +00:00
|
|
|
lastDictionary = dictionary;
|
|
|
|
lastPartOfSpeech = '';
|
2017-01-08 19:18:55 +00:00
|
|
|
}
|
2017-07-10 21:10:58 +00:00
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
if (lastPartOfSpeech === partOfSpeech) {
|
|
|
|
removeCategoriesSet.add('partOfSpeech');
|
|
|
|
} else {
|
|
|
|
lastPartOfSpeech = partOfSpeech;
|
|
|
|
}
|
2016-03-21 01:45:37 +00:00
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
if (removeCategoriesSet.size > 0) {
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const tag of tags) {
|
|
|
|
if (removeCategoriesSet.has(tag.category)) {
|
|
|
|
tag.redundant = true;
|
|
|
|
}
|
|
|
|
}
|
2020-10-04 23:33:22 +00:00
|
|
|
removeCategoriesSet.clear();
|
|
|
|
}
|
2019-11-05 01:52:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Metadata
|
2019-12-22 19:07:30 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
async _addTermMeta(dictionaryEntries, enabledDictionaryMap) {
|
|
|
|
const headwordMap = new Map();
|
|
|
|
const headwordMapKeys = [];
|
|
|
|
const headwordReadingMaps = [];
|
2021-02-28 03:27:00 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const {headwords, pronunciations, frequencies} of dictionaryEntries) {
|
|
|
|
for (let i = 0, ii = headwords.length; i < ii; ++i) {
|
|
|
|
const {term, reading} = headwords[i];
|
|
|
|
let readingMap = headwordMap.get(term);
|
2021-02-28 03:27:00 +00:00
|
|
|
if (typeof readingMap === 'undefined') {
|
|
|
|
readingMap = new Map();
|
2021-03-25 23:55:31 +00:00
|
|
|
headwordMap.set(term, readingMap);
|
|
|
|
headwordMapKeys.push(term);
|
|
|
|
headwordReadingMaps.push(readingMap);
|
2020-11-02 02:24:35 +00:00
|
|
|
}
|
2021-02-28 03:27:00 +00:00
|
|
|
let targets = readingMap.get(reading);
|
|
|
|
if (typeof targets === 'undefined') {
|
|
|
|
targets = [];
|
|
|
|
readingMap.set(reading, targets);
|
2020-11-02 02:24:35 +00:00
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
targets.push({headwordIndex: i, pronunciations, frequencies});
|
2019-08-31 01:06:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const metas = await this._database.findTermMetaBulk(headwordMapKeys, enabledDictionaryMap);
|
|
|
|
for (const {mode, data, dictionary, index} of metas) {
|
|
|
|
const {index: dictionaryIndex, priority: dictionaryPriority} = this._getDictionaryOrder(dictionary, enabledDictionaryMap);
|
|
|
|
const map2 = headwordReadingMaps[index];
|
2021-02-28 03:27:00 +00:00
|
|
|
for (const [reading, targets] of map2.entries()) {
|
2020-11-02 02:24:35 +00:00
|
|
|
switch (mode) {
|
|
|
|
case 'freq':
|
|
|
|
{
|
2021-02-28 03:27:00 +00:00
|
|
|
let frequency = data;
|
|
|
|
const hasReading = (data !== null && typeof data === 'object');
|
|
|
|
if (hasReading) {
|
|
|
|
if (data.reading !== reading) { continue; }
|
|
|
|
frequency = data.frequency;
|
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const {frequencies, headwordIndex} of targets) {
|
|
|
|
frequencies.push(this._createTermFrequency(
|
|
|
|
frequencies.length,
|
|
|
|
headwordIndex,
|
|
|
|
dictionary,
|
|
|
|
dictionaryIndex,
|
|
|
|
dictionaryPriority,
|
|
|
|
hasReading,
|
|
|
|
frequency
|
|
|
|
));
|
2021-02-28 03:27:00 +00:00
|
|
|
}
|
2020-11-02 02:24:35 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'pitch':
|
|
|
|
{
|
2021-02-28 03:27:00 +00:00
|
|
|
if (data.reading !== reading) { continue; }
|
2021-03-25 23:55:31 +00:00
|
|
|
const pitches = [];
|
|
|
|
for (const {position, tags} of data.pitches) {
|
|
|
|
const tags2 = [];
|
|
|
|
if (Array.isArray(tags) && tags.length > 0) {
|
|
|
|
tags2.push(this._createTagGroup(dictionary, tags));
|
|
|
|
}
|
|
|
|
pitches.push({position, tags: tags2});
|
2021-02-28 03:27:00 +00:00
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const {pronunciations, headwordIndex} of targets) {
|
|
|
|
pronunciations.push(this._createTermPronunciation(
|
|
|
|
pronunciations.length,
|
|
|
|
headwordIndex,
|
|
|
|
dictionary,
|
|
|
|
dictionaryIndex,
|
|
|
|
dictionaryPriority,
|
|
|
|
pitches
|
|
|
|
));
|
2021-02-28 03:27:00 +00:00
|
|
|
}
|
2020-11-02 02:24:35 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2017-09-23 15:46:34 +00:00
|
|
|
}
|
2017-09-14 06:22:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
async _addKanjiMeta(dictionaryEntries, enabledDictionaryMap) {
|
2020-01-25 03:27:25 +00:00
|
|
|
const kanjiList = [];
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const {character} of dictionaryEntries) {
|
2020-10-02 21:59:14 +00:00
|
|
|
kanjiList.push(character);
|
2020-01-25 03:27:25 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
|
2020-01-25 03:27:25 +00:00
|
|
|
for (const {character, mode, data, dictionary, index} of metas) {
|
2021-03-25 23:55:31 +00:00
|
|
|
const {index: dictionaryIndex, priority: dictionaryPriority} = this._getDictionaryOrder(dictionary, enabledDictionaryMap);
|
2020-01-25 03:27:25 +00:00
|
|
|
switch (mode) {
|
|
|
|
case 'freq':
|
2020-12-30 20:12:08 +00:00
|
|
|
{
|
2021-03-25 23:55:31 +00:00
|
|
|
const {frequencies} = dictionaryEntries[index];
|
|
|
|
frequencies.push(this._createKanjiFrequency(
|
|
|
|
frequencies.length,
|
|
|
|
dictionary,
|
|
|
|
dictionaryIndex,
|
|
|
|
dictionaryPriority,
|
|
|
|
character,
|
|
|
|
data
|
|
|
|
));
|
2020-12-30 20:12:08 +00:00
|
|
|
}
|
2020-01-25 03:27:25 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
async _expandKanjiStats(stats, dictionary) {
|
|
|
|
const statsEntries = Object.entries(stats);
|
|
|
|
const items = [];
|
|
|
|
for (const [name] of statsEntries) {
|
|
|
|
const query = this._getNameBase(name);
|
|
|
|
items.push({query, dictionary});
|
2020-10-02 21:59:14 +00:00
|
|
|
}
|
2017-09-18 02:16:08 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const databaseInfos = await this._database.findTagMetaBulk(items);
|
2017-09-18 02:16:08 +00:00
|
|
|
|
2020-02-15 18:12:03 +00:00
|
|
|
const statsGroups = new Map();
|
2021-03-25 23:55:31 +00:00
|
|
|
for (let i = 0, ii = statsEntries.length; i < ii; ++i) {
|
|
|
|
const databaseInfo = databaseInfos[i];
|
|
|
|
if (databaseInfo === null) { continue; }
|
2019-10-19 17:32:05 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const [name, value] = statsEntries[i];
|
|
|
|
const {category} = databaseInfo;
|
2020-02-15 18:12:03 +00:00
|
|
|
let group = statsGroups.get(category);
|
|
|
|
if (typeof group === 'undefined') {
|
|
|
|
group = [];
|
|
|
|
statsGroups.set(category, group);
|
|
|
|
}
|
2019-08-30 23:38:36 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
group.push(this._createKanjiStat(name, value, databaseInfo, dictionary));
|
2017-09-18 02:16:08 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const groupedStats = {};
|
2020-02-15 18:12:03 +00:00
|
|
|
for (const [category, group] of statsGroups.entries()) {
|
2020-10-02 21:59:14 +00:00
|
|
|
this._sortKanjiStats(group);
|
2021-03-25 23:55:31 +00:00
|
|
|
groupedStats[category] = group;
|
2017-09-22 06:20:51 +00:00
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
return groupedStats;
|
2017-09-18 02:16:08 +00:00
|
|
|
}
|
2019-08-11 18:12:01 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_sortKanjiStats(stats) {
|
|
|
|
if (stats.length <= 1) { return; }
|
|
|
|
const stringComparer = this._stringComparer;
|
|
|
|
stats.sort((v1, v2) => {
|
|
|
|
const i = v1.order - v2.order;
|
|
|
|
return (i !== 0) ? i : stringComparer.compare(v1.content, v2.content);
|
|
|
|
});
|
2019-10-19 17:32:05 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Helpers
|
2020-10-04 23:33:22 +00:00
|
|
|
|
2020-08-09 17:21:14 +00:00
|
|
|
_getNameBase(name) {
|
2019-08-11 18:12:01 +00:00
|
|
|
const pos = name.indexOf(':');
|
2019-11-24 16:02:52 +00:00
|
|
|
return (pos >= 0 ? name.substring(0, pos) : name);
|
2019-08-11 18:12:01 +00:00
|
|
|
}
|
2019-12-22 18:09:35 +00:00
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
_getSecondarySearchDictionaryMap(enabledDictionaryMap) {
|
|
|
|
const secondarySearchDictionaryMap = new Map();
|
2020-10-07 00:28:49 +00:00
|
|
|
for (const [dictionary, details] of enabledDictionaryMap.entries()) {
|
|
|
|
if (!details.allowSecondarySearches) { continue; }
|
|
|
|
secondarySearchDictionaryMap.set(dictionary, details);
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
return secondarySearchDictionaryMap;
|
|
|
|
}
|
|
|
|
|
2021-02-28 04:11:41 +00:00
|
|
|
_getDictionaryOrder(dictionary, enabledDictionaryMap) {
|
2020-10-04 15:12:15 +00:00
|
|
|
const info = enabledDictionaryMap.get(dictionary);
|
2021-03-06 18:04:50 +00:00
|
|
|
const {index, priority} = typeof info !== 'undefined' ? info : {index: enabledDictionaryMap.size, priority: 0};
|
|
|
|
return {index, priority};
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
*_getArrayVariants(arrayVariants) {
|
|
|
|
const ii = arrayVariants.length;
|
2020-08-10 01:04:09 +00:00
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
let total = 1;
|
|
|
|
for (let i = 0; i < ii; ++i) {
|
|
|
|
total *= arrayVariants[i].length;
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
|
|
|
|
2020-10-04 23:33:22 +00:00
|
|
|
for (let a = 0; a < total; ++a) {
|
|
|
|
const variant = [];
|
|
|
|
let index = a;
|
|
|
|
for (let i = 0; i < ii; ++i) {
|
|
|
|
const entryVariants = arrayVariants[i];
|
|
|
|
variant.push(entryVariants[index % entryVariants.length]);
|
|
|
|
index = Math.floor(index / entryVariants.length);
|
|
|
|
}
|
|
|
|
yield variant;
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-04 15:12:15 +00:00
|
|
|
_createMapKey(array) {
|
|
|
|
return JSON.stringify(array);
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Kanji data
|
2020-08-10 01:04:09 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_createKanjiStat(name, value, databaseInfo, dictionary) {
|
|
|
|
const {category, notes, order, score} = databaseInfo;
|
2020-10-02 21:59:14 +00:00
|
|
|
return {
|
|
|
|
name,
|
|
|
|
category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
|
2021-03-25 23:55:31 +00:00
|
|
|
content: (typeof notes === 'string' ? notes : ''),
|
2020-10-02 21:59:14 +00:00
|
|
|
order: (typeof order === 'number' ? order : 0),
|
|
|
|
score: (typeof score === 'number' ? score : 0),
|
|
|
|
dictionary: (typeof dictionary === 'string' ? dictionary : null),
|
|
|
|
value
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_createKanjiFrequency(index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency) {
|
|
|
|
return {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency};
|
|
|
|
}
|
|
|
|
|
|
|
|
_createKanjiDictionaryEntry(character, dictionary, onyomi, kunyomi, tags, stats, definitions) {
|
2020-10-04 15:12:15 +00:00
|
|
|
return {
|
|
|
|
type: 'kanji',
|
|
|
|
character,
|
|
|
|
dictionary,
|
|
|
|
onyomi,
|
|
|
|
kunyomi,
|
|
|
|
tags,
|
|
|
|
stats,
|
2021-03-25 23:55:31 +00:00
|
|
|
definitions,
|
2020-10-04 15:12:15 +00:00
|
|
|
frequencies: []
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Term data
|
2020-10-02 21:59:14 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_createTag(databaseTag, name, dictionary) {
|
|
|
|
const {category, notes, order, score} = (databaseTag !== null ? databaseTag : {});
|
|
|
|
return {
|
|
|
|
name,
|
|
|
|
category: (typeof category === 'string' && category.length > 0 ? category : 'default'),
|
|
|
|
order: (typeof order === 'number' ? order : 0),
|
|
|
|
score: (typeof score === 'number' ? score : 0),
|
|
|
|
content: (typeof notes === 'string' && notes.length > 0 ? [notes] : []),
|
|
|
|
dictionaries: [dictionary],
|
|
|
|
redundant: false
|
|
|
|
};
|
|
|
|
}
|
2020-10-02 21:59:14 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_createTagGroup(dictionary, tagNames) {
|
|
|
|
return {dictionary, tagNames};
|
|
|
|
}
|
|
|
|
|
|
|
|
_createSource(originalText, transformedText, deinflectedText, isPrimary) {
|
|
|
|
return {originalText, transformedText, deinflectedText, isPrimary};
|
|
|
|
}
|
|
|
|
|
2021-03-26 23:50:54 +00:00
|
|
|
_createTermHeadword(index, term, reading, sources, tags, wordClasses) {
|
|
|
|
return {index, term, reading, sources, tags, wordClasses};
|
2021-03-25 23:55:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
_createTermDefinition(index, headwordIndices, dictionary, tags, entries) {
|
|
|
|
return {index, headwordIndices, dictionary, tags, entries};
|
|
|
|
}
|
2020-10-02 21:59:14 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) {
|
|
|
|
return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches};
|
|
|
|
}
|
|
|
|
|
|
|
|
_createTermFrequency(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency) {
|
|
|
|
return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency};
|
|
|
|
}
|
|
|
|
|
2021-04-03 19:41:44 +00:00
|
|
|
_createTermDictionaryEntry(id, isPrimary, sequence, sequenceDictionary, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxTransformedTextLength, headwords, definitions) {
|
2020-10-02 21:59:14 +00:00
|
|
|
return {
|
2020-10-04 15:12:15 +00:00
|
|
|
type: 'term',
|
|
|
|
id,
|
2021-03-02 00:01:30 +00:00
|
|
|
isPrimary,
|
2020-10-04 15:12:15 +00:00
|
|
|
sequence,
|
2021-04-03 19:41:44 +00:00
|
|
|
sequenceDictionary,
|
2021-03-25 23:55:31 +00:00
|
|
|
inflections,
|
2020-10-04 15:12:15 +00:00
|
|
|
score,
|
2021-03-25 23:55:31 +00:00
|
|
|
dictionaryIndex,
|
|
|
|
dictionaryPriority,
|
|
|
|
sourceTermExactMatchCount,
|
2021-03-26 01:40:27 +00:00
|
|
|
maxTransformedTextLength,
|
2021-03-25 23:55:31 +00:00
|
|
|
headwords,
|
|
|
|
definitions,
|
|
|
|
pronunciations: [],
|
|
|
|
frequencies: []
|
2020-10-04 15:12:15 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, reasons, isPrimary, enabledDictionaryMap) {
|
2021-04-08 23:59:55 +00:00
|
|
|
const {term, reading: rawReading, definitionTags, termTags, definitions, score, dictionary, id, sequence, rules} = databaseEntry;
|
2021-04-04 20:22:35 +00:00
|
|
|
const reading = (rawReading.length > 0 ? rawReading : term);
|
2021-03-25 23:55:31 +00:00
|
|
|
const {index: dictionaryIndex, priority: dictionaryPriority} = this._getDictionaryOrder(dictionary, enabledDictionaryMap);
|
2021-04-04 20:22:35 +00:00
|
|
|
const sourceTermExactMatchCount = (isPrimary && deinflectedText === term ? 1 : 0);
|
2021-03-25 23:55:31 +00:00
|
|
|
const source = this._createSource(originalText, transformedText, deinflectedText, isPrimary);
|
2021-03-26 01:40:27 +00:00
|
|
|
const maxTransformedTextLength = transformedText.length;
|
2021-03-25 23:55:31 +00:00
|
|
|
|
|
|
|
const headwordTagGroups = [];
|
|
|
|
const definitionTagGroups = [];
|
|
|
|
if (termTags.length > 0) { headwordTagGroups.push(this._createTagGroup(dictionary, termTags)); }
|
|
|
|
if (definitionTags.length > 0) { definitionTagGroups.push(this._createTagGroup(dictionary, definitionTags)); }
|
|
|
|
|
|
|
|
return this._createTermDictionaryEntry(
|
|
|
|
id,
|
|
|
|
isPrimary,
|
|
|
|
sequence,
|
2021-04-03 19:41:44 +00:00
|
|
|
sequence >= 0 ? dictionary : null,
|
2020-10-04 15:12:15 +00:00
|
|
|
reasons,
|
|
|
|
score,
|
2021-03-25 23:55:31 +00:00
|
|
|
dictionaryIndex,
|
|
|
|
dictionaryPriority,
|
|
|
|
sourceTermExactMatchCount,
|
2021-03-26 01:40:27 +00:00
|
|
|
maxTransformedTextLength,
|
2021-04-04 20:22:35 +00:00
|
|
|
[this._createTermHeadword(0, term, reading, [source], headwordTagGroups, rules)],
|
2021-04-08 23:59:55 +00:00
|
|
|
[this._createTermDefinition(0, [0], dictionary, definitionTagGroups, definitions)]
|
2021-03-25 23:55:31 +00:00
|
|
|
);
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
|
2021-04-03 19:41:44 +00:00
|
|
|
_createGroupedDictionaryEntry(dictionaryEntries, sequence, sequenceDictionary, checkDuplicateDefinitions) {
|
2021-03-25 23:55:31 +00:00
|
|
|
// Headwords are generated before sorting, so that the order of dictionaryEntries can be maintained
|
|
|
|
const definitionEntries = [];
|
|
|
|
const headwords = new Map();
|
|
|
|
for (const dictionaryEntry of dictionaryEntries) {
|
|
|
|
const headwordIndexMap = this._addTermHeadwords(headwords, dictionaryEntry.headwords);
|
|
|
|
definitionEntries.push({index: definitionEntries.length, dictionaryEntry, headwordIndexMap});
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
|
|
|
|
// Sort
|
|
|
|
if (definitionEntries.length > 1) {
|
|
|
|
this._sortTermDefinitionEntries(definitionEntries);
|
|
|
|
} else {
|
|
|
|
checkDuplicateDefinitions = false;
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Merge dictionary entry data
|
|
|
|
let score = Number.MIN_SAFE_INTEGER;
|
|
|
|
let dictionaryIndex = Number.MAX_SAFE_INTEGER;
|
|
|
|
let dictionaryPriority = Number.MIN_SAFE_INTEGER;
|
2021-03-26 01:40:27 +00:00
|
|
|
let maxTransformedTextLength = 0;
|
2021-03-25 23:55:31 +00:00
|
|
|
let sourceTermExactMatchCount = 0;
|
|
|
|
let isPrimary = false;
|
|
|
|
const definitions = [];
|
|
|
|
const definitionsMap = checkDuplicateDefinitions ? new Map() : null;
|
|
|
|
let inflections = null;
|
|
|
|
|
|
|
|
for (const {dictionaryEntry, headwordIndexMap} of definitionEntries) {
|
|
|
|
score = Math.max(score, dictionaryEntry.score);
|
|
|
|
dictionaryIndex = Math.min(dictionaryIndex, dictionaryEntry.dictionaryIndex);
|
|
|
|
dictionaryPriority = Math.max(dictionaryPriority, dictionaryEntry.dictionaryPriority);
|
|
|
|
if (dictionaryEntry.isPrimary) {
|
|
|
|
isPrimary = true;
|
2021-03-26 01:40:27 +00:00
|
|
|
maxTransformedTextLength = Math.max(maxTransformedTextLength, dictionaryEntry.maxTransformedTextLength);
|
2021-03-25 23:55:31 +00:00
|
|
|
sourceTermExactMatchCount += dictionaryEntry.sourceTermExactMatchCount;
|
|
|
|
const dictionaryEntryInflections = dictionaryEntry.inflections;
|
|
|
|
if (inflections === null || dictionaryEntryInflections.length < inflections.length) {
|
|
|
|
inflections = dictionaryEntryInflections;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (checkDuplicateDefinitions) {
|
|
|
|
this._addTermDefinitions2(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap);
|
|
|
|
} else {
|
|
|
|
this._addTermDefinitions(definitions, dictionaryEntry.definitions, headwordIndexMap);
|
|
|
|
}
|
|
|
|
}
|
2020-10-04 22:54:03 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
return this._createTermDictionaryEntry(
|
|
|
|
-1,
|
|
|
|
isPrimary,
|
2021-04-03 19:41:44 +00:00
|
|
|
sequence,
|
|
|
|
sequenceDictionary,
|
2021-03-25 23:55:31 +00:00
|
|
|
inflections !== null ? inflections : [],
|
|
|
|
score,
|
|
|
|
dictionaryIndex,
|
|
|
|
dictionaryPriority,
|
|
|
|
sourceTermExactMatchCount,
|
2021-03-26 01:40:27 +00:00
|
|
|
maxTransformedTextLength,
|
2021-03-25 23:55:31 +00:00
|
|
|
[...headwords.values()],
|
|
|
|
definitions
|
|
|
|
);
|
|
|
|
}
|
2020-10-04 22:10:10 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Data collection addition functions
|
2020-10-04 15:12:15 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_addUniqueStrings(list, newItems) {
|
|
|
|
for (const item of newItems) {
|
|
|
|
if (!list.includes(item)) {
|
|
|
|
list.push(item);
|
|
|
|
}
|
|
|
|
}
|
2020-10-04 15:12:15 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_addUniqueSources(sources, newSources) {
|
|
|
|
if (newSources.length === 0) { return; }
|
|
|
|
if (sources.length === 0) {
|
|
|
|
sources.push(...newSources);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
for (const newSource of newSources) {
|
|
|
|
const {originalText, transformedText, deinflectedText, isPrimary} = newSource;
|
|
|
|
let has = false;
|
|
|
|
for (const source of sources) {
|
|
|
|
if (
|
|
|
|
source.deinflectedText === deinflectedText &&
|
|
|
|
source.transformedText === transformedText &&
|
|
|
|
source.originalText === originalText
|
|
|
|
) {
|
|
|
|
if (isPrimary) { source.isPrimary = true; }
|
|
|
|
has = true;
|
|
|
|
break;
|
|
|
|
}
|
2021-03-02 03:17:23 +00:00
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
if (!has) {
|
|
|
|
sources.push(newSource);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-03-02 03:17:23 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_addUniqueTagGroups(tagGroups, newTagGroups) {
|
|
|
|
if (newTagGroups.length === 0) { return; }
|
|
|
|
for (const newTagGroup of newTagGroups) {
|
|
|
|
const {dictionary} = newTagGroup;
|
|
|
|
const ii = tagGroups.length;
|
|
|
|
if (ii > 0) {
|
|
|
|
let i = 0;
|
|
|
|
for (; i < ii; ++i) {
|
|
|
|
const tagGroup = tagGroups[i];
|
|
|
|
if (tagGroup.dictionary === dictionary) {
|
|
|
|
this._addUniqueStrings(tagGroup.tagNames, newTagGroup.tagNames);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i < ii) { continue; }
|
2021-03-02 03:17:23 +00:00
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
tagGroups.push(newTagGroup);
|
|
|
|
}
|
|
|
|
}
|
2021-03-02 03:17:23 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_addTermHeadwords(headwordsMap, headwords) {
|
|
|
|
const headwordIndexMap = [];
|
2021-03-26 23:50:54 +00:00
|
|
|
for (const {term, reading, sources, tags, wordClasses} of headwords) {
|
2021-03-25 23:55:31 +00:00
|
|
|
const key = this._createMapKey([term, reading]);
|
|
|
|
let headword = headwordsMap.get(key);
|
|
|
|
if (typeof headword === 'undefined') {
|
2021-03-26 23:50:54 +00:00
|
|
|
headword = this._createTermHeadword(headwordsMap.size, term, reading, [], [], []);
|
2021-03-25 23:55:31 +00:00
|
|
|
headwordsMap.set(key, headword);
|
2021-03-02 03:17:23 +00:00
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
this._addUniqueSources(headword.sources, sources);
|
|
|
|
this._addUniqueTagGroups(headword.tags, tags);
|
2021-03-26 23:50:54 +00:00
|
|
|
this._addUniqueStrings(headword.wordClasses, wordClasses);
|
2021-03-25 23:55:31 +00:00
|
|
|
headwordIndexMap.push(headword.index);
|
|
|
|
}
|
|
|
|
return headwordIndexMap;
|
|
|
|
}
|
|
|
|
|
|
|
|
_addUniqueTermHeadwordIndex(headwordIndices, headwordIndex) {
|
|
|
|
let end = headwordIndices.length;
|
|
|
|
if (end === 0) {
|
|
|
|
headwordIndices.push(headwordIndex);
|
|
|
|
return;
|
2021-03-02 03:17:23 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
let start = 0;
|
|
|
|
while (start < end) {
|
|
|
|
const mid = Math.floor((start + end) / 2);
|
|
|
|
const value = headwordIndices[mid];
|
|
|
|
if (headwordIndex === value) { return; }
|
|
|
|
if (headwordIndex > value) {
|
|
|
|
start = mid + 1;
|
|
|
|
} else {
|
|
|
|
end = mid;
|
2020-10-04 22:10:10 +00:00
|
|
|
}
|
|
|
|
}
|
2021-03-25 23:55:31 +00:00
|
|
|
|
|
|
|
if (headwordIndex === headwordIndices[start]) { return; }
|
|
|
|
headwordIndices.splice(start, 0, headwordIndex);
|
2020-10-04 22:10:10 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_addTermDefinitions(definitions, newDefinitions, headwordIndexMap) {
|
|
|
|
for (const {headwordIndices, dictionary, tags, entries} of newDefinitions) {
|
|
|
|
const headwordIndicesNew = [];
|
|
|
|
for (const headwordIndex of headwordIndices) {
|
|
|
|
headwordIndicesNew.push(headwordIndexMap[headwordIndex]);
|
|
|
|
}
|
|
|
|
definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, tags, entries));
|
|
|
|
}
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_addTermDefinitions2(definitions, definitionsMap, newDefinitions, headwordIndexMap) {
|
|
|
|
for (const {headwordIndices, dictionary, tags, entries} of newDefinitions) {
|
|
|
|
const key = this._createMapKey([dictionary, ...entries]);
|
|
|
|
let definition = definitionsMap.get(key);
|
|
|
|
if (typeof definition === 'undefined') {
|
|
|
|
definition = this._createTermDefinition(definitions.length, [], dictionary, [], [...entries]);
|
|
|
|
definitions.push(definition);
|
|
|
|
definitionsMap.set(key, definition);
|
|
|
|
}
|
2020-10-04 23:33:22 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
const newHeadwordIndices = definition.headwordIndices;
|
|
|
|
for (const headwordIndex of headwordIndices) {
|
|
|
|
this._addUniqueTermHeadwordIndex(newHeadwordIndices, headwordIndexMap[headwordIndex]);
|
|
|
|
}
|
|
|
|
this._addUniqueTagGroups(definition.tags, tags);
|
|
|
|
}
|
|
|
|
}
|
2020-08-10 01:04:09 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Sorting functions
|
|
|
|
|
|
|
|
_sortDatabaseEntriesByIndex(databaseEntries) {
|
|
|
|
if (databaseEntries.length <= 1) { return; }
|
|
|
|
databaseEntries.sort((a, b) => a.index - b.index);
|
2020-10-02 21:59:14 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_sortTermDictionaryEntries(dictionaryEntries) {
|
2020-10-02 21:59:14 +00:00
|
|
|
const stringComparer = this._stringComparer;
|
2021-02-27 04:55:32 +00:00
|
|
|
const compareFunction = (v1, v2) => {
|
2021-03-25 23:55:31 +00:00
|
|
|
// Sort by length of source term
|
2021-03-26 01:40:27 +00:00
|
|
|
let i = v2.maxTransformedTextLength - v1.maxTransformedTextLength;
|
2021-03-25 23:55:31 +00:00
|
|
|
if (i !== 0) { return i; }
|
2021-03-11 01:27:10 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Sort by the number of inflection reasons
|
|
|
|
i = v1.inflections.length - v2.inflections.length;
|
|
|
|
if (i !== 0) { return i; }
|
2021-03-11 01:27:10 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Sort by how many terms exactly match the source (e.g. for exact kana prioritization)
|
|
|
|
i = v2.sourceTermExactMatchCount - v1.sourceTermExactMatchCount;
|
|
|
|
if (i !== 0) { return i; }
|
2020-10-04 22:54:03 +00:00
|
|
|
|
2021-03-06 18:27:20 +00:00
|
|
|
// Sort by dictionary priority
|
2021-03-25 23:55:31 +00:00
|
|
|
i = v2.dictionaryPriority - v1.dictionaryPriority;
|
2021-03-06 18:27:20 +00:00
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by term score
|
2020-10-02 21:59:14 +00:00
|
|
|
i = v2.score - v1.score;
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-04-04 20:22:35 +00:00
|
|
|
// Sort by headword term text
|
2021-03-25 23:55:31 +00:00
|
|
|
const headwords1 = v1.headwords;
|
|
|
|
const headwords2 = v2.headwords;
|
|
|
|
for (let j = 0, jj = Math.min(headwords1.length, headwords2.length); j < jj; ++j) {
|
|
|
|
const term1 = headwords1[j].term;
|
|
|
|
const term2 = headwords2[j].term;
|
|
|
|
|
|
|
|
i = term2.length - term1.length;
|
2021-03-06 18:04:50 +00:00
|
|
|
if (i !== 0) { return i; }
|
2020-10-04 23:04:21 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
i = stringComparer.compare(term1, term2);
|
2021-03-06 18:04:50 +00:00
|
|
|
if (i !== 0) { return i; }
|
|
|
|
}
|
2020-10-02 21:59:14 +00:00
|
|
|
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by dictionary order
|
2021-03-25 23:55:31 +00:00
|
|
|
i = v1.dictionaryIndex - v2.dictionaryIndex;
|
2021-03-06 18:04:50 +00:00
|
|
|
return i;
|
2020-10-04 15:12:15 +00:00
|
|
|
};
|
2021-03-25 23:55:31 +00:00
|
|
|
dictionaryEntries.sort(compareFunction);
|
2020-10-02 21:59:14 +00:00
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_sortTermDefinitionEntries(definitionEntries) {
|
|
|
|
const compareFunction = (e1, e2) => {
|
|
|
|
const v1 = e1.dictionaryEntry;
|
|
|
|
const v2 = e2.dictionaryEntry;
|
2020-10-02 21:59:14 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Sort by dictionary priority
|
|
|
|
let i = v2.dictionaryPriority - v1.dictionaryPriority;
|
|
|
|
if (i !== 0) { return i; }
|
2020-10-05 02:04:44 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Sort by term score
|
|
|
|
i = v2.score - v1.score;
|
2020-10-02 21:59:14 +00:00
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
// Sort by definition headword index
|
|
|
|
const definitions1 = v1.definitions;
|
|
|
|
const definitions2 = v2.definitions;
|
|
|
|
const headwordIndexMap1 = e1.headwordIndexMap;
|
|
|
|
const headwordIndexMap2 = e2.headwordIndexMap;
|
|
|
|
for (let j = 0, jj = Math.min(definitions1.length, definitions2.length); j < jj; ++j) {
|
|
|
|
const headwordIndices1 = definitions1[j].headwordIndices;
|
|
|
|
const headwordIndices2 = definitions2[j].headwordIndices;
|
|
|
|
const kk = headwordIndices1.length;
|
|
|
|
i = headwordIndices2.length - kk;
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
for (let k = 0; k < kk; ++k) {
|
|
|
|
i = headwordIndexMap1[headwordIndices1[k]] - headwordIndexMap2[headwordIndices2[k]];
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sort by dictionary order
|
|
|
|
i = v1.dictionaryIndex - v2.dictionaryIndex;
|
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
|
|
|
// Sort by original order
|
|
|
|
i = e1.index - e2.index;
|
|
|
|
return i;
|
|
|
|
};
|
|
|
|
definitionEntries.sort(compareFunction);
|
2020-08-10 01:04:09 +00:00
|
|
|
}
|
2021-01-03 17:12:55 +00:00
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_sortTermDictionaryEntriesById(dictionaryEntries) {
|
|
|
|
if (dictionaryEntries.length <= 1) { return; }
|
|
|
|
dictionaryEntries.sort((a, b) => a.id - b.id);
|
|
|
|
}
|
|
|
|
|
|
|
|
_sortTermDictionaryEntryData(dictionaryEntries) {
|
|
|
|
const compare = (v1, v2) => {
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by dictionary priority
|
2021-03-25 23:55:31 +00:00
|
|
|
let i = v2.dictionaryPriority - v1.dictionaryPriority;
|
2021-02-28 03:27:00 +00:00
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-04-04 20:22:35 +00:00
|
|
|
// Sory by headword order
|
2021-03-25 23:55:31 +00:00
|
|
|
i = v1.headwordIndex - v2.headwordIndex;
|
2021-02-28 03:27:00 +00:00
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by dictionary order
|
2021-03-25 23:55:31 +00:00
|
|
|
i = v1.dictionaryIndex - v2.dictionaryIndex;
|
2021-03-06 18:04:50 +00:00
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
2021-02-28 03:27:00 +00:00
|
|
|
// Default order
|
|
|
|
i = v1.index - v2.index;
|
|
|
|
return i;
|
|
|
|
};
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const {definitions, frequencies, pronunciations} of dictionaryEntries) {
|
|
|
|
this._flagRedundantDefinitionTags(definitions);
|
|
|
|
frequencies.sort(compare);
|
|
|
|
pronunciations.sort(compare);
|
2021-02-28 03:27:00 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
_sortKanjiDictionaryEntryData(dictionaryEntries) {
|
|
|
|
const compare = (v1, v2) => {
|
2021-03-06 18:04:50 +00:00
|
|
|
// Sort by dictionary priority
|
2021-03-25 23:55:31 +00:00
|
|
|
let i = v2.dictionaryPriority - v1.dictionaryPriority;
|
2021-03-06 18:04:50 +00:00
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
|
|
|
// Sort by dictionary order
|
2021-03-25 23:55:31 +00:00
|
|
|
i = v1.dictionaryIndex - v2.dictionaryIndex;
|
2021-02-28 03:27:00 +00:00
|
|
|
if (i !== 0) { return i; }
|
|
|
|
|
|
|
|
// Default order
|
|
|
|
i = v1.index - v2.index;
|
|
|
|
return i;
|
|
|
|
};
|
|
|
|
|
2021-03-25 23:55:31 +00:00
|
|
|
for (const {frequencies} of dictionaryEntries) {
|
|
|
|
frequencies.sort(compare);
|
2021-01-03 17:12:55 +00:00
|
|
|
}
|
|
|
|
}
|
2016-03-20 17:52:27 +00:00
|
|
|
}
|