Refactor Translator and dictionary entry format (#1553)

* Update test data

* Move translator.js

* Create new version of Translator

* Update Backend

* Update DictionaryDataUtil

* Update DisplayGenerator

* Create AnkiNoteDataCreator

* Replace AnkiNoteData with AnkiNoteDataCreator

* Update tests

* Remove AnkiNoteData

* Update test data

* Remove translator-old.js

* Add TypeScript interface definitions for the new translator data format
This commit is contained in:
toasted-nutbread 2021-03-25 19:55:31 -04:00 committed by GitHub
parent e7035dcff4
commit 4be5c8fd9f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 9951 additions and 29225 deletions

View File

@ -110,7 +110,7 @@
{
"files": [
"ext/js/core.js",
"ext/js/data/anki-note-data.js",
"ext/js/data/anki-note-data-creator.js",
"ext/js/language/dictionary-data-util.js",
"ext/js/templates/template-renderer.js"
],
@ -122,7 +122,7 @@
"files": ["ext/**/*.js"],
"excludedFiles": [
"ext/js/core.js",
"ext/js/data/anki-note-data.js",
"ext/js/data/anki-note-data-creator.js",
"ext/js/language/dictionary-data-util.js",
"ext/js/templates/template-renderer.js"
],
@ -151,7 +151,7 @@
"excludedFiles": [
"ext/js/core.js",
"ext/js/yomichan.js",
"ext/js/data/anki-note-data.js",
"ext/js/data/anki-note-data-creator.js",
"ext/js/language/dictionary-data-util.js",
"ext/js/templates/template-renderer.js"
],

1
.gitattributes vendored
View File

@ -1,6 +1,7 @@
*.sh text eol=lf
*.handlebars text eol=lf
*.js text eol=lf
*.ts text eol=lf
*.json text eol=lf
*.css text eol=lf
*.html text eol=lf

View File

@ -30,7 +30,7 @@ class TranslatorVM extends DatabaseVM {
super();
this._japaneseUtil = null;
this._translator = null;
this._AnkiNoteData = null;
this._ankiNoteDataCreator = null;
this._dictionaryName = null;
}
@ -38,10 +38,14 @@ class TranslatorVM extends DatabaseVM {
return this._translator;
}
get ankiNoteDataCreator() {
return this._ankiNoteDataCreator;
}
async prepare(dictionaryDirectory, dictionaryName) {
this.execute([
'js/core.js',
'js/data/anki-note-data.js',
'js/data/anki-note-data-creator.js',
'js/data/database.js',
'js/data/json-schema.js',
'js/general/cache-map.js',
@ -60,13 +64,13 @@ class TranslatorVM extends DatabaseVM {
DictionaryDatabase,
JapaneseUtil,
Translator,
AnkiNoteData
AnkiNoteDataCreator
] = this.get([
'DictionaryImporter',
'DictionaryDatabase',
'JapaneseUtil',
'Translator',
'AnkiNoteData'
'AnkiNoteDataCreator'
]);
// Dictionary
@ -98,7 +102,7 @@ class TranslatorVM extends DatabaseVM {
this._translator.prepare(deinflectionReasions);
// Assign properties
this._AnkiNoteData = AnkiNoteData;
this._ankiNoteDataCreator = new AnkiNoteDataCreator(this._japaneseUtil);
}
createTestAnkiNoteData(definition, mode) {
@ -116,8 +120,7 @@ class TranslatorVM extends DatabaseVM {
},
injectedMedia: null
};
const AnkiNoteData = this._AnkiNoteData;
return new AnkiNoteData(this._japaneseUtil, marker, data).createPublic();
return this._ankiNoteDataCreator.create(marker, data);
}
buildOptions(optionsPresets, optionsArray) {

View File

@ -0,0 +1,402 @@
/*
* Copyright (C) 2021 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
namespace Translation {
// Common
/**
* A generic dictionary entry which is used as the base interface.
*/
export interface DictionaryEntry {
/**
* A string corresponding to the type of the entry.
* `'kanji'` corresponds to a KanjiDictionaryEntry.
* `'term'` corresponds to a TermDictionaryEntry.
*/
type: string;
}
/**
* A tag represents some brief information about part of a dictionary entry.
*/
export interface Tag {
/**
* The name of the tag.
*/
name: string;
/**
* The category of the tag.
*/
category: string;
/**
* A number indicating the sorting order of the tag.
*/
order: number;
/**
* A score value for the tag.
*/
score: number;
/**
* An array of descriptions for the tag. * If there are multiple entries,
* the values will typically have originated from different dictionaries.
* However, there is no correlation between the length of this array and
* the length of the `dictionaries` field, as duplicates are removed.
*/
content: string[];
/**
* An array of dictionary names that contained a tag with this name and category.
*/
dictionaries: string[];
/**
* Whether or not this tag is redundant with previous tags.
*/
redundant: boolean;
}
// Kanji
/**
* A dictionary entry for a kanji character.
* `DictionaryEntry.type` is always `'kanji'`.
*/
export interface KanjiDictionaryEntry extends DictionaryEntry {
/**
* The kanji character that was looked up.
*/
character: string;
/**
* The name of the dictionary that the information originated from.
*/
dictionary: string;
/**
* Onyomi readings for the kanji character.
*/
onyomi: string[];
/**
* Kunyomi readings for the kanji character.
*/
kunyomi: string[];
/**
* Tags for the kanji character.
*/
tags: Tag[];
/**
* An object containing stats about the kanji character.
*/
stats: KanjiStatGroups;
/**
* Definitions for the kanji character.
*/
definitions: string[];
/**
* Frequency information for the kanji character.
*/
frequencies: KanjiFrequency[];
}
/**
* An object with groups of stats about a kanji character.
*/
export interface KanjiStatGroups {
/**
* A group of stats.
* @param propName The name of the group.
*/
[propName: string]: KanjiStat[];
}
/**
* A stat represents a generic piece of information about a kanji character.
*/
export interface KanjiStat {
/**
* The name of the stat.
*/
name: string;
/**
* The category of the stat.
*/
category: string;
/**
* A description of the stat.
*/
content: string;
/**
* A number indicating the sorting order of the stat.
*/
order: number;
/**
* A score value for the stat.
*/
score: number;
/**
* The name of the dictionary that the stat originated from.
*/
dictionary: string;
/**
* A value for the stat.
*/
value: number | string;
}
/**
* Frequency information corresponds to how frequently a character appears in a corpus,
* which can be a number of occurrences or an overall rank.
*/
export interface KanjiFrequency {
/**
* The original order of the frequency, which is usually used for sorting.
*/
index: number;
/**
* The name of the dictionary that the frequency information originated from.
*/
dictionary: string;
/**
* The index of the dictionary in the original list of dictionaries used for the lookup.
*/
dictionaryIndex: number;
/**
* The priority of the dictionary.
*/
dictionaryPriority: number;
/**
* The kanji character for the frequency.
*/
character: string;
/**
* The frequency for the character, as a number of occurrences or an overall rank.
*/
frequency: number | string;
}
// Terms
/**
* A dictionary entry for a term or group of terms.
* `DictionaryEntry.type` is always `'term'`.
*/
export interface TermDictionaryEntry extends DictionaryEntry {
/**
* Database ID for the term, or `-1` if multiple entries have been merged.
*/
id: number;
/**
* Whether or not any of the sources is a primary source. Primary sources are derived from the
* original search text, while non-primary sources originate from related terms.
*/
isPrimary: boolean;
/**
* Database sequence number for the term, or `-1` if multiple entries have been merged.
*/
sequence: number;
/**
* A list of inflections that was applied to get the term.
*/
inflections: string[];
/**
* A score for the dictionary entry.
*/
score: number;
/**
* The index of the dictionary in the original list of dictionaries used for the lookup.
*/
dictionaryIndex: number;
/**
* The priority of the dictionary.
*/
dictionaryPriority: number;
/**
* The number of primary sources that had an exact text match for the term.
*/
sourceTermExactMatchCount: number;
/**
* The maximum deinflected text length of a primary source.
*/
maxDeinflectedTextLength: number;
/**
* Headwords for the entry.
*/
headwords: TermHeadword[];
/**
* Definitions for the entry.
*/
definitions: TermDefinition[];
/**
* Pronunciations for the entry.
*/
pronunciations: TermPronunciation[];
/**
* Frequencies for the entry.
*/
frequencies: TermFrequency[];
}
/**
* A term headword is a combination of a term, reading, and auxiliary information.
*/
export interface TermHeadword {
/**
* The original order of the headword, which is usually used for sorting.
*/
index: number;
/**
* The text for the term.
*/
term: string;
/**
* The reading of the term.
*/
reading: string;
/**
* The sources of the term.
*/
sources: TermSource[];
/**
* Tags for the headword.
*/
tags: Tag[];
}
/**
* A definition contains a list of entries and information about what what terms it corresponds to.
*/
export interface TermDefinition {
/**
* The original order of the definition, which is usually used for sorting.
*/
index: number;
/**
* A list of headwords that this definition corresponds to.
*/
headwordIndices: number[];
/**
* The name of the dictionary that the definition information originated from.
*/
dictionary: string;
/**
* Tags for the definition.
*/
tags: Tag[];
/**
* The definition entries.
*/
entries: string[];
}
/**
* A term pronunciation represents different ways to pronounce one of the headwords.
*/
export interface TermPronunciation {
/**
* The original order of the pronunciation, which is usually used for sorting.
*/
index: number;
/**
* Which headword this pronunciation corresponds to.
*/
headwordIndex: number;
/**
* The name of the dictionary that the proununciation information originated from.
*/
dictionary: string;
/**
* The index of the dictionary in the original list of dictionaries used for the lookup.
*/
dictionaryIndex: number;
/**
* The priority of the dictionary.
*/
dictionaryPriority: number;
/**
* The pitch accent representations for the term.
*/
pitches: TermPitch[];
}
/**
* Pitch accent information for a term, represented as the position of the downstep.
*/
export interface TermPitch {
/**
* Position of the downstep, as a number of mora.
*/
position: number;
/**
* Tags for the pitch accent.
*/
tags: Tag[];
}
/**
* Frequency information corresponds to how frequently a term appears in a corpus,
* which can be a number of occurrences or an overall rank.
*/
export interface TermFrequency {
/**
* The original order of the frequency, which is usually used for sorting.
*/
index: number;
/**
* Which headword this frequency corresponds to.
*/
headwordIndex: number;
/**
* The name of the dictionary that the frequency information originated from.
*/
dictionary: string;
/**
* The index of the dictionary in the original list of dictionaries used for the lookup.
*/
dictionaryIndex: number;
/**
* The priority of the dictionary.
*/
dictionaryPriority: number;
/**
* Whether or not the frequency had an explicit reading specified.
*/
hasReading: boolean;
/**
* The frequency for the term, as a number of occurrences or an overall rank.
*/
frequency: number | string;
}
/**
* Source information represents how the original text was transformed to get to the final term.
*/
export interface TermSource {
/**
* The original text that was searched.
*/
originalText: string;
/**
* The original text after being transformed, but before applying deinflections.
*/
transformedText: string;
/**
* The final text after applying deinflections.
*/
deinflectedText: string;
/**
* Whether or not this source is a primary source. Primary sources are derived from the
* original search text, while non-primary sources originate from related terms.
*/
isPrimary: boolean;
}
}

View File

@ -414,9 +414,9 @@ class Backend {
const options = this._getProfileOptions(optionsContext);
const {general: {resultOutputMode: mode, maxResults}} = options;
const findTermsOptions = this._getTranslatorFindTermsOptions(details, options);
const [definitions, length] = await this._translator.findTerms(mode, text, findTermsOptions);
definitions.splice(maxResults);
return {length, definitions};
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(mode, text, findTermsOptions);
dictionaryEntries.splice(maxResults);
return {length: originalTextLength, definitions: dictionaryEntries};
}
async _onApiTextParse({text, optionsContext}) {
@ -1050,7 +1050,7 @@ class Backend {
let i = 0;
const ii = text.length;
while (i < ii) {
const [definitions, sourceLength] = await this._translator.findTerms(
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(
'simple',
text.substring(i, i + scanningLength),
findTermsOptions
@ -1058,20 +1058,20 @@ class Backend {
const codePoint = text.codePointAt(i);
const character = String.fromCodePoint(codePoint);
if (
definitions.length > 0 &&
sourceLength > 0 &&
(sourceLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint))
dictionaryEntries.length > 0 &&
originalTextLength > 0 &&
(originalTextLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint))
) {
previousUngroupedSegment = null;
const {expression, reading} = definitions[0];
const source = text.substring(i, i + sourceLength);
const term = [];
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
const {headwords: [{term, reading}]} = dictionaryEntries[0];
const source = text.substring(i, i + originalTextLength);
const textSegments = [];
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(term, reading, source)) {
const reading2 = jp.convertReading(text2, furigana, readingMode);
term.push({text: text2, reading: reading2});
textSegments.push({text: text2, reading: reading2});
}
results.push(term);
i += sourceLength;
results.push(textSegments);
i += originalTextLength;
} else {
if (previousUngroupedSegment === null) {
previousUngroupedSegment = {text: character, reading: ''};

View File

@ -0,0 +1,598 @@
/*
* Copyright (C) 2021 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/* global
* DictionaryDataUtil
*/
/**
* This class is used to convert the internal dictionary entry format to the
* format used by Anki, for backwards compatibility.
*/
class AnkiNoteDataCreator {
/**
* Creates a new instance.
* @param japaneseUtil An instance of `JapaneseUtil`.
*/
constructor(japaneseUtil) {
this._japaneseUtil = japaneseUtil;
}
/**
* Creates a compatibility representation of the specified data.
* @param marker The marker that is being used for template rendering.
* @returns An object used for rendering Anki templates.
*/
create(marker, {
definition: dictionaryEntry,
resultOutputMode,
mode,
glossaryLayoutMode,
compactTags,
context,
injectedMedia=null
}) {
const self = this;
const definition = this.createCachedValue(this._getDefinition.bind(this, dictionaryEntry, injectedMedia, context, resultOutputMode));
const uniqueExpressions = this.createCachedValue(this._getUniqueExpressions.bind(this, dictionaryEntry));
const uniqueReadings = this.createCachedValue(this._getUniqueReadings.bind(this, dictionaryEntry));
const context2 = this.createCachedValue(this._getPublicContext.bind(this, context));
const pitches = this.createCachedValue(this._getPitches.bind(this, dictionaryEntry));
const pitchCount = this.createCachedValue(this._getPitchCount.bind(this, pitches));
return {
marker,
get definition() { return self.getCachedValue(definition); },
glossaryLayoutMode,
compactTags,
group: (resultOutputMode === 'group'),
merge: (resultOutputMode === 'merge'),
modeTermKanji: (mode === 'term-kanji'),
modeTermKana: (mode === 'term-kana'),
modeKanji: (mode === 'kanji'),
compactGlossaries: (glossaryLayoutMode === 'compact'),
get uniqueExpressions() { return self.getCachedValue(uniqueExpressions); },
get uniqueReadings() { return self.getCachedValue(uniqueReadings); },
get pitches() { return self.getCachedValue(pitches); },
get pitchCount() { return self.getCachedValue(pitchCount); },
get context() { return self.getCachedValue(context2); }
};
}
/**
* Creates a deferred-evaluation value.
* @param getter The function to invoke to get the return value.
* @returns An object which can be passed into `getCachedValue`.
*/
createCachedValue(getter) {
return {getter, hasValue: false, value: void 0};
}
/**
* Gets the value of a cached object.
* @param item An object that was returned from `createCachedValue`.
* @returns The result of evaluating the getter, which is cached after the first invocation.
*/
getCachedValue(item) {
if (item.hasValue) { return item.value; }
const value = item.getter();
item.value = value;
item.hasValue = true;
return value;
}
// Private
_asObject(value) {
return (typeof value === 'object' && value !== null ? value : {});
}
_getPrimarySource(dictionaryEntry) {
for (const headword of dictionaryEntry.headwords) {
for (const source of headword.sources) {
if (source.isPrimary) { return source; }
}
}
return null;
}
_getUniqueExpressions(dictionaryEntry) {
if (dictionaryEntry.type === 'term') {
const results = new Set();
for (const {term} of dictionaryEntry.headwords) {
results.add(term);
}
return [...results];
} else {
return [];
}
}
_getUniqueReadings(dictionaryEntry) {
if (dictionaryEntry.type === 'term') {
const results = new Set();
for (const {reading} of dictionaryEntry.headwords) {
results.add(reading);
}
return [...results];
} else {
return [];
}
}
_getPublicContext(context) {
let {documentTitle} = this._asObject(context);
if (typeof documentTitle !== 'string') { documentTitle = ''; }
return {
document: {
title: documentTitle
}
};
}
_getPitches(dictionaryEntry) {
const results = [];
if (dictionaryEntry.type === 'term') {
for (const {dictionary, pitches} of DictionaryDataUtil.getPitchAccentInfos(dictionaryEntry)) {
const pitches2 = [];
for (const {terms, reading, position, tags, exclusiveTerms, exclusiveReadings} of pitches) {
pitches2.push({
expressions: terms,
reading,
position,
tags,
exclusiveExpressions: exclusiveTerms,
exclusiveReadings
});
}
results.push({dictionary, pitches: pitches2});
}
}
return results;
}
_getPitchCount(cachedPitches) {
const pitches = this.getCachedValue(cachedPitches);
return pitches.reduce((i, v) => i + v.pitches.length, 0);
}
_getDefinition(dictionaryEntry, injectedMedia, context, resultOutputMode) {
switch (dictionaryEntry.type) {
case 'term':
return this._getTermDefinition(dictionaryEntry, injectedMedia, context, resultOutputMode);
case 'kanji':
return this._getKanjiDefinition(dictionaryEntry, injectedMedia, context);
default:
return {};
}
}
_getKanjiDefinition(dictionaryEntry, injectedMedia, context) {
const self = this;
const {character, dictionary, onyomi, kunyomi, definitions} = dictionaryEntry;
const {
screenshotFileName=null,
clipboardImageFileName=null,
clipboardText=null,
audioFileName=null
} = this._asObject(injectedMedia);
let {url} = this._asObject(context);
if (typeof url !== 'string') { url = ''; }
const stats = this.createCachedValue(this._getKanjiStats.bind(this, dictionaryEntry));
const tags = this.createCachedValue(this._convertTags.bind(this, dictionaryEntry.tags));
const frequencies = this.createCachedValue(this._getKanjiFrequencies.bind(this, dictionaryEntry));
const cloze = this.createCachedValue(this._getCloze.bind(this, dictionaryEntry, context));
return {
type: 'kanji',
character,
dictionary,
onyomi,
kunyomi,
glossary: definitions,
get tags() { return self.getCachedValue(tags); },
get stats() { return self.getCachedValue(stats); },
get frequencies() { return self.getCachedValue(frequencies); },
screenshotFileName,
clipboardImageFileName,
clipboardText,
audioFileName,
url,
get cloze() { return self.getCachedValue(cloze); }
};
}
_getKanjiStats(dictionaryEntry) {
const results = {};
for (const [key, value] of Object.entries(dictionaryEntry.stats)) {
results[key] = value.map(this._convertKanjiStat.bind(this));
}
return results;
}
_convertKanjiStat({name, category, content, order, score, dictionary, value}) {
return {
name,
category,
notes: content,
order,
score,
dictionary,
value
};
}
_getKanjiFrequencies(dictionaryEntry) {
const results = [];
for (const {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency} of dictionaryEntry.frequencies) {
results.push({
index,
dictionary,
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
character,
frequency
});
}
return results;
}
_getTermDefinition(dictionaryEntry, injectedMedia, context, resultOutputMode) {
const self = this;
let type = 'term';
switch (resultOutputMode) {
case 'group': type = 'termGrouped'; break;
case 'merge': type = 'termMerged'; break;
}
const {id, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount} = dictionaryEntry;
const {
screenshotFileName=null,
clipboardImageFileName=null,
clipboardText=null,
audioFileName=null
} = this._asObject(injectedMedia);
let {url} = this._asObject(context);
if (typeof url !== 'string') { url = ''; }
const primarySource = this._getPrimarySource(dictionaryEntry);
const dictionaryNames = this.createCachedValue(this._getTermDictionaryNames.bind(this, dictionaryEntry));
const commonInfo = this.createCachedValue(this._getTermDictionaryEntryCommonInfo.bind(this, dictionaryEntry, type));
const termTags = this.createCachedValue(this._getTermTags.bind(this, dictionaryEntry, type));
const expressions = this.createCachedValue(this._getTermExpressions.bind(this, dictionaryEntry));
const frequencies = this.createCachedValue(this._getTermFrequencies.bind(this, dictionaryEntry));
const pitches = this.createCachedValue(this._getTermPitches.bind(this, dictionaryEntry));
const glossary = this.createCachedValue(this._getTermGlossaryArray.bind(this, dictionaryEntry, type));
const cloze = this.createCachedValue(this._getCloze.bind(this, dictionaryEntry, context));
const furiganaSegments = this.createCachedValue(this._getTermFuriganaSegments.bind(this, dictionaryEntry, type));
return {
type,
id: (type === 'term' ? id : void 0),
source: (primarySource !== null ? primarySource.transformedText : null),
rawSource: (primarySource !== null ? primarySource.originalText : null),
sourceTerm: (type !== 'termMerged' ? (primarySource !== null ? primarySource.deinflectedText : null) : void 0),
reasons: inflections,
score,
isPrimary: (type === 'term' ? dictionaryEntry.isPrimary : void 0),
sequence: (type === 'term' ? dictionaryEntry.sequence : void 0),
get dictionary() { return self.getCachedValue(dictionaryNames)[0]; },
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
get dictionaryNames() { return self.getCachedValue(dictionaryNames); },
get expression() {
const {uniqueTerms} = self.getCachedValue(commonInfo);
return (type === 'term' || type === 'termGrouped' ? uniqueTerms[0] : uniqueTerms);
},
get reading() {
const {uniqueReadings} = self.getCachedValue(commonInfo);
return (type === 'term' || type === 'termGrouped' ? uniqueReadings[0] : uniqueReadings);
},
get expressions() { return self.getCachedValue(expressions); },
get glossary() { return self.getCachedValue(glossary); },
get definitionTags() { return type === 'term' ? self.getCachedValue(commonInfo).definitionTags : void 0; },
get termTags() { return self.getCachedValue(termTags); },
get definitions() { return self.getCachedValue(commonInfo).definitions; },
get frequencies() { return self.getCachedValue(frequencies); },
get pitches() { return self.getCachedValue(pitches); },
sourceTermExactMatchCount,
screenshotFileName,
clipboardImageFileName,
clipboardText,
audioFileName,
url,
get cloze() { return self.getCachedValue(cloze); },
get furiganaSegments() { return self.getCachedValue(furiganaSegments); }
};
}
_getTermDictionaryNames(dictionaryEntry) {
const dictionaryNames = new Set();
for (const {dictionary} of dictionaryEntry.definitions) {
dictionaryNames.add(dictionary);
}
return [...dictionaryNames];
}
_getTermDictionaryEntryCommonInfo(dictionaryEntry, type) {
const merged = (type === 'termMerged');
const hasDefinitions = (type !== 'term');
const allTermsSet = new Set();
const allReadingsSet = new Set();
for (const {term, reading} of dictionaryEntry.headwords) {
allTermsSet.add(term);
allReadingsSet.add(reading);
}
const uniqueTerms = [...allTermsSet];
const uniqueReadings = [...allReadingsSet];
const definitions = [];
const definitionTags = [];
for (const {tags, headwordIndices, entries, dictionary} of dictionaryEntry.definitions) {
const definitionTags2 = [];
for (const tag of tags) {
definitionTags.push(this._convertTag(tag));
definitionTags2.push(this._convertTag(tag));
}
if (!hasDefinitions) { continue; }
const only = merged ? DictionaryDataUtil.getDisambiguations(dictionaryEntry.headwords, headwordIndices, allTermsSet, allReadingsSet) : void 0;
definitions.push({
dictionary,
glossary: entries,
definitionTags: definitionTags2,
only
});
}
return {
uniqueTerms,
uniqueReadings,
definitionTags,
definitions: hasDefinitions ? definitions : void 0
};
}
_getTermFrequencies(dictionaryEntry) {
const results = [];
const {headwords} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency} of dictionaryEntry.frequencies) {
const {term, reading} = headwords[headwordIndex];
results.push({
index: results.length,
expressionIndex: headwordIndex,
dictionary,
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
expression: term,
reading,
hasReading,
frequency
});
}
return results;
}
_getTermPitches(dictionaryEntry) {
const self = this;
const results = [];
const {headwords} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches} of dictionaryEntry.pronunciations) {
const {term, reading} = headwords[headwordIndex];
const cachedPitches = this.createCachedValue(this._getTermPitchesInner.bind(this, pitches));
results.push({
index: results.length,
expressionIndex: headwordIndex,
dictionary,
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
expression: term,
reading,
get pitches() { return self.getCachedValue(cachedPitches); }
});
}
return results;
}
_getTermPitchesInner(pitches) {
const self = this;
const results = [];
for (const {position, tags} of pitches) {
const cachedTags = this.createCachedValue(this._convertTags.bind(this, tags));
results.push({
position,
get tags() { return self.getCachedValue(cachedTags); }
});
}
return results;
}
_getTermExpressions(dictionaryEntry) {
const self = this;
const results = [];
const {headwords} = dictionaryEntry;
for (let i = 0, ii = headwords.length; i < ii; ++i) {
const {term, reading, tags, sources: [{deinflectedText}]} = headwords[i];
const termTags = this.createCachedValue(this._convertTags.bind(this, tags));
const frequencies = this.createCachedValue(this._getTermExpressionFrequencies.bind(this, dictionaryEntry, i));
const pitches = this.createCachedValue(this._getTermExpressionPitches.bind(this, dictionaryEntry, i));
const termFrequency = this.createCachedValue(this._getTermExpressionTermFrequency.bind(this, termTags));
const furiganaSegments = this.createCachedValue(this._getTermHeadwordFuriganaSegments.bind(this, term, reading));
const item = {
sourceTerm: deinflectedText,
expression: term,
reading,
get termTags() { return self.getCachedValue(termTags); },
get frequencies() { return self.getCachedValue(frequencies); },
get pitches() { return self.getCachedValue(pitches); },
get furiganaSegments() { return self.getCachedValue(furiganaSegments); },
get termFrequency() { return self.getCachedValue(termFrequency); }
};
results.push(item);
}
return results;
}
_getTermExpressionFrequencies(dictionaryEntry, i) {
const results = [];
const {headwords, frequencies} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency} of frequencies) {
if (headwordIndex !== i) { continue; }
const {term, reading} = headwords[headwordIndex];
results.push({
index: results.length,
expressionIndex: headwordIndex,
dictionary,
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
expression: term,
reading,
hasReading,
frequency
});
}
return results;
}
_getTermExpressionPitches(dictionaryEntry, i) {
const self = this;
const results = [];
const {headwords, pronunciations} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches} of pronunciations) {
if (headwordIndex !== i) { continue; }
const {term, reading} = headwords[headwordIndex];
const cachedPitches = this.createCachedValue(this._getTermPitchesInner.bind(this, pitches));
results.push({
index: results.length,
expressionIndex: headwordIndex,
dictionary,
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
expression: term,
reading,
get pitches() { return self.getCachedValue(cachedPitches); }
});
}
return results;
}
_getTermExpressionTermFrequency(cachedTermTags) {
const termTags = this.getCachedValue(cachedTermTags);
return DictionaryDataUtil.getTermFrequency(termTags);
}
_getTermGlossaryArray(dictionaryEntry, type) {
if (type === 'term') {
const results = [];
for (const {entries} of dictionaryEntry.definitions) {
results.push(...entries);
}
return results;
}
return void 0;
}
_getTermTags(dictionaryEntry, type) {
if (type !== 'termMerged') {
const results = [];
for (const {tag} of DictionaryDataUtil.groupTermTags(dictionaryEntry)) {
results.push(this._convertTag(tag));
}
return results;
}
return void 0;
}
_convertTags(tags) {
const results = [];
for (const tag of tags) {
results.push(this._convertTag(tag));
}
return results;
}
_convertTag({name, category, content, order, score, dictionaries, redundant}) {
return {
name,
category,
notes: (content.length > 0 ? content[0] : ''),
order,
score,
dictionary: (dictionaries.length > 0 ? dictionaries[0] : ''),
redundant
};
}
_getCloze(dictionaryEntry, context) {
let originalText = '';
switch (dictionaryEntry.type) {
case 'term':
{
const primarySource = this._getPrimarySource(dictionaryEntry);
if (primarySource !== null) { originalText = primarySource.originalText; }
}
break;
case 'kanji':
originalText = dictionaryEntry.character;
break;
}
const {sentence} = this._asObject(context);
let {text, offset} = this._asObject(sentence);
if (typeof text !== 'string') { text = ''; }
if (typeof offset !== 'number') { offset = 0; }
return {
sentence: text,
prefix: text.substring(0, offset),
body: text.substring(offset, offset + originalText.length),
suffix: text.substring(offset + originalText.length)
};
}
_getTermFuriganaSegments(dictionaryEntry, type) {
if (type === 'term') {
for (const {term, reading} of dictionaryEntry.headwords) {
return this._getTermHeadwordFuriganaSegments(term, reading);
}
}
return void 0;
}
_getTermHeadwordFuriganaSegments(term, reading) {
return this._japaneseUtil.distributeFurigana(term, reading);
}
}

View File

@ -1,299 +0,0 @@
/*
* Copyright (C) 2021 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/* global
* DictionaryDataUtil
*/
/**
* This class represents the data that is exposed to the Anki template renderer.
* The public properties and data should be backwards compatible.
*/
class AnkiNoteData {
constructor(japaneseUtil, marker, {
definition,
resultOutputMode,
mode,
glossaryLayoutMode,
compactTags,
context,
injectedMedia=null
}) {
this._japaneseUtil = japaneseUtil;
this._definition = definition;
this._resultOutputMode = resultOutputMode;
this._mode = mode;
this._glossaryLayoutMode = glossaryLayoutMode;
this._compactTags = compactTags;
this._context = context;
this._marker = marker;
this._injectedMedia = injectedMedia;
this._pitches = null;
this._pitchCount = null;
this._uniqueExpressions = null;
this._uniqueReadings = null;
this._publicContext = null;
this._cloze = null;
this._furiganaSegmentsCache = null;
this._prepareDefinition(definition, injectedMedia, context);
}
get marker() {
return this._marker;
}
set marker(value) {
this._marker = value;
}
get definition() {
return this._definition;
}
get uniqueExpressions() {
if (this._uniqueExpressions === null) {
this._uniqueExpressions = this._getUniqueExpressions();
}
return this._uniqueExpressions;
}
get uniqueReadings() {
if (this._uniqueReadings === null) {
this._uniqueReadings = this._getUniqueReadings();
}
return this._uniqueReadings;
}
get pitches() {
if (this._pitches === null) {
this._pitches = DictionaryDataUtil.getPitchAccentInfos(this._definition);
}
return this._pitches;
}
get pitchCount() {
if (this._pitchCount === null) {
this._pitchCount = this.pitches.reduce((i, v) => i + v.pitches.length, 0);
}
return this._pitchCount;
}
get group() {
return this._resultOutputMode === 'group';
}
get merge() {
return this._resultOutputMode === 'merge';
}
get modeTermKanji() {
return this._mode === 'term-kanji';
}
get modeTermKana() {
return this._mode === 'term-kana';
}
get modeKanji() {
return this._mode === 'kanji';
}
get compactGlossaries() {
return this._glossaryLayoutMode === 'compact';
}
get glossaryLayoutMode() {
return this._glossaryLayoutMode;
}
get compactTags() {
return this._compactTags;
}
get context() {
if (this._publicContext === null) {
this._publicContext = this._getPublicContext();
}
return this._publicContext;
}
createPublic() {
const self = this;
return {
get marker() { return self.marker; },
set marker(value) { self.marker = value; },
get definition() { return self.definition; },
get glossaryLayoutMode() { return self.glossaryLayoutMode; },
get compactTags() { return self.compactTags; },
get group() { return self.group; },
get merge() { return self.merge; },
get modeTermKanji() { return self.modeTermKanji; },
get modeTermKana() { return self.modeTermKana; },
get modeKanji() { return self.modeKanji; },
get compactGlossaries() { return self.compactGlossaries; },
get uniqueExpressions() { return self.uniqueExpressions; },
get uniqueReadings() { return self.uniqueReadings; },
get pitches() { return self.pitches; },
get pitchCount() { return self.pitchCount; },
get context() { return self.context; }
};
}
// Private
_asObject(value) {
return (typeof value === 'object' && value !== null ? value : {});
}
_getUniqueExpressions() {
const results = new Set();
const definition = this._definition;
if (definition.type !== 'kanji') {
for (const {expression} of definition.expressions) {
results.add(expression);
}
}
return [...results];
}
_getUniqueReadings() {
const results = new Set();
const definition = this._definition;
if (definition.type !== 'kanji') {
for (const {reading} of definition.expressions) {
results.add(reading);
}
}
return [...results];
}
_getPublicContext() {
let {documentTitle} = this._asObject(this._context);
if (typeof documentTitle !== 'string') { documentTitle = ''; }
return {
document: {
title: documentTitle
}
};
}
_getCloze() {
const {sentence} = this._asObject(this._context);
let {text, offset} = this._asObject(sentence);
if (typeof text !== 'string') { text = ''; }
if (typeof offset !== 'number') { offset = 0; }
const definition = this._definition;
const source = definition.type === 'kanji' ? definition.character : definition.rawSource;
return {
sentence: text,
prefix: text.substring(0, offset),
body: text.substring(offset, offset + source.length),
suffix: text.substring(offset + source.length)
};
}
_getClozeCached() {
if (this._cloze === null) {
this._cloze = this._getCloze();
}
return this._cloze;
}
_prepareDefinition(definition, injectedMedia, context) {
const {
screenshotFileName=null,
clipboardImageFileName=null,
clipboardText=null,
audioFileName=null
} = this._asObject(injectedMedia);
let {url} = this._asObject(context);
if (typeof url !== 'string') { url = ''; }
definition.screenshotFileName = screenshotFileName;
definition.clipboardImageFileName = clipboardImageFileName;
definition.clipboardText = clipboardText;
definition.audioFileName = audioFileName;
definition.url = url;
Object.defineProperty(definition, 'cloze', {
configurable: true,
enumerable: true,
get: this._getClozeCached.bind(this)
});
for (const definition2 of this._getAllDefinitions(definition)) {
if (definition2.type === 'term') {
this._defineFuriganaSegments(definition2);
}
if (definition2.type === 'kanji') { continue; }
for (const expression of definition2.expressions) {
this._defineFuriganaSegments(expression);
this._defineTermFrequency(expression);
}
}
}
_defineFuriganaSegments(object) {
Object.defineProperty(object, 'furiganaSegments', {
configurable: true,
enumerable: true,
get: this._getFuriganaSegments.bind(this, object)
});
}
_defineTermFrequency(object) {
Object.defineProperty(object, 'termFrequency', {
configurable: true,
enumerable: true,
get: this._getTermFrequency.bind(this, object)
});
}
_getFuriganaSegments(object) {
if (this._furiganaSegmentsCache !== null) {
const cachedResult = this._furiganaSegmentsCache.get(object);
if (typeof cachedResult !== 'undefined') { return cachedResult; }
} else {
this._furiganaSegmentsCache = new Map();
}
const {expression, reading} = object;
const result = this._japaneseUtil.distributeFurigana(expression, reading);
this._furiganaSegmentsCache.set(object, result);
return result;
}
_getTermFrequency(object) {
const {termTags} = object;
return DictionaryDataUtil.getTermFrequency(termTags);