1
Fork 0

Refactor Translator and dictionary entry format (#1553)

* Update test data

* Move translator.js

* Create new version of Translator

* Update Backend

* Update DictionaryDataUtil

* Update DisplayGenerator

* Create AnkiNoteDataCreator

* Replace AnkiNoteData with AnkiNoteDataCreator

* Update tests

* Remove AnkiNoteData

* Update test data

* Remove translator-old.js

* Add TypeScript interface definitions for the new translator data format
This commit is contained in:
toasted-nutbread 2021-03-25 19:55:31 -04:00 committed by GitHub
parent e7035dcff4
commit 4be5c8fd9f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 9951 additions and 29225 deletions

View File

@ -110,7 +110,7 @@
{
"files": [
"ext/js/core.js",
"ext/js/data/anki-note-data.js",
"ext/js/data/anki-note-data-creator.js",
"ext/js/language/dictionary-data-util.js",
"ext/js/templates/template-renderer.js"
],
@ -122,7 +122,7 @@
"files": ["ext/**/*.js"],
"excludedFiles": [
"ext/js/core.js",
"ext/js/data/anki-note-data.js",
"ext/js/data/anki-note-data-creator.js",
"ext/js/language/dictionary-data-util.js",
"ext/js/templates/template-renderer.js"
],
@ -151,7 +151,7 @@
"excludedFiles": [
"ext/js/core.js",
"ext/js/yomichan.js",
"ext/js/data/anki-note-data.js",
"ext/js/data/anki-note-data-creator.js",
"ext/js/language/dictionary-data-util.js",
"ext/js/templates/template-renderer.js"
],

1
.gitattributes vendored
View File

@ -1,6 +1,7 @@
*.sh text eol=lf
*.handlebars text eol=lf
*.js text eol=lf
*.ts text eol=lf
*.json text eol=lf
*.css text eol=lf
*.html text eol=lf

View File

@ -30,7 +30,7 @@ class TranslatorVM extends DatabaseVM {
super();
this._japaneseUtil = null;
this._translator = null;
this._AnkiNoteData = null;
this._ankiNoteDataCreator = null;
this._dictionaryName = null;
}
@ -38,10 +38,14 @@ class TranslatorVM extends DatabaseVM {
return this._translator;
}
get ankiNoteDataCreator() {
return this._ankiNoteDataCreator;
}
async prepare(dictionaryDirectory, dictionaryName) {
this.execute([
'js/core.js',
'js/data/anki-note-data.js',
'js/data/anki-note-data-creator.js',
'js/data/database.js',
'js/data/json-schema.js',
'js/general/cache-map.js',
@ -60,13 +64,13 @@ class TranslatorVM extends DatabaseVM {
DictionaryDatabase,
JapaneseUtil,
Translator,
AnkiNoteData
AnkiNoteDataCreator
] = this.get([
'DictionaryImporter',
'DictionaryDatabase',
'JapaneseUtil',
'Translator',
'AnkiNoteData'
'AnkiNoteDataCreator'
]);
// Dictionary
@ -98,7 +102,7 @@ class TranslatorVM extends DatabaseVM {
this._translator.prepare(deinflectionReasions);
// Assign properties
this._AnkiNoteData = AnkiNoteData;
this._ankiNoteDataCreator = new AnkiNoteDataCreator(this._japaneseUtil);
}
createTestAnkiNoteData(definition, mode) {
@ -116,8 +120,7 @@ class TranslatorVM extends DatabaseVM {
},
injectedMedia: null
};
const AnkiNoteData = this._AnkiNoteData;
return new AnkiNoteData(this._japaneseUtil, marker, data).createPublic();
return this._ankiNoteDataCreator.create(marker, data);
}
buildOptions(optionsPresets, optionsArray) {

View File

@ -0,0 +1,402 @@
/*
* Copyright (C) 2021 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
namespace Translation {
// Common
/**
* A generic dictionary entry which is used as the base interface.
*/
export interface DictionaryEntry {
/**
* A string corresponding to the type of the entry.
* `'kanji'` corresponds to a KanjiDictionaryEntry.
* `'term'` corresponds to a TermDictionaryEntry.
*/
type: string;
}
/**
* A tag represents some brief information about part of a dictionary entry.
*/
export interface Tag {
/**
* The name of the tag.
*/
name: string;
/**
* The category of the tag.
*/
category: string;
/**
* A number indicating the sorting order of the tag.
*/
order: number;
/**
* A score value for the tag.
*/
score: number;
/**
* An array of descriptions for the tag. * If there are multiple entries,
* the values will typically have originated from different dictionaries.
* However, there is no correlation between the length of this array and
* the length of the `dictionaries` field, as duplicates are removed.
*/
content: string[];
/**
* An array of dictionary names that contained a tag with this name and category.
*/
dictionaries: string[];
/**
* Whether or not this tag is redundant with previous tags.
*/
redundant: boolean;
}
// Kanji
/**
* A dictionary entry for a kanji character.
* `DictionaryEntry.type` is always `'kanji'`.
*/
export interface KanjiDictionaryEntry extends DictionaryEntry {
/**
* The kanji character that was looked up.
*/
character: string;
/**
* The name of the dictionary that the information originated from.
*/
dictionary: string;
/**
* Onyomi readings for the kanji character.
*/
onyomi: string[];
/**
* Kunyomi readings for the kanji character.
*/
kunyomi: string[];
/**
* Tags for the kanji character.
*/
tags: Tag[];
/**
* An object containing stats about the kanji character.
*/
stats: KanjiStatGroups;
/**
* Definitions for the kanji character.
*/
definitions: string[];
/**
* Frequency information for the kanji character.
*/
frequencies: KanjiFrequency[];
}
/**
* An object with groups of stats about a kanji character.
*/
export interface KanjiStatGroups {
/**
* A group of stats.
* @param propName The name of the group.
*/
[propName: string]: KanjiStat[];
}
/**
* A stat represents a generic piece of information about a kanji character.
*/
export interface KanjiStat {
/**
* The name of the stat.
*/
name: string;
/**
* The category of the stat.
*/
category: string;
/**
* A description of the stat.
*/
content: string;
/**
* A number indicating the sorting order of the stat.
*/
order: number;
/**
* A score value for the stat.
*/
score: number;
/**
* The name of the dictionary that the stat originated from.
*/
dictionary: string;
/**
* A value for the stat.
*/
value: number | string;
}
/**
* Frequency information corresponds to how frequently a character appears in a corpus,
* which can be a number of occurrences or an overall rank.
*/
export interface KanjiFrequency {
/**
* The original order of the frequency, which is usually used for sorting.
*/
index: number;
/**
* The name of the dictionary that the frequency information originated from.
*/
dictionary: string;
/**
* The index of the dictionary in the original list of dictionaries used for the lookup.
*/
dictionaryIndex: number;
/**
* The priority of the dictionary.
*/
dictionaryPriority: number;
/**
* The kanji character for the frequency.
*/
character: string;
/**
* The frequency for the character, as a number of occurrences or an overall rank.
*/
frequency: number | string;
}
// Terms
/**
* A dictionary entry for a term or group of terms.
* `DictionaryEntry.type` is always `'term'`.
*/
export interface TermDictionaryEntry extends DictionaryEntry {
/**
* Database ID for the term, or `-1` if multiple entries have been merged.
*/
id: number;
/**
* Whether or not any of the sources is a primary source. Primary sources are derived from the
* original search text, while non-primary sources originate from related terms.
*/
isPrimary: boolean;
/**
* Database sequence number for the term, or `-1` if multiple entries have been merged.
*/
sequence: number;
/**
* A list of inflections that was applied to get the term.
*/
inflections: string[];
/**
* A score for the dictionary entry.
*/
score: number;
/**
* The index of the dictionary in the original list of dictionaries used for the lookup.
*/
dictionaryIndex: number;
/**
* The priority of the dictionary.
*/
dictionaryPriority: number;
/**
* The number of primary sources that had an exact text match for the term.
*/
sourceTermExactMatchCount: number;
/**
* The maximum deinflected text length of a primary source.
*/
maxDeinflectedTextLength: number;
/**
* Headwords for the entry.
*/
headwords: TermHeadword[];
/**
* Definitions for the entry.
*/
definitions: TermDefinition[];
/**
* Pronunciations for the entry.
*/
pronunciations: TermPronunciation[];
/**
* Frequencies for the entry.
*/
frequencies: TermFrequency[];
}
/**
* A term headword is a combination of a term, reading, and auxiliary information.
*/
export interface TermHeadword {
/**
* The original order of the headword, which is usually used for sorting.
*/
index: number;
/**
* The text for the term.
*/
term: string;
/**
* The reading of the term.
*/
reading: string;
/**
* The sources of the term.
*/
sources: TermSource[];
/**
* Tags for the headword.
*/
tags: Tag[];
}
/**
* A definition contains a list of entries and information about what what terms it corresponds to.
*/
export interface TermDefinition {
/**
* The original order of the definition, which is usually used for sorting.
*/
index: number;
/**
* A list of headwords that this definition corresponds to.
*/
headwordIndices: number[];
/**
* The name of the dictionary that the definition information originated from.
*/
dictionary: string;
/**
* Tags for the definition.
*/
tags: Tag[];
/**
* The definition entries.
*/
entries: string[];
}
/**
* A term pronunciation represents different ways to pronounce one of the headwords.
*/
export interface TermPronunciation {
/**
* The original order of the pronunciation, which is usually used for sorting.
*/
index: number;
/**
* Which headword this pronunciation corresponds to.
*/
headwordIndex: number;
/**
* The name of the dictionary that the proununciation information originated from.
*/
dictionary: string;
/**
* The index of the dictionary in the original list of dictionaries used for the lookup.
*/
dictionaryIndex: number;
/**
* The priority of the dictionary.
*/
dictionaryPriority: number;
/**
* The pitch accent representations for the term.
*/
pitches: TermPitch[];
}
/**
* Pitch accent information for a term, represented as the position of the downstep.
*/
export interface TermPitch {
/**
* Position of the downstep, as a number of mora.
*/
position: number;
/**
* Tags for the pitch accent.
*/
tags: Tag[];
}
/**
* Frequency information corresponds to how frequently a term appears in a corpus,
* which can be a number of occurrences or an overall rank.
*/
export interface TermFrequency {
/**
* The original order of the frequency, which is usually used for sorting.
*/
index: number;
/**
* Which headword this frequency corresponds to.
*/
headwordIndex: number;
/**
* The name of the dictionary that the frequency information originated from.
*/
dictionary: string;
/**
* The index of the dictionary in the original list of dictionaries used for the lookup.
*/
dictionaryIndex: number;
/**
* The priority of the dictionary.
*/
dictionaryPriority: number;
/**
* Whether or not the frequency had an explicit reading specified.
*/
hasReading: boolean;
/**
* The frequency for the term, as a number of occurrences or an overall rank.
*/
frequency: number | string;
}
/**
* Source information represents how the original text was transformed to get to the final term.
*/
export interface TermSource {
/**
* The original text that was searched.
*/
originalText: string;
/**
* The original text after being transformed, but before applying deinflections.
*/
transformedText: string;
/**
* The final text after applying deinflections.
*/
deinflectedText: string;
/**
* Whether or not this source is a primary source. Primary sources are derived from the
* original search text, while non-primary sources originate from related terms.
*/
isPrimary: boolean;
}
}

View File

@ -414,9 +414,9 @@ class Backend {
const options = this._getProfileOptions(optionsContext);
const {general: {resultOutputMode: mode, maxResults}} = options;
const findTermsOptions = this._getTranslatorFindTermsOptions(details, options);
const [definitions, length] = await this._translator.findTerms(mode, text, findTermsOptions);
definitions.splice(maxResults);
return {length, definitions};
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(mode, text, findTermsOptions);
dictionaryEntries.splice(maxResults);
return {length: originalTextLength, definitions: dictionaryEntries};
}
async _onApiTextParse({text, optionsContext}) {
@ -1050,7 +1050,7 @@ class Backend {
let i = 0;
const ii = text.length;
while (i < ii) {
const [definitions, sourceLength] = await this._translator.findTerms(
const {dictionaryEntries, originalTextLength} = await this._translator.findTerms(
'simple',
text.substring(i, i + scanningLength),
findTermsOptions
@ -1058,20 +1058,20 @@ class Backend {
const codePoint = text.codePointAt(i);
const character = String.fromCodePoint(codePoint);
if (
definitions.length > 0 &&
sourceLength > 0 &&
(sourceLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint))
dictionaryEntries.length > 0 &&
originalTextLength > 0 &&
(originalTextLength !== character.length || this._japaneseUtil.isCodePointJapanese(codePoint))
) {
previousUngroupedSegment = null;
const {expression, reading} = definitions[0];
const source = text.substring(i, i + sourceLength);
const term = [];
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(expression, reading, source)) {
const {headwords: [{term, reading}]} = dictionaryEntries[0];
const source = text.substring(i, i + originalTextLength);
const textSegments = [];
for (const {text: text2, furigana} of jp.distributeFuriganaInflected(term, reading, source)) {
const reading2 = jp.convertReading(text2, furigana, readingMode);
term.push({text: text2, reading: reading2});
textSegments.push({text: text2, reading: reading2});
}
results.push(term);
i += sourceLength;
results.push(textSegments);
i += originalTextLength;
} else {
if (previousUngroupedSegment === null) {
previousUngroupedSegment = {text: character, reading: ''};

View File

@ -0,0 +1,598 @@
/*
* Copyright (C) 2021 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/* global
* DictionaryDataUtil
*/
/**
* This class is used to convert the internal dictionary entry format to the
* format used by Anki, for backwards compatibility.
*/
class AnkiNoteDataCreator {
/**
* Creates a new instance.
* @param japaneseUtil An instance of `JapaneseUtil`.
*/
constructor(japaneseUtil) {
this._japaneseUtil = japaneseUtil;
}
/**
* Creates a compatibility representation of the specified data.
* @param marker The marker that is being used for template rendering.
* @returns An object used for rendering Anki templates.
*/
create(marker, {
definition: dictionaryEntry,
resultOutputMode,
mode,
glossaryLayoutMode,
compactTags,
context,
injectedMedia=null
}) {
const self = this;
const definition = this.createCachedValue(this._getDefinition.bind(this, dictionaryEntry, injectedMedia, context, resultOutputMode));
const uniqueExpressions = this.createCachedValue(this._getUniqueExpressions.bind(this, dictionaryEntry));
const uniqueReadings = this.createCachedValue(this._getUniqueReadings.bind(this, dictionaryEntry));
const context2 = this.createCachedValue(this._getPublicContext.bind(this, context));
const pitches = this.createCachedValue(this._getPitches.bind(this, dictionaryEntry));
const pitchCount = this.createCachedValue(this._getPitchCount.bind(this, pitches));
return {
marker,
get definition() { return self.getCachedValue(definition); },
glossaryLayoutMode,
compactTags,
group: (resultOutputMode === 'group'),
merge: (resultOutputMode === 'merge'),
modeTermKanji: (mode === 'term-kanji'),
modeTermKana: (mode === 'term-kana'),
modeKanji: (mode === 'kanji'),
compactGlossaries: (glossaryLayoutMode === 'compact'),
get uniqueExpressions() { return self.getCachedValue(uniqueExpressions); },
get uniqueReadings() { return self.getCachedValue(uniqueReadings); },
get pitches() { return self.getCachedValue(pitches); },
get pitchCount() { return self.getCachedValue(pitchCount); },
get context() { return self.getCachedValue(context2); }
};
}
/**
* Creates a deferred-evaluation value.
* @param getter The function to invoke to get the return value.
* @returns An object which can be passed into `getCachedValue`.
*/
createCachedValue(getter) {
return {getter, hasValue: false, value: void 0};
}
/**
* Gets the value of a cached object.
* @param item An object that was returned from `createCachedValue`.
* @returns The result of evaluating the getter, which is cached after the first invocation.
*/
getCachedValue(item) {
if (item.hasValue) { return item.value; }
const value = item.getter();
item.value = value;
item.hasValue = true;
return value;
}
// Private
_asObject(value) {
return (typeof value === 'object' && value !== null ? value : {});
}
_getPrimarySource(dictionaryEntry) {
for (const headword of dictionaryEntry.headwords) {
for (const source of headword.sources) {
if (source.isPrimary) { return source; }
}
}
return null;
}
_getUniqueExpressions(dictionaryEntry) {
if (dictionaryEntry.type === 'term') {
const results = new Set();
for (const {term} of dictionaryEntry.headwords) {
results.add(term);
}
return [...results];
} else {
return [];
}
}
_getUniqueReadings(dictionaryEntry) {
if (dictionaryEntry.type === 'term') {
const results = new Set();
for (const {reading} of dictionaryEntry.headwords) {
results.add(reading);
}
return [...results];
} else {
return [];
}
}
_getPublicContext(context) {
let {documentTitle} = this._asObject(context);
if (typeof documentTitle !== 'string') { documentTitle = ''; }
return {
document: {
title: documentTitle
}
};
}
_getPitches(dictionaryEntry) {
const results = [];
if (dictionaryEntry.type === 'term') {
for (const {dictionary, pitches} of DictionaryDataUtil.getPitchAccentInfos(dictionaryEntry)) {
const pitches2 = [];
for (const {terms, reading, position, tags, exclusiveTerms, exclusiveReadings} of pitches) {
pitches2.push({
expressions: terms,
reading,
position,
tags,
exclusiveExpressions: exclusiveTerms,
exclusiveReadings
});
}
results.push({dictionary, pitches: pitches2});
}
}
return results;
}
_getPitchCount(cachedPitches) {
const pitches = this.getCachedValue(cachedPitches);
return pitches.reduce((i, v) => i + v.pitches.length, 0);
}
_getDefinition(dictionaryEntry, injectedMedia, context, resultOutputMode) {
switch (dictionaryEntry.type) {
case 'term':
return this._getTermDefinition(dictionaryEntry, injectedMedia, context, resultOutputMode);
case 'kanji':
return this._getKanjiDefinition(dictionaryEntry, injectedMedia, context);
default:
return {};
}
}
_getKanjiDefinition(dictionaryEntry, injectedMedia, context) {
const self = this;
const {character, dictionary, onyomi, kunyomi, definitions} = dictionaryEntry;
const {
screenshotFileName=null,
clipboardImageFileName=null,
clipboardText=null,
audioFileName=null
} = this._asObject(injectedMedia);
let {url} = this._asObject(context);
if (typeof url !== 'string') { url = ''; }
const stats = this.createCachedValue(this._getKanjiStats.bind(this, dictionaryEntry));
const tags = this.createCachedValue(this._convertTags.bind(this, dictionaryEntry.tags));
const frequencies = this.createCachedValue(this._getKanjiFrequencies.bind(this, dictionaryEntry));
const cloze = this.createCachedValue(this._getCloze.bind(this, dictionaryEntry, context));
return {
type: 'kanji',
character,
dictionary,
onyomi,
kunyomi,
glossary: definitions,
get tags() { return self.getCachedValue(tags); },
get stats() { return self.getCachedValue(stats); },
get frequencies() { return self.getCachedValue(frequencies); },
screenshotFileName,
clipboardImageFileName,
clipboardText,
audioFileName,
url,
get cloze() { return self.getCachedValue(cloze); }
};
}
_getKanjiStats(dictionaryEntry) {
const results = {};
for (const [key, value] of Object.entries(dictionaryEntry.stats)) {
results[key] = value.map(this._convertKanjiStat.bind(this));
}
return results;
}
_convertKanjiStat({name, category, content, order, score, dictionary, value}) {
return {
name,
category,
notes: content,
order,
score,
dictionary,
value
};
}
_getKanjiFrequencies(dictionaryEntry) {
const results = [];
for (const {index, dictionary, dictionaryIndex, dictionaryPriority, character, frequency} of dictionaryEntry.frequencies) {
results.push({
index,
dictionary,
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
character,
frequency
});
}
return results;
}
_getTermDefinition(dictionaryEntry, injectedMedia, context, resultOutputMode) {
const self = this;
let type = 'term';
switch (resultOutputMode) {
case 'group': type = 'termGrouped'; break;
case 'merge': type = 'termMerged'; break;
}
const {id, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount} = dictionaryEntry;
const {
screenshotFileName=null,
clipboardImageFileName=null,
clipboardText=null,
audioFileName=null
} = this._asObject(injectedMedia);
let {url} = this._asObject(context);
if (typeof url !== 'string') { url = ''; }
const primarySource = this._getPrimarySource(dictionaryEntry);
const dictionaryNames = this.createCachedValue(this._getTermDictionaryNames.bind(this, dictionaryEntry));
const commonInfo = this.createCachedValue(this._getTermDictionaryEntryCommonInfo.bind(this, dictionaryEntry, type));
const termTags = this.createCachedValue(this._getTermTags.bind(this, dictionaryEntry, type));
const expressions = this.createCachedValue(this._getTermExpressions.bind(this, dictionaryEntry));
const frequencies = this.createCachedValue(this._getTermFrequencies.bind(this, dictionaryEntry));
const pitches = this.createCachedValue(this._getTermPitches.bind(this, dictionaryEntry));
const glossary = this.createCachedValue(this._getTermGlossaryArray.bind(this, dictionaryEntry, type));
const cloze = this.createCachedValue(this._getCloze.bind(this, dictionaryEntry, context));
const furiganaSegments = this.createCachedValue(this._getTermFuriganaSegments.bind(this, dictionaryEntry, type));
return {
type,
id: (type === 'term' ? id : void 0),
source: (primarySource !== null ? primarySource.transformedText : null),
rawSource: (primarySource !== null ? primarySource.originalText : null),
sourceTerm: (type !== 'termMerged' ? (primarySource !== null ? primarySource.deinflectedText : null) : void 0),
reasons: inflections,
score,
isPrimary: (type === 'term' ? dictionaryEntry.isPrimary : void 0),
sequence: (type === 'term' ? dictionaryEntry.sequence : void 0),
get dictionary() { return self.getCachedValue(dictionaryNames)[0]; },
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
get dictionaryNames() { return self.getCachedValue(dictionaryNames); },
get expression() {
const {uniqueTerms} = self.getCachedValue(commonInfo);
return (type === 'term' || type === 'termGrouped' ? uniqueTerms[0] : uniqueTerms);
},
get reading() {
const {uniqueReadings} = self.getCachedValue(commonInfo);
return (type === 'term' || type === 'termGrouped' ? uniqueReadings[0] : uniqueReadings);
},
get expressions() { return self.getCachedValue(expressions); },
get glossary() { return self.getCachedValue(glossary); },
get definitionTags() { return type === 'term' ? self.getCachedValue(commonInfo).definitionTags : void 0; },
get termTags() { return self.getCachedValue(termTags); },
get definitions() { return self.getCachedValue(commonInfo).definitions; },
get frequencies() { return self.getCachedValue(frequencies); },
get pitches() { return self.getCachedValue(pitches); },
sourceTermExactMatchCount,
screenshotFileName,
clipboardImageFileName,
clipboardText,
audioFileName,
url,
get cloze() { return self.getCachedValue(cloze); },
get furiganaSegments() { return self.getCachedValue(furiganaSegments); }
};
}
_getTermDictionaryNames(dictionaryEntry) {
const dictionaryNames = new Set();
for (const {dictionary} of dictionaryEntry.definitions) {
dictionaryNames.add(dictionary);
}
return [...dictionaryNames];
}
_getTermDictionaryEntryCommonInfo(dictionaryEntry, type) {
const merged = (type === 'termMerged');
const hasDefinitions = (type !== 'term');
const allTermsSet = new Set();
const allReadingsSet = new Set();
for (const {term, reading} of dictionaryEntry.headwords) {
allTermsSet.add(term);
allReadingsSet.add(reading);
}
const uniqueTerms = [...allTermsSet];
const uniqueReadings = [...allReadingsSet];
const definitions = [];
const definitionTags = [];
for (const {tags, headwordIndices, entries, dictionary} of dictionaryEntry.definitions) {
const definitionTags2 = [];
for (const tag of tags) {
definitionTags.push(this._convertTag(tag));
definitionTags2.push(this._convertTag(tag));
}
if (!hasDefinitions) { continue; }
const only = merged ? DictionaryDataUtil.getDisambiguations(dictionaryEntry.headwords, headwordIndices, allTermsSet, allReadingsSet) : void 0;
definitions.push({
dictionary,
glossary: entries,
definitionTags: definitionTags2,
only
});
}
return {
uniqueTerms,
uniqueReadings,
definitionTags,
definitions: hasDefinitions ? definitions : void 0
};
}
_getTermFrequencies(dictionaryEntry) {
const results = [];
const {headwords} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency} of dictionaryEntry.frequencies) {
const {term, reading} = headwords[headwordIndex];
results.push({
index: results.length,
expressionIndex: headwordIndex,
dictionary,
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
expression: term,
reading,
hasReading,
frequency
});
}
return results;
}
_getTermPitches(dictionaryEntry) {
const self = this;
const results = [];
const {headwords} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches} of dictionaryEntry.pronunciations) {
const {term, reading} = headwords[headwordIndex];
const cachedPitches = this.createCachedValue(this._getTermPitchesInner.bind(this, pitches));
results.push({
index: results.length,
expressionIndex: headwordIndex,
dictionary,
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
expression: term,
reading,
get pitches() { return self.getCachedValue(cachedPitches); }
});
}
return results;
}
_getTermPitchesInner(pitches) {
const self = this;
const results = [];
for (const {position, tags} of pitches) {
const cachedTags = this.createCachedValue(this._convertTags.bind(this, tags));
results.push({
position,
get tags() { return self.getCachedValue(cachedTags); }
});
}
return results;
}
_getTermExpressions(dictionaryEntry) {
const self = this;
const results = [];
const {headwords} = dictionaryEntry;
for (let i = 0, ii = headwords.length; i < ii; ++i) {
const {term, reading, tags, sources: [{deinflectedText}]} = headwords[i];
const termTags = this.createCachedValue(this._convertTags.bind(this, tags));
const frequencies = this.createCachedValue(this._getTermExpressionFrequencies.bind(this, dictionaryEntry, i));
const pitches = this.createCachedValue(this._getTermExpressionPitches.bind(this, dictionaryEntry, i));
const termFrequency = this.createCachedValue(this._getTermExpressionTermFrequency.bind(this, termTags));
const furiganaSegments = this.createCachedValue(this._getTermHeadwordFuriganaSegments.bind(this, term, reading));
const item = {
sourceTerm: deinflectedText,
expression: term,
reading,
get termTags() { return self.getCachedValue(termTags); },
get frequencies() { return self.getCachedValue(frequencies); },
get pitches() { return self.getCachedValue(pitches); },
get furiganaSegments() { return self.getCachedValue(furiganaSegments); },
get termFrequency() { return self.getCachedValue(termFrequency); }
};
results.push(item);
}
return results;
}
_getTermExpressionFrequencies(dictionaryEntry, i) {
const results = [];
const {headwords, frequencies} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency} of frequencies) {
if (headwordIndex !== i) { continue; }
const {term, reading} = headwords[headwordIndex];
results.push({
index: results.length,
expressionIndex: headwordIndex,
dictionary,
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
expression: term,
reading,
hasReading,
frequency
});
}
return results;
}
_getTermExpressionPitches(dictionaryEntry, i) {
const self = this;
const results = [];
const {headwords, pronunciations} = dictionaryEntry;
for (const {headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches} of pronunciations) {
if (headwordIndex !== i) { continue; }
const {term, reading} = headwords[headwordIndex];
const cachedPitches = this.createCachedValue(this._getTermPitchesInner.bind(this, pitches));
results.push({
index: results.length,
expressionIndex: headwordIndex,
dictionary,
dictionaryOrder: {
index: dictionaryIndex,
priority: dictionaryPriority
},
expression: term,
reading,
get pitches() { return self.getCachedValue(cachedPitches); }
});
}
return results;
}
_getTermExpressionTermFrequency(cachedTermTags) {
const termTags = this.getCachedValue(cachedTermTags);
return DictionaryDataUtil.getTermFrequency(termTags);
}
_getTermGlossaryArray(dictionaryEntry, type) {
if (type === 'term') {
const results = [];
for (const {entries} of dictionaryEntry.definitions) {
results.push(...entries);
}
return results;
}
return void 0;
}
_getTermTags(dictionaryEntry, type) {
if (type !== 'termMerged') {
const results = [];
for (const {tag} of DictionaryDataUtil.groupTermTags(dictionaryEntry)) {
results.push(this._convertTag(tag));
}
return results;
}
return void 0;
}
_convertTags(tags) {
const results = [];
for (const tag of tags) {
results.push(this._convertTag(tag));
}
return results;
}
_convertTag({name, category, content, order, score, dictionaries, redundant}) {
return {
name,
category,
notes: (content.length > 0 ? content[0] : ''),
order,
score,
dictionary: (dictionaries.length > 0 ? dictionaries[0] : ''),
redundant
};
}
_getCloze(dictionaryEntry, context) {
let originalText = '';
switch (dictionaryEntry.type) {
case 'term':
{
const primarySource = this._getPrimarySource(dictionaryEntry);
if (primarySource !== null) { originalText = primarySource.originalText; }
}
break;
case 'kanji':
originalText = dictionaryEntry.character;
break;
}
const {sentence} = this._asObject(context);
let {text, offset} = this._asObject(sentence);
if (typeof text !== 'string') { text = ''; }
if (typeof offset !== 'number') { offset = 0; }
return {
sentence: text,
prefix: text.substring(0, offset),
body: text.substring(offset, offset + originalText.length),
suffix: text.substring(offset + originalText.length)
};
}
_getTermFuriganaSegments(dictionaryEntry, type) {
if (type === 'term') {
for (const {term, reading} of dictionaryEntry.headwords) {
return this._getTermHeadwordFuriganaSegments(term, reading);
}
}
return void 0;
}
_getTermHeadwordFuriganaSegments(term, reading) {
return this._japaneseUtil.distributeFurigana(term, reading);
}
}

View File

@ -1,299 +0,0 @@
/*
* Copyright (C) 2021 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/* global
* DictionaryDataUtil
*/
/**
* This class represents the data that is exposed to the Anki template renderer.
* The public properties and data should be backwards compatible.
*/
class AnkiNoteData {
constructor(japaneseUtil, marker, {
definition,
resultOutputMode,
mode,
glossaryLayoutMode,
compactTags,
context,
injectedMedia=null
}) {
this._japaneseUtil = japaneseUtil;
this._definition = definition;
this._resultOutputMode = resultOutputMode;
this._mode = mode;
this._glossaryLayoutMode = glossaryLayoutMode;
this._compactTags = compactTags;
this._context = context;
this._marker = marker;
this._injectedMedia = injectedMedia;
this._pitches = null;
this._pitchCount = null;
this._uniqueExpressions = null;
this._uniqueReadings = null;
this._publicContext = null;
this._cloze = null;
this._furiganaSegmentsCache = null;
this._prepareDefinition(definition, injectedMedia, context);
}
get marker() {
return this._marker;
}
set marker(value) {
this._marker = value;
}
get definition() {
return this._definition;
}
get uniqueExpressions() {
if (this._uniqueExpressions === null) {
this._uniqueExpressions = this._getUniqueExpressions();
}
return this._uniqueExpressions;
}
get uniqueReadings() {
if (this._uniqueReadings === null) {
this._uniqueReadings = this._getUniqueReadings();
}
return this._uniqueReadings;
}
get pitches() {
if (this._pitches === null) {
this._pitches = DictionaryDataUtil.getPitchAccentInfos(this._definition);
}
return this._pitches;
}
get pitchCount() {
if (this._pitchCount === null) {
this._pitchCount = this.pitches.reduce((i, v) => i + v.pitches.length, 0);
}
return this._pitchCount;
}
get group() {
return this._resultOutputMode === 'group';
}
get merge() {
return this._resultOutputMode === 'merge';
}
get modeTermKanji() {
return this._mode === 'term-kanji';
}
get modeTermKana() {
return this._mode === 'term-kana';
}
get modeKanji() {
return this._mode === 'kanji';
}
get compactGlossaries() {
return this._glossaryLayoutMode === 'compact';
}
get glossaryLayoutMode() {
return this._glossaryLayoutMode;
}
get compactTags() {
return this._compactTags;
}
get context() {
if (this._publicContext === null) {
this._publicContext = this._getPublicContext();
}
return this._publicContext;
}
createPublic() {
const self = this;
return {
get marker() { return self.marker; },
set marker(value) { self.marker = value; },
get definition() { return self.definition; },
get glossaryLayoutMode() { return self.glossaryLayoutMode; },
get compactTags() { return self.compactTags; },
get group() { return self.group; },
get merge() { return self.merge; },
get modeTermKanji() { return self.modeTermKanji; },
get modeTermKana() { return self.modeTermKana; },
get modeKanji() { return self.modeKanji; },
get compactGlossaries() { return self.compactGlossaries; },
get uniqueExpressions() { return self.uniqueExpressions; },
get uniqueReadings() { return self.uniqueReadings; },
get pitches() { return self.pitches; },
get pitchCount() { return self.pitchCount; },
get context() { return self.context; }
};
}
// Private
_asObject(value) {
return (typeof value === 'object' && value !== null ? value : {});
}
_getUniqueExpressions() {
const results = new Set();
const definition = this._definition;
if (definition.type !== 'kanji') {
for (const {expression} of definition.expressions) {
results.add(expression);
}
}
return [...results];
}
_getUniqueReadings() {
const results = new Set();
const definition = this._definition;
if (definition.type !== 'kanji') {
for (const {reading} of definition.expressions) {
results.add(reading);
}
}
return [...results];
}
_getPublicContext() {
let {documentTitle} = this._asObject(this._context);
if (typeof documentTitle !== 'string') { documentTitle = ''; }
return {
document: {
title: documentTitle
}
};
}
_getCloze() {
const {sentence} = this._asObject(this._context);
let {text, offset} = this._asObject(sentence);
if (typeof text !== 'string') { text = ''; }
if (typeof offset !== 'number') { offset = 0; }
const definition = this._definition;
const source = definition.type === 'kanji' ? definition.character : definition.rawSource;
return {
sentence: text,
prefix: text.substring(0, offset),
body: text.substring(offset, offset + source.length),
suffix: text.substring(offset + source.length)
};
}
_getClozeCached() {
if (this._cloze === null) {
this._cloze = this._getCloze();
}
return this._cloze;
}
_prepareDefinition(definition, injectedMedia, context) {
const {
screenshotFileName=null,
clipboardImageFileName=null,
clipboardText=null,
audioFileName=null
} = this._asObject(injectedMedia);
let {url} = this._asObject(context);
if (typeof url !== 'string') { url = ''; }
definition.screenshotFileName = screenshotFileName;
definition.clipboardImageFileName = clipboardImageFileName;
definition.clipboardText = clipboardText;
definition.audioFileName = audioFileName;
definition.url = url;
Object.defineProperty(definition, 'cloze', {
configurable: true,
enumerable: true,
get: this._getClozeCached.bind(this)
});
for (const definition2 of this._getAllDefinitions(definition)) {
if (definition2.type === 'term') {
this._defineFuriganaSegments(definition2);
}
if (definition2.type === 'kanji') { continue; }
for (const expression of definition2.expressions) {
this._defineFuriganaSegments(expression);
this._defineTermFrequency(expression);
}
}
}
_defineFuriganaSegments(object) {
Object.defineProperty(object, 'furiganaSegments', {
configurable: true,
enumerable: true,
get: this._getFuriganaSegments.bind(this, object)
});
}
_defineTermFrequency(object) {
Object.defineProperty(object, 'termFrequency', {
configurable: true,
enumerable: true,
get: this._getTermFrequency.bind(this, object)
});
}
_getFuriganaSegments(object) {
if (this._furiganaSegmentsCache !== null) {
const cachedResult = this._furiganaSegmentsCache.get(object);
if (typeof cachedResult !== 'undefined') { return cachedResult; }
} else {
this._furiganaSegmentsCache = new Map();
}
const {expression, reading} = object;
const result = this._japaneseUtil.distributeFurigana(expression, reading);
this._furiganaSegmentsCache.set(object, result);
return result;
}
_getTermFrequency(object) {
const {termTags} = object;
return DictionaryDataUtil.getTermFrequency(termTags);
}
_getAllDefinitions(definition) {
const definitions = [definition];
for (let i = 0; i < definitions.length; ++i) {
const childDefinitions = definitions[i].definitions;
if (Array.isArray(childDefinitions)) {
definitions.push(...childDefinitions);
}
}
return definitions;
}
}

View File

@ -60,23 +60,20 @@ class DisplayGenerator {
const definitionsContainer = node.querySelector('.definition-list');
const termTagsContainer = node.querySelector('.expression-list-tag-list');
const {expressions, type, reasons, frequencies} = details;
const definitions = (type === 'term' ? [details] : details.definitions);
const merged = (type === 'termMerged' || type === 'termMergedByGlossary');
const {headwords: expressions, type, inflections: reasons, definitions, frequencies, pronunciations} = details;
const pitches = DictionaryDataUtil.getPitchAccentInfos(details);
const pitchCount = pitches.reduce((i, v) => i + v.pitches.length, 0);
const groupedFrequencies = DictionaryDataUtil.groupTermFrequencies(frequencies);
const groupedFrequencies = DictionaryDataUtil.groupTermFrequencies(details);
const termTags = DictionaryDataUtil.groupTermTags(details);
const uniqueExpressions = new Set();
const uniqueReadings = new Set();
for (const {expression, reading} of expressions) {
for (const {term: expression, reading} of expressions) {
uniqueExpressions.add(expression);
uniqueReadings.add(reading);
}
node.dataset.format = type;
node.dataset.expressionMulti = `${merged}`;
node.dataset.expressionCount = `${expressions.length}`;
node.dataset.definitionCount = `${definitions.length}`;
node.dataset.pitchAccentDictionaryCount = `${pitches.length}`;
@ -86,7 +83,13 @@ class DisplayGenerator {
node.dataset.frequencyCount = `${frequencies.length}`;
node.dataset.groupedFrequencyCount = `${groupedFrequencies.length}`;
this._appendMultiple(expressionsContainer, this._createTermExpression.bind(this), expressions);
for (let i = 0, ii = expressions.length; i < ii; ++i) {
const node2 = this._createTermExpression(expressions[i], i, pronunciations);
node2.dataset.index = `${i}`;
expressionsContainer.appendChild(node2);
}
expressionsContainer.dataset.count = `${expressions.length}`;
this._appendMultiple(reasonsContainer, this._createTermReason.bind(this), reasons);
this._appendMultiple(frequencyGroupListContainer, this._createFrequencyGroup.bind(this), groupedFrequencies, false);
this._appendMultiple(pitchesContainer, this._createPitches.bind(this), pitches);
@ -114,7 +117,7 @@ class DisplayGenerator {
dictionaryTag.name = dictionary;
}
const node2 = this._createTermDefinitionItem(definition, dictionaryTag);
const node2 = this._createTermDefinitionItem(definition, dictionaryTag, expressions, uniqueExpressions, uniqueReadings);
node2.dataset.index = `${i}`;
definitionsContainer.appendChild(node2);
}
@ -144,7 +147,7 @@ class DisplayGenerator {
this._appendMultiple(frequencyGroupListContainer, this._createFrequencyGroup.bind(this), groupedFrequencies, true);
this._appendMultiple(tagContainer, this._createTag.bind(this), [...details.tags, dictionaryTag]);
this._appendMultiple(glossaryContainer, this._createKanjiGlossaryItem.bind(this), details.glossary);
this._appendMultiple(glossaryContainer, this._createKanjiGlossaryItem.bind(this), details.definitions);
this._appendMultiple(chineseReadingsContainer, this._createKanjiReading.bind(this), details.onyomi);
this._appendMultiple(japaneseReadingsContainer, this._createKanjiReading.bind(this), details.kunyomi);
@ -229,8 +232,8 @@ class DisplayGenerator {
// Private
_createTermExpression(details) {
const {expression, reading, termTags, pitches} = details;
_createTermExpression(headword, headwordIndex, pronunciations) {
const {term: expression, reading, tags: termTags} = headword;
const searchQueries = [];
if (expression) { searchQueries.push(expression); }
@ -244,7 +247,7 @@ class DisplayGenerator {
node.dataset.readingIsSame = `${reading === expression}`;
node.dataset.frequency = DictionaryDataUtil.getTermFrequency(termTags);
const pitchAccentCategories = this._getPitchAccentCategories(pitches);
const pitchAccentCategories = this._getPitchAccentCategories(reading, pronunciations, headwordIndex);
if (pitchAccentCategories !== null) {
node.dataset.pitchAccentCategories = pitchAccentCategories;
}
@ -266,19 +269,21 @@ class DisplayGenerator {
return fragment;
}
_createTermDefinitionItem(details, dictionaryTag) {
_createTermDefinitionItem(details, dictionaryTag, headwords, uniqueTerms, uniqueReadings) {
const {dictionary, tags, headwordIndices, entries} = details;
const disambiguations = DictionaryDataUtil.getDisambiguations(headwords, headwordIndices, uniqueTerms, uniqueReadings);
const node = this._templates.instantiate('definition-item');
const tagListContainer = node.querySelector('.definition-tag-list');
const onlyListContainer = node.querySelector('.definition-disambiguation-list');
const glossaryContainer = node.querySelector('.glossary-list');
const {dictionary, definitionTags} = details;
node.dataset.dictionary = dictionary;
this._appendMultiple(tagListContainer, this._createTag.bind(this), [...definitionTags, dictionaryTag]);
this._appendMultiple(onlyListContainer, this._createTermDisambiguation.bind(this), details.only);
this._appendMultiple(glossaryContainer, this._createTermGlossaryItem.bind(this), details.glossary, dictionary);
this._appendMultiple(tagListContainer, this._createTag.bind(this), [...tags, dictionaryTag]);
this._appendMultiple(onlyListContainer, this._createTermDisambiguation.bind(this), disambiguations);
this._appendMultiple(glossaryContainer, this._createTermGlossaryItem.bind(this), entries, dictionary);
return node;
}
@ -406,11 +411,12 @@ class DisplayGenerator {
}
_createKanjiInfoTableItem(details) {
const {content, name, value} = details;
const node = this._templates.instantiate('kanji-info-table-item');
const nameNode = node.querySelector('.kanji-info-table-item-header');
const valueNode = node.querySelector('.kanji-info-table-item-value');
this._setTextContent(nameNode, details.notes || details.name);
this._setTextContent(valueNode, details.value);
this._setTextContent(nameNode, content.length > 0 ? content : name);
this._setTextContent(valueNode, value);
return node;
}
@ -419,37 +425,46 @@ class DisplayGenerator {
}
_createTag(details) {
const {notes, name, category, redundant} = details;
const {content, name, category, redundant} = details;
const node = this._templates.instantiate('tag');
const inner = node.querySelector('.tag-label-content');
node.title = notes;
const contentString = content.join('\n');
node.title = contentString;
this._setTextContent(inner, name);
node.dataset.details = notes || name;
node.dataset.details = contentString.length > 0 ? contentString : name;
node.dataset.category = category;
if (redundant) { node.dataset.redundant = 'true'; }
return node;
}
_createTermTag(details, totalExpressionCount) {
const {tag, expressions} = details;
_createTermTag(details, totalHeadwordCount) {
const {tag, headwordIndices} = details;
const node = this._createTag(tag);
node.dataset.disambiguation = `${JSON.stringify(expressions)}`;
node.dataset.totalExpressionCount = `${totalExpressionCount}`;
node.dataset.matchedExpressionCount = `${expressions.length}`;
node.dataset.unmatchedExpressionCount = `${Math.max(0, totalExpressionCount - expressions.length)}`;
node.dataset.headwords = headwordIndices.join(' ');
node.dataset.totalExpressionCount = `${totalHeadwordCount}`;
node.dataset.matchedExpressionCount = `${headwordIndices.length}`;
node.dataset.unmatchedExpressionCount = `${Math.max(0, totalHeadwordCount - headwordIndices.length)}`;
return node;
}
_createSearchTag(text) {
return this._createTag({
notes: '',
name: text,
category: 'search',
_createTagData(name, category) {
return {
name,
category,
order: 0,
score: 0,
content: [],
dictionaries: [],
redundant: false
});
};
}
_createSearchTag(text) {
return this._createTag(this._createTagData(text, 'search'));
}
_createPitches(details) {
@ -462,7 +477,7 @@ class DisplayGenerator {
node.dataset.pitchesMulti = 'true';
node.dataset.pitchesCount = `${pitches.length}`;
const tag = this._createTag({notes: '', name: dictionary, category: 'pitch-accent-dictionary'});
const tag = this._createTag(this._createTagData(dictionary, 'pitch-accent-dictionary'));
node.querySelector('.pitch-accent-group-tag-list').appendChild(tag);
let hasTags = false;
@ -482,7 +497,7 @@ class DisplayGenerator {
_createPitch(details) {
const jp = this._japaneseUtil;
const {reading, position, tags, exclusiveExpressions, exclusiveReadings} = details;
const {reading, position, tags, exclusiveTerms, exclusiveReadings} = details;
const morae = jp.getKanaMorae(reading);
const node = this._templates.instantiate('pitch-accent');
@ -497,7 +512,7 @@ class DisplayGenerator {
this._appendMultiple(n, this._createTag.bind(this), tags);
n = node.querySelector('.pitch-accent-disambiguation-list');
this._createPitchAccentDisambiguations(n, exclusiveExpressions, exclusiveReadings);
this._createPitchAccentDisambiguations(n, exclusiveTerms, exclusiveReadings);
n = node.querySelector('.pitch-accent-characters');
for (let i = 0, ii = morae.length; i < ii; ++i) {
@ -523,9 +538,9 @@ class DisplayGenerator {
return node;
}
_createPitchAccentDisambiguations(container, exclusiveExpressions, exclusiveReadings) {
_createPitchAccentDisambiguations(container, exclusiveTerms, exclusiveReadings) {
const templateName = 'pitch-accent-disambiguation';
for (const exclusiveExpression of exclusiveExpressions) {
for (const exclusiveExpression of exclusiveTerms) {
const node = this._templates.instantiate(templateName);
node.dataset.type = 'expression';
this._setTextContent(node, exclusiveExpression, 'ja');
@ -539,8 +554,8 @@ class DisplayGenerator {
container.appendChild(node);
}
container.dataset.count = `${exclusiveExpressions.length + exclusiveReadings.length}`;
container.dataset.expressionCount = `${exclusiveExpressions.length}`;
container.dataset.count = `${exclusiveTerms.length + exclusiveReadings.length}`;
container.dataset.expressionCount = `${exclusiveTerms.length}`;
container.dataset.readingCount = `${exclusiveReadings.length}`;
}
@ -586,7 +601,7 @@ class DisplayGenerator {
}
_createFrequencyGroup(details, kanji) {
const {dictionary, frequencyData} = details;
const {dictionary, frequencies} = details;
const node = this._templates.instantiate('frequency-group-item');
const body = node.querySelector('.tag-body-content');
@ -594,36 +609,37 @@ class DisplayGenerator {
this._setTextContent(node.querySelector('.tag-label-content'), dictionary);
node.dataset.details = dictionary;
for (let i = 0, ii = frequencyData.length; i < ii; ++i) {
const item = frequencyData[i];
const ii = frequencies.length;
for (let i = 0; i < ii; ++i) {
const item = frequencies[i];
const itemNode = (kanji ? this._createKanjiFrequency(item, dictionary) : this._createTermFrequency(item, dictionary));
itemNode.dataset.index = `${i}`;
body.appendChild(itemNode);
}
body.dataset.count = `${frequencyData.length}`;
node.dataset.count = `${frequencyData.length}`;
body.dataset.count = `${ii}`;
node.dataset.count = `${ii}`;
node.dataset.details = dictionary;
return node;
}
_createTermFrequency(details, dictionary) {
const {expression, reading, frequencies} = details;
const {term, reading, values} = details;
const node = this._templates.instantiate('term-frequency-item');
this._setTextContent(node.querySelector('.tag-label-content'), dictionary);
const frequency = frequencies.join(', ');
const frequency = values.join(', ');
this._setTextContent(node.querySelector('.frequency-disambiguation-expression'), expression, 'ja');
this._setTextContent(node.querySelector('.frequency-disambiguation-expression'), term, 'ja');
this._setTextContent(node.querySelector('.frequency-disambiguation-reading'), (reading !== null ? reading : ''), 'ja');
this._setTextContent(node.querySelector('.frequency-value'), frequency, 'ja');
node.dataset.expression = expression;
node.dataset.expression = term;
node.dataset.reading = reading;
node.dataset.hasReading = `${reading !== null}`;
node.dataset.readingIsSame = `${reading === expression}`;
node.dataset.readingIsSame = `${reading === term}`;
node.dataset.dictionary = dictionary;
node.dataset.frequency = `${frequency}`;
node.dataset.details = dictionary;
@ -632,10 +648,10 @@ class DisplayGenerator {
}
_createKanjiFrequency(details, dictionary) {
const {character, frequencies} = details;
const {character, values} = details;
const node = this._templates.instantiate('kanji-frequency-item');
const frequency = frequencies.join(', ');
const frequency = values.join(', ');
this._setTextContent(node.querySelector('.tag-label-content'), dictionary);
this._setTextContent(node.querySelector('.frequency-value'), frequency, 'ja');
@ -707,15 +723,7 @@ class DisplayGenerator {
}
_createDictionaryTag(dictionary) {
return {
name: dictionary,
category: 'dictionary',
notes: '',
order: 100,
score: 0,
dictionary,
redundant: false
};
return this._createTagData(dictionary, 'dictionary');
}
_setTextContent(node, value, language) {
@ -751,11 +759,12 @@ class DisplayGenerator {
}
}
_getPitchAccentCategories(pitches) {
if (pitches.length === 0) { return null; }
_getPitchAccentCategories(reading, pronunciations, headwordIndex) {
if (pronunciations.length === 0) { return null; }
const categories = new Set();
for (const {reading, pitches: pitches2} of pitches) {
for (const {position} of pitches2) {
for (const pronunciation of pronunciations) {
if (pronunciation.headwordIndex !== headwordIndex) { continue; }
for (const {position} of pronunciation.pitches) {
const category = this._japaneseUtil.getPitchCategory(reading, position, false);
if (category !== null) {
categories.add(category);

View File

@ -16,40 +16,41 @@
*/
class DictionaryDataUtil {
static groupTermTags(definition) {
const {expressions} = definition;
const expressionsLength = expressions.length;
const uniqueCheck = (expressionsLength > 1);
const resultsMap = new Map();
static groupTermTags(dictionaryEntry) {
const {headwords} = dictionaryEntry;
const headwordCount = headwords.length;
const uniqueCheck = (headwordCount > 1);
const resultsIndexMap = new Map();
const results = [];
for (let i = 0; i < expressionsLength; ++i) {
const {termTags, expression, reading} = expressions[i];
for (const tag of termTags) {
for (let i = 0; i < headwordCount; ++i) {
const {tags} = headwords[i];
for (const tag of tags) {
if (uniqueCheck) {
const {name, category, notes, dictionary} = tag;
const key = this._createMapKey([name, category, notes, dictionary]);
const index = resultsMap.get(key);
const index = resultsIndexMap.get(key);
if (typeof index !== 'undefined') {
const existingItem = results[index];
existingItem.expressions.push({index: i, expression, reading});
existingItem.headwordIndices.push(i);
continue;
}
resultsMap.set(key, results.length);
resultsIndexMap.set(key, results.length);
}
const item = {
tag,
expressions: [{index: i, expression, reading}]
};
const item = {tag, headwordIndices: [i]};
results.push(item);
}
}
return results;
}
static groupTermFrequencies(frequencies) {
static groupTermFrequencies(dictionaryEntry) {
const {headwords, frequencies} = dictionaryEntry;
const map1 = new Map();
for (const {dictionary, expression, reading, hasReading, frequency} of frequencies) {
for (const {headwordIndex, dictionary, hasReading, frequency} of frequencies) {
const {term, reading} = headwords[headwordIndex];
let map2 = map1.get(dictionary);
if (typeof map2 === 'undefined') {
map2 = new Map();
@ -57,14 +58,14 @@ class DictionaryDataUtil {
}
const readingKey = hasReading ? reading : null;
const key = this._createMapKey([expression, readingKey]);
const key = this._createMapKey([term, readingKey]);
let frequencyData = map2.get(key);
if (typeof frequencyData === 'undefined') {
frequencyData = {expression, reading: readingKey, frequencies: new Set()};
frequencyData = {term, reading: readingKey, values: new Set()};
map2.set(key, frequencyData);
}
frequencyData.frequencies.add(frequency);
frequencyData.values.add(frequency);
}
return this._createFrequencyGroupsFromMap(map1);
}
@ -80,64 +81,66 @@ class DictionaryDataUtil {
let frequencyData = map2.get(character);
if (typeof frequencyData === 'undefined') {
frequencyData = {character, frequencies: new Set()};
frequencyData = {character, values: new Set()};
map2.set(character, frequencyData);
}
frequencyData.frequencies.add(frequency);
frequencyData.values.add(frequency);
}
return this._createFrequencyGroupsFromMap(map1);
}
static getPitchAccentInfos(definition) {
if (definition.type === 'kanji') { return []; }
static getPitchAccentInfos(dictionaryEntry) {
const {headwords, pronunciations} = dictionaryEntry;
const results = new Map();
const allExpressions = new Set();
const allReadings = new Set();
for (const {expression, reading, pitches: expressionPitches} of definition.expressions) {
allExpressions.add(expression);
for (const {term, reading} of headwords) {
allExpressions.add(term);
allReadings.add(reading);
}
for (const {pitches, dictionary} of expressionPitches) {
let dictionaryResults = results.get(dictionary);
if (typeof dictionaryResults === 'undefined') {
dictionaryResults = [];
results.set(dictionary, dictionaryResults);
}
for (const {position, tags} of pitches) {
let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, tags, dictionaryResults);
if (pitchAccentInfo === null) {
pitchAccentInfo = {expressions: new Set(), reading, position, tags};
dictionaryResults.push(pitchAccentInfo);
}
pitchAccentInfo.expressions.add(expression);
const pitchAccentInfoMap = new Map();
for (const {headwordIndex, dictionary, pitches} of pronunciations) {
const {term, reading} = headwords[headwordIndex];
let dictionaryPitchAccentInfoList = pitchAccentInfoMap.get(dictionary);
if (typeof dictionaryPitchAccentInfoList === 'undefined') {
dictionaryPitchAccentInfoList = [];
pitchAccentInfoMap.set(dictionary, dictionaryPitchAccentInfoList);
}
for (const {position, tags} of pitches) {
let pitchAccentInfo = this._findExistingPitchAccentInfo(reading, position, tags, dictionaryPitchAccentInfoList);
if (pitchAccentInfo === null) {
pitchAccentInfo = {
terms: new Set(),
reading,
position,
tags,
exclusiveTerms: [],
exclusiveReadings: []
};
dictionaryPitchAccentInfoList.push(pitchAccentInfo);
}
pitchAccentInfo.terms.add(term);
}
}
const multipleReadings = (allReadings.size > 1);
for (const dictionaryResults of results.values()) {
for (const result of dictionaryResults) {
const exclusiveExpressions = [];
const exclusiveReadings = [];
const resultExpressions = result.expressions;
if (!this._areSetsEqual(resultExpressions, allExpressions)) {
exclusiveExpressions.push(...this._getSetIntersection(resultExpressions, allExpressions));
for (const dictionaryPitchAccentInfoList of pitchAccentInfoMap.values()) {
for (const pitchAccentInfo of dictionaryPitchAccentInfoList) {
const {terms, reading, exclusiveTerms, exclusiveReadings} = pitchAccentInfo;
if (!this._areSetsEqual(terms, allExpressions)) {
exclusiveTerms.push(...this._getSetIntersection(terms, allExpressions));
}
if (multipleReadings) {
exclusiveReadings.push(result.reading);
exclusiveReadings.push(reading);
}
result.expressions = [...resultExpressions];
result.exclusiveExpressions = exclusiveExpressions;
result.exclusiveReadings = exclusiveReadings;
pitchAccentInfo.terms = [...terms];
}
}
const results2 = [];
for (const [dictionary, pitches] of results.entries()) {
for (const [dictionary, pitches] of pitchAccentInfoMap.entries()) {
results2.push({dictionary, pitches});
}
return results2;
@ -157,17 +160,34 @@ class DictionaryDataUtil {
}
}
static getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) {
if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; }
const terms = new Set();
const readings = new Set();
for (const headwordIndex of headwordIndices) {
const {term, reading} = headwords[headwordIndex];
terms.add(term);
readings.add(reading);
}
const disambiguations = [];
if (!this._areSetsEqual(terms, allTermsSet)) { disambiguations.push(...this._getSetIntersection(terms, allTermsSet)); }
if (!this._areSetsEqual(readings, allReadingsSet)) { disambiguations.push(...this._getSetIntersection(readings, allReadingsSet)); }
return disambiguations;
}
// Private
static _createFrequencyGroupsFromMap(map) {
const results = [];
for (const [dictionary, map2] of map.entries()) {
const frequencyDataArray = [];
const frequencies = [];
for (const frequencyData of map2.values()) {
frequencyData.frequencies = [...frequencyData.frequencies];
frequencyDataArray.push(frequencyData);
frequencyData.values = [...frequencyData.values];
frequencies.push(frequencyData);
}
results.push({dictionary, frequencyData: frequencyDataArray});
results.push({dictionary, frequencies});
}
return results;
}

File diff suppressed because it is too large Load Diff

View File

@ -16,7 +16,7 @@
*/
/* globals
* AnkiNoteData
* AnkiNoteDataCreator
* JapaneseUtil
* TemplateRenderer
* TemplateRendererFrameApi
@ -25,8 +25,9 @@
(() => {
const japaneseUtil = new JapaneseUtil(null);
const templateRenderer = new TemplateRenderer(japaneseUtil);
const ankiNoteDataCreator = new AnkiNoteDataCreator(japaneseUtil);
templateRenderer.registerDataType('ankiNote', {
modifier: ({data, marker}) => new AnkiNoteData(japaneseUtil, marker, data).createPublic()
modifier: ({data, marker}) => ankiNoteDataCreator.create(marker, data)
});
const templateRendererFrameApi = new TemplateRendererFrameApi(templateRenderer);
templateRendererFrameApi.prepare();

View File

@ -17,7 +17,7 @@
<!-- Scripts -->
<script src="/lib/handlebars.min.js"></script>
<script src="/js/data/anki-note-data.js"></script>
<script src="/js/data/anki-note-data-creator.js"></script>
<script src="/js/language/dictionary-data-util.js"></script>
<script src="/js/language/japanese-util.js"></script>
<script src="/js/templates/template-renderer.js"></script>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -42,21 +42,20 @@ async function createVM() {
const [
JapaneseUtil,
TemplateRenderer,
AnkiNoteData,
AnkiNoteBuilder
] = vm.get([
'JapaneseUtil',
'TemplateRenderer',
'AnkiNoteData',
'AnkiNoteBuilder'
]);
const ankiNoteDataCreator = vm.ankiNoteDataCreator;
class TemplateRendererProxy {
constructor() {
const japaneseUtil = new JapaneseUtil(null);
this._templateRenderer = new TemplateRenderer(japaneseUtil);
this._templateRenderer.registerDataType('ankiNote', {
modifier: ({data, marker}) => new AnkiNoteData(japaneseUtil, marker, data).createPublic()
modifier: ({data, marker}) => ankiNoteDataCreator.create(marker, data)
});
}
@ -122,7 +121,7 @@ function getFieldMarkers(type) {
}
}
async function getRenderResults(definitions, type, mode, templates, AnkiNoteBuilder, write) {
async function getRenderResults(dictionaryEntries, type, mode, templates, AnkiNoteBuilder, write) {
const markers = getFieldMarkers(type);
const fields = [];
for (const marker of markers) {
@ -132,8 +131,18 @@ async function getRenderResults(definitions, type, mode, templates, AnkiNoteBuil
const clozePrefix = 'cloze-prefix';
const clozeSuffix = 'cloze-suffix';
const results = [];
for (const definition of definitions) {
const source = definition.type === 'kanji' ? definition.character : definition.rawSource;
for (const dictionaryEntry of dictionaryEntries) {
let source = '';
switch (dictionaryEntry.type) {
case 'kanji':
source = dictionaryEntry.character;
break;
case 'term':
if (dictionaryEntry.headwords.length > 0 && dictionaryEntry.headwords[0].sources.length > 0) {
source = dictionaryEntry.headwords[0].sources[0].originalText;
}
break;
}
const ankiNoteBuilder = new AnkiNoteBuilder();
const context = {
url: 'url:',
@ -145,7 +154,7 @@ async function getRenderResults(definitions, type, mode, templates, AnkiNoteBuil
};
const errors = [];
const noteFields = (await ankiNoteBuilder.createNote({
definition,
definition: dictionaryEntry,
mode: null,
context,
templates,
@ -193,8 +202,8 @@ async function main() {
{
const {name, mode, text} = test;
const options = vm.buildOptions(optionsPresets, test.options);
const [definitions] = clone(await vm.translator.findTerms(mode, text, options));
const results = mode !== 'simple' ? clone(await getRenderResults(definitions, 'terms', mode, templates, AnkiNoteBuilder, write)) : null;
const {dictionaryEntries} = clone(await vm.translator.findTerms(mode, text, options));
const results = mode !== 'simple' ? clone(await getRenderResults(dictionaryEntries, 'terms', mode, templates, AnkiNoteBuilder, write)) : null;
actualResults1.push({name, results});
if (!write) {
assert.deepStrictEqual(results, expected1.results);
@ -205,8 +214,8 @@ async function main() {
{
const {name, text} = test;
const options = vm.buildOptions(optionsPresets, test.options);
const definitions = clone(await vm.translator.findKanji(text, options));
const results = clone(await getRenderResults(definitions, 'kanji', null, templates, AnkiNoteBuilder, write));
const dictionaryEntries = clone(await vm.translator.findKanji(text, options));
const results = clone(await getRenderResults(dictionaryEntries, 'kanji', null, templates, AnkiNoteBuilder, write));
actualResults1.push({name, results});
if (!write) {
assert.deepStrictEqual(results, expected1.results);

View File

@ -54,13 +54,13 @@ async function main() {
{
const {name, mode, text} = test;
const options = translatorVM.buildOptions(optionsPresets, test.options);
const [definitions, length] = clone(await translatorVM.translator.findTerms(mode, text, options));
const noteDataList = mode !== 'simple' ? clone(definitions.map((definition) => translatorVM.createTestAnkiNoteData(clone(definition), mode))) : null;
actualResults1.push({name, length, definitions});
const {dictionaryEntries, originalTextLength} = clone(await translatorVM.translator.findTerms(mode, text, options));
const noteDataList = mode !== 'simple' ? clone(dictionaryEntries.map((dictionaryEntry) => translatorVM.createTestAnkiNoteData(clone(dictionaryEntry), mode))) : null;
actualResults1.push({name, originalTextLength, dictionaryEntries});
actualResults2.push({name, noteDataList});
if (!write) {
assert.deepStrictEqual(length, expected1.length);
assert.deepStrictEqual(definitions, expected1.definitions);
assert.deepStrictEqual(originalTextLength, expected1.originalTextLength);
assert.deepStrictEqual(dictionaryEntries, expected1.dictionaryEntries);
assert.deepStrictEqual(noteDataList, expected2.noteDataList);
}
}
@ -69,12 +69,12 @@ async function main() {
{
const {name, text} = test;
const options = translatorVM.buildOptions(optionsPresets, test.options);
const definitions = clone(await translatorVM.translator.findKanji(text, options));
const noteDataList = clone(definitions.map((definition) => translatorVM.createTestAnkiNoteData(clone(definition), null)));
actualResults1.push({name, definitions});
const dictionaryEntries = clone(await translatorVM.translator.findKanji(text, options));
const noteDataList = clone(dictionaryEntries.map((dictionaryEntry) => translatorVM.createTestAnkiNoteData(clone(dictionaryEntry), null)));
actualResults1.push({name, dictionaryEntries});
actualResults2.push({name, noteDataList});
if (!write) {
assert.deepStrictEqual(definitions, expected1.definitions);
assert.deepStrictEqual(dictionaryEntries, expected1.dictionaryEntries);
assert.deepStrictEqual(noteDataList, expected2.noteDataList);
}
}