Translator data format updates (#1754)

* Add {dictionaryIndex, dictionaryPriority} to definitions

* Add score to definitions

* Add id to definition

* Use definition id instead of ids array

* Remove ids array

* Update docs

* Update test data
This commit is contained in:
toasted-nutbread 2021-06-26 11:57:09 -04:00 committed by GitHub
parent 5756885fa9
commit 6e0a367afc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 448 additions and 304 deletions

View File

@ -191,10 +191,6 @@ namespace Translation {
* `DictionaryEntry.type` is always `'term'`. * `DictionaryEntry.type` is always `'term'`.
*/ */
export interface TermDictionaryEntry extends DictionaryEntry { export interface TermDictionaryEntry extends DictionaryEntry {
/**
* Database IDs for the term.
*/
ids: number[];
/** /**
* Whether or not any of the sources is a primary source. Primary sources are derived from the * Whether or not any of the sources is a primary source. Primary sources are derived from the
* original search text, while non-primary sources originate from related terms. * original search text, while non-primary sources originate from related terms.
@ -288,6 +284,22 @@ namespace Translation {
* The name of the dictionary that the definition information originated from. * The name of the dictionary that the definition information originated from.
*/ */
dictionary: string; dictionary: string;
/**
* The index of the dictionary in the original list of dictionaries used for the lookup.
*/
dictionaryIndex: number;
/**
* The priority of the dictionary.
*/
dictionaryPriority: number;
/**
* Database ID for the definition.
*/
id: number[];
/**
* A score for the definition.
*/
score: number;
/** /**
* A list of database sequence numbers for the term. A value of `-1` corresponds to no sequence. * A list of database sequence numbers for the term. A value of `-1` corresponds to no sequence.
* The list can have multiple values if multiple definitions with different sequences have been merged. * The list can have multiple values if multiple definitions with different sequences have been merged.

View File

@ -274,7 +274,7 @@ class AnkiNoteDataCreator {
case 'merge': type = 'termMerged'; break; case 'merge': type = 'termMerged'; break;
} }
const {ids, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount} = dictionaryEntry; const {inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, definitions} = dictionaryEntry;
const { const {
screenshotFileName=null, screenshotFileName=null,
@ -301,7 +301,7 @@ class AnkiNoteDataCreator {
return { return {
type, type,
id: (type === 'term' ? ids[0] : void 0), id: (type === 'term' && definitions.length > 0 ? definitions[0].id : void 0),
source: (primarySource !== null ? primarySource.transformedText : null), source: (primarySource !== null ? primarySource.transformedText : null),
rawSource: (primarySource !== null ? primarySource.originalText : null), rawSource: (primarySource !== null ? primarySource.originalText : null),
sourceTerm: (type !== 'termMerged' ? (primarySource !== null ? primarySource.deinflectedText : null) : void 0), sourceTerm: (type !== 'termMerged' ? (primarySource !== null ? primarySource.deinflectedText : null) : void 0),

View File

@ -358,7 +358,7 @@ class Translator {
const groupedDictionaryEntriesMap = new Map(); const groupedDictionaryEntriesMap = new Map();
const ungroupedDictionaryEntriesMap = new Map(); const ungroupedDictionaryEntriesMap = new Map();
for (const dictionaryEntry of dictionaryEntries) { for (const dictionaryEntry of dictionaryEntries) {
const {ids: [id], definitions: [{dictionary, sequences: [sequence]}]} = dictionaryEntry; const {definitions: [{id, dictionary, sequences: [sequence]}]} = dictionaryEntry;
if (mainDictionary === dictionary && sequence >= 0) { if (mainDictionary === dictionary && sequence >= 0) {
let group = groupedDictionaryEntriesMap.get(sequence); let group = groupedDictionaryEntriesMap.get(sequence);
if (typeof group === 'undefined') { if (typeof group === 'undefined') {
@ -1023,8 +1023,8 @@ class Translator {
return {index, term, reading, sources, tags, wordClasses}; return {index, term, reading, sources, tags, wordClasses};
} }
_createTermDefinition(index, headwordIndices, dictionary, sequences, isPrimary, tags, entries) { _createTermDefinition(index, headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, id, score, sequences, isPrimary, tags, entries) {
return {index, headwordIndices, dictionary, sequences, isPrimary, tags, entries}; return {index, headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, id, score, sequences, isPrimary, tags, entries};
} }
_createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) { _createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) {
@ -1035,10 +1035,9 @@ class Translator {
return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency}; return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency};
} }
_createTermDictionaryEntry(ids, isPrimary, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxTransformedTextLength, headwords, definitions) { _createTermDictionaryEntry(isPrimary, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxTransformedTextLength, headwords, definitions) {
return { return {
type: 'term', type: 'term',
ids,
isPrimary, isPrimary,
inflections, inflections,
score, score,
@ -1069,7 +1068,6 @@ class Translator {
if (definitionTags.length > 0) { definitionTagGroups.push(this._createTagGroup(dictionary, definitionTags)); } if (definitionTags.length > 0) { definitionTagGroups.push(this._createTagGroup(dictionary, definitionTags)); }
return this._createTermDictionaryEntry( return this._createTermDictionaryEntry(
[id],
isPrimary, isPrimary,
reasons, reasons,
score, score,
@ -1078,7 +1076,7 @@ class Translator {
sourceTermExactMatchCount, sourceTermExactMatchCount,
maxTransformedTextLength, maxTransformedTextLength,
[this._createTermHeadword(0, term, reading, [source], headwordTagGroups, rules)], [this._createTermHeadword(0, term, reading, [source], headwordTagGroups, rules)],
[this._createTermDefinition(0, [0], dictionary, [sequence], isPrimary, definitionTagGroups, definitions)] [this._createTermDefinition(0, [0], dictionary, dictionaryIndex, dictionaryPriority, id, score, [sequence], isPrimary, definitionTagGroups, definitions)]
); );
} }
@ -1107,7 +1105,6 @@ class Translator {
const definitions = []; const definitions = [];
const definitionsMap = checkDuplicateDefinitions ? new Map() : null; const definitionsMap = checkDuplicateDefinitions ? new Map() : null;
let inflections = null; let inflections = null;
const ids = new Set();
for (const {dictionaryEntry, headwordIndexMap} of definitionEntries) { for (const {dictionaryEntry, headwordIndexMap} of definitionEntries) {
score = Math.max(score, dictionaryEntry.score); score = Math.max(score, dictionaryEntry.score);
@ -1121,7 +1118,6 @@ class Translator {
inflections = dictionaryEntryInflections; inflections = dictionaryEntryInflections;
} }
} }
for (const id of dictionaryEntry.ids) { ids.add(id); }
if (checkDuplicateDefinitions) { if (checkDuplicateDefinitions) {
this._addTermDefinitions(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap); this._addTermDefinitions(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap);
} else { } else {
@ -1142,7 +1138,6 @@ class Translator {
} }
return this._createTermDictionaryEntry( return this._createTermDictionaryEntry(
[...ids],
isPrimary, isPrimary,
inflections !== null ? inflections : [], inflections !== null ? inflections : [],
score, score,
@ -1252,21 +1247,21 @@ class Translator {
} }
_addTermDefinitionsFast(definitions, newDefinitions, headwordIndexMap) { _addTermDefinitionsFast(definitions, newDefinitions, headwordIndexMap) {
for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) { for (const {headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, sequences, id, score, isPrimary, tags, entries} of newDefinitions) {
const headwordIndicesNew = []; const headwordIndicesNew = [];
for (const headwordIndex of headwordIndices) { for (const headwordIndex of headwordIndices) {
headwordIndicesNew.push(headwordIndexMap[headwordIndex]); headwordIndicesNew.push(headwordIndexMap[headwordIndex]);
} }
definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, sequences, isPrimary, tags, entries)); definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, dictionaryIndex, dictionaryPriority, id, score, sequences, isPrimary, tags, entries));
} }
} }
_addTermDefinitions(definitions, definitionsMap, newDefinitions, headwordIndexMap) { _addTermDefinitions(definitions, definitionsMap, newDefinitions, headwordIndexMap) {
for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) { for (const {headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, sequences, id, score, isPrimary, tags, entries} of newDefinitions) {
const key = this._createMapKey([dictionary, ...entries]); const key = this._createMapKey([dictionary, ...entries]);
let definition = definitionsMap.get(key); let definition = definitionsMap.get(key);
if (typeof definition === 'undefined') { if (typeof definition === 'undefined') {
definition = this._createTermDefinition(definitions.length, [], dictionary, [...sequences], isPrimary, [], [...entries]); definition = this._createTermDefinition(definitions.length, [], dictionary, dictionaryIndex, dictionaryPriority, id, score, [...sequences], isPrimary, [], [...entries]);
definitions.push(definition); definitions.push(definition);
definitionsMap.set(key, definition); definitionsMap.set(key, definition);
} else { } else {
@ -1378,7 +1373,7 @@ class Translator {
_sortTermDictionaryEntriesById(dictionaryEntries) { _sortTermDictionaryEntriesById(dictionaryEntries) {
if (dictionaryEntries.length <= 1) { return; } if (dictionaryEntries.length <= 1) { return; }
dictionaryEntries.sort((a, b) => a.ids[0] - b.ids[0]); dictionaryEntries.sort((a, b) => a.definitions[0].id - b.definitions[0].id);
} }
_sortTermDictionaryEntryData(dictionaryEntries) { _sortTermDictionaryEntryData(dictionaryEntries) {

File diff suppressed because it is too large Load Diff