Translator data format updates (#1754)

* Add {dictionaryIndex, dictionaryPriority} to definitions

* Add score to definitions

* Add id to definition

* Use definition id instead of ids array

* Remove ids array

* Update docs

* Update test data
This commit is contained in:
toasted-nutbread 2021-06-26 11:57:09 -04:00 committed by GitHub
parent 5756885fa9
commit 6e0a367afc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 448 additions and 304 deletions

View File

@ -191,10 +191,6 @@ namespace Translation {
* `DictionaryEntry.type` is always `'term'`.
*/
export interface TermDictionaryEntry extends DictionaryEntry {
/**
* Database IDs for the term.
*/
ids: number[];
/**
* Whether or not any of the sources is a primary source. Primary sources are derived from the
* original search text, while non-primary sources originate from related terms.
@ -288,6 +284,22 @@ namespace Translation {
* The name of the dictionary that the definition information originated from.
*/
dictionary: string;
/**
* The index of the dictionary in the original list of dictionaries used for the lookup.
*/
dictionaryIndex: number;
/**
* The priority of the dictionary.
*/
dictionaryPriority: number;
/**
* Database ID for the definition.
*/
id: number[];
/**
* A score for the definition.
*/
score: number;
/**
* A list of database sequence numbers for the term. A value of `-1` corresponds to no sequence.
* The list can have multiple values if multiple definitions with different sequences have been merged.

View File

@ -274,7 +274,7 @@ class AnkiNoteDataCreator {
case 'merge': type = 'termMerged'; break;
}
const {ids, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount} = dictionaryEntry;
const {inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, definitions} = dictionaryEntry;
const {
screenshotFileName=null,
@ -301,7 +301,7 @@ class AnkiNoteDataCreator {
return {
type,
id: (type === 'term' ? ids[0] : void 0),
id: (type === 'term' && definitions.length > 0 ? definitions[0].id : void 0),
source: (primarySource !== null ? primarySource.transformedText : null),
rawSource: (primarySource !== null ? primarySource.originalText : null),
sourceTerm: (type !== 'termMerged' ? (primarySource !== null ? primarySource.deinflectedText : null) : void 0),

View File

@ -358,7 +358,7 @@ class Translator {
const groupedDictionaryEntriesMap = new Map();
const ungroupedDictionaryEntriesMap = new Map();
for (const dictionaryEntry of dictionaryEntries) {
const {ids: [id], definitions: [{dictionary, sequences: [sequence]}]} = dictionaryEntry;
const {definitions: [{id, dictionary, sequences: [sequence]}]} = dictionaryEntry;
if (mainDictionary === dictionary && sequence >= 0) {
let group = groupedDictionaryEntriesMap.get(sequence);
if (typeof group === 'undefined') {
@ -1023,8 +1023,8 @@ class Translator {
return {index, term, reading, sources, tags, wordClasses};
}
_createTermDefinition(index, headwordIndices, dictionary, sequences, isPrimary, tags, entries) {
return {index, headwordIndices, dictionary, sequences, isPrimary, tags, entries};
_createTermDefinition(index, headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, id, score, sequences, isPrimary, tags, entries) {
return {index, headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, id, score, sequences, isPrimary, tags, entries};
}
_createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) {
@ -1035,10 +1035,9 @@ class Translator {
return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency};
}
_createTermDictionaryEntry(ids, isPrimary, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxTransformedTextLength, headwords, definitions) {
_createTermDictionaryEntry(isPrimary, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxTransformedTextLength, headwords, definitions) {
return {
type: 'term',
ids,
isPrimary,
inflections,
score,
@ -1069,7 +1068,6 @@ class Translator {
if (definitionTags.length > 0) { definitionTagGroups.push(this._createTagGroup(dictionary, definitionTags)); }
return this._createTermDictionaryEntry(
[id],
isPrimary,
reasons,
score,
@ -1078,7 +1076,7 @@ class Translator {
sourceTermExactMatchCount,
maxTransformedTextLength,
[this._createTermHeadword(0, term, reading, [source], headwordTagGroups, rules)],
[this._createTermDefinition(0, [0], dictionary, [sequence], isPrimary, definitionTagGroups, definitions)]
[this._createTermDefinition(0, [0], dictionary, dictionaryIndex, dictionaryPriority, id, score, [sequence], isPrimary, definitionTagGroups, definitions)]
);
}
@ -1107,7 +1105,6 @@ class Translator {
const definitions = [];
const definitionsMap = checkDuplicateDefinitions ? new Map() : null;
let inflections = null;
const ids = new Set();
for (const {dictionaryEntry, headwordIndexMap} of definitionEntries) {
score = Math.max(score, dictionaryEntry.score);
@ -1121,7 +1118,6 @@ class Translator {
inflections = dictionaryEntryInflections;
}
}
for (const id of dictionaryEntry.ids) { ids.add(id); }
if (checkDuplicateDefinitions) {
this._addTermDefinitions(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap);
} else {
@ -1142,7 +1138,6 @@ class Translator {
}
return this._createTermDictionaryEntry(
[...ids],
isPrimary,
inflections !== null ? inflections : [],
score,
@ -1252,21 +1247,21 @@ class Translator {
}
_addTermDefinitionsFast(definitions, newDefinitions, headwordIndexMap) {
for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) {
for (const {headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, sequences, id, score, isPrimary, tags, entries} of newDefinitions) {
const headwordIndicesNew = [];
for (const headwordIndex of headwordIndices) {
headwordIndicesNew.push(headwordIndexMap[headwordIndex]);
}
definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, sequences, isPrimary, tags, entries));
definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, dictionaryIndex, dictionaryPriority, id, score, sequences, isPrimary, tags, entries));
}
}
_addTermDefinitions(definitions, definitionsMap, newDefinitions, headwordIndexMap) {
for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) {
for (const {headwordIndices, dictionary, dictionaryIndex, dictionaryPriority, sequences, id, score, isPrimary, tags, entries} of newDefinitions) {
const key = this._createMapKey([dictionary, ...entries]);
let definition = definitionsMap.get(key);
if (typeof definition === 'undefined') {
definition = this._createTermDefinition(definitions.length, [], dictionary, [...sequences], isPrimary, [], [...entries]);
definition = this._createTermDefinition(definitions.length, [], dictionary, dictionaryIndex, dictionaryPriority, id, score, [...sequences], isPrimary, [], [...entries]);
definitions.push(definition);
definitionsMap.set(key, definition);
} else {
@ -1378,7 +1373,7 @@ class Translator {
_sortTermDictionaryEntriesById(dictionaryEntries) {
if (dictionaryEntries.length <= 1) { return; }
dictionaryEntries.sort((a, b) => a.ids[0] - b.ids[0]);
dictionaryEntries.sort((a, b) => a.definitions[0].id - b.definitions[0].id);
}
_sortTermDictionaryEntryData(dictionaryEntries) {

File diff suppressed because it is too large Load Diff