From d87515ec125189ceeae251d37013b511c7bf8baf Mon Sep 17 00:00:00 2001 From: toasted-nutbread Date: Sat, 5 Jun 2021 22:27:58 -0400 Subject: [PATCH] Translator id updates (#1730) * Allow unsequenced definitions to be added to multiple groups * Update translator data to store multiple IDs * Update Anki note data * Update test data * Update docs --- docs/interfaces/dictionary-entry.ts | 4 +- ext/js/data/anki-note-data-creator.js | 4 +- ext/js/language/translator.js | 18 +- test/data/translator-test-results.json | 372 +++++++++++++++++++------ 4 files changed, 296 insertions(+), 102 deletions(-) diff --git a/docs/interfaces/dictionary-entry.ts b/docs/interfaces/dictionary-entry.ts index 2a58f5dc..da2a0151 100644 --- a/docs/interfaces/dictionary-entry.ts +++ b/docs/interfaces/dictionary-entry.ts @@ -192,9 +192,9 @@ namespace Translation { */ export interface TermDictionaryEntry extends DictionaryEntry { /** - * Database ID for the term, or `-1` if multiple entries have been merged. + * Database IDs for the term. */ - id: number; + ids: number[]; /** * Whether or not any of the sources is a primary source. Primary sources are derived from the * original search text, while non-primary sources originate from related terms. diff --git a/ext/js/data/anki-note-data-creator.js b/ext/js/data/anki-note-data-creator.js index 4cf62788..c76369c3 100644 --- a/ext/js/data/anki-note-data-creator.js +++ b/ext/js/data/anki-note-data-creator.js @@ -274,7 +274,7 @@ class AnkiNoteDataCreator { case 'merge': type = 'termMerged'; break; } - const {id, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount} = dictionaryEntry; + const {ids, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount} = dictionaryEntry; const { screenshotFileName=null, @@ -301,7 +301,7 @@ class AnkiNoteDataCreator { return { type, - id: (type === 'term' ? id : void 0), + id: (type === 'term' ? ids[0] : void 0), source: (primarySource !== null ? primarySource.transformedText : null), rawSource: (primarySource !== null ? primarySource.originalText : null), sourceTerm: (type !== 'termMerged' ? (primarySource !== null ? primarySource.deinflectedText : null) : void 0), diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index bf2c7322..a36ef48c 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -358,7 +358,7 @@ class Translator { const groupedDictionaryEntriesMap = new Map(); const ungroupedDictionaryEntriesMap = new Map(); for (const dictionaryEntry of dictionaryEntries) { - const {id, definitions: [{dictionary, sequences: [sequence]}]} = dictionaryEntry; + const {ids: [id], definitions: [{dictionary, sequences: [sequence]}]} = dictionaryEntry; if (mainDictionary === dictionary && sequence >= 0) { let group = groupedDictionaryEntriesMap.get(sequence); if (typeof group === 'undefined') { @@ -448,12 +448,10 @@ class Translator { for (const {ids, dictionaryEntries} of target.groups) { if (ids.has(id)) { continue; } - dictionaryEntries.push(dictionaryEntry); ids.add(id); - ungroupedDictionaryEntriesMap.delete(id); - break; } + ungroupedDictionaryEntriesMap.delete(id); } // Search database for additional secondary terms @@ -1039,10 +1037,10 @@ class Translator { return {index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, hasReading, frequency}; } - _createTermDictionaryEntry(id, isPrimary, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxTransformedTextLength, headwords, definitions) { + _createTermDictionaryEntry(ids, isPrimary, inflections, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxTransformedTextLength, headwords, definitions) { return { type: 'term', - id, + ids, isPrimary, inflections, score, @@ -1073,7 +1071,7 @@ class Translator { if (definitionTags.length > 0) { definitionTagGroups.push(this._createTagGroup(dictionary, definitionTags)); } return this._createTermDictionaryEntry( - id, + [id], isPrimary, reasons, score, @@ -1111,6 +1109,7 @@ class Translator { const definitions = []; const definitionsMap = checkDuplicateDefinitions ? new Map() : null; let inflections = null; + const ids = new Set(); for (const {dictionaryEntry, headwordIndexMap} of definitionEntries) { score = Math.max(score, dictionaryEntry.score); @@ -1124,6 +1123,7 @@ class Translator { inflections = dictionaryEntryInflections; } } + for (const id of dictionaryEntry.ids) { ids.add(id); } if (checkDuplicateDefinitions) { this._addTermDefinitions(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap); } else { @@ -1144,7 +1144,7 @@ class Translator { } return this._createTermDictionaryEntry( - -1, + [...ids], isPrimary, inflections !== null ? inflections : [], score, @@ -1380,7 +1380,7 @@ class Translator { _sortTermDictionaryEntriesById(dictionaryEntries) { if (dictionaryEntries.length <= 1) { return; } - dictionaryEntries.sort((a, b) => a.id - b.id); + dictionaryEntries.sort((a, b) => a.ids[0] - b.ids[0]); } _sortTermDictionaryEntryData(dictionaryEntries) { diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json index 93a86fa8..6d5d0f13 100644 --- a/test/data/translator-test-results.json +++ b/test/data/translator-test-results.json @@ -246,7 +246,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 1, + "ids": [ + 1 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -343,7 +345,9 @@ }, { "type": "term", - "id": 2, + "ids": [ + 2 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -459,7 +463,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 3, + "ids": [ + 3 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -569,7 +575,9 @@ }, { "type": "term", - "id": 5, + "ids": [ + 5 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -679,7 +687,9 @@ }, { "type": "term", - "id": 4, + "ids": [ + 4 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -789,7 +799,9 @@ }, { "type": "term", - "id": 6, + "ids": [ + 6 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -899,7 +911,9 @@ }, { "type": "term", - "id": 1, + "ids": [ + 1 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -996,7 +1010,9 @@ }, { "type": "term", - "id": 2, + "ids": [ + 2 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -1112,7 +1128,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 7, + "ids": [ + 7 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -1240,7 +1258,9 @@ }, { "type": "term", - "id": 9, + "ids": [ + 9 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -1368,7 +1388,9 @@ }, { "type": "term", - "id": 8, + "ids": [ + 8 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -1496,7 +1518,9 @@ }, { "type": "term", - "id": 10, + "ids": [ + 10 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -1624,7 +1648,9 @@ }, { "type": "term", - "id": 3, + "ids": [ + 3 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -1736,7 +1762,9 @@ }, { "type": "term", - "id": 5, + "ids": [ + 5 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -1848,7 +1876,9 @@ }, { "type": "term", - "id": 4, + "ids": [ + 4 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -1960,7 +1990,9 @@ }, { "type": "term", - "id": 6, + "ids": [ + 6 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -2072,7 +2104,9 @@ }, { "type": "term", - "id": 1, + "ids": [ + 1 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -2169,7 +2203,9 @@ }, { "type": "term", - "id": 2, + "ids": [ + 2 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -2285,7 +2321,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 11, + "ids": [ + 11 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -2391,7 +2429,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 1, + "ids": [ + 1 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -2494,7 +2534,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 2, + "ids": [ + 2 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -2610,7 +2652,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 3, + "ids": [ + 3 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -2720,7 +2764,9 @@ }, { "type": "term", - "id": 4, + "ids": [ + 4 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -2836,7 +2882,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 5, + "ids": [ + 5 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -2946,7 +2994,9 @@ }, { "type": "term", - "id": 6, + "ids": [ + 6 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -3062,7 +3112,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 7, + "ids": [ + 7 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -3190,7 +3242,9 @@ }, { "type": "term", - "id": 8, + "ids": [ + 8 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -3318,7 +3372,9 @@ }, { "type": "term", - "id": 3, + "ids": [ + 3 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -3430,7 +3486,9 @@ }, { "type": "term", - "id": 4, + "ids": [ + 4 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -3548,7 +3606,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 9, + "ids": [ + 9 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -3676,7 +3736,9 @@ }, { "type": "term", - "id": 10, + "ids": [ + 10 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -3804,7 +3866,9 @@ }, { "type": "term", - "id": 5, + "ids": [ + 5 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -3916,7 +3980,9 @@ }, { "type": "term", - "id": 6, + "ids": [ + 6 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -4034,7 +4100,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 11, + "ids": [ + 11 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -4150,7 +4218,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 7, + "ids": [ + 7 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -4200,7 +4270,9 @@ }, { "type": "term", - "id": 9, + "ids": [ + 9 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -4250,7 +4322,9 @@ }, { "type": "term", - "id": 8, + "ids": [ + 8 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -4300,7 +4374,9 @@ }, { "type": "term", - "id": 10, + "ids": [ + 10 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -4350,7 +4426,9 @@ }, { "type": "term", - "id": 3, + "ids": [ + 3 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -4402,7 +4480,9 @@ }, { "type": "term", - "id": 5, + "ids": [ + 5 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -4454,7 +4534,9 @@ }, { "type": "term", - "id": 4, + "ids": [ + 4 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -4506,7 +4588,9 @@ }, { "type": "term", - "id": 6, + "ids": [ + 6 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -4558,7 +4642,9 @@ }, { "type": "term", - "id": 1, + "ids": [ + 1 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -4608,7 +4694,9 @@ }, { "type": "term", - "id": 2, + "ids": [ + 2 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -4664,7 +4752,10 @@ "dictionaryEntries": [ { "type": "term", - "id": -1, + "ids": [ + 7, + 8 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -4835,7 +4926,10 @@ }, { "type": "term", - "id": -1, + "ids": [ + 9, + 10 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -5006,7 +5100,10 @@ }, { "type": "term", - "id": -1, + "ids": [ + 3, + 4 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -5161,7 +5258,10 @@ }, { "type": "term", - "id": -1, + "ids": [ + 5, + 6 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -5316,7 +5416,9 @@ }, { "type": "term", - "id": -1, + "ids": [ + 1 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -5413,7 +5515,9 @@ }, { "type": "term", - "id": -1, + "ids": [ + 2 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -5529,7 +5633,12 @@ "dictionaryEntries": [ { "type": "term", - "id": -1, + "ids": [ + 7, + 9, + 8, + 10 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -5852,7 +5961,12 @@ }, { "type": "term", - "id": -1, + "ids": [ + 3, + 5, + 4, + 6 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -6142,7 +6256,9 @@ }, { "type": "term", - "id": -1, + "ids": [ + 1 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -6239,7 +6355,9 @@ }, { "type": "term", - "id": -1, + "ids": [ + 2 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -6355,7 +6473,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 7, + "ids": [ + 7 + ], "isPrimary": true, "inflections": [ "-te", @@ -6487,7 +6607,9 @@ }, { "type": "term", - "id": 9, + "ids": [ + 9 + ], "isPrimary": true, "inflections": [ "-te", @@ -6619,7 +6741,9 @@ }, { "type": "term", - "id": 8, + "ids": [ + 8 + ], "isPrimary": true, "inflections": [ "-te", @@ -6751,7 +6875,9 @@ }, { "type": "term", - "id": 10, + "ids": [ + 10 + ], "isPrimary": true, "inflections": [ "-te", @@ -6883,7 +7009,9 @@ }, { "type": "term", - "id": 3, + "ids": [ + 3 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -6995,7 +7123,9 @@ }, { "type": "term", - "id": 5, + "ids": [ + 5 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -7107,7 +7237,9 @@ }, { "type": "term", - "id": 4, + "ids": [ + 4 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -7219,7 +7351,9 @@ }, { "type": "term", - "id": 6, + "ids": [ + 6 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -7331,7 +7465,9 @@ }, { "type": "term", - "id": 1, + "ids": [ + 1 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -7428,7 +7564,9 @@ }, { "type": "term", - "id": 2, + "ids": [ + 2 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -7544,7 +7682,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 7, + "ids": [ + 7 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -7672,7 +7812,9 @@ }, { "type": "term", - "id": 9, + "ids": [ + 9 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -7800,7 +7942,9 @@ }, { "type": "term", - "id": 8, + "ids": [ + 8 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -7928,7 +8072,9 @@ }, { "type": "term", - "id": 10, + "ids": [ + 10 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -8056,7 +8202,9 @@ }, { "type": "term", - "id": 3, + "ids": [ + 3 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -8168,7 +8316,9 @@ }, { "type": "term", - "id": 5, + "ids": [ + 5 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -8280,7 +8430,9 @@ }, { "type": "term", - "id": 4, + "ids": [ + 4 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -8392,7 +8544,9 @@ }, { "type": "term", - "id": 6, + "ids": [ + 6 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -8504,7 +8658,9 @@ }, { "type": "term", - "id": 1, + "ids": [ + 1 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -8601,7 +8757,9 @@ }, { "type": "term", - "id": 2, + "ids": [ + 2 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -8717,7 +8875,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 7, + "ids": [ + 7 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -8845,7 +9005,9 @@ }, { "type": "term", - "id": 9, + "ids": [ + 9 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -8973,7 +9135,9 @@ }, { "type": "term", - "id": 8, + "ids": [ + 8 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -9101,7 +9265,9 @@ }, { "type": "term", - "id": 10, + "ids": [ + 10 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -9229,7 +9395,9 @@ }, { "type": "term", - "id": 3, + "ids": [ + 3 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -9341,7 +9509,9 @@ }, { "type": "term", - "id": 5, + "ids": [ + 5 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -9453,7 +9623,9 @@ }, { "type": "term", - "id": 4, + "ids": [ + 4 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -9565,7 +9737,9 @@ }, { "type": "term", - "id": 6, + "ids": [ + 6 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -9677,7 +9851,9 @@ }, { "type": "term", - "id": 1, + "ids": [ + 1 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -9774,7 +9950,9 @@ }, { "type": "term", - "id": 2, + "ids": [ + 2 + ], "isPrimary": true, "inflections": [], "score": 1, @@ -9890,7 +10068,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 12, + "ids": [ + 12 + ], "isPrimary": true, "inflections": [ "masu stem" @@ -9988,7 +10168,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 13, + "ids": [ + 13 + ], "isPrimary": true, "inflections": [], "score": 90, @@ -10084,7 +10266,9 @@ "dictionaryEntries": [ { "type": "term", - "id": 12, + "ids": [ + 12 + ], "isPrimary": true, "inflections": [ "polite past" @@ -10182,7 +10366,12 @@ "dictionaryEntries": [ { "type": "term", - "id": -1, + "ids": [ + 7, + 9, + 8, + 10 + ], "isPrimary": true, "inflections": [], "score": 10, @@ -10505,7 +10694,12 @@ }, { "type": "term", - "id": -1, + "ids": [ + 3, + 5, + 4, + 6 + ], "isPrimary": true, "inflections": [ "masu stem"