Improve term grouping (#1653)

* Rename _addUniqueStrings to _addUniqueSimple

* Update definition merging to not depend the sequence number

* Improve naming

* Update AnkiNoteDataCreator

* Update docs

* Remove fields that no longer exist

* Update test data
This commit is contained in:
toasted-nutbread 2021-05-08 13:16:56 -04:00 committed by GitHub
parent 32f5544021
commit 289bdc1622
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 341 additions and 135 deletions

View File

@ -200,14 +200,6 @@ namespace Translation {
* original search text, while non-primary sources originate from related terms. * original search text, while non-primary sources originate from related terms.
*/ */
isPrimary: boolean; isPrimary: boolean;
/**
* Database sequence number for the term, or `-1` if multiple entries have been merged.
*/
sequence: number;
/**
* The dictionary that the sequence number originated from, or `null` if there is no sequence.
*/
sequenceDictionary: string;
/** /**
* A list of inflections that was applied to get the term. * A list of inflections that was applied to get the term.
*/ */
@ -297,9 +289,11 @@ namespace Translation {
*/ */
dictionary: string; dictionary: string;
/** /**
* Database sequence number for the term. The value will be `-1` if there is no sequence. * A list of database sequence numbers for the term. A value of `-1` corresponds to no sequence.
* The list can have multiple values if multiple definitions with different sequences have been merged.
* The list should always have at least one item.
*/ */
sequence: number; sequences: number;
/** /**
* Tags for the definition. * Tags for the definition.
*/ */

View File

@ -362,7 +362,7 @@ class AnkiNoteDataCreator {
const definitions = []; const definitions = [];
const definitionTags = []; const definitionTags = [];
for (const {tags, headwordIndices, entries, dictionary, sequence} of dictionaryEntry.definitions) { for (const {tags, headwordIndices, entries, dictionary, sequences} of dictionaryEntry.definitions) {
const definitionTags2 = []; const definitionTags2 = [];
for (const tag of tags) { for (const tag of tags) {
definitionTags.push(this._convertTag(tag)); definitionTags.push(this._convertTag(tag));
@ -371,7 +371,7 @@ class AnkiNoteDataCreator {
if (!hasDefinitions) { continue; } if (!hasDefinitions) { continue; }
const only = merged ? DictionaryDataUtil.getDisambiguations(dictionaryEntry.headwords, headwordIndices, allTermsSet, allReadingsSet) : void 0; const only = merged ? DictionaryDataUtil.getDisambiguations(dictionaryEntry.headwords, headwordIndices, allTermsSet, allReadingsSet) : void 0;
definitions.push({ definitions.push({
sequence, sequence: sequences[0],
dictionary, dictionary,
glossary: entries, glossary: entries,
definitionTags: definitionTags2, definitionTags: definitionTags2,
@ -613,8 +613,9 @@ class AnkiNoteDataCreator {
_getTermDictionaryEntrySequence(dictionaryEntry) { _getTermDictionaryEntrySequence(dictionaryEntry) {
let hasSequence = false; let hasSequence = false;
let mainSequence = -1; let mainSequence = -1;
for (const {sequence, isPrimary} of dictionaryEntry.definitions) { for (const {sequences, isPrimary} of dictionaryEntry.definitions) {
if (!isPrimary) { continue; } if (!isPrimary) { continue; }
const sequence = sequences[0];
if (!hasSequence) { if (!hasSequence) {
mainSequence = sequence; mainSequence = sequence;
hasSequence = true; hasSequence = true;

View File

@ -353,7 +353,7 @@ class Translator {
const groupedDictionaryEntriesMap = new Map(); const groupedDictionaryEntriesMap = new Map();
const ungroupedDictionaryEntriesMap = new Map(); const ungroupedDictionaryEntriesMap = new Map();
for (const dictionaryEntry of dictionaryEntries) { for (const dictionaryEntry of dictionaryEntries) {
const {id, definitions: [{dictionary, sequence}]} = dictionaryEntry; const {id, definitions: [{dictionary, sequences: [sequence]}]} = dictionaryEntry;
if (mainDictionary === dictionary && sequence >= 0) { if (mainDictionary === dictionary && sequence >= 0) {
let group = groupedDictionaryEntriesMap.get(sequence); let group = groupedDictionaryEntriesMap.get(sequence);
if (typeof group === 'undefined') { if (typeof group === 'undefined') {
@ -620,7 +620,7 @@ class Translator {
tag1.order = Math.min(tag1.order, tag2.order); tag1.order = Math.min(tag1.order, tag2.order);
tag1.score = Math.max(tag1.score, tag2.score); tag1.score = Math.max(tag1.score, tag2.score);
tag1.dictionaries.push(...tag2.dictionaries); tag1.dictionaries.push(...tag2.dictionaries);
this._addUniqueStrings(tag1.content, tag2.content); this._addUniqueSimple(tag1.content, tag2.content);
tags.splice(j, 1); tags.splice(j, 1);
--tagCount; --tagCount;
--j; --j;
@ -927,8 +927,8 @@ class Translator {
return {index, term, reading, sources, tags, wordClasses}; return {index, term, reading, sources, tags, wordClasses};
} }
_createTermDefinition(index, headwordIndices, dictionary, sequence, isPrimary, tags, entries) { _createTermDefinition(index, headwordIndices, dictionary, sequences, isPrimary, tags, entries) {
return {index, headwordIndices, dictionary, sequence, isPrimary, tags, entries}; return {index, headwordIndices, dictionary, sequences, isPrimary, tags, entries};
} }
_createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) { _createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) {
@ -982,7 +982,7 @@ class Translator {
sourceTermExactMatchCount, sourceTermExactMatchCount,
maxTransformedTextLength, maxTransformedTextLength,
[this._createTermHeadword(0, term, reading, [source], headwordTagGroups, rules)], [this._createTermHeadword(0, term, reading, [source], headwordTagGroups, rules)],
[this._createTermDefinition(0, [0], dictionary, sequence, isPrimary, definitionTagGroups, definitions)] [this._createTermDefinition(0, [0], dictionary, [sequence], isPrimary, definitionTagGroups, definitions)]
); );
} }
@ -1027,9 +1027,9 @@ class Translator {
} }
} }
if (checkDuplicateDefinitions) { if (checkDuplicateDefinitions) {
this._addTermDefinitions2(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap); this._addTermDefinitions(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap);
} else { } else {
this._addTermDefinitions(definitions, dictionaryEntry.definitions, headwordIndexMap); this._addTermDefinitionsFast(definitions, dictionaryEntry.definitions, headwordIndexMap);
} }
} }
@ -1049,7 +1049,7 @@ class Translator {
// Data collection addition functions // Data collection addition functions
_addUniqueStrings(list, newItems) { _addUniqueSimple(list, newItems) {
for (const item of newItems) { for (const item of newItems) {
if (!list.includes(item)) { if (!list.includes(item)) {
list.push(item); list.push(item);
@ -1093,7 +1093,7 @@ class Translator {
for (; i < ii; ++i) { for (; i < ii; ++i) {
const tagGroup = tagGroups[i]; const tagGroup = tagGroups[i];
if (tagGroup.dictionary === dictionary) { if (tagGroup.dictionary === dictionary) {
this._addUniqueStrings(tagGroup.tagNames, newTagGroup.tagNames); this._addUniqueSimple(tagGroup.tagNames, newTagGroup.tagNames);
break; break;
} }
} }
@ -1114,7 +1114,7 @@ class Translator {
} }
this._addUniqueSources(headword.sources, sources); this._addUniqueSources(headword.sources, sources);
this._addUniqueTagGroups(headword.tags, tags); this._addUniqueTagGroups(headword.tags, tags);
this._addUniqueStrings(headword.wordClasses, wordClasses); this._addUniqueSimple(headword.wordClasses, wordClasses);
headwordIndexMap.push(headword.index); headwordIndexMap.push(headword.index);
} }
return headwordIndexMap; return headwordIndexMap;
@ -1143,28 +1143,29 @@ class Translator {
headwordIndices.splice(start, 0, headwordIndex); headwordIndices.splice(start, 0, headwordIndex);
} }
_addTermDefinitions(definitions, newDefinitions, headwordIndexMap) { _addTermDefinitionsFast(definitions, newDefinitions, headwordIndexMap) {
for (const {headwordIndices, dictionary, sequence, isPrimary, tags, entries} of newDefinitions) { for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) {
const headwordIndicesNew = []; const headwordIndicesNew = [];
for (const headwordIndex of headwordIndices) { for (const headwordIndex of headwordIndices) {
headwordIndicesNew.push(headwordIndexMap[headwordIndex]); headwordIndicesNew.push(headwordIndexMap[headwordIndex]);
} }
definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, sequence, isPrimary, tags, entries)); definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, sequences, isPrimary, tags, entries));
} }
} }
_addTermDefinitions2(definitions, definitionsMap, newDefinitions, headwordIndexMap) { _addTermDefinitions(definitions, definitionsMap, newDefinitions, headwordIndexMap) {
for (const {headwordIndices, dictionary, sequence, isPrimary, tags, entries} of newDefinitions) { for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) {
const key = this._createMapKey([dictionary, sequence, ...entries]); const key = this._createMapKey([dictionary, ...entries]);
let definition = definitionsMap.get(key); let definition = definitionsMap.get(key);
if (typeof definition === 'undefined') { if (typeof definition === 'undefined') {
definition = this._createTermDefinition(definitions.length, [], dictionary, sequence, isPrimary, [], [...entries]); definition = this._createTermDefinition(definitions.length, [], dictionary, [...sequences], isPrimary, [], [...entries]);
definitions.push(definition); definitions.push(definition);
definitionsMap.set(key, definition); definitionsMap.set(key, definition);
} else { } else {
if (isPrimary) { if (isPrimary) {
definition.isPrimary = true; definition.isPrimary = true;
} }
this._addUniqueSimple(definition.sequences, sequences);
} }
const newHeadwordIndices = definition.headwordIndices; const newHeadwordIndices = definition.headwordIndices;

File diff suppressed because it is too large Load Diff