Improve term grouping (#1653)

* Rename _addUniqueStrings to _addUniqueSimple

* Update definition merging to not depend the sequence number

* Improve naming

* Update AnkiNoteDataCreator

* Update docs

* Remove fields that no longer exist

* Update test data
This commit is contained in:
toasted-nutbread 2021-05-08 13:16:56 -04:00 committed by GitHub
parent 32f5544021
commit 289bdc1622
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 341 additions and 135 deletions

View File

@ -200,14 +200,6 @@ namespace Translation {
* original search text, while non-primary sources originate from related terms.
*/
isPrimary: boolean;
/**
* Database sequence number for the term, or `-1` if multiple entries have been merged.
*/
sequence: number;
/**
* The dictionary that the sequence number originated from, or `null` if there is no sequence.
*/
sequenceDictionary: string;
/**
* A list of inflections that was applied to get the term.
*/
@ -297,9 +289,11 @@ namespace Translation {
*/
dictionary: string;
/**
* Database sequence number for the term. The value will be `-1` if there is no sequence.
* A list of database sequence numbers for the term. A value of `-1` corresponds to no sequence.
* The list can have multiple values if multiple definitions with different sequences have been merged.
* The list should always have at least one item.
*/
sequence: number;
sequences: number;
/**
* Tags for the definition.
*/

View File

@ -362,7 +362,7 @@ class AnkiNoteDataCreator {
const definitions = [];
const definitionTags = [];
for (const {tags, headwordIndices, entries, dictionary, sequence} of dictionaryEntry.definitions) {
for (const {tags, headwordIndices, entries, dictionary, sequences} of dictionaryEntry.definitions) {
const definitionTags2 = [];
for (const tag of tags) {
definitionTags.push(this._convertTag(tag));
@ -371,7 +371,7 @@ class AnkiNoteDataCreator {
if (!hasDefinitions) { continue; }
const only = merged ? DictionaryDataUtil.getDisambiguations(dictionaryEntry.headwords, headwordIndices, allTermsSet, allReadingsSet) : void 0;
definitions.push({
sequence,
sequence: sequences[0],
dictionary,
glossary: entries,
definitionTags: definitionTags2,
@ -613,8 +613,9 @@ class AnkiNoteDataCreator {
_getTermDictionaryEntrySequence(dictionaryEntry) {
let hasSequence = false;
let mainSequence = -1;
for (const {sequence, isPrimary} of dictionaryEntry.definitions) {
for (const {sequences, isPrimary} of dictionaryEntry.definitions) {
if (!isPrimary) { continue; }
const sequence = sequences[0];
if (!hasSequence) {
mainSequence = sequence;
hasSequence = true;

View File

@ -353,7 +353,7 @@ class Translator {
const groupedDictionaryEntriesMap = new Map();
const ungroupedDictionaryEntriesMap = new Map();
for (const dictionaryEntry of dictionaryEntries) {
const {id, definitions: [{dictionary, sequence}]} = dictionaryEntry;
const {id, definitions: [{dictionary, sequences: [sequence]}]} = dictionaryEntry;
if (mainDictionary === dictionary && sequence >= 0) {
let group = groupedDictionaryEntriesMap.get(sequence);
if (typeof group === 'undefined') {
@ -620,7 +620,7 @@ class Translator {
tag1.order = Math.min(tag1.order, tag2.order);
tag1.score = Math.max(tag1.score, tag2.score);
tag1.dictionaries.push(...tag2.dictionaries);
this._addUniqueStrings(tag1.content, tag2.content);
this._addUniqueSimple(tag1.content, tag2.content);
tags.splice(j, 1);
--tagCount;
--j;
@ -927,8 +927,8 @@ class Translator {
return {index, term, reading, sources, tags, wordClasses};
}
_createTermDefinition(index, headwordIndices, dictionary, sequence, isPrimary, tags, entries) {
return {index, headwordIndices, dictionary, sequence, isPrimary, tags, entries};
_createTermDefinition(index, headwordIndices, dictionary, sequences, isPrimary, tags, entries) {
return {index, headwordIndices, dictionary, sequences, isPrimary, tags, entries};
}
_createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) {
@ -982,7 +982,7 @@ class Translator {
sourceTermExactMatchCount,
maxTransformedTextLength,
[this._createTermHeadword(0, term, reading, [source], headwordTagGroups, rules)],
[this._createTermDefinition(0, [0], dictionary, sequence, isPrimary, definitionTagGroups, definitions)]
[this._createTermDefinition(0, [0], dictionary, [sequence], isPrimary, definitionTagGroups, definitions)]
);
}
@ -1027,9 +1027,9 @@ class Translator {
}
}
if (checkDuplicateDefinitions) {
this._addTermDefinitions2(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap);
this._addTermDefinitions(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap);
} else {
this._addTermDefinitions(definitions, dictionaryEntry.definitions, headwordIndexMap);
this._addTermDefinitionsFast(definitions, dictionaryEntry.definitions, headwordIndexMap);
}
}
@ -1049,7 +1049,7 @@ class Translator {
// Data collection addition functions
_addUniqueStrings(list, newItems) {
_addUniqueSimple(list, newItems) {
for (const item of newItems) {
if (!list.includes(item)) {
list.push(item);
@ -1093,7 +1093,7 @@ class Translator {
for (; i < ii; ++i) {
const tagGroup = tagGroups[i];
if (tagGroup.dictionary === dictionary) {
this._addUniqueStrings(tagGroup.tagNames, newTagGroup.tagNames);
this._addUniqueSimple(tagGroup.tagNames, newTagGroup.tagNames);
break;
}
}
@ -1114,7 +1114,7 @@ class Translator {
}
this._addUniqueSources(headword.sources, sources);
this._addUniqueTagGroups(headword.tags, tags);
this._addUniqueStrings(headword.wordClasses, wordClasses);
this._addUniqueSimple(headword.wordClasses, wordClasses);
headwordIndexMap.push(headword.index);
}
return headwordIndexMap;
@ -1143,28 +1143,29 @@ class Translator {
headwordIndices.splice(start, 0, headwordIndex);
}
_addTermDefinitions(definitions, newDefinitions, headwordIndexMap) {
for (const {headwordIndices, dictionary, sequence, isPrimary, tags, entries} of newDefinitions) {
_addTermDefinitionsFast(definitions, newDefinitions, headwordIndexMap) {
for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) {
const headwordIndicesNew = [];
for (const headwordIndex of headwordIndices) {
headwordIndicesNew.push(headwordIndexMap[headwordIndex]);
}
definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, sequence, isPrimary, tags, entries));
definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, sequences, isPrimary, tags, entries));
}
}
_addTermDefinitions2(definitions, definitionsMap, newDefinitions, headwordIndexMap) {
for (const {headwordIndices, dictionary, sequence, isPrimary, tags, entries} of newDefinitions) {
const key = this._createMapKey([dictionary, sequence, ...entries]);
_addTermDefinitions(definitions, definitionsMap, newDefinitions, headwordIndexMap) {
for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) {
const key = this._createMapKey([dictionary, ...entries]);
let definition = definitionsMap.get(key);
if (typeof definition === 'undefined') {
definition = this._createTermDefinition(definitions.length, [], dictionary, sequence, isPrimary, [], [...entries]);
definition = this._createTermDefinition(definitions.length, [], dictionary, [...sequences], isPrimary, [], [...entries]);
definitions.push(definition);
definitionsMap.set(key, definition);
} else {
if (isPrimary) {
definition.isPrimary = true;
}
this._addUniqueSimple(definition.sequences, sequences);
}
const newHeadwordIndices = definition.headwordIndices;

File diff suppressed because it is too large Load Diff