Improve term grouping (#1653)
* Rename _addUniqueStrings to _addUniqueSimple * Update definition merging to not depend the sequence number * Improve naming * Update AnkiNoteDataCreator * Update docs * Remove fields that no longer exist * Update test data
This commit is contained in:
parent
32f5544021
commit
289bdc1622
@ -200,14 +200,6 @@ namespace Translation {
|
||||
* original search text, while non-primary sources originate from related terms.
|
||||
*/
|
||||
isPrimary: boolean;
|
||||
/**
|
||||
* Database sequence number for the term, or `-1` if multiple entries have been merged.
|
||||
*/
|
||||
sequence: number;
|
||||
/**
|
||||
* The dictionary that the sequence number originated from, or `null` if there is no sequence.
|
||||
*/
|
||||
sequenceDictionary: string;
|
||||
/**
|
||||
* A list of inflections that was applied to get the term.
|
||||
*/
|
||||
@ -297,9 +289,11 @@ namespace Translation {
|
||||
*/
|
||||
dictionary: string;
|
||||
/**
|
||||
* Database sequence number for the term. The value will be `-1` if there is no sequence.
|
||||
* A list of database sequence numbers for the term. A value of `-1` corresponds to no sequence.
|
||||
* The list can have multiple values if multiple definitions with different sequences have been merged.
|
||||
* The list should always have at least one item.
|
||||
*/
|
||||
sequence: number;
|
||||
sequences: number;
|
||||
/**
|
||||
* Tags for the definition.
|
||||
*/
|
||||
|
@ -362,7 +362,7 @@ class AnkiNoteDataCreator {
|
||||
|
||||
const definitions = [];
|
||||
const definitionTags = [];
|
||||
for (const {tags, headwordIndices, entries, dictionary, sequence} of dictionaryEntry.definitions) {
|
||||
for (const {tags, headwordIndices, entries, dictionary, sequences} of dictionaryEntry.definitions) {
|
||||
const definitionTags2 = [];
|
||||
for (const tag of tags) {
|
||||
definitionTags.push(this._convertTag(tag));
|
||||
@ -371,7 +371,7 @@ class AnkiNoteDataCreator {
|
||||
if (!hasDefinitions) { continue; }
|
||||
const only = merged ? DictionaryDataUtil.getDisambiguations(dictionaryEntry.headwords, headwordIndices, allTermsSet, allReadingsSet) : void 0;
|
||||
definitions.push({
|
||||
sequence,
|
||||
sequence: sequences[0],
|
||||
dictionary,
|
||||
glossary: entries,
|
||||
definitionTags: definitionTags2,
|
||||
@ -613,8 +613,9 @@ class AnkiNoteDataCreator {
|
||||
_getTermDictionaryEntrySequence(dictionaryEntry) {
|
||||
let hasSequence = false;
|
||||
let mainSequence = -1;
|
||||
for (const {sequence, isPrimary} of dictionaryEntry.definitions) {
|
||||
for (const {sequences, isPrimary} of dictionaryEntry.definitions) {
|
||||
if (!isPrimary) { continue; }
|
||||
const sequence = sequences[0];
|
||||
if (!hasSequence) {
|
||||
mainSequence = sequence;
|
||||
hasSequence = true;
|
||||
|
@ -353,7 +353,7 @@ class Translator {
|
||||
const groupedDictionaryEntriesMap = new Map();
|
||||
const ungroupedDictionaryEntriesMap = new Map();
|
||||
for (const dictionaryEntry of dictionaryEntries) {
|
||||
const {id, definitions: [{dictionary, sequence}]} = dictionaryEntry;
|
||||
const {id, definitions: [{dictionary, sequences: [sequence]}]} = dictionaryEntry;
|
||||
if (mainDictionary === dictionary && sequence >= 0) {
|
||||
let group = groupedDictionaryEntriesMap.get(sequence);
|
||||
if (typeof group === 'undefined') {
|
||||
@ -620,7 +620,7 @@ class Translator {
|
||||
tag1.order = Math.min(tag1.order, tag2.order);
|
||||
tag1.score = Math.max(tag1.score, tag2.score);
|
||||
tag1.dictionaries.push(...tag2.dictionaries);
|
||||
this._addUniqueStrings(tag1.content, tag2.content);
|
||||
this._addUniqueSimple(tag1.content, tag2.content);
|
||||
tags.splice(j, 1);
|
||||
--tagCount;
|
||||
--j;
|
||||
@ -927,8 +927,8 @@ class Translator {
|
||||
return {index, term, reading, sources, tags, wordClasses};
|
||||
}
|
||||
|
||||
_createTermDefinition(index, headwordIndices, dictionary, sequence, isPrimary, tags, entries) {
|
||||
return {index, headwordIndices, dictionary, sequence, isPrimary, tags, entries};
|
||||
_createTermDefinition(index, headwordIndices, dictionary, sequences, isPrimary, tags, entries) {
|
||||
return {index, headwordIndices, dictionary, sequences, isPrimary, tags, entries};
|
||||
}
|
||||
|
||||
_createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) {
|
||||
@ -982,7 +982,7 @@ class Translator {
|
||||
sourceTermExactMatchCount,
|
||||
maxTransformedTextLength,
|
||||
[this._createTermHeadword(0, term, reading, [source], headwordTagGroups, rules)],
|
||||
[this._createTermDefinition(0, [0], dictionary, sequence, isPrimary, definitionTagGroups, definitions)]
|
||||
[this._createTermDefinition(0, [0], dictionary, [sequence], isPrimary, definitionTagGroups, definitions)]
|
||||
);
|
||||
}
|
||||
|
||||
@ -1027,9 +1027,9 @@ class Translator {
|
||||
}
|
||||
}
|
||||
if (checkDuplicateDefinitions) {
|
||||
this._addTermDefinitions2(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap);
|
||||
this._addTermDefinitions(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap);
|
||||
} else {
|
||||
this._addTermDefinitions(definitions, dictionaryEntry.definitions, headwordIndexMap);
|
||||
this._addTermDefinitionsFast(definitions, dictionaryEntry.definitions, headwordIndexMap);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1049,7 +1049,7 @@ class Translator {
|
||||
|
||||
// Data collection addition functions
|
||||
|
||||
_addUniqueStrings(list, newItems) {
|
||||
_addUniqueSimple(list, newItems) {
|
||||
for (const item of newItems) {
|
||||
if (!list.includes(item)) {
|
||||
list.push(item);
|
||||
@ -1093,7 +1093,7 @@ class Translator {
|
||||
for (; i < ii; ++i) {
|
||||
const tagGroup = tagGroups[i];
|
||||
if (tagGroup.dictionary === dictionary) {
|
||||
this._addUniqueStrings(tagGroup.tagNames, newTagGroup.tagNames);
|
||||
this._addUniqueSimple(tagGroup.tagNames, newTagGroup.tagNames);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1114,7 +1114,7 @@ class Translator {
|
||||
}
|
||||
this._addUniqueSources(headword.sources, sources);
|
||||
this._addUniqueTagGroups(headword.tags, tags);
|
||||
this._addUniqueStrings(headword.wordClasses, wordClasses);
|
||||
this._addUniqueSimple(headword.wordClasses, wordClasses);
|
||||
headwordIndexMap.push(headword.index);
|
||||
}
|
||||
return headwordIndexMap;
|
||||
@ -1143,28 +1143,29 @@ class Translator {
|
||||
headwordIndices.splice(start, 0, headwordIndex);
|
||||
}
|
||||
|
||||
_addTermDefinitions(definitions, newDefinitions, headwordIndexMap) {
|
||||
for (const {headwordIndices, dictionary, sequence, isPrimary, tags, entries} of newDefinitions) {
|
||||
_addTermDefinitionsFast(definitions, newDefinitions, headwordIndexMap) {
|
||||
for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) {
|
||||
const headwordIndicesNew = [];
|
||||
for (const headwordIndex of headwordIndices) {
|
||||
headwordIndicesNew.push(headwordIndexMap[headwordIndex]);
|
||||
}
|
||||
definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, sequence, isPrimary, tags, entries));
|
||||
definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, sequences, isPrimary, tags, entries));
|
||||
}
|
||||
}
|
||||
|
||||
_addTermDefinitions2(definitions, definitionsMap, newDefinitions, headwordIndexMap) {
|
||||
for (const {headwordIndices, dictionary, sequence, isPrimary, tags, entries} of newDefinitions) {
|
||||
const key = this._createMapKey([dictionary, sequence, ...entries]);
|
||||
_addTermDefinitions(definitions, definitionsMap, newDefinitions, headwordIndexMap) {
|
||||
for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) {
|
||||
const key = this._createMapKey([dictionary, ...entries]);
|
||||
let definition = definitionsMap.get(key);
|
||||
if (typeof definition === 'undefined') {
|
||||
definition = this._createTermDefinition(definitions.length, [], dictionary, sequence, isPrimary, [], [...entries]);
|
||||
definition = this._createTermDefinition(definitions.length, [], dictionary, [...sequences], isPrimary, [], [...entries]);
|
||||
definitions.push(definition);
|
||||
definitionsMap.set(key, definition);
|
||||
} else {
|
||||
if (isPrimary) {
|
||||
definition.isPrimary = true;
|
||||
}
|
||||
this._addUniqueSimple(definition.sequences, sequences);
|
||||
}
|
||||
|
||||
const newHeadwordIndices = definition.headwordIndices;
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user