Improve term grouping (#1653)
* Rename _addUniqueStrings to _addUniqueSimple * Update definition merging to not depend the sequence number * Improve naming * Update AnkiNoteDataCreator * Update docs * Remove fields that no longer exist * Update test data
This commit is contained in:
parent
32f5544021
commit
289bdc1622
@ -200,14 +200,6 @@ namespace Translation {
|
|||||||
* original search text, while non-primary sources originate from related terms.
|
* original search text, while non-primary sources originate from related terms.
|
||||||
*/
|
*/
|
||||||
isPrimary: boolean;
|
isPrimary: boolean;
|
||||||
/**
|
|
||||||
* Database sequence number for the term, or `-1` if multiple entries have been merged.
|
|
||||||
*/
|
|
||||||
sequence: number;
|
|
||||||
/**
|
|
||||||
* The dictionary that the sequence number originated from, or `null` if there is no sequence.
|
|
||||||
*/
|
|
||||||
sequenceDictionary: string;
|
|
||||||
/**
|
/**
|
||||||
* A list of inflections that was applied to get the term.
|
* A list of inflections that was applied to get the term.
|
||||||
*/
|
*/
|
||||||
@ -297,9 +289,11 @@ namespace Translation {
|
|||||||
*/
|
*/
|
||||||
dictionary: string;
|
dictionary: string;
|
||||||
/**
|
/**
|
||||||
* Database sequence number for the term. The value will be `-1` if there is no sequence.
|
* A list of database sequence numbers for the term. A value of `-1` corresponds to no sequence.
|
||||||
|
* The list can have multiple values if multiple definitions with different sequences have been merged.
|
||||||
|
* The list should always have at least one item.
|
||||||
*/
|
*/
|
||||||
sequence: number;
|
sequences: number;
|
||||||
/**
|
/**
|
||||||
* Tags for the definition.
|
* Tags for the definition.
|
||||||
*/
|
*/
|
||||||
|
@ -362,7 +362,7 @@ class AnkiNoteDataCreator {
|
|||||||
|
|
||||||
const definitions = [];
|
const definitions = [];
|
||||||
const definitionTags = [];
|
const definitionTags = [];
|
||||||
for (const {tags, headwordIndices, entries, dictionary, sequence} of dictionaryEntry.definitions) {
|
for (const {tags, headwordIndices, entries, dictionary, sequences} of dictionaryEntry.definitions) {
|
||||||
const definitionTags2 = [];
|
const definitionTags2 = [];
|
||||||
for (const tag of tags) {
|
for (const tag of tags) {
|
||||||
definitionTags.push(this._convertTag(tag));
|
definitionTags.push(this._convertTag(tag));
|
||||||
@ -371,7 +371,7 @@ class AnkiNoteDataCreator {
|
|||||||
if (!hasDefinitions) { continue; }
|
if (!hasDefinitions) { continue; }
|
||||||
const only = merged ? DictionaryDataUtil.getDisambiguations(dictionaryEntry.headwords, headwordIndices, allTermsSet, allReadingsSet) : void 0;
|
const only = merged ? DictionaryDataUtil.getDisambiguations(dictionaryEntry.headwords, headwordIndices, allTermsSet, allReadingsSet) : void 0;
|
||||||
definitions.push({
|
definitions.push({
|
||||||
sequence,
|
sequence: sequences[0],
|
||||||
dictionary,
|
dictionary,
|
||||||
glossary: entries,
|
glossary: entries,
|
||||||
definitionTags: definitionTags2,
|
definitionTags: definitionTags2,
|
||||||
@ -613,8 +613,9 @@ class AnkiNoteDataCreator {
|
|||||||
_getTermDictionaryEntrySequence(dictionaryEntry) {
|
_getTermDictionaryEntrySequence(dictionaryEntry) {
|
||||||
let hasSequence = false;
|
let hasSequence = false;
|
||||||
let mainSequence = -1;
|
let mainSequence = -1;
|
||||||
for (const {sequence, isPrimary} of dictionaryEntry.definitions) {
|
for (const {sequences, isPrimary} of dictionaryEntry.definitions) {
|
||||||
if (!isPrimary) { continue; }
|
if (!isPrimary) { continue; }
|
||||||
|
const sequence = sequences[0];
|
||||||
if (!hasSequence) {
|
if (!hasSequence) {
|
||||||
mainSequence = sequence;
|
mainSequence = sequence;
|
||||||
hasSequence = true;
|
hasSequence = true;
|
||||||
|
@ -353,7 +353,7 @@ class Translator {
|
|||||||
const groupedDictionaryEntriesMap = new Map();
|
const groupedDictionaryEntriesMap = new Map();
|
||||||
const ungroupedDictionaryEntriesMap = new Map();
|
const ungroupedDictionaryEntriesMap = new Map();
|
||||||
for (const dictionaryEntry of dictionaryEntries) {
|
for (const dictionaryEntry of dictionaryEntries) {
|
||||||
const {id, definitions: [{dictionary, sequence}]} = dictionaryEntry;
|
const {id, definitions: [{dictionary, sequences: [sequence]}]} = dictionaryEntry;
|
||||||
if (mainDictionary === dictionary && sequence >= 0) {
|
if (mainDictionary === dictionary && sequence >= 0) {
|
||||||
let group = groupedDictionaryEntriesMap.get(sequence);
|
let group = groupedDictionaryEntriesMap.get(sequence);
|
||||||
if (typeof group === 'undefined') {
|
if (typeof group === 'undefined') {
|
||||||
@ -620,7 +620,7 @@ class Translator {
|
|||||||
tag1.order = Math.min(tag1.order, tag2.order);
|
tag1.order = Math.min(tag1.order, tag2.order);
|
||||||
tag1.score = Math.max(tag1.score, tag2.score);
|
tag1.score = Math.max(tag1.score, tag2.score);
|
||||||
tag1.dictionaries.push(...tag2.dictionaries);
|
tag1.dictionaries.push(...tag2.dictionaries);
|
||||||
this._addUniqueStrings(tag1.content, tag2.content);
|
this._addUniqueSimple(tag1.content, tag2.content);
|
||||||
tags.splice(j, 1);
|
tags.splice(j, 1);
|
||||||
--tagCount;
|
--tagCount;
|
||||||
--j;
|
--j;
|
||||||
@ -927,8 +927,8 @@ class Translator {
|
|||||||
return {index, term, reading, sources, tags, wordClasses};
|
return {index, term, reading, sources, tags, wordClasses};
|
||||||
}
|
}
|
||||||
|
|
||||||
_createTermDefinition(index, headwordIndices, dictionary, sequence, isPrimary, tags, entries) {
|
_createTermDefinition(index, headwordIndices, dictionary, sequences, isPrimary, tags, entries) {
|
||||||
return {index, headwordIndices, dictionary, sequence, isPrimary, tags, entries};
|
return {index, headwordIndices, dictionary, sequences, isPrimary, tags, entries};
|
||||||
}
|
}
|
||||||
|
|
||||||
_createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) {
|
_createTermPronunciation(index, headwordIndex, dictionary, dictionaryIndex, dictionaryPriority, pitches) {
|
||||||
@ -982,7 +982,7 @@ class Translator {
|
|||||||
sourceTermExactMatchCount,
|
sourceTermExactMatchCount,
|
||||||
maxTransformedTextLength,
|
maxTransformedTextLength,
|
||||||
[this._createTermHeadword(0, term, reading, [source], headwordTagGroups, rules)],
|
[this._createTermHeadword(0, term, reading, [source], headwordTagGroups, rules)],
|
||||||
[this._createTermDefinition(0, [0], dictionary, sequence, isPrimary, definitionTagGroups, definitions)]
|
[this._createTermDefinition(0, [0], dictionary, [sequence], isPrimary, definitionTagGroups, definitions)]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1027,9 +1027,9 @@ class Translator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (checkDuplicateDefinitions) {
|
if (checkDuplicateDefinitions) {
|
||||||
this._addTermDefinitions2(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap);
|
this._addTermDefinitions(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap);
|
||||||
} else {
|
} else {
|
||||||
this._addTermDefinitions(definitions, dictionaryEntry.definitions, headwordIndexMap);
|
this._addTermDefinitionsFast(definitions, dictionaryEntry.definitions, headwordIndexMap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1049,7 +1049,7 @@ class Translator {
|
|||||||
|
|
||||||
// Data collection addition functions
|
// Data collection addition functions
|
||||||
|
|
||||||
_addUniqueStrings(list, newItems) {
|
_addUniqueSimple(list, newItems) {
|
||||||
for (const item of newItems) {
|
for (const item of newItems) {
|
||||||
if (!list.includes(item)) {
|
if (!list.includes(item)) {
|
||||||
list.push(item);
|
list.push(item);
|
||||||
@ -1093,7 +1093,7 @@ class Translator {
|
|||||||
for (; i < ii; ++i) {
|
for (; i < ii; ++i) {
|
||||||
const tagGroup = tagGroups[i];
|
const tagGroup = tagGroups[i];
|
||||||
if (tagGroup.dictionary === dictionary) {
|
if (tagGroup.dictionary === dictionary) {
|
||||||
this._addUniqueStrings(tagGroup.tagNames, newTagGroup.tagNames);
|
this._addUniqueSimple(tagGroup.tagNames, newTagGroup.tagNames);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1114,7 +1114,7 @@ class Translator {
|
|||||||
}
|
}
|
||||||
this._addUniqueSources(headword.sources, sources);
|
this._addUniqueSources(headword.sources, sources);
|
||||||
this._addUniqueTagGroups(headword.tags, tags);
|
this._addUniqueTagGroups(headword.tags, tags);
|
||||||
this._addUniqueStrings(headword.wordClasses, wordClasses);
|
this._addUniqueSimple(headword.wordClasses, wordClasses);
|
||||||
headwordIndexMap.push(headword.index);
|
headwordIndexMap.push(headword.index);
|
||||||
}
|
}
|
||||||
return headwordIndexMap;
|
return headwordIndexMap;
|
||||||
@ -1143,28 +1143,29 @@ class Translator {
|
|||||||
headwordIndices.splice(start, 0, headwordIndex);
|
headwordIndices.splice(start, 0, headwordIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
_addTermDefinitions(definitions, newDefinitions, headwordIndexMap) {
|
_addTermDefinitionsFast(definitions, newDefinitions, headwordIndexMap) {
|
||||||
for (const {headwordIndices, dictionary, sequence, isPrimary, tags, entries} of newDefinitions) {
|
for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) {
|
||||||
const headwordIndicesNew = [];
|
const headwordIndicesNew = [];
|
||||||
for (const headwordIndex of headwordIndices) {
|
for (const headwordIndex of headwordIndices) {
|
||||||
headwordIndicesNew.push(headwordIndexMap[headwordIndex]);
|
headwordIndicesNew.push(headwordIndexMap[headwordIndex]);
|
||||||
}
|
}
|
||||||
definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, sequence, isPrimary, tags, entries));
|
definitions.push(this._createTermDefinition(definitions.length, headwordIndicesNew, dictionary, sequences, isPrimary, tags, entries));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_addTermDefinitions2(definitions, definitionsMap, newDefinitions, headwordIndexMap) {
|
_addTermDefinitions(definitions, definitionsMap, newDefinitions, headwordIndexMap) {
|
||||||
for (const {headwordIndices, dictionary, sequence, isPrimary, tags, entries} of newDefinitions) {
|
for (const {headwordIndices, dictionary, sequences, isPrimary, tags, entries} of newDefinitions) {
|
||||||
const key = this._createMapKey([dictionary, sequence, ...entries]);
|
const key = this._createMapKey([dictionary, ...entries]);
|
||||||
let definition = definitionsMap.get(key);
|
let definition = definitionsMap.get(key);
|
||||||
if (typeof definition === 'undefined') {
|
if (typeof definition === 'undefined') {
|
||||||
definition = this._createTermDefinition(definitions.length, [], dictionary, sequence, isPrimary, [], [...entries]);
|
definition = this._createTermDefinition(definitions.length, [], dictionary, [...sequences], isPrimary, [], [...entries]);
|
||||||
definitions.push(definition);
|
definitions.push(definition);
|
||||||
definitionsMap.set(key, definition);
|
definitionsMap.set(key, definition);
|
||||||
} else {
|
} else {
|
||||||
if (isPrimary) {
|
if (isPrimary) {
|
||||||
definition.isPrimary = true;
|
definition.isPrimary = true;
|
||||||
}
|
}
|
||||||
|
this._addUniqueSimple(definition.sequences, sequences);
|
||||||
}
|
}
|
||||||
|
|
||||||
const newHeadwordIndices = definition.headwordIndices;
|
const newHeadwordIndices = definition.headwordIndices;
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user