Improve term meta ordering (#1455)

* Update implementation of _buildTermMeta

* Remove old implementation

* Expose dictionaryPriority on frequencies and pitch accents

* Update how meta data is generated; add index

* Update order

* Update names

* Expose expressionIndex as part of pitch/frequency data

* Implement meta sorting

* Update test data
This commit is contained in:
toasted-nutbread 2021-02-27 22:27:00 -05:00 committed by GitHub
parent 7d6915ec3b
commit e4a4e5f85f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 1658 additions and 159 deletions

View File

@ -655,70 +655,72 @@ class Translator {
// Metadata building // Metadata building
async _buildTermMeta(definitions, enabledDictionaryMap) { async _buildTermMeta(definitions, enabledDictionaryMap) {
const addMetadataTargetInfo = (targetMap1, target, parents) => { const allDefinitions = this._getAllDefinitions(definitions);
let {expression, reading} = target; const expressionMap = new Map();
if (!reading) { reading = expression; } const expressionValues = [];
const expressionKeys = [];
let targetMap2 = targetMap1.get(expression); for (const {expressions, frequencies: frequencies1, pitches: pitches1} of allDefinitions) {
if (typeof targetMap2 === 'undefined') { for (let i = 0, ii = expressions.length; i < ii; ++i) {
targetMap2 = new Map(); const {expression, reading, frequencies: frequencies2, pitches: pitches2} = expressions[i];
targetMap1.set(expression, targetMap2); let readingMap = expressionMap.get(expression);
} if (typeof readingMap === 'undefined') {
readingMap = new Map();
let targets = targetMap2.get(reading); expressionMap.set(expression, readingMap);
if (typeof targets === 'undefined') { expressionValues.push(readingMap);
targets = new Set([target, ...parents]); expressionKeys.push(expression);
targetMap2.set(reading, targets);
} else {
targets.add(target);
for (const parent of parents) {
targets.add(parent);
} }
} let targets = readingMap.get(reading);
}; if (typeof targets === 'undefined') {
targets = [];
const targetMap = new Map(); readingMap.set(reading, targets);
const definitionsQueue = definitions.map((definition) => ({definition, parents: []}));
while (definitionsQueue.length > 0) {
const {definition, parents} = definitionsQueue.shift();
const childDefinitions = definition.definitions;
if (Array.isArray(childDefinitions)) {
for (const definition2 of childDefinitions) {
definitionsQueue.push({definition: definition2, parents: [...parents, definition]});
} }
} else { targets.push(
addMetadataTargetInfo(targetMap, definition, parents); {frequencies: frequencies1, pitches: pitches1, index: i},
} {frequencies: frequencies2, pitches: pitches2, index: i}
);
for (const target of definition.expressions) {
addMetadataTargetInfo(targetMap, target, []);
} }
} }
const targetMapEntries = [...targetMap.entries()];
const uniqueExpressions = targetMapEntries.map(([expression]) => expression);
const metas = await this._database.findTermMetaBulk(uniqueExpressions, enabledDictionaryMap); const metas = await this._database.findTermMetaBulk(expressionKeys, enabledDictionaryMap);
for (const {expression, mode, data, dictionary, index} of metas) { for (const {expression, mode, data, dictionary, index} of metas) {
const targetMap2 = targetMapEntries[index][1]; const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
for (const [reading, targets] of targetMap2) { const map2 = expressionValues[index];
for (const [reading, targets] of map2.entries()) {
switch (mode) { switch (mode) {
case 'freq': case 'freq':
{ {
const frequencyData = this._getTermFrequencyData(expression, reading, dictionary, data); let frequency = data;
if (frequencyData === null) { continue; } const hasReading = (data !== null && typeof data === 'object');
for (const {frequencies} of targets) { frequencies.push(frequencyData); } if (hasReading) {
if (data.reading !== reading) { continue; }
frequency = data.frequency;
}
for (const {frequencies, index: expressionIndex} of targets) {
frequencies.push({index: frequencies.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, hasReading, frequency});
}
} }
break; break;
case 'pitch': case 'pitch':
{ {
const pitchData = await this._getPitchData(expression, reading, dictionary, data); if (data.reading !== reading) { continue; }
if (pitchData === null) { continue; } const pitches2 = [];
for (const {pitches} of targets) { pitches.push(pitchData); } for (let {position, tags} of data.pitches) {
tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : [];
pitches2.push({position, tags});
}
for (const {pitches, index: expressionIndex} of targets) {
pitches.push({index: pitches.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, pitches: pitches2});
}
} }
break; break;
} }
} }
} }
for (const definition of allDefinitions) {
this._sortTermDefinitionMeta(definition);
}
} }
async _buildKanjiMeta(definitions, enabledDictionaryMap) { async _buildKanjiMeta(definitions, enabledDictionaryMap) {
@ -729,15 +731,20 @@ class Translator {
const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap); const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
for (const {character, mode, data, dictionary, index} of metas) { for (const {character, mode, data, dictionary, index} of metas) {
const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
switch (mode) { switch (mode) {
case 'freq': case 'freq':
{ {
const frequencyData = this._getKanjiFrequencyData(character, dictionary, data); const {frequencies} = definitions[index];
definitions[index].frequencies.push(frequencyData); frequencies.push({index: frequencies.length, dictionary, dictionaryPriority, character, frequency: data});
} }
break; break;
} }
} }
for (const definition of definitions) {
this._sortKanjiDefinitionMeta(definition);
}
} }
async _expandTags(names, dictionary) { async _expandTags(names, dictionary) {
@ -806,32 +813,6 @@ class Translator {
return tagMetaList; return tagMetaList;
} }
_getTermFrequencyData(expression, reading, dictionary, data) {
let frequency = data;
const hasReading = (data !== null && typeof data === 'object');
if (hasReading) {
if (data.reading !== reading) { return null; }
frequency = data.frequency;
}
return {dictionary, expression, reading, hasReading, frequency};
}
_getKanjiFrequencyData(character, dictionary, data) {
return {dictionary, character, frequency: data};
}
async _getPitchData(expression, reading, dictionary, data) {
if (data.reading !== reading) { return null; }
const pitches = [];
for (let {position, tags} of data.pitches) {
tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : [];
pitches.push({position, tags});
}
return {expression, reading, dictionary, pitches};
}
// Simple helpers // Simple helpers
_scoreToTermFrequency(score) { _scoreToTermFrequency(score) {
@ -1000,6 +981,17 @@ class Translator {
return result; return result;
} }
_getAllDefinitions(definitions) {
definitions = [...definitions];
for (let i = 0; i < definitions.length; ++i) {
const childDefinitions = definitions[i].definitions;
if (Array.isArray(childDefinitions)) {
definitions.push(...childDefinitions);
}
}
return definitions;
}
// Reduction functions // Reduction functions
_getTermTagsScoreSum(termTags) { _getTermTagsScoreSum(termTags) {
@ -1334,6 +1326,45 @@ class Translator {
}); });
} }
_sortTermDefinitionMeta(definition) {
const compareFunction = (v1, v2) => {
// Sort by dictionary
let i = v2.dictionaryPriority - v1.dictionaryPriority;
if (i !== 0) { return i; }
// Sory by expression order
i = v1.expressionIndex - v2.expressionIndex;
if (i !== 0) { return i; }
// Default order
i = v1.index - v2.index;
return i;
};
const {expressions, frequencies: frequencies1, pitches: pitches1} = definition;
frequencies1.sort(compareFunction);
pitches1.sort(compareFunction);
for (const {frequencies: frequencies2, pitches: pitches2} of expressions) {
frequencies2.sort(compareFunction);
pitches2.sort(compareFunction);
}
}
_sortKanjiDefinitionMeta(definition) {
const compareFunction = (v1, v2) => {
// Sort by dictionary
let i = v2.dictionaryPriority - v1.dictionaryPriority;
if (i !== 0) { return i; }
// Default order
i = v1.index - v2.index;
return i;
};
const {frequencies} = definition;
frequencies.sort(compareFunction);
}
// Regex functions // Regex functions
_applyTextReplacements(text, sourceMap, replacements) { _applyTextReplacements(text, sourceMap, replacements) {

File diff suppressed because it is too large Load Diff