Improve term meta ordering (#1455)

* Update implementation of _buildTermMeta

* Remove old implementation

* Expose dictionaryPriority on frequencies and pitch accents

* Update how meta data is generated; add index

* Update order

* Update names

* Expose expressionIndex as part of pitch/frequency data

* Implement meta sorting

* Update test data
This commit is contained in:
toasted-nutbread 2021-02-27 22:27:00 -05:00 committed by GitHub
parent 7d6915ec3b
commit e4a4e5f85f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 1658 additions and 159 deletions

View File

@ -655,70 +655,72 @@ class Translator {
// Metadata building
async _buildTermMeta(definitions, enabledDictionaryMap) {
const addMetadataTargetInfo = (targetMap1, target, parents) => {
let {expression, reading} = target;
if (!reading) { reading = expression; }
const allDefinitions = this._getAllDefinitions(definitions);
const expressionMap = new Map();
const expressionValues = [];
const expressionKeys = [];
let targetMap2 = targetMap1.get(expression);
if (typeof targetMap2 === 'undefined') {
targetMap2 = new Map();
targetMap1.set(expression, targetMap2);
}
let targets = targetMap2.get(reading);
if (typeof targets === 'undefined') {
targets = new Set([target, ...parents]);
targetMap2.set(reading, targets);
} else {
targets.add(target);
for (const parent of parents) {
targets.add(parent);
for (const {expressions, frequencies: frequencies1, pitches: pitches1} of allDefinitions) {
for (let i = 0, ii = expressions.length; i < ii; ++i) {
const {expression, reading, frequencies: frequencies2, pitches: pitches2} = expressions[i];
let readingMap = expressionMap.get(expression);
if (typeof readingMap === 'undefined') {
readingMap = new Map();
expressionMap.set(expression, readingMap);
expressionValues.push(readingMap);
expressionKeys.push(expression);
}
}
};
const targetMap = new Map();
const definitionsQueue = definitions.map((definition) => ({definition, parents: []}));
while (definitionsQueue.length > 0) {
const {definition, parents} = definitionsQueue.shift();
const childDefinitions = definition.definitions;
if (Array.isArray(childDefinitions)) {
for (const definition2 of childDefinitions) {
definitionsQueue.push({definition: definition2, parents: [...parents, definition]});
let targets = readingMap.get(reading);
if (typeof targets === 'undefined') {
targets = [];
readingMap.set(reading, targets);
}
} else {
addMetadataTargetInfo(targetMap, definition, parents);
}
for (const target of definition.expressions) {
addMetadataTargetInfo(targetMap, target, []);
targets.push(
{frequencies: frequencies1, pitches: pitches1, index: i},
{frequencies: frequencies2, pitches: pitches2, index: i}
);
}
}
const targetMapEntries = [...targetMap.entries()];
const uniqueExpressions = targetMapEntries.map(([expression]) => expression);
const metas = await this._database.findTermMetaBulk(uniqueExpressions, enabledDictionaryMap);
const metas = await this._database.findTermMetaBulk(expressionKeys, enabledDictionaryMap);
for (const {expression, mode, data, dictionary, index} of metas) {
const targetMap2 = targetMapEntries[index][1];
for (const [reading, targets] of targetMap2) {
const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
const map2 = expressionValues[index];
for (const [reading, targets] of map2.entries()) {
switch (mode) {
case 'freq':
{
const frequencyData = this._getTermFrequencyData(expression, reading, dictionary, data);
if (frequencyData === null) { continue; }
for (const {frequencies} of targets) { frequencies.push(frequencyData); }
let frequency = data;
const hasReading = (data !== null && typeof data === 'object');
if (hasReading) {
if (data.reading !== reading) { continue; }
frequency = data.frequency;
}
for (const {frequencies, index: expressionIndex} of targets) {
frequencies.push({index: frequencies.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, hasReading, frequency});
}
}
break;
case 'pitch':
{
const pitchData = await this._getPitchData(expression, reading, dictionary, data);
if (pitchData === null) { continue; }
for (const {pitches} of targets) { pitches.push(pitchData); }
if (data.reading !== reading) { continue; }
const pitches2 = [];
for (let {position, tags} of data.pitches) {
tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : [];
pitches2.push({position, tags});
}
for (const {pitches, index: expressionIndex} of targets) {
pitches.push({index: pitches.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, pitches: pitches2});
}
}
break;
}
}
}
for (const definition of allDefinitions) {
this._sortTermDefinitionMeta(definition);
}
}
async _buildKanjiMeta(definitions, enabledDictionaryMap) {
@ -729,15 +731,20 @@ class Translator {
const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
for (const {character, mode, data, dictionary, index} of metas) {
const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
switch (mode) {
case 'freq':
{
const frequencyData = this._getKanjiFrequencyData(character, dictionary, data);
definitions[index].frequencies.push(frequencyData);
const {frequencies} = definitions[index];
frequencies.push({index: frequencies.length, dictionary, dictionaryPriority, character, frequency: data});
}
break;
}
}
for (const definition of definitions) {
this._sortKanjiDefinitionMeta(definition);
}
}
async _expandTags(names, dictionary) {
@ -806,32 +813,6 @@ class Translator {
return tagMetaList;
}
_getTermFrequencyData(expression, reading, dictionary, data) {
let frequency = data;
const hasReading = (data !== null && typeof data === 'object');
if (hasReading) {
if (data.reading !== reading) { return null; }
frequency = data.frequency;
}
return {dictionary, expression, reading, hasReading, frequency};
}
_getKanjiFrequencyData(character, dictionary, data) {
return {dictionary, character, frequency: data};
}
async _getPitchData(expression, reading, dictionary, data) {
if (data.reading !== reading) { return null; }
const pitches = [];
for (let {position, tags} of data.pitches) {
tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : [];
pitches.push({position, tags});
}
return {expression, reading, dictionary, pitches};
}
// Simple helpers
_scoreToTermFrequency(score) {
@ -1000,6 +981,17 @@ class Translator {
return result;
}
_getAllDefinitions(definitions) {
definitions = [...definitions];
for (let i = 0; i < definitions.length; ++i) {
const childDefinitions = definitions[i].definitions;
if (Array.isArray(childDefinitions)) {
definitions.push(...childDefinitions);
}
}
return definitions;
}
// Reduction functions
_getTermTagsScoreSum(termTags) {
@ -1334,6 +1326,45 @@ class Translator {
});
}
_sortTermDefinitionMeta(definition) {
const compareFunction = (v1, v2) => {
// Sort by dictionary
let i = v2.dictionaryPriority - v1.dictionaryPriority;
if (i !== 0) { return i; }
// Sory by expression order
i = v1.expressionIndex - v2.expressionIndex;
if (i !== 0) { return i; }
// Default order
i = v1.index - v2.index;
return i;
};
const {expressions, frequencies: frequencies1, pitches: pitches1} = definition;
frequencies1.sort(compareFunction);
pitches1.sort(compareFunction);
for (const {frequencies: frequencies2, pitches: pitches2} of expressions) {
frequencies2.sort(compareFunction);
pitches2.sort(compareFunction);
}
}
_sortKanjiDefinitionMeta(definition) {
const compareFunction = (v1, v2) => {
// Sort by dictionary
let i = v2.dictionaryPriority - v1.dictionaryPriority;
if (i !== 0) { return i; }
// Default order
i = v1.index - v2.index;
return i;
};
const {frequencies} = definition;
frequencies.sort(compareFunction);
}
// Regex functions
_applyTextReplacements(text, sourceMap, replacements) {

File diff suppressed because it is too large Load Diff