Improve term meta ordering (#1455)
* Update implementation of _buildTermMeta * Remove old implementation * Expose dictionaryPriority on frequencies and pitch accents * Update how meta data is generated; add index * Update order * Update names * Expose expressionIndex as part of pitch/frequency data * Implement meta sorting * Update test data
This commit is contained in:
parent
7d6915ec3b
commit
e4a4e5f85f
@ -655,70 +655,72 @@ class Translator {
|
|||||||
// Metadata building
|
// Metadata building
|
||||||
|
|
||||||
async _buildTermMeta(definitions, enabledDictionaryMap) {
|
async _buildTermMeta(definitions, enabledDictionaryMap) {
|
||||||
const addMetadataTargetInfo = (targetMap1, target, parents) => {
|
const allDefinitions = this._getAllDefinitions(definitions);
|
||||||
let {expression, reading} = target;
|
const expressionMap = new Map();
|
||||||
if (!reading) { reading = expression; }
|
const expressionValues = [];
|
||||||
|
const expressionKeys = [];
|
||||||
|
|
||||||
let targetMap2 = targetMap1.get(expression);
|
for (const {expressions, frequencies: frequencies1, pitches: pitches1} of allDefinitions) {
|
||||||
if (typeof targetMap2 === 'undefined') {
|
for (let i = 0, ii = expressions.length; i < ii; ++i) {
|
||||||
targetMap2 = new Map();
|
const {expression, reading, frequencies: frequencies2, pitches: pitches2} = expressions[i];
|
||||||
targetMap1.set(expression, targetMap2);
|
let readingMap = expressionMap.get(expression);
|
||||||
|
if (typeof readingMap === 'undefined') {
|
||||||
|
readingMap = new Map();
|
||||||
|
expressionMap.set(expression, readingMap);
|
||||||
|
expressionValues.push(readingMap);
|
||||||
|
expressionKeys.push(expression);
|
||||||
}
|
}
|
||||||
|
let targets = readingMap.get(reading);
|
||||||
let targets = targetMap2.get(reading);
|
|
||||||
if (typeof targets === 'undefined') {
|
if (typeof targets === 'undefined') {
|
||||||
targets = new Set([target, ...parents]);
|
targets = [];
|
||||||
targetMap2.set(reading, targets);
|
readingMap.set(reading, targets);
|
||||||
} else {
|
|
||||||
targets.add(target);
|
|
||||||
for (const parent of parents) {
|
|
||||||
targets.add(parent);
|
|
||||||
}
|
}
|
||||||
|
targets.push(
|
||||||
|
{frequencies: frequencies1, pitches: pitches1, index: i},
|
||||||
|
{frequencies: frequencies2, pitches: pitches2, index: i}
|
||||||
|
);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
const targetMap = new Map();
|
|
||||||
const definitionsQueue = definitions.map((definition) => ({definition, parents: []}));
|
|
||||||
while (definitionsQueue.length > 0) {
|
|
||||||
const {definition, parents} = definitionsQueue.shift();
|
|
||||||
const childDefinitions = definition.definitions;
|
|
||||||
if (Array.isArray(childDefinitions)) {
|
|
||||||
for (const definition2 of childDefinitions) {
|
|
||||||
definitionsQueue.push({definition: definition2, parents: [...parents, definition]});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
addMetadataTargetInfo(targetMap, definition, parents);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const target of definition.expressions) {
|
const metas = await this._database.findTermMetaBulk(expressionKeys, enabledDictionaryMap);
|
||||||
addMetadataTargetInfo(targetMap, target, []);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const targetMapEntries = [...targetMap.entries()];
|
|
||||||
const uniqueExpressions = targetMapEntries.map(([expression]) => expression);
|
|
||||||
|
|
||||||
const metas = await this._database.findTermMetaBulk(uniqueExpressions, enabledDictionaryMap);
|
|
||||||
for (const {expression, mode, data, dictionary, index} of metas) {
|
for (const {expression, mode, data, dictionary, index} of metas) {
|
||||||
const targetMap2 = targetMapEntries[index][1];
|
const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
|
||||||
for (const [reading, targets] of targetMap2) {
|
const map2 = expressionValues[index];
|
||||||
|
for (const [reading, targets] of map2.entries()) {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case 'freq':
|
case 'freq':
|
||||||
{
|
{
|
||||||
const frequencyData = this._getTermFrequencyData(expression, reading, dictionary, data);
|
let frequency = data;
|
||||||
if (frequencyData === null) { continue; }
|
const hasReading = (data !== null && typeof data === 'object');
|
||||||
for (const {frequencies} of targets) { frequencies.push(frequencyData); }
|
if (hasReading) {
|
||||||
|
if (data.reading !== reading) { continue; }
|
||||||
|
frequency = data.frequency;
|
||||||
|
}
|
||||||
|
for (const {frequencies, index: expressionIndex} of targets) {
|
||||||
|
frequencies.push({index: frequencies.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, hasReading, frequency});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 'pitch':
|
case 'pitch':
|
||||||
{
|
{
|
||||||
const pitchData = await this._getPitchData(expression, reading, dictionary, data);
|
if (data.reading !== reading) { continue; }
|
||||||
if (pitchData === null) { continue; }
|
const pitches2 = [];
|
||||||
for (const {pitches} of targets) { pitches.push(pitchData); }
|
for (let {position, tags} of data.pitches) {
|
||||||
|
tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : [];
|
||||||
|
pitches2.push({position, tags});
|
||||||
|
}
|
||||||
|
for (const {pitches, index: expressionIndex} of targets) {
|
||||||
|
pitches.push({index: pitches.length, expressionIndex, dictionary, dictionaryPriority, expression, reading, pitches: pitches2});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (const definition of allDefinitions) {
|
||||||
|
this._sortTermDefinitionMeta(definition);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async _buildKanjiMeta(definitions, enabledDictionaryMap) {
|
async _buildKanjiMeta(definitions, enabledDictionaryMap) {
|
||||||
@ -729,15 +731,20 @@ class Translator {
|
|||||||
|
|
||||||
const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
|
const metas = await this._database.findKanjiMetaBulk(kanjiList, enabledDictionaryMap);
|
||||||
for (const {character, mode, data, dictionary, index} of metas) {
|
for (const {character, mode, data, dictionary, index} of metas) {
|
||||||
|
const dictionaryPriority = this._getDictionaryPriority(dictionary, enabledDictionaryMap);
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case 'freq':
|
case 'freq':
|
||||||
{
|
{
|
||||||
const frequencyData = this._getKanjiFrequencyData(character, dictionary, data);
|
const {frequencies} = definitions[index];
|
||||||
definitions[index].frequencies.push(frequencyData);
|
frequencies.push({index: frequencies.length, dictionary, dictionaryPriority, character, frequency: data});
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (const definition of definitions) {
|
||||||
|
this._sortKanjiDefinitionMeta(definition);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async _expandTags(names, dictionary) {
|
async _expandTags(names, dictionary) {
|
||||||
@ -806,32 +813,6 @@ class Translator {
|
|||||||
return tagMetaList;
|
return tagMetaList;
|
||||||
}
|
}
|
||||||
|
|
||||||
_getTermFrequencyData(expression, reading, dictionary, data) {
|
|
||||||
let frequency = data;
|
|
||||||
const hasReading = (data !== null && typeof data === 'object');
|
|
||||||
if (hasReading) {
|
|
||||||
if (data.reading !== reading) { return null; }
|
|
||||||
frequency = data.frequency;
|
|
||||||
}
|
|
||||||
return {dictionary, expression, reading, hasReading, frequency};
|
|
||||||
}
|
|
||||||
|
|
||||||
_getKanjiFrequencyData(character, dictionary, data) {
|
|
||||||
return {dictionary, character, frequency: data};
|
|
||||||
}
|
|
||||||
|
|
||||||
async _getPitchData(expression, reading, dictionary, data) {
|
|
||||||
if (data.reading !== reading) { return null; }
|
|
||||||
|
|
||||||
const pitches = [];
|
|
||||||
for (let {position, tags} of data.pitches) {
|
|
||||||
tags = Array.isArray(tags) ? await this._expandTags(tags, dictionary) : [];
|
|
||||||
pitches.push({position, tags});
|
|
||||||
}
|
|
||||||
|
|
||||||
return {expression, reading, dictionary, pitches};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Simple helpers
|
// Simple helpers
|
||||||
|
|
||||||
_scoreToTermFrequency(score) {
|
_scoreToTermFrequency(score) {
|
||||||
@ -1000,6 +981,17 @@ class Translator {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_getAllDefinitions(definitions) {
|
||||||
|
definitions = [...definitions];
|
||||||
|
for (let i = 0; i < definitions.length; ++i) {
|
||||||
|
const childDefinitions = definitions[i].definitions;
|
||||||
|
if (Array.isArray(childDefinitions)) {
|
||||||
|
definitions.push(...childDefinitions);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return definitions;
|
||||||
|
}
|
||||||
|
|
||||||
// Reduction functions
|
// Reduction functions
|
||||||
|
|
||||||
_getTermTagsScoreSum(termTags) {
|
_getTermTagsScoreSum(termTags) {
|
||||||
@ -1334,6 +1326,45 @@ class Translator {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_sortTermDefinitionMeta(definition) {
|
||||||
|
const compareFunction = (v1, v2) => {
|
||||||
|
// Sort by dictionary
|
||||||
|
let i = v2.dictionaryPriority - v1.dictionaryPriority;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
|
// Sory by expression order
|
||||||
|
i = v1.expressionIndex - v2.expressionIndex;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
|
// Default order
|
||||||
|
i = v1.index - v2.index;
|
||||||
|
return i;
|
||||||
|
};
|
||||||
|
|
||||||
|
const {expressions, frequencies: frequencies1, pitches: pitches1} = definition;
|
||||||
|
frequencies1.sort(compareFunction);
|
||||||
|
pitches1.sort(compareFunction);
|
||||||
|
for (const {frequencies: frequencies2, pitches: pitches2} of expressions) {
|
||||||
|
frequencies2.sort(compareFunction);
|
||||||
|
pitches2.sort(compareFunction);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_sortKanjiDefinitionMeta(definition) {
|
||||||
|
const compareFunction = (v1, v2) => {
|
||||||
|
// Sort by dictionary
|
||||||
|
let i = v2.dictionaryPriority - v1.dictionaryPriority;
|
||||||
|
if (i !== 0) { return i; }
|
||||||
|
|
||||||
|
// Default order
|
||||||
|
i = v1.index - v2.index;
|
||||||
|
return i;
|
||||||
|
};
|
||||||
|
|
||||||
|
const {frequencies} = definition;
|
||||||
|
frequencies.sort(compareFunction);
|
||||||
|
}
|
||||||
|
|
||||||
// Regex functions
|
// Regex functions
|
||||||
|
|
||||||
_applyTextReplacements(text, sourceMap, replacements) {
|
_applyTextReplacements(text, sourceMap, replacements) {
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user