Optimize internal data structure used by the Deinflector class

This commit is contained in:
toasted-nutbread 2019-10-05 16:24:42 -04:00
parent 30263c3db8
commit 50a47348a7
2 changed files with 51 additions and 39 deletions

View File

@ -19,51 +19,74 @@
class Deinflector { class Deinflector {
constructor(reasons) { constructor(reasons) {
this.reasons = reasons; this.reasons = Deinflector.normalizeReasons(reasons);
} }
deinflect(source) { deinflect(source) {
const results = [{ const results = [{
source, source,
term: source, term: source,
rules: [], rules: 0,
definitions: [], definitions: [],
reasons: [] reasons: []
}]; }];
for (let i = 0; i < results.length; ++i) { for (let i = 0; i < results.length; ++i) {
const entry = results[i]; const {rules, term, reasons} = results[i];
for (const [reason, variants] of this.reasons) {
for (const reason in this.reasons) { for (const [kanaIn, kanaOut, rulesIn, rulesOut] of variants) {
for (const variant of this.reasons[reason]) { if (
let accept = entry.rules.length === 0; (rules !== 0 && (rules & rulesIn) === 0) ||
if (!accept) { !term.endsWith(kanaIn) ||
for (const rule of entry.rules) { (term.length - kanaIn.length + kanaOut.length) <= 0
if (variant.rulesIn.includes(rule)) { ) {
accept = true;
break;
}
}
}
if (!accept || !entry.term.endsWith(variant.kanaIn)) {
continue;
}
const term = entry.term.slice(0, -variant.kanaIn.length) + variant.kanaOut;
if (term.length === 0) {
continue; continue;
} }
results.push({ results.push({
source, source,
term, term: term.slice(0, -kanaIn.length) + kanaOut,
rules: variant.rulesOut, rules: rulesOut,
definitions: [], definitions: [],
reasons: [reason, ...entry.reasons] reasons: [reason, ...reasons]
}); });
} }
} }
} }
return results; return results;
} }
static normalizeReasons(reasons) {
const normalizedReasons = [];
for (const reason in reasons) {
const variants = [];
for (const {kanaIn, kanaOut, rulesIn, rulesOut} of reasons[reason]) {
variants.push([
kanaIn,
kanaOut,
Deinflector.rulesToRuleFlags(rulesIn),
Deinflector.rulesToRuleFlags(rulesOut)
]);
}
normalizedReasons.push([reason, variants]);
}
return normalizedReasons;
}
static rulesToRuleFlags(rules) {
const ruleTypes = Deinflector.ruleTypes;
let value = 0;
for (const rule of rules) {
value |= ruleTypes[rule];
}
return value;
}
} }
Deinflector.ruleTypes = {
'v1': 0b0000001, // Verb ichidan
'v5': 0b0000010, // Verb godan
'vs': 0b0000100, // Verb suru
'vk': 0b0001000, // Verb kuru
'adj-i': 0b0010000, // Adjective i
'iru': 0b0100000, // Intermediate -iru endings for progressive or perfect tense
};

View File

@ -238,8 +238,10 @@ class Translator {
const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, titles); const definitions = await this.database.findTermsBulk(uniqueDeinflectionTerms, titles);
for (const definition of definitions) { for (const definition of definitions) {
const definitionRules = Deinflector.rulesToRuleFlags(definition.rules);
for (const deinflection of uniqueDeinflectionArrays[definition.index]) { for (const deinflection of uniqueDeinflectionArrays[definition.index]) {
if (Translator.definitionContainsAnyRule(definition, deinflection.rules)) { const deinflectionRules = deinflection.rules;
if (deinflectionRules === 0 || (definitionRules & deinflectionRules) !== 0) {
deinflection.definitions.push(definition); deinflection.definitions.push(definition);
} }
} }
@ -248,19 +250,6 @@ class Translator {
return deinflections.filter(e => e.definitions.length > 0); return deinflections.filter(e => e.definitions.length > 0);
} }
static definitionContainsAnyRule(definition, rules) {
if (rules.length === 0) {
return true;
}
const definitionRules = definition.rules;
for (const rule of rules) {
if (definitionRules.includes(rule)) {
return true;
}
}
return false;
}
getDeinflections(text) { getDeinflections(text) {
const deinflections = []; const deinflections = [];