fixing various deinflection issues

This commit is contained in:
Alex Yatskov 2016-12-23 15:44:16 -08:00
parent 39fa11f72b
commit 3047f8c4fc
2 changed files with 51 additions and 47 deletions

View File

@ -43,34 +43,40 @@ class Translator {
findTerm(text, dictionaries, enableSoftKatakanaSearch) { findTerm(text, dictionaries, enableSoftKatakanaSearch) {
const cache = {}; const cache = {};
return this.findDeinflectionGroups(text, dictionaries, cache).then(groups => { return this.findTermDeinflections(text, dictionaries, cache).then(deinfHiragana => {
const textHiragana = wanakana._katakanaToHiragana(text); const textHiragana = wanakana._katakanaToHiragana(text);
if (text !== textHiragana && enableSoftKatakanaSearch) { if (text !== textHiragana && enableSoftKatakanaSearch) {
return this.findDeinflectionGroups(textHiragana, dictionaries, cache).then(groupsHiragana => { return this.findTermDeinflections(textHiragana, dictionaries, cache).then(deinfHiragana => deinfHiragana.concat(deinfHiragana));
for (const key in groupsHiragana) { } else {
groups[key] = groups[key] || groupsHiragana[key]; return deinfHiragana;
}
}).then(deinflections => {
let definitions = [];
for (const deinflection of deinflections) {
for (const definition of deinflection.definitions) {
definitions.push({
source: deinflection.source,
reasons: deinflection.reasons,
score: definition.score,
id: definition.id,
dictionary: definition.dictionary,
expression: definition.expression,
reading: definition.reading,
glossary: definition.glossary,
tags: sortTags(definition.tags.map(tag => buildTag(tag, definition.tagMeta)))
});
}
} }
return groups; definitions = undupeTermDefs(definitions);
}); definitions = sortTermDefs(definitions);
} else {
return groups;
}
}).then(groups => {
const definitions = [];
for (const key in groups) {
definitions.push(groups[key]);
}
let length = 0; let length = 0;
for (const result of definitions) { for (const definition of definitions) {
length = Math.max(length, result.source.length); length = Math.max(length, definition.source.length);
} }
return { return {length, definitions};
length,
definitions: sortTermDefs(definitions)
};
}); });
} }
@ -86,7 +92,7 @@ class Translator {
return Promise.all(promises).then(sets => this.processKanji(sets.reduce((a, b) => a.concat(b), []))); return Promise.all(promises).then(sets => this.processKanji(sets.reduce((a, b) => a.concat(b), [])));
} }
findDeinflectionGroups(text, dictionaries, cache) { findTermDeinflections(text, dictionaries, cache) {
const definer = term => { const definer = term => {
if (cache.hasOwnProperty(term)) { if (cache.hasOwnProperty(term)) {
return Promise.resolve(cache[term]); return Promise.resolve(cache[term]);
@ -95,38 +101,19 @@ class Translator {
return this.database.findTerm(term, dictionaries).then(definitions => cache[term] = definitions); return this.database.findTerm(term, dictionaries).then(definitions => cache[term] = definitions);
}; };
const groups = {}, promises = []; const promises = [];
for (let i = text.length; i > 0; --i) { for (let i = text.length; i > 0; --i) {
promises.push( promises.push(this.deinflector.deinflect(text.slice(0, i), definer));
this.deinflector.deinflect(text.slice(0, i), definer).then(deinflections => {
for (const deinflection of deinflections) {
this.processDeinflection(groups, deinflection);
}
})
);
} }
return Promise.all(promises).then(() => groups); return Promise.all(promises).then(results => {
let deinflections = [];
for (const result of results) {
deinflections = deinflections.concat(result);
} }
processDeinflection(groups, {source, rules, reasons, definitions}, dictionaries) { return deinflections;
for (const definition of definitions) { });
if (definition.id in groups) {
continue;
}
const tags = definition.tags.map(tag => buildTag(tag, definition.tagMeta));
groups[definition.id] = {
source,
reasons,
score: definition.score,
dictionary: definition.dictionary,
expression: definition.expression,
reading: definition.reading,
glossary: definition.glossary,
tags: sortTags(tags)
};
}
} }
processKanji(definitions) { processKanji(definitions) {

View File

@ -96,6 +96,23 @@ function sortTermDefs(definitions) {
}); });
} }
function undupeTermDefs(definitions) {
const definitionGroups = {};
for (const definition of definitions) {
const definitionExisting = definitionGroups[definition.id];
if (!definitionGroups.hasOwnProperty(definition.id) || definition.expression.length > definitionExisting.expression.length) {
definitionGroups[definition.id] = definition;
}
}
const definitionsUnique = [];
for (const key in definitionGroups) {
definitionsUnique.push(definitionGroups[key]);
}
return definitionsUnique;
}
function buildTag(name, meta) { function buildTag(name, meta) {
const tag = {name}; const tag = {name};
const symbol = name.split(':')[0]; const symbol = name.split(':')[0];