fixing various deinflection issues
This commit is contained in:
parent
39fa11f72b
commit
3047f8c4fc
@ -43,34 +43,40 @@ class Translator {
|
|||||||
|
|
||||||
findTerm(text, dictionaries, enableSoftKatakanaSearch) {
|
findTerm(text, dictionaries, enableSoftKatakanaSearch) {
|
||||||
const cache = {};
|
const cache = {};
|
||||||
return this.findDeinflectionGroups(text, dictionaries, cache).then(groups => {
|
return this.findTermDeinflections(text, dictionaries, cache).then(deinfHiragana => {
|
||||||
const textHiragana = wanakana._katakanaToHiragana(text);
|
const textHiragana = wanakana._katakanaToHiragana(text);
|
||||||
if (text !== textHiragana && enableSoftKatakanaSearch) {
|
if (text !== textHiragana && enableSoftKatakanaSearch) {
|
||||||
return this.findDeinflectionGroups(textHiragana, dictionaries, cache).then(groupsHiragana => {
|
return this.findTermDeinflections(textHiragana, dictionaries, cache).then(deinfHiragana => deinfHiragana.concat(deinfHiragana));
|
||||||
for (const key in groupsHiragana) {
|
|
||||||
groups[key] = groups[key] || groupsHiragana[key];
|
|
||||||
}
|
|
||||||
|
|
||||||
return groups;
|
|
||||||
});
|
|
||||||
} else {
|
} else {
|
||||||
return groups;
|
return deinfHiragana;
|
||||||
}
|
}
|
||||||
}).then(groups => {
|
}).then(deinflections => {
|
||||||
const definitions = [];
|
let definitions = [];
|
||||||
for (const key in groups) {
|
for (const deinflection of deinflections) {
|
||||||
definitions.push(groups[key]);
|
for (const definition of deinflection.definitions) {
|
||||||
|
definitions.push({
|
||||||
|
source: deinflection.source,
|
||||||
|
reasons: deinflection.reasons,
|
||||||
|
score: definition.score,
|
||||||
|
id: definition.id,
|
||||||
|
dictionary: definition.dictionary,
|
||||||
|
expression: definition.expression,
|
||||||
|
reading: definition.reading,
|
||||||
|
glossary: definition.glossary,
|
||||||
|
tags: sortTags(definition.tags.map(tag => buildTag(tag, definition.tagMeta)))
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
definitions = undupeTermDefs(definitions);
|
||||||
|
definitions = sortTermDefs(definitions);
|
||||||
|
|
||||||
let length = 0;
|
let length = 0;
|
||||||
for (const result of definitions) {
|
for (const definition of definitions) {
|
||||||
length = Math.max(length, result.source.length);
|
length = Math.max(length, definition.source.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {length, definitions};
|
||||||
length,
|
|
||||||
definitions: sortTermDefs(definitions)
|
|
||||||
};
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,7 +92,7 @@ class Translator {
|
|||||||
return Promise.all(promises).then(sets => this.processKanji(sets.reduce((a, b) => a.concat(b), [])));
|
return Promise.all(promises).then(sets => this.processKanji(sets.reduce((a, b) => a.concat(b), [])));
|
||||||
}
|
}
|
||||||
|
|
||||||
findDeinflectionGroups(text, dictionaries, cache) {
|
findTermDeinflections(text, dictionaries, cache) {
|
||||||
const definer = term => {
|
const definer = term => {
|
||||||
if (cache.hasOwnProperty(term)) {
|
if (cache.hasOwnProperty(term)) {
|
||||||
return Promise.resolve(cache[term]);
|
return Promise.resolve(cache[term]);
|
||||||
@ -95,38 +101,19 @@ class Translator {
|
|||||||
return this.database.findTerm(term, dictionaries).then(definitions => cache[term] = definitions);
|
return this.database.findTerm(term, dictionaries).then(definitions => cache[term] = definitions);
|
||||||
};
|
};
|
||||||
|
|
||||||
const groups = {}, promises = [];
|
const promises = [];
|
||||||
for (let i = text.length; i > 0; --i) {
|
for (let i = text.length; i > 0; --i) {
|
||||||
promises.push(
|
promises.push(this.deinflector.deinflect(text.slice(0, i), definer));
|
||||||
this.deinflector.deinflect(text.slice(0, i), definer).then(deinflections => {
|
|
||||||
for (const deinflection of deinflections) {
|
|
||||||
this.processDeinflection(groups, deinflection);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return Promise.all(promises).then(() => groups);
|
return Promise.all(promises).then(results => {
|
||||||
}
|
let deinflections = [];
|
||||||
|
for (const result of results) {
|
||||||
processDeinflection(groups, {source, rules, reasons, definitions}, dictionaries) {
|
deinflections = deinflections.concat(result);
|
||||||
for (const definition of definitions) {
|
|
||||||
if (definition.id in groups) {
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const tags = definition.tags.map(tag => buildTag(tag, definition.tagMeta));
|
return deinflections;
|
||||||
groups[definition.id] = {
|
});
|
||||||
source,
|
|
||||||
reasons,
|
|
||||||
score: definition.score,
|
|
||||||
dictionary: definition.dictionary,
|
|
||||||
expression: definition.expression,
|
|
||||||
reading: definition.reading,
|
|
||||||
glossary: definition.glossary,
|
|
||||||
tags: sortTags(tags)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
processKanji(definitions) {
|
processKanji(definitions) {
|
||||||
|
@ -96,6 +96,23 @@ function sortTermDefs(definitions) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function undupeTermDefs(definitions) {
|
||||||
|
const definitionGroups = {};
|
||||||
|
for (const definition of definitions) {
|
||||||
|
const definitionExisting = definitionGroups[definition.id];
|
||||||
|
if (!definitionGroups.hasOwnProperty(definition.id) || definition.expression.length > definitionExisting.expression.length) {
|
||||||
|
definitionGroups[definition.id] = definition;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const definitionsUnique = [];
|
||||||
|
for (const key in definitionGroups) {
|
||||||
|
definitionsUnique.push(definitionGroups[key]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return definitionsUnique;
|
||||||
|
}
|
||||||
|
|
||||||
function buildTag(name, meta) {
|
function buildTag(name, meta) {
|
||||||
const tag = {name};
|
const tag = {name};
|
||||||
const symbol = name.split(':')[0];
|
const symbol = name.split(':')[0];
|
||||||
|
Loading…
Reference in New Issue
Block a user