Fixes to deinflection logic

This commit is contained in:
Alex Yatskov 2016-04-16 20:11:27 -07:00
parent ccf5459190
commit 9ceef4649f
3 changed files with 52 additions and 44 deletions

View File

@ -32,7 +32,7 @@ class Deinflection {
} }
for (const tag of this.tags) { for (const tag of this.tags) {
if (this.searchTags(tag, tags)) { if (tags.indexOf(tag) !== -1) {
return true; return true;
} }
} }
@ -43,7 +43,7 @@ class Deinflection {
deinflect(validator, rules) { deinflect(validator, rules) {
if (this.validate(validator)) { if (this.validate(validator)) {
const child = new Deinflection(this.term); const child = new Deinflection(this.term, this.tags);
this.children.push(child); this.children.push(child);
} }
@ -52,7 +52,7 @@ class Deinflection {
for (const v of variants) { for (const v of variants) {
let allowed = this.tags.length === 0; let allowed = this.tags.length === 0;
for (const tag of this.tags) { for (const tag of this.tags) {
if (this.searchTags(tag, v.tagsIn)) { if (v.tagsIn.indexOf(tag) !== -1) {
allowed = true; allowed = true;
break; break;
} }
@ -73,20 +73,9 @@ class Deinflection {
return this.children.length > 0; return this.children.length > 0;
} }
searchTags(tag, tags) {
for (const t of tags) {
const re = new RegExp(tag);
if (re.test(t)) {
return true;
}
}
return false;
}
gather() { gather() {
if (this.children.length === 0) { if (this.children.length === 0) {
return [{root: this.term, rules: []}]; return [{root: this.term, tags: this.tags, rules: []}];
} }
const paths = []; const paths = [];

View File

@ -77,11 +77,9 @@ class Translator {
return tags; return tags;
}); });
if (dfs === null) { if (dfs !== null) {
this.processTerm(groups, term);
} else {
for (const df of dfs) { for (const df of dfs) {
this.processTerm(groups, df.source, df.rules, df.root); this.processTerm(groups, df.source, df.tags, df.rules, df.root);
} }
} }
} }
@ -141,20 +139,30 @@ class Translator {
return results; return results;
} }
processTerm(groups, source, rules=[], root='') { processTerm(groups, source, tags, rules=[], root='') {
for (const entry of this.dictionary.findTerm(root || source)) { for (const entry of this.dictionary.findTerm(root)) {
if (entry.id in groups) { if (entry.id in groups) {
continue; continue;
} }
groups[entry.id] = { let matched = tags.length == 0;
expression: entry.expression, for (const tag of tags) {
reading: entry.reading, if (entry.tags.indexOf(tag) !== -1) {
glossary: entry.glossary, matched = true;
tags: entry.tags, break;
source: source, }
rules: rules }
};
if (matched) {
groups[entry.id] = {
expression: entry.expression,
reading: entry.reading,
glossary: entry.glossary,
tags: entry.tags,
source: source,
rules: rules
};
}
} }
} }

View File

@ -112,21 +112,21 @@ PARSED_TAGS = {
'v4h': 'Yodan verb with "hu/fu" ending (archaic)', 'v4h': 'Yodan verb with "hu/fu" ending (archaic)',
'v4r': 'Yodan verb with "ru" ending (archaic)', 'v4r': 'Yodan verb with "ru" ending (archaic)',
'v5': 'Godan verb (not completely classified)', 'v5': 'Godan verb (not completely classified)',
'v5aru': 'Godan verb - -aru special class', # 'v5aru': 'Godan verb - -aru special class',
'v5b': 'Godan verb with "bu" ending', # 'v5b': 'Godan verb with "bu" ending',
'v5g': 'Godan verb with "gu" ending', # 'v5g': 'Godan verb with "gu" ending',
'v5k': 'Godan verb with "ku" ending', # 'v5k': 'Godan verb with "ku" ending',
'v5k-s': 'Godan verb - iku/yuku special class', # 'v5k-s': 'Godan verb - iku/yuku special class',
'v5m': 'Godan verb with "mu" ending', # 'v5m': 'Godan verb with "mu" ending',
'v5n': 'Godan verb with "nu" ending', # 'v5n': 'Godan verb with "nu" ending',
'v5r': 'Godan verb with "ru" ending', # 'v5r': 'Godan verb with "ru" ending',
'v5r-i': 'Godan verb with "ru" ending (irregular verb)', # 'v5r-i': 'Godan verb with "ru" ending (irregular verb)',
'v5s': 'Godan verb with "su" ending', # 'v5s': 'Godan verb with "su" ending',
'v5t': 'Godan verb with "tsu" ending', # 'v5t': 'Godan verb with "tsu" ending',
'v5u': 'Godan verb with "u" ending', # 'v5u': 'Godan verb with "u" ending',
'v5u-s': 'Godan verb with "u" ending (special class)', # 'v5u-s': 'Godan verb with "u" ending (special class)',
'v5uru': 'Godan verb - uru old class verb (old form of Eru)', # 'v5uru': 'Godan verb - uru old class verb (old form of Eru)',
'v5z': 'Godan verb with "zu" ending', # 'v5z': 'Godan verb with "zu" ending',
'vi': 'intransitive verb', 'vi': 'intransitive verb',
'vk': 'kuru verb - special class', 'vk': 'kuru verb - special class',
'vn': 'irregular nu verb', 'vn': 'irregular nu verb',
@ -167,6 +167,16 @@ def parse_kanji_dic(path):
return results return results
def fixup_godan_verbs(tags):
results = []
for tag in tags:
if tag.startswith('v5'):
tag = 'v5'
results.append(tag)
return set(results)
def parse_edict(path): def parse_edict(path):
results = [] results = []
for line in load_definitions(path): for line in load_definitions(path):
@ -188,6 +198,7 @@ def parse_edict(path):
continue continue
tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1)))) tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
tags_raw = fixup_godan_verbs(tags_raw)
tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys())) tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys()))
tags = tags.union(tags_raw) tags = tags.union(tags_raw)