From 9ceef4649f07211079017d2cfcbc3756dd9a509b Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Sat, 16 Apr 2016 20:11:27 -0700 Subject: [PATCH] Fixes to deinflection logic --- ext/bg/js/deinflector.js | 19 ++++--------------- ext/bg/js/translator.js | 36 +++++++++++++++++++++-------------- util/compile.py | 41 +++++++++++++++++++++++++--------------- 3 files changed, 52 insertions(+), 44 deletions(-) diff --git a/ext/bg/js/deinflector.js b/ext/bg/js/deinflector.js index 03f9d40a..3c24ae3a 100644 --- a/ext/bg/js/deinflector.js +++ b/ext/bg/js/deinflector.js @@ -32,7 +32,7 @@ class Deinflection { } for (const tag of this.tags) { - if (this.searchTags(tag, tags)) { + if (tags.indexOf(tag) !== -1) { return true; } } @@ -43,7 +43,7 @@ class Deinflection { deinflect(validator, rules) { if (this.validate(validator)) { - const child = new Deinflection(this.term); + const child = new Deinflection(this.term, this.tags); this.children.push(child); } @@ -52,7 +52,7 @@ class Deinflection { for (const v of variants) { let allowed = this.tags.length === 0; for (const tag of this.tags) { - if (this.searchTags(tag, v.tagsIn)) { + if (v.tagsIn.indexOf(tag) !== -1) { allowed = true; break; } @@ -73,20 +73,9 @@ class Deinflection { return this.children.length > 0; } - searchTags(tag, tags) { - for (const t of tags) { - const re = new RegExp(tag); - if (re.test(t)) { - return true; - } - } - - return false; - } - gather() { if (this.children.length === 0) { - return [{root: this.term, rules: []}]; + return [{root: this.term, tags: this.tags, rules: []}]; } const paths = []; diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index 101083e8..46d63ed0 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -77,11 +77,9 @@ class Translator { return tags; }); - if (dfs === null) { - this.processTerm(groups, term); - } else { + if (dfs !== null) { for (const df of dfs) { - this.processTerm(groups, df.source, df.rules, df.root); + this.processTerm(groups, df.source, df.tags, df.rules, df.root); } } } @@ -141,20 +139,30 @@ class Translator { return results; } - processTerm(groups, source, rules=[], root='') { - for (const entry of this.dictionary.findTerm(root || source)) { + processTerm(groups, source, tags, rules=[], root='') { + for (const entry of this.dictionary.findTerm(root)) { if (entry.id in groups) { continue; } - groups[entry.id] = { - expression: entry.expression, - reading: entry.reading, - glossary: entry.glossary, - tags: entry.tags, - source: source, - rules: rules - }; + let matched = tags.length == 0; + for (const tag of tags) { + if (entry.tags.indexOf(tag) !== -1) { + matched = true; + break; + } + } + + if (matched) { + groups[entry.id] = { + expression: entry.expression, + reading: entry.reading, + glossary: entry.glossary, + tags: entry.tags, + source: source, + rules: rules + }; + } } } diff --git a/util/compile.py b/util/compile.py index 1ed299f7..9c5a6bdd 100755 --- a/util/compile.py +++ b/util/compile.py @@ -112,21 +112,21 @@ PARSED_TAGS = { 'v4h': 'Yodan verb with "hu/fu" ending (archaic)', 'v4r': 'Yodan verb with "ru" ending (archaic)', 'v5': 'Godan verb (not completely classified)', - 'v5aru': 'Godan verb - -aru special class', - 'v5b': 'Godan verb with "bu" ending', - 'v5g': 'Godan verb with "gu" ending', - 'v5k': 'Godan verb with "ku" ending', - 'v5k-s': 'Godan verb - iku/yuku special class', - 'v5m': 'Godan verb with "mu" ending', - 'v5n': 'Godan verb with "nu" ending', - 'v5r': 'Godan verb with "ru" ending', - 'v5r-i': 'Godan verb with "ru" ending (irregular verb)', - 'v5s': 'Godan verb with "su" ending', - 'v5t': 'Godan verb with "tsu" ending', - 'v5u': 'Godan verb with "u" ending', - 'v5u-s': 'Godan verb with "u" ending (special class)', - 'v5uru': 'Godan verb - uru old class verb (old form of Eru)', - 'v5z': 'Godan verb with "zu" ending', + # 'v5aru': 'Godan verb - -aru special class', + # 'v5b': 'Godan verb with "bu" ending', + # 'v5g': 'Godan verb with "gu" ending', + # 'v5k': 'Godan verb with "ku" ending', + # 'v5k-s': 'Godan verb - iku/yuku special class', + # 'v5m': 'Godan verb with "mu" ending', + # 'v5n': 'Godan verb with "nu" ending', + # 'v5r': 'Godan verb with "ru" ending', + # 'v5r-i': 'Godan verb with "ru" ending (irregular verb)', + # 'v5s': 'Godan verb with "su" ending', + # 'v5t': 'Godan verb with "tsu" ending', + # 'v5u': 'Godan verb with "u" ending', + # 'v5u-s': 'Godan verb with "u" ending (special class)', + # 'v5uru': 'Godan verb - uru old class verb (old form of Eru)', + # 'v5z': 'Godan verb with "zu" ending', 'vi': 'intransitive verb', 'vk': 'kuru verb - special class', 'vn': 'irregular nu verb', @@ -167,6 +167,16 @@ def parse_kanji_dic(path): return results +def fixup_godan_verbs(tags): + results = [] + for tag in tags: + if tag.startswith('v5'): + tag = 'v5' + results.append(tag) + + return set(results) + + def parse_edict(path): results = [] for line in load_definitions(path): @@ -188,6 +198,7 @@ def parse_edict(path): continue tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1)))) + tags_raw = fixup_godan_verbs(tags_raw) tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys())) tags = tags.union(tags_raw)