Fixes to deinflection logic
This commit is contained in:
parent
ccf5459190
commit
9ceef4649f
@ -32,7 +32,7 @@ class Deinflection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (const tag of this.tags) {
|
for (const tag of this.tags) {
|
||||||
if (this.searchTags(tag, tags)) {
|
if (tags.indexOf(tag) !== -1) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -43,7 +43,7 @@ class Deinflection {
|
|||||||
|
|
||||||
deinflect(validator, rules) {
|
deinflect(validator, rules) {
|
||||||
if (this.validate(validator)) {
|
if (this.validate(validator)) {
|
||||||
const child = new Deinflection(this.term);
|
const child = new Deinflection(this.term, this.tags);
|
||||||
this.children.push(child);
|
this.children.push(child);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,7 +52,7 @@ class Deinflection {
|
|||||||
for (const v of variants) {
|
for (const v of variants) {
|
||||||
let allowed = this.tags.length === 0;
|
let allowed = this.tags.length === 0;
|
||||||
for (const tag of this.tags) {
|
for (const tag of this.tags) {
|
||||||
if (this.searchTags(tag, v.tagsIn)) {
|
if (v.tagsIn.indexOf(tag) !== -1) {
|
||||||
allowed = true;
|
allowed = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -73,20 +73,9 @@ class Deinflection {
|
|||||||
return this.children.length > 0;
|
return this.children.length > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
searchTags(tag, tags) {
|
|
||||||
for (const t of tags) {
|
|
||||||
const re = new RegExp(tag);
|
|
||||||
if (re.test(t)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
gather() {
|
gather() {
|
||||||
if (this.children.length === 0) {
|
if (this.children.length === 0) {
|
||||||
return [{root: this.term, rules: []}];
|
return [{root: this.term, tags: this.tags, rules: []}];
|
||||||
}
|
}
|
||||||
|
|
||||||
const paths = [];
|
const paths = [];
|
||||||
|
@ -77,11 +77,9 @@ class Translator {
|
|||||||
return tags;
|
return tags;
|
||||||
});
|
});
|
||||||
|
|
||||||
if (dfs === null) {
|
if (dfs !== null) {
|
||||||
this.processTerm(groups, term);
|
|
||||||
} else {
|
|
||||||
for (const df of dfs) {
|
for (const df of dfs) {
|
||||||
this.processTerm(groups, df.source, df.rules, df.root);
|
this.processTerm(groups, df.source, df.tags, df.rules, df.root);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -141,20 +139,30 @@ class Translator {
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
processTerm(groups, source, rules=[], root='') {
|
processTerm(groups, source, tags, rules=[], root='') {
|
||||||
for (const entry of this.dictionary.findTerm(root || source)) {
|
for (const entry of this.dictionary.findTerm(root)) {
|
||||||
if (entry.id in groups) {
|
if (entry.id in groups) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
groups[entry.id] = {
|
let matched = tags.length == 0;
|
||||||
expression: entry.expression,
|
for (const tag of tags) {
|
||||||
reading: entry.reading,
|
if (entry.tags.indexOf(tag) !== -1) {
|
||||||
glossary: entry.glossary,
|
matched = true;
|
||||||
tags: entry.tags,
|
break;
|
||||||
source: source,
|
}
|
||||||
rules: rules
|
}
|
||||||
};
|
|
||||||
|
if (matched) {
|
||||||
|
groups[entry.id] = {
|
||||||
|
expression: entry.expression,
|
||||||
|
reading: entry.reading,
|
||||||
|
glossary: entry.glossary,
|
||||||
|
tags: entry.tags,
|
||||||
|
source: source,
|
||||||
|
rules: rules
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,21 +112,21 @@ PARSED_TAGS = {
|
|||||||
'v4h': 'Yodan verb with "hu/fu" ending (archaic)',
|
'v4h': 'Yodan verb with "hu/fu" ending (archaic)',
|
||||||
'v4r': 'Yodan verb with "ru" ending (archaic)',
|
'v4r': 'Yodan verb with "ru" ending (archaic)',
|
||||||
'v5': 'Godan verb (not completely classified)',
|
'v5': 'Godan verb (not completely classified)',
|
||||||
'v5aru': 'Godan verb - -aru special class',
|
# 'v5aru': 'Godan verb - -aru special class',
|
||||||
'v5b': 'Godan verb with "bu" ending',
|
# 'v5b': 'Godan verb with "bu" ending',
|
||||||
'v5g': 'Godan verb with "gu" ending',
|
# 'v5g': 'Godan verb with "gu" ending',
|
||||||
'v5k': 'Godan verb with "ku" ending',
|
# 'v5k': 'Godan verb with "ku" ending',
|
||||||
'v5k-s': 'Godan verb - iku/yuku special class',
|
# 'v5k-s': 'Godan verb - iku/yuku special class',
|
||||||
'v5m': 'Godan verb with "mu" ending',
|
# 'v5m': 'Godan verb with "mu" ending',
|
||||||
'v5n': 'Godan verb with "nu" ending',
|
# 'v5n': 'Godan verb with "nu" ending',
|
||||||
'v5r': 'Godan verb with "ru" ending',
|
# 'v5r': 'Godan verb with "ru" ending',
|
||||||
'v5r-i': 'Godan verb with "ru" ending (irregular verb)',
|
# 'v5r-i': 'Godan verb with "ru" ending (irregular verb)',
|
||||||
'v5s': 'Godan verb with "su" ending',
|
# 'v5s': 'Godan verb with "su" ending',
|
||||||
'v5t': 'Godan verb with "tsu" ending',
|
# 'v5t': 'Godan verb with "tsu" ending',
|
||||||
'v5u': 'Godan verb with "u" ending',
|
# 'v5u': 'Godan verb with "u" ending',
|
||||||
'v5u-s': 'Godan verb with "u" ending (special class)',
|
# 'v5u-s': 'Godan verb with "u" ending (special class)',
|
||||||
'v5uru': 'Godan verb - uru old class verb (old form of Eru)',
|
# 'v5uru': 'Godan verb - uru old class verb (old form of Eru)',
|
||||||
'v5z': 'Godan verb with "zu" ending',
|
# 'v5z': 'Godan verb with "zu" ending',
|
||||||
'vi': 'intransitive verb',
|
'vi': 'intransitive verb',
|
||||||
'vk': 'kuru verb - special class',
|
'vk': 'kuru verb - special class',
|
||||||
'vn': 'irregular nu verb',
|
'vn': 'irregular nu verb',
|
||||||
@ -167,6 +167,16 @@ def parse_kanji_dic(path):
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def fixup_godan_verbs(tags):
|
||||||
|
results = []
|
||||||
|
for tag in tags:
|
||||||
|
if tag.startswith('v5'):
|
||||||
|
tag = 'v5'
|
||||||
|
results.append(tag)
|
||||||
|
|
||||||
|
return set(results)
|
||||||
|
|
||||||
|
|
||||||
def parse_edict(path):
|
def parse_edict(path):
|
||||||
results = []
|
results = []
|
||||||
for line in load_definitions(path):
|
for line in load_definitions(path):
|
||||||
@ -188,6 +198,7 @@ def parse_edict(path):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
|
tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
|
||||||
|
tags_raw = fixup_godan_verbs(tags_raw)
|
||||||
tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys()))
|
tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys()))
|
||||||
tags = tags.union(tags_raw)
|
tags = tags.union(tags_raw)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user