Deinflector now properly working
Former-commit-id: c77faa975904ad9a6560aa8070f6f05f14c45d23
This commit is contained in:
parent
44f12fcf5c
commit
e7aeb2f9df
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -26,15 +27,14 @@ import json
|
|||||||
#
|
#
|
||||||
|
|
||||||
class Deinflection:
|
class Deinflection:
|
||||||
def __init__(self, term, parent=None, tags=list(), rule=str()):
|
def __init__(self, term, tags=list(), rule=str()):
|
||||||
self.children = list()
|
self.children = list()
|
||||||
self.term = term
|
self.term = term
|
||||||
self.parent = parent
|
|
||||||
self.tags = tags
|
self.tags = tags
|
||||||
self.rule = rule
|
self.rule = rule
|
||||||
|
|
||||||
|
|
||||||
def deinflect(self, validator, rules):
|
def deinflect(self, validator, rules, candidates):
|
||||||
for rule, variants in rules.items():
|
for rule, variants in rules.items():
|
||||||
for variant in variants:
|
for variant in variants:
|
||||||
tagsIn = variant['tagsIn']
|
tagsIn = variant['tagsIn']
|
||||||
@ -42,40 +42,57 @@ class Deinflection:
|
|||||||
kanaIn = variant['kanaIn']
|
kanaIn = variant['kanaIn']
|
||||||
kanaOut = variant['kanaOut']
|
kanaOut = variant['kanaOut']
|
||||||
|
|
||||||
allowed = not self.tags
|
allowed = len(self.tags) == 0
|
||||||
for tag in self.tags:
|
for tag in self.tags:
|
||||||
if tag in tagsIn:
|
if self.searchTags(tag, tagsIn):
|
||||||
allowed = True
|
allowed = True
|
||||||
|
break
|
||||||
|
|
||||||
if not allowed:
|
if not allowed or not self.term.endswith(kanaIn):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for i in xrange(len(kanaIn), len(self.term) + 1):
|
term = self.term[:-len(kanaIn)] + kanaOut
|
||||||
term = self.term[:i]
|
candidates.update([term])
|
||||||
if not term.endswith(kanaIn):
|
|
||||||
continue
|
|
||||||
|
|
||||||
rebase = term[:-len(kanaIn)] + kanaOut
|
child = Deinflection(term, tagsOut, rule)
|
||||||
if validator(rebase, self.tags):
|
if child.deinflect(validator, rules, candidates):
|
||||||
child = Deinflection(rebase, term, tagsOut, rule)
|
|
||||||
self.children.append(child)
|
self.children.append(child)
|
||||||
child.deinflect(validator, rules)
|
|
||||||
|
if len(self.children) > 0:
|
||||||
|
return True
|
||||||
|
|
||||||
|
for tags in validator(self.term):
|
||||||
|
for tag in self.tags:
|
||||||
|
if self.searchTags(tag, tags):
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def dump(self, depth=0):
|
def searchTags(self, tag, tags):
|
||||||
result = u'%s%s' % (u'\t' * depth, self.term)
|
for t in tags:
|
||||||
if self.rule:
|
if re.search(tag, t):
|
||||||
result += u' (%s %s)' % (self.parent, self.rule)
|
return True
|
||||||
result += u'\n'
|
|
||||||
|
|
||||||
|
|
||||||
|
def gather(self):
|
||||||
|
if len(self.children) == 0:
|
||||||
|
endpoint = {
|
||||||
|
'root': self.term,
|
||||||
|
'term': self.term,
|
||||||
|
'rules': [self.rule] if self.rule else list()
|
||||||
|
}
|
||||||
|
|
||||||
|
return [endpoint]
|
||||||
|
|
||||||
|
paths = list()
|
||||||
for child in self.children:
|
for child in self.children:
|
||||||
result += child.dump(depth + 1)
|
for path in child.gather():
|
||||||
|
if self.rule:
|
||||||
|
path['rules'].append(self.rule)
|
||||||
|
else:
|
||||||
|
path['term'] = self.term
|
||||||
|
paths.append(path)
|
||||||
|
|
||||||
return result
|
return paths
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self.dump()
|
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -89,6 +106,7 @@ class Deinflector:
|
|||||||
|
|
||||||
|
|
||||||
def deinflect(self, term, validator=lambda term, tags: True):
|
def deinflect(self, term, validator=lambda term, tags: True):
|
||||||
|
candidates = set()
|
||||||
node = Deinflection(term)
|
node = Deinflection(term)
|
||||||
node.deinflect(validator, self.rules)
|
node.deinflect(validator, self.rules, candidates)
|
||||||
return node
|
return node.gather(), candidates
|
||||||
|
Loading…
Reference in New Issue
Block a user