2013-11-08 19:33:07 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2013-11-09 23:42:02 +00:00
|
|
|
# Copyright (C) 2013 Alex Yatskov
|
2013-11-08 19:33:07 +00:00
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
2013-11-09 21:26:21 +00:00
|
|
|
|
2013-11-08 19:33:07 +00:00
|
|
|
import codecs
|
|
|
|
import json
|
2013-11-09 18:17:53 +00:00
|
|
|
import re
|
2013-11-08 19:33:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# Deinflection
|
|
|
|
#
|
|
|
|
|
|
|
|
class Deinflection:
|
2013-11-09 18:17:53 +00:00
|
|
|
def __init__(self, term, tags=list(), rule=str()):
|
2013-11-08 19:33:07 +00:00
|
|
|
self.children = list()
|
|
|
|
self.term = term
|
|
|
|
self.tags = tags
|
|
|
|
self.rule = rule
|
2013-11-09 20:11:46 +00:00
|
|
|
self.success = False
|
|
|
|
|
|
|
|
|
2013-11-09 20:23:26 +00:00
|
|
|
def validate(self, validator):
|
2013-11-09 20:11:46 +00:00
|
|
|
for tags in validator(self.term):
|
|
|
|
if len(self.tags) == 0:
|
|
|
|
return True
|
|
|
|
|
|
|
|
for tag in self.tags:
|
|
|
|
if self.searchTags(tag, tags):
|
|
|
|
return True
|
2013-11-08 19:33:07 +00:00
|
|
|
|
|
|
|
|
2013-11-09 20:23:26 +00:00
|
|
|
def deinflect(self, validator, rules):
|
|
|
|
if self.validate(validator):
|
2013-11-09 20:11:46 +00:00
|
|
|
child = Deinflection(self.term)
|
|
|
|
self.children.append(child)
|
|
|
|
|
2013-11-08 19:33:07 +00:00
|
|
|
for rule, variants in rules.items():
|
|
|
|
for variant in variants:
|
|
|
|
tagsIn = variant['tagsIn']
|
|
|
|
tagsOut = variant['tagsOut']
|
|
|
|
kanaIn = variant['kanaIn']
|
|
|
|
kanaOut = variant['kanaOut']
|
|
|
|
|
2013-11-09 18:17:53 +00:00
|
|
|
allowed = len(self.tags) == 0
|
2013-11-08 23:49:16 +00:00
|
|
|
for tag in self.tags:
|
2013-11-09 18:17:53 +00:00
|
|
|
if self.searchTags(tag, tagsIn):
|
2013-11-08 23:49:16 +00:00
|
|
|
allowed = True
|
2013-11-09 18:17:53 +00:00
|
|
|
break
|
2013-11-08 23:49:16 +00:00
|
|
|
|
2013-11-09 18:17:53 +00:00
|
|
|
if not allowed or not self.term.endswith(kanaIn):
|
2013-11-08 23:49:16 +00:00
|
|
|
continue
|
|
|
|
|
2013-11-09 18:17:53 +00:00
|
|
|
term = self.term[:-len(kanaIn)] + kanaOut
|
2013-11-08 19:33:07 +00:00
|
|
|
|
2013-11-09 18:17:53 +00:00
|
|
|
child = Deinflection(term, tagsOut, rule)
|
2013-11-09 20:23:26 +00:00
|
|
|
if child.deinflect(validator, rules):
|
2013-11-09 18:17:53 +00:00
|
|
|
self.children.append(child)
|
2013-11-08 19:33:07 +00:00
|
|
|
|
2013-11-09 18:17:53 +00:00
|
|
|
if len(self.children) > 0:
|
|
|
|
return True
|
2013-11-08 19:33:07 +00:00
|
|
|
|
2013-11-09 18:17:53 +00:00
|
|
|
|
|
|
|
def searchTags(self, tag, tags):
|
|
|
|
for t in tags:
|
|
|
|
if re.search(tag, t):
|
|
|
|
return True
|
2013-11-08 19:33:07 +00:00
|
|
|
|
|
|
|
|
2013-11-09 18:17:53 +00:00
|
|
|
def gather(self):
|
|
|
|
if len(self.children) == 0:
|
2013-11-09 20:23:26 +00:00
|
|
|
return [{'root': self.term, 'rules': list()}]
|
2013-11-09 18:17:53 +00:00
|
|
|
|
|
|
|
paths = list()
|
|
|
|
for child in self.children:
|
|
|
|
for path in child.gather():
|
|
|
|
if self.rule:
|
|
|
|
path['rules'].append(self.rule)
|
2013-11-09 20:23:26 +00:00
|
|
|
path['source'] = self.term
|
2013-11-09 18:17:53 +00:00
|
|
|
paths.append(path)
|
2013-11-08 19:33:07 +00:00
|
|
|
|
2013-11-09 18:17:53 +00:00
|
|
|
return paths
|
2013-11-08 19:33:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# Deinflector
|
|
|
|
#
|
|
|
|
|
|
|
|
class Deinflector:
|
|
|
|
def __init__(self, filename):
|
|
|
|
with codecs.open(filename, 'rb', 'utf-8') as fp:
|
|
|
|
self.rules = json.load(fp)
|
|
|
|
|
|
|
|
|
2013-11-09 18:45:25 +00:00
|
|
|
def deinflect(self, term, validator):
|
2013-11-08 19:33:07 +00:00
|
|
|
node = Deinflection(term)
|
2013-11-09 20:23:26 +00:00
|
|
|
if node.deinflect(validator, self.rules):
|
|
|
|
return node.gather()
|