Adding stub for new deinflector and translator
Former-commit-id: 9fa5a50ea34281b02423ec4e31bda123f152a96c
This commit is contained in:
parent
2b37db4f9e
commit
e130e48f28
0
yomi_base/japanese2/__init__.py
Normal file
0
yomi_base/japanese2/__init__.py
Normal file
1
yomi_base/japanese2/data/.gitattributes
vendored
Normal file
1
yomi_base/japanese2/data/.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
*.db filter=lfs diff=lfs merge=lfs -text
|
2708
yomi_base/japanese2/data/deinflect.json
Normal file
2708
yomi_base/japanese2/data/deinflect.json
Normal file
File diff suppressed because it is too large
Load Diff
3
yomi_base/japanese2/data/dictionary.db
Normal file
3
yomi_base/japanese2/data/dictionary.db
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:50737c19f1e501b771e429e944a8bbf24551c97a9047eb013175c8c20295539d
|
||||||
|
size 20406272
|
89
yomi_base/japanese2/deinflect.py
Normal file
89
yomi_base/japanese2/deinflect.py
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (C) 2011 Alex Yatskov
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
|
||||||
|
import codecs
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Deinflection
|
||||||
|
#
|
||||||
|
|
||||||
|
class Deinflection:
|
||||||
|
def __init__(self, term, tags=list(), rule=str()):
|
||||||
|
self.children = list()
|
||||||
|
self.term = term
|
||||||
|
self.tags = tags
|
||||||
|
self.rule = rule
|
||||||
|
|
||||||
|
|
||||||
|
def deinflect(self, validator, rules):
|
||||||
|
for rule, variants in rules.items():
|
||||||
|
for variant in variants:
|
||||||
|
tagsIn = variant['tagsIn']
|
||||||
|
tagsOut = variant['tagsOut']
|
||||||
|
kanaIn = variant['kanaIn']
|
||||||
|
kanaOut = variant['kanaOut']
|
||||||
|
|
||||||
|
for i in xrange(len(kanaIn), len(self.term) + 1):
|
||||||
|
term = self.term[:i]
|
||||||
|
|
||||||
|
allowed = not self.tags
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag in tagsIn:
|
||||||
|
allowed = True
|
||||||
|
|
||||||
|
if allowed and term.endswith(kanaIn):
|
||||||
|
rebase = term[:-len(kanaIn)] + kanaOut
|
||||||
|
if validator(rebase, self.tags):
|
||||||
|
child = Deinflection(rebase, tagsOut, rule)
|
||||||
|
self.children.append(child)
|
||||||
|
child.deinflect(validator, rules)
|
||||||
|
|
||||||
|
|
||||||
|
def dump(self, depth=0):
|
||||||
|
result = u'%s%s' % (u'\t' * depth, self.term)
|
||||||
|
if self.rule:
|
||||||
|
result += u' (%s)' % self.rule
|
||||||
|
result += u'\n'
|
||||||
|
|
||||||
|
for child in self.children:
|
||||||
|
result += child.dump(depth + 1)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.dump()
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Deinflector
|
||||||
|
#
|
||||||
|
|
||||||
|
class Deinflector:
|
||||||
|
def __init__(self, filename):
|
||||||
|
with codecs.open(filename, 'rb', 'utf-8') as fp:
|
||||||
|
self.rules = json.load(fp)
|
||||||
|
|
||||||
|
|
||||||
|
def deinflect(self, term, validator=lambda term, tags: True):
|
||||||
|
node = Deinflection(term)
|
||||||
|
node.deinflect(validator, self.rules)
|
||||||
|
return node
|
Loading…
Reference in New Issue
Block a user