From e15dea9921a50b59e2f2098f8b385449e85dbeac Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Sat, 9 Nov 2013 13:04:36 -0800 Subject: [PATCH] Adding translation engine Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e --- yomi_base/japanese2/__init__.py | 33 ++++++++++++++ yomi_base/japanese2/translate.py | 74 ++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 yomi_base/japanese2/translate.py diff --git a/yomi_base/japanese2/__init__.py b/yomi_base/japanese2/__init__.py index e69de29..8ccf44c 100644 --- a/yomi_base/japanese2/__init__.py +++ b/yomi_base/japanese2/__init__.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2011 Alex Yatskov +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +import os.path +from dictionary import Dictionary +from deinflect import Deinflector +from translate import Translator + + +def buildRelPath(path): + directory = os.path.split(__file__)[0] + return os.path.join(directory, path) + + +def initLanguage(): + deinflector = Deinflector(buildRelPath('data/deinflect.json')) + dictionary = Dictionary(buildRelPath('data/dictionary.db')) + return Translator(deinflector, dictionary) diff --git a/yomi_base/japanese2/translate.py b/yomi_base/japanese2/translate.py new file mode 100644 index 0000000..db382cd --- /dev/null +++ b/yomi_base/japanese2/translate.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +# Copyright (C) 2011 Alex Yatskov +# This module is based on Rikaichan code written by Jonathan Zarate +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +import operator +import re + + +class Translator: + def __init__(self, deinflector, dictionary): + self.deinflector = deinflector + self.dictionary = dictionary + + + def wordSearch(self, selection): + groups = dict() + + for i in xrange(len(selection), 0, -1): + term = selection[:i] + + deinflections = self.deinflector.deinflect(term, self.validator) + if deinflections is None: + self.processTerm(groups, term) + else: + for deinflection in deinflections: + self.processTerm(groups, **deinflection) + + results = map(self.formatResult, groups.items()) + results = filter(operator.truth, results) + results = sorted(results, key=lambda x: len(x[0]), reverse=True) + + length = 0 + for expression, reading, definition, rules, source in results: + length = max(length, len(source)) + + return results, length + + + def processTerm(self, groups, source, rules=list(), root=str()): + root = root or source + + for entry in self.dictionary.findTerm(root): + expression, reading, definition, tags = entry + key = expression, reading, definition + if key not in groups: + groups[key] = entry, source, rules + + + def formatResult(self, group): + (expression, reading, definition), (entry, source, rules) = group + return expression, reading, definition, rules, source + + + def validator(self, term): + results = list() + for expression, reading, definitions, tags in self.dictionary.findTerm(term): + results.append(tags) + + return results