yomichan-anki/yomi_base/japanese/translate.py

# -*- coding: utf-8 -*-

# Copyright (C) 2013  Alex Yatskov
# This module is based on Rikaichan code written by Jonathan Zarate
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import util


class Translator:
    def __init__(self, deinflector, dictionary):
        self.deinflector = deinflector
        self.dictionary  = dictionary


    def findTerm(self, text, wildcards=False):
        text = util.sanitize(text, wildcards=wildcards)

        groups = {}
        for i in xrange(len(text), 0, -1):
            term = text[:i]

            dfs = self.deinflector.deinflect(term, lambda term: [d['tags'] for d in self.dictionary.findTerm(term)])
            if dfs is None:
                continue

            for df in dfs:
                self.processTerm(groups, **df)

        definitions = groups.values()
        definitions = sorted(definitions, key=lambda d: (len(d['source']), 'P' in d['tags'], -len(d['rules'])), reverse=True)

        length = 0
        for result in definitions:
            length = max(length, len(result['source']))

        return definitions, length


    def findCharacters(self, text):
        text = util.sanitize(text, kana=False)

        processed = {}
        results   = []
        for c in text:
            if c not in processed:
                match = self.dictionary.findKanji(c)
                if match is not None:
                    results.append(match)
                processed[c] = match

        return results


    def processTerm(self, groups, source, tags, rules=[], root='', wildcards=False):
        for entry in self.dictionary.findTerm(root, wildcards):
            if entry['id'] in groups:
                continue

            matched = len(tags) == 0
            for tag in tags:
                if tag in entry['tags']:
                    matched = True
                    break

            if matched:
                groups[entry['id']] = {
                    'expression': entry['expression'],
                    'reading':    entry['reading'],
                    'glossary':   entry['glossary'],
                    'tags':       entry['tags'],
                    'source':     source,
                    'rules':      rules
                }
Adding translation engine Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e 2013-11-09 21:04:36 +00:00			`# -- coding: utf-8 --`

Switching dictionary to return values in python dictionaries instead of tuples Former-commit-id: 3f0682e88a24faed24f9e12e7c9cee338e817672 2013-11-09 23:42:02 +00:00			`# Copyright (C) 2013 Alex Yatskov`
Adding translation engine Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e 2013-11-09 21:04:36 +00:00			`# This module is based on Rikaichan code written by Jonathan Zarate`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`


Adding imporved support for searching using wildcards Former-commit-id: 95d392046cc4d703fee433afabbd4ba3d04fad36 2013-11-16 19:21:35 +00:00			`import util`
Adding translation engine Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e 2013-11-09 21:04:36 +00:00

			`class Translator:`
			`def __init__(self, deinflector, dictionary):`
			`self.deinflector = deinflector`
Cleanup, remove maxResults 2016-05-08 03:24:59 +00:00			`self.dictionary = dictionary`
Adding translation engine Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e 2013-11-09 21:04:36 +00:00

Adding imporved support for searching using wildcards Former-commit-id: 95d392046cc4d703fee433afabbd4ba3d04fad36 2013-11-16 19:21:35 +00:00			`def findTerm(self, text, wildcards=False):`
Fixing deinflection bugs 2016-05-09 21:33:52 +00:00			`text = util.sanitize(text, wildcards=wildcards)`
Adding translation engine Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e 2013-11-09 21:04:36 +00:00
Fixing deinflection bugs 2016-05-09 21:33:52 +00:00			`groups = {}`
Some basic support for displaying information about characters Former-commit-id: 895554df1f912021309091c88cd5b3c2a8b7211f 2013-11-11 04:27:25 +00:00			`for i in xrange(len(text), 0, -1):`
			`term = text[:i]`
Adding translation engine Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e 2013-11-09 21:04:36 +00:00
Fixing deinflection bugs 2016-05-09 21:33:52 +00:00			`dfs = self.deinflector.deinflect(term, lambda term: [d['tags'] for d in self.dictionary.findTerm(term)])`
			`if dfs is None:`
			`continue`

			`for df in dfs:`
			`self.processTerm(groups, **df)`

			`definitions = groups.values()`
			`definitions = sorted(definitions, key=lambda d: (len(d['source']), 'P' in d['tags'], -len(d['rules'])), reverse=True)`
Adding translation engine Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e 2013-11-09 21:04:36 +00:00
			`length = 0`
Fixing deinflection bugs 2016-05-09 21:33:52 +00:00			`for result in definitions:`
Switching dictionary to return values in python dictionaries instead of tuples Former-commit-id: 3f0682e88a24faed24f9e12e7c9cee338e817672 2013-11-09 23:42:02 +00:00			`length = max(length, len(result['source']))`
Adding translation engine Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e 2013-11-09 21:04:36 +00:00
Fixing deinflection bugs 2016-05-09 21:33:52 +00:00			`return definitions, length`
Adding translation engine Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e 2013-11-09 21:04:36 +00:00
Some basic support for displaying information about characters Former-commit-id: 895554df1f912021309091c88cd5b3c2a8b7211f 2013-11-11 04:27:25 +00:00
Updating Kanji lookup, temporarily disabling fancy search Former-commit-id: c6e298b51f4d5ae8596244fe97e227bc8fa2dc18 2013-11-14 17:22:20 +00:00			`def findCharacters(self, text):`
Fixing deinflection bugs 2016-05-09 21:33:52 +00:00			`text = util.sanitize(text, kana=False)`

Cleanup, remove maxResults 2016-05-08 03:24:59 +00:00			`processed = {}`
			`results = []`
Updating Kanji lookup, temporarily disabling fancy search Former-commit-id: c6e298b51f4d5ae8596244fe97e227bc8fa2dc18 2013-11-14 17:22:20 +00:00			`for c in text:`
Display Kanji results in order that they are encountered Former-commit-id: 5324fcfae066149171584f484da64c3ea2c63584 2013-11-16 03:53:05 +00:00			`if c not in processed:`
Fixing deinflection bugs 2016-05-09 21:33:52 +00:00			`match = self.dictionary.findKanji(c)`
Display Kanji results in order that they are encountered Former-commit-id: 5324fcfae066149171584f484da64c3ea2c63584 2013-11-16 03:53:05 +00:00			`if match is not None:`
			`results.append(match)`
			`processed[c] = match`
Some basic support for displaying information about characters Former-commit-id: 895554df1f912021309091c88cd5b3c2a8b7211f 2013-11-11 04:27:25 +00:00
Display Kanji results in order that they are encountered Former-commit-id: 5324fcfae066149171584f484da64c3ea2c63584 2013-11-16 03:53:05 +00:00			`return results`
Some basic support for displaying information about characters Former-commit-id: 895554df1f912021309091c88cd5b3c2a8b7211f 2013-11-11 04:27:25 +00:00

Fixing deinflection bugs 2016-05-09 21:33:52 +00:00			`def processTerm(self, groups, source, tags, rules=[], root='', wildcards=False):`
Adding imporved support for searching using wildcards Former-commit-id: 95d392046cc4d703fee433afabbd4ba3d04fad36 2013-11-16 19:21:35 +00:00			`for entry in self.dictionary.findTerm(root, wildcards):`
Fixing deinflection bugs 2016-05-09 21:33:52 +00:00			`if entry['id'] in groups:`
			`continue`

			`matched = len(tags) == 0`
			`for tag in tags:`
			`if tag in entry['tags']:`
			`matched = True`
			`break`

			`if matched:`
			`groups[entry['id']] = {`
			`'expression': entry['expression'],`
			`'reading': entry['reading'],`
			`'glossary': entry['glossary'],`
			`'tags': entry['tags'],`
			`'source': source,`
			`'rules': rules`
			`}`