yomichan-anki/yomi_base/japanese/translate.py

# -*- coding: utf-8 -*-

# Copyright (C) 2013  Alex Yatskov
# This module is based on Rikaichan code written by Jonathan Zarate
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import re


class Translator:
    def __init__(self, deinflector, dictionary):
        self.deinflector = deinflector
        self.dictionary  = dictionary


    def findTerm(self, text, wildcards=False):
        if wildcards:
            text = re.sub(u'[\*＊]', u'%', text)
            text = re.sub(u'[\?？]', u'_', text)

        groups = {}
        for i in xrange(len(text), 0, -1):
            term = text[:i]

            dfs = self.deinflector.deinflect(term, lambda term: [d['tags'] for d in self.dictionary.findTerm(term)])
            if dfs is None:
                continue

            for df in dfs:
                self.processTerm(groups, **df)

        definitions = groups.values()
        definitions = sorted(
            definitions,
            reverse=True,
            key=lambda d: (
                len(d['source']),
                'P' in d['tags'],
                -len(d['rules']),
                d['expression']
            )
        )

        length = 0
        for result in definitions:
            length = max(length, len(result['source']))

        return definitions, length


    def findKanji(self, text):
        processed = {}
        results   = []
        for c in text:
            if c not in processed:
                match = self.dictionary.findKanji(c)
                if match is not None:
                    results.append(match)
                processed[c] = match

        return results


    def processTerm(self, groups, source, tags, rules=[], root='', wildcards=False):
        for entry in self.dictionary.findTerm(root, wildcards):
            if entry['id'] in groups:
                continue

            matched = len(tags) == 0
            for tag in tags:
                if tag in entry['tags']:
                    matched = True
                    break

            if matched:
                groups[entry['id']] = {
                    'expression': entry['expression'],
                    'reading':    entry['reading'],
                    'glossary':   entry['glossary'],
                    'tags':       entry['tags'],
                    'source':     source,
                    'rules':      rules
                }
-												Adding translation engine


Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e
											
										
										
											2013-11-09 21:04:36 +00:00
+								# -*- coding: utf-8 -*-
-												Switching dictionary to return values in python dictionaries instead of tuples


Former-commit-id: 3f0682e88a24faed24f9e12e7c9cee338e817672
											
										
										
											2013-11-09 23:42:02 +00:00
+								# Copyright (C) 2013  Alex Yatskov
-												Adding translation engine


Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e
											
										
										
											2013-11-09 21:04:36 +00:00
+								# This module is based on Rikaichan code written by Jonathan Zarate
 								#
 								# This program is free software: you can redistribute it and/or modify
 								# it under the terms of the GNU General Public License as published by
 								# the Free Software Foundation, either version 3 of the License, or
 								# (at your option) any later version.
 								#
 								# This program is distributed in the hope that it will be useful,
 								# but WITHOUT ANY WARRANTY; without even the implied warranty of
 								# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 								# GNU General Public License for more details.
 								#
 								# You should have received a copy of the GNU General Public License
 								# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-												Removing useless code

											
										
										
											2016-05-29 23:18:07 +00:00
+								import re
-												Adding translation engine


Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e
											
										
										
											2013-11-09 21:04:36 +00:00
 								class Translator:
 								    def __init__(self, deinflector, dictionary):
 								        self.deinflector = deinflector
-												Cleanup, remove maxResults

											
										
										
											2016-05-08 03:24:59 +00:00
+								        self.dictionary  = dictionary
-												Adding translation engine


Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e
											
										
										
											2013-11-09 21:04:36 +00:00
-												Adding imporved support for searching using wildcards


Former-commit-id: 95d392046cc4d703fee433afabbd4ba3d04fad36
											
										
										
											2013-11-16 19:21:35 +00:00
+								    def findTerm(self, text, wildcards=False):
-												Removing useless code

											
										
										
											2016-05-29 23:18:07 +00:00
+								        if wildcards:
 								            text = re.sub(u'[\*＊]', u'%', text)
 								            text = re.sub(u'[\?？]', u'_', text)
-												Adding translation engine


Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e
											
										
										
											2013-11-09 21:04:36 +00:00
-												Fixing deinflection bugs

											
										
										
											2016-05-09 21:33:52 +00:00
+								        groups = {}
-												Some basic support for displaying information about characters


Former-commit-id: 895554df1f912021309091c88cd5b3c2a8b7211f
											
										
										
											2013-11-11 04:27:25 +00:00
+								        for i in xrange(len(text), 0, -1):
 								            term = text[:i]
-												Adding translation engine


Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e
											
										
										
											2013-11-09 21:04:36 +00:00
-												Fixing deinflection bugs

											
										
										
											2016-05-09 21:33:52 +00:00
+								            dfs = self.deinflector.deinflect(term, lambda term: [d['tags'] for d in self.dictionary.findTerm(term)])
 								            if dfs is None:
 								                continue
 								            for df in dfs:
 								                self.processTerm(groups, **df)
 								        definitions = groups.values()
-												Order consistently

											
										
										
											2016-05-18 03:31:40 +00:00
+								        definitions = sorted(
 								            definitions,
 								            reverse=True,
 								            key=lambda d: (
 								                len(d['source']),
 								                'P' in d['tags'],
 								                -len(d['rules']),
 								                d['expression']
 								            )
 								        )
-												Adding translation engine


Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e
											
										
										
											2013-11-09 21:04:36 +00:00
 								        length = 0
-												Fixing deinflection bugs

											
										
										
											2016-05-09 21:33:52 +00:00
+								        for result in definitions:
-												Switching dictionary to return values in python dictionaries instead of tuples


Former-commit-id: 3f0682e88a24faed24f9e12e7c9cee338e817672
											
										
										
											2013-11-09 23:42:02 +00:00
+								            length = max(length, len(result['source']))
-												Adding translation engine


Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e
											
										
										
											2013-11-09 21:04:36 +00:00
-												Fixing deinflection bugs

											
										
										
											2016-05-09 21:33:52 +00:00
+								        return definitions, length
-												Adding translation engine


Former-commit-id: 712e8584fe681e3ef397d4caa4094a6ce3eb2b1e
											
										
										
											2013-11-09 21:04:36 +00:00
-												Some basic support for displaying information about characters


Former-commit-id: 895554df1f912021309091c88cd5b3c2a8b7211f
											
										
										
											2013-11-11 04:27:25 +00:00
-												Cleanup

											
										
										
											2016-05-18 05:12:05 +00:00
+								    def findKanji(self, text):
-												Cleanup, remove maxResults

											
										
										
											2016-05-08 03:24:59 +00:00
+								        processed = {}
 								        results   = []
-												Updating Kanji lookup, temporarily disabling fancy search


Former-commit-id: c6e298b51f4d5ae8596244fe97e227bc8fa2dc18
											
										
										
											2013-11-14 17:22:20 +00:00
+								        for c in text:
-												Display Kanji results in order that they are encountered


Former-commit-id: 5324fcfae066149171584f484da64c3ea2c63584
											
										
										
											2013-11-16 03:53:05 +00:00
+								            if c not in processed:
-												Fixing deinflection bugs

											
										
										
											2016-05-09 21:33:52 +00:00
+								                match = self.dictionary.findKanji(c)
-												Display Kanji results in order that they are encountered


Former-commit-id: 5324fcfae066149171584f484da64c3ea2c63584
											
										
										
											2013-11-16 03:53:05 +00:00
+								                if match is not None:
 								                    results.append(match)
 								                processed[c] = match
-												Some basic support for displaying information about characters


Former-commit-id: 895554df1f912021309091c88cd5b3c2a8b7211f
											
										
										
											2013-11-11 04:27:25 +00:00
-												Display Kanji results in order that they are encountered


Former-commit-id: 5324fcfae066149171584f484da64c3ea2c63584
											
										
										
											2013-11-16 03:53:05 +00:00
+								        return results
-												Some basic support for displaying information about characters


Former-commit-id: 895554df1f912021309091c88cd5b3c2a8b7211f
											
										
										
											2013-11-11 04:27:25 +00:00
-												Fixing deinflection bugs

											
										
										
											2016-05-09 21:33:52 +00:00
+								    def processTerm(self, groups, source, tags, rules=[], root='', wildcards=False):
-												Adding imporved support for searching using wildcards


Former-commit-id: 95d392046cc4d703fee433afabbd4ba3d04fad36
											
										
										
											2013-11-16 19:21:35 +00:00
+								        for entry in self.dictionary.findTerm(root, wildcards):
-												Fixing deinflection bugs

											
										
										
											2016-05-09 21:33:52 +00:00
+								            if entry['id'] in groups:
 								                continue
 								            matched = len(tags) == 0
 								            for tag in tags:
 								                if tag in entry['tags']:
 								                    matched = True
 								                    break
 								            if matched:
 								                groups[entry['id']] = {
 								                    'expression': entry['expression'],
 								                    'reading':    entry['reading'],
 								                    'glossary':   entry['glossary'],
 								                    'tags':       entry['tags'],
 								                    'source':     source,
 								                    'rules':      rules
 								                }