1

Adding imporved support for searching using wildcards

Former-commit-id: 95d392046cc4d703fee433afabbd4ba3d04fad36
This commit is contained in:
Alex Yatskov 2013-11-16 11:21:35 -08:00
parent c40f778955
commit d31bff5700
9 changed files with 84 additions and 18 deletions

View File

@ -159,7 +159,7 @@
<item>
<widget class="QLabel" name="label">
<property name="text">
<string>Search</string>
<string>Expression</string>
</property>
</widget>
</item>
@ -235,7 +235,7 @@
<item>
<widget class="QLabel" name="label_2">
<property name="text">
<string>Search</string>
<string>Character</string>
</property>
</widget>
</item>

View File

@ -2,7 +2,7 @@
# Form implementation generated from reading ui file 'ui/about.ui'
#
# Created: Thu Nov 14 09:02:10 2013
# Created: Fri Nov 15 20:29:12 2013
# by: PyQt4 UI code generator 4.10
#
# WARNING! All changes made in this file will be lost!

View File

@ -2,7 +2,7 @@
# Form implementation generated from reading ui file 'ui/preferences.ui'
#
# Created: Thu Nov 14 09:02:10 2013
# Created: Fri Nov 15 20:29:12 2013
# by: PyQt4 UI code generator 4.10
#
# WARNING! All changes made in this file will be lost!

View File

@ -2,7 +2,7 @@
# Form implementation generated from reading ui file 'ui/reader.ui'
#
# Created: Thu Nov 14 09:02:10 2013
# Created: Fri Nov 15 20:29:13 2013
# by: PyQt4 UI code generator 4.10
#
# WARNING! All changes made in this file will be lost!
@ -266,11 +266,11 @@ class Ui_MainWindowReader(object):
self.menuTextSize.setTitle(_translate("MainWindowReader", "&Zoom", None))
self.toolBar.setWindowTitle(_translate("MainWindowReader", "toolBar", None))
self.dockVocab.setWindowTitle(_translate("MainWindowReader", "Vocabulary", None))
self.label.setText(_translate("MainWindowReader", "Search", None))
self.label.setText(_translate("MainWindowReader", "Expression", None))
self.dockAnki.setWindowTitle(_translate("MainWindowReader", "Anki", None))
self.label_3.setText(_translate("MainWindowReader", "Active tag(s)", None))
self.dockKanji.setWindowTitle(_translate("MainWindowReader", "Kanji", None))
self.label_2.setText(_translate("MainWindowReader", "Search", None))
self.label_2.setText(_translate("MainWindowReader", "Character", None))
self.actionOpen.setText(_translate("MainWindowReader", "&Open...", None))
self.actionOpen.setToolTip(_translate("MainWindowReader", "Open file", None))
self.actionOpen.setShortcut(_translate("MainWindowReader", "Ctrl+O", None))

View File

@ -2,7 +2,7 @@
# Resource object code
#
# Created: Thu Nov 14 09:02:10 2013
# Created: Fri Nov 15 20:29:13 2013
# by: The Resource Compiler for PyQt (Qt v4.8.4)
#
# WARNING! All changes made in this file will be lost!

View File

@ -26,12 +26,12 @@ class Dictionary:
self.indices = set()
def findTerm(self, word, partial=False):
def findTerm(self, word, wildcards=False):
self.requireIndex('Terms', 'expression')
self.requireIndex('Terms', 'reading')
cursor = self.db.cursor()
cursor.execute('SELECT * FROM Terms WHERE expression {0} ? OR reading=? LIMIT 100'.format('LIKE' if partial else '='), (word, word))
cursor.execute('SELECT * FROM Terms WHERE expression {0} ? OR reading=? LIMIT 100'.format('LIKE' if wildcards else '='), (word, word))
results = list()
for expression, reading, glossary, tags in cursor.fetchall():

View File

@ -18,6 +18,7 @@
import operator
import util
class Translator:
@ -26,15 +27,15 @@ class Translator:
self.dictionary = dictionary
def findTerm(self, text, partial=False):
groups = dict()
def findTerm(self, text, wildcards=False):
text = util.sanitize(text, wildcards=wildcards)
groups = dict()
for i in xrange(len(text), 0, -1):
term = text[:i]
deinflections = self.deinflector.deinflect(term, self.validator)
if deinflections is None:
self.processTerm(groups, term, partial=partial)
self.processTerm(groups, term, wildcards=wildcards)
else:
for deinflection in deinflections:
self.processTerm(groups, **deinflection)
@ -51,6 +52,7 @@ class Translator:
def findCharacters(self, text):
text = util.sanitize(text, kana=False)
results = list()
processed = dict()
@ -64,10 +66,10 @@ class Translator:
return results
def processTerm(self, groups, source, rules=list(), root=str(), partial=False):
def processTerm(self, groups, source, rules=list(), root=str(), wildcards=False):
root = root or source
for entry in self.dictionary.findTerm(root, partial):
for entry in self.dictionary.findTerm(root, wildcards):
key = entry['expression'], entry['reading'], entry['glossary']
if key not in groups:
groups[key] = entry['tags'], source, rules

View File

@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Alex Yatskov
# This module is based on Rikaichan code written by Jonathan Zarate
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import re
def isHiragana(c):
return 0x3040 <= ord(c) < 0x30a0
def isKatakana(c):
return 0x30a0 <= ord(c) < 0x3100
def isKana(c):
return isHiragana(c) or isKatakana(c)
def isKanji(c):
return 0x4e00 <= ord(c) < 0x9fb0 or 0x3400 <= ord(c) < 0x4dc0
def isJapanese(c):
return isKana(c) or isKanji(c)
def sanitize(text, kana=True, wildcards=False):
if kana:
checker = isJapanese
else:
checker = isKanji
if wildcards:
text = re.sub(u'[\*]', u'%', text)
text = re.sub(u'[\?]', u'_', text)
overrides = [u'%', u'_']
else:
overrides = list()
result = unicode()
for c in text:
if checker(c) or c in overrides:
result += c
return result

View File

@ -188,7 +188,7 @@ def buildVocabDefs(definitions, query):
html += """
<p>No definitions to display.</p>
<p>Mouse over text with the <em>middle mouse button</em> or <em>shift key</em> pressed to search.</p>
<p>You can also also input terms in the search box below.</p>"""
<p>You can also also input terms in the search box below, using the "*" and "?" wildcards where needed.</p>"""
return html + buildDefFooter()
@ -216,6 +216,9 @@ def buildKanjiDefs(definitions, query):
for i, definition in enumerate(definitions):
html += buildKanjiDef(definition, i, query)
else:
html += '<p>No definitions to display.</p>'
html += """
<p>No definitions to display.</p>
<p>Mouse over text with the <em>middle mouse button</em> or <em>shift key</em> pressed to search.</p>
<p>You can also also input terms in the search box below."""
return html + buildDefFooter()