2011-08-28 18:01:32 +00:00
|
|
|
|
# -*- coding: utf-8 -*-
|
2011-10-27 15:22:26 +00:00
|
|
|
|
|
2013-11-09 23:42:02 +00:00
|
|
|
|
# Copyright (C) 2013 Alex Yatskov
|
2011-08-28 18:01:32 +00:00
|
|
|
|
#
|
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
# (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from PyQt4 import QtGui
|
2013-11-09 23:42:02 +00:00
|
|
|
|
import re
|
2014-12-24 08:26:05 +00:00
|
|
|
|
import codecs
|
2014-12-20 11:19:22 +00:00
|
|
|
|
import sqlite3
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def decodeContent(content):
|
2013-11-10 03:09:40 +00:00
|
|
|
|
encodings = ['utf-8', 'shift_jis', 'euc-jp', 'utf-16']
|
2011-08-28 18:01:32 +00:00
|
|
|
|
errors = dict()
|
|
|
|
|
|
|
|
|
|
for encoding in encodings:
|
|
|
|
|
try:
|
|
|
|
|
return content.decode(encoding), encoding
|
|
|
|
|
except UnicodeDecodeError, e:
|
|
|
|
|
errors[encoding] = e[2]
|
|
|
|
|
|
|
|
|
|
encoding = sorted(errors, key=errors.get, reverse=True)[0]
|
|
|
|
|
return content.decode(encoding, 'replace'), encoding
|
|
|
|
|
|
|
|
|
|
|
2013-11-11 01:39:44 +00:00
|
|
|
|
def stripReadings(content):
|
2016-05-18 05:12:05 +00:00
|
|
|
|
return re.sub(u'《[^》]+》', u'', content)
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def findSentence(content, position):
|
|
|
|
|
quotesFwd = {u'「': u'」', u'『': u'』', u"'": u"'", u'"': u'"'}
|
|
|
|
|
quotesBwd = {u'」': u'「', u'』': u'『', u"'": u"'", u'"': u'"'}
|
|
|
|
|
terminators = u'。..??!!'
|
|
|
|
|
|
2016-05-08 19:05:32 +00:00
|
|
|
|
quoteStack = []
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
|
|
|
|
start = 0
|
|
|
|
|
for i in xrange(position, start, -1):
|
|
|
|
|
c = content[i]
|
|
|
|
|
|
|
|
|
|
if not quoteStack and (c in terminators or c in quotesFwd or c == '\n'):
|
|
|
|
|
start = i + 1
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if quoteStack and c == quoteStack[0]:
|
|
|
|
|
quoteStack.pop()
|
|
|
|
|
elif c in quotesBwd:
|
|
|
|
|
quoteStack.insert(0, quotesBwd[c])
|
|
|
|
|
|
2016-05-08 19:05:32 +00:00
|
|
|
|
quoteStack = []
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
|
|
|
|
end = len(content)
|
|
|
|
|
for i in xrange(position, end):
|
|
|
|
|
c = content[i]
|
|
|
|
|
|
|
|
|
|
if not quoteStack:
|
|
|
|
|
if c in terminators:
|
|
|
|
|
end = i + 1
|
|
|
|
|
break
|
2015-02-12 04:44:29 +00:00
|
|
|
|
elif c in quotesBwd:
|
2011-08-28 18:01:32 +00:00
|
|
|
|
end = i
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if quoteStack and c == quoteStack[0]:
|
|
|
|
|
quoteStack.pop()
|
|
|
|
|
elif c in quotesFwd:
|
|
|
|
|
quoteStack.insert(0, quotesFwd[c])
|
2015-11-13 09:33:13 +00:00
|
|
|
|
|
|
|
|
|
return content[start:end].strip()
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
|
|
|
|
|
2013-11-11 01:39:44 +00:00
|
|
|
|
def formatFields(fields, markup):
|
2016-05-08 19:05:32 +00:00
|
|
|
|
result = {}
|
2011-08-28 18:01:32 +00:00
|
|
|
|
for field, value in fields.items():
|
2013-11-16 20:51:21 +00:00
|
|
|
|
try:
|
|
|
|
|
result[field] = value.format(**markup)
|
|
|
|
|
except KeyError:
|
|
|
|
|
pass
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
2013-11-11 01:39:44 +00:00
|
|
|
|
def splitTags(tags):
|
|
|
|
|
return filter(lambda tag: tag.strip(), re.split('[;,\s]', tags))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def markupVocabExp(definition):
|
2013-12-22 21:57:05 +00:00
|
|
|
|
if definition['reading']:
|
|
|
|
|
summary = u'{expression} [{reading}]'.format(**definition)
|
|
|
|
|
else:
|
|
|
|
|
summary = u'{expression}'.format(**definition)
|
|
|
|
|
|
2011-08-28 18:01:32 +00:00
|
|
|
|
return {
|
2013-11-11 01:39:44 +00:00
|
|
|
|
'expression': definition['expression'],
|
2016-05-18 05:12:05 +00:00
|
|
|
|
'reading': definition['reading'] or u'',
|
2016-05-02 03:43:09 +00:00
|
|
|
|
'glossary': definition['glossary'],
|
|
|
|
|
'sentence': definition.get('sentence'),
|
|
|
|
|
'summary': summary
|
2011-08-28 18:01:32 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2013-11-11 01:39:44 +00:00
|
|
|
|
def markupVocabReading(definition):
|
2013-12-22 21:57:05 +00:00
|
|
|
|
if definition['reading']:
|
|
|
|
|
return {
|
|
|
|
|
'expression': definition['reading'],
|
2016-05-18 05:12:05 +00:00
|
|
|
|
'reading': u'',
|
2016-05-02 03:43:09 +00:00
|
|
|
|
'glossary': definition['glossary'],
|
|
|
|
|
'sentence': definition.get('sentence'),
|
|
|
|
|
'summary': definition['reading']
|
2013-12-22 21:57:05 +00:00
|
|
|
|
}
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
|
|
|
|
|
2013-11-12 03:59:47 +00:00
|
|
|
|
def copyVocabDef(definition):
|
2016-05-18 16:26:47 +00:00
|
|
|
|
glossary = '; '.join(definition['glossary'])
|
2013-11-12 03:59:47 +00:00
|
|
|
|
if definition['reading']:
|
2016-05-18 16:26:47 +00:00
|
|
|
|
result = u'{0}\t{1}\t{2}\n'.format(
|
|
|
|
|
definition['expression'],
|
|
|
|
|
definition['reading'],
|
|
|
|
|
glossary
|
|
|
|
|
)
|
2013-11-12 03:59:47 +00:00
|
|
|
|
else:
|
2016-05-18 16:26:47 +00:00
|
|
|
|
result = u'{0}\t{1}\n'.format(
|
|
|
|
|
definition['expression'],
|
|
|
|
|
glossary
|
|
|
|
|
)
|
2013-11-12 03:59:47 +00:00
|
|
|
|
|
|
|
|
|
QtGui.QApplication.clipboard().setText(result)
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
|
|
|
|
|
2013-11-12 03:59:47 +00:00
|
|
|
|
def markupKanji(definition):
|
|
|
|
|
return {
|
|
|
|
|
'character': definition['character'],
|
2016-05-02 03:43:09 +00:00
|
|
|
|
'onyomi': definition['onyomi'],
|
|
|
|
|
'kunyomi': definition['kunyomi'],
|
|
|
|
|
'glossary': definition['glossary'],
|
|
|
|
|
'summary': definition['character']
|
2013-11-12 03:59:47 +00:00
|
|
|
|
}
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
|
|
|
|
|
2013-11-14 16:36:26 +00:00
|
|
|
|
def copyKanjiDef(definition):
|
2016-05-18 16:26:47 +00:00
|
|
|
|
result = u'{0}\t{1}\t{2}\t{3}'.format(
|
|
|
|
|
definition['character'],
|
|
|
|
|
', '.join(definition['kunyomi']),
|
|
|
|
|
', '.join(definition['onyomi']),
|
|
|
|
|
', '.join(definition['glossary'])
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
QtGui.QApplication.clipboard().setText(result)
|
2013-11-14 16:36:26 +00:00
|
|
|
|
|
|
|
|
|
|
2013-11-11 04:27:25 +00:00
|
|
|
|
def buildDefHeader():
|
2016-05-02 03:43:09 +00:00
|
|
|
|
palette = QtGui.QApplication.palette()
|
2013-11-11 04:27:25 +00:00
|
|
|
|
toolTipBg = palette.color(QtGui.QPalette.Window).name()
|
|
|
|
|
toolTipFg = palette.color(QtGui.QPalette.WindowText).name()
|
|
|
|
|
|
2016-05-18 05:28:56 +00:00
|
|
|
|
return u'''
|
2013-11-11 04:27:25 +00:00
|
|
|
|
<html><head><style>
|
|
|
|
|
body {{ background-color: {0}; color: {1}; font-size: 11pt; }}
|
|
|
|
|
span.expression {{ font-size: 15pt; }}
|
2016-05-18 05:28:56 +00:00
|
|
|
|
</style></head><body>'''.format(toolTipBg, toolTipFg)
|
2013-11-11 04:27:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def buildDefFooter():
|
|
|
|
|
return '</body></html>'
|
|
|
|
|
|
|
|
|
|
|
2013-11-16 19:53:02 +00:00
|
|
|
|
def buildEmpty():
|
2016-05-18 05:28:56 +00:00
|
|
|
|
return u'''
|
2013-11-16 19:53:02 +00:00
|
|
|
|
<p>No definitions to display.</p>
|
|
|
|
|
<p>Mouse over text with the <em>middle mouse button</em> or <em>shift key</em> pressed to search.</p>
|
2016-05-18 05:28:56 +00:00
|
|
|
|
<p>You can also also input terms in the search box below.'''
|
2013-11-16 19:53:02 +00:00
|
|
|
|
|
|
|
|
|
|
2013-11-11 04:27:25 +00:00
|
|
|
|
def buildVocabDef(definition, index, query):
|
2016-05-18 05:12:05 +00:00
|
|
|
|
reading = u''
|
2013-11-09 23:42:02 +00:00
|
|
|
|
if definition['reading']:
|
2014-12-28 12:46:01 +00:00
|
|
|
|
reading = u'<span class="reading">[{0}]<br></span>'.format(definition['reading'])
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
2016-05-18 05:12:05 +00:00
|
|
|
|
rules = u''
|
2014-12-20 11:39:24 +00:00
|
|
|
|
if definition.get('rules'):
|
2013-11-18 04:21:57 +00:00
|
|
|
|
rules = ' < '.join(definition['rules'])
|
2014-12-28 12:46:01 +00:00
|
|
|
|
rules = '<span class="rules">({0})<br></span>'.format(rules)
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
2014-12-28 12:46:01 +00:00
|
|
|
|
links = '<a href="copyVocabDef:{0}"><img src="://img/img/icon_copy_definition.png" align="right"></a>'.format(index)
|
2013-11-14 17:22:20 +00:00
|
|
|
|
if query is not None:
|
2013-11-11 04:27:25 +00:00
|
|
|
|
if query('vocab', markupVocabExp(definition)):
|
2014-12-28 12:46:01 +00:00
|
|
|
|
links += '<a href="addVocabExp:{0}"><img src="://img/img/icon_add_expression.png" align="right"></a>'.format(index)
|
2013-11-11 04:27:25 +00:00
|
|
|
|
if query('vocab', markupVocabReading(definition)):
|
2014-12-28 12:46:01 +00:00
|
|
|
|
links += '<a href="addVocabReading:{0}"><img src="://img/img/icon_add_reading.png" align="right"></a>'.format(index)
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
2016-05-18 16:26:47 +00:00
|
|
|
|
glossary = u'<ol>'
|
|
|
|
|
for g in definition['glossary']:
|
|
|
|
|
glossary += u'<li>{0}</li>'.format(g)
|
|
|
|
|
glossary += u'</ol>'
|
|
|
|
|
|
2016-05-18 05:28:56 +00:00
|
|
|
|
html = u'''
|
2014-12-28 12:46:01 +00:00
|
|
|
|
<span class="links">{0}</span>
|
|
|
|
|
<span class="expression">{1}<br></span>
|
2013-11-16 20:30:20 +00:00
|
|
|
|
{2}
|
2014-12-28 12:46:01 +00:00
|
|
|
|
<span class="glossary">{3}<br></span>
|
2013-11-16 20:30:20 +00:00
|
|
|
|
{4}
|
2016-05-18 16:26:47 +00:00
|
|
|
|
<br clear="all">'''.format(links, definition['expression'], reading, glossary, rules)
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
|
|
|
|
return html
|
|
|
|
|
|
|
|
|
|
|
2013-11-11 04:27:25 +00:00
|
|
|
|
def buildVocabDefs(definitions, query):
|
|
|
|
|
html = buildDefHeader()
|
2013-11-09 23:42:02 +00:00
|
|
|
|
if len(definitions) > 0:
|
2011-08-28 18:01:32 +00:00
|
|
|
|
for i, definition in enumerate(definitions):
|
2013-11-11 04:27:25 +00:00
|
|
|
|
html += buildVocabDef(definition, i, query)
|
2011-08-28 18:01:32 +00:00
|
|
|
|
else:
|
2013-11-16 19:53:02 +00:00
|
|
|
|
html += buildEmpty()
|
2011-08-28 18:01:32 +00:00
|
|
|
|
|
2013-11-11 04:27:25 +00:00
|
|
|
|
return html + buildDefFooter()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def buildKanjiDef(definition, index, query):
|
2014-12-28 12:46:01 +00:00
|
|
|
|
links = '<a href="copyKanjiDef:{0}"><img src="://img/img/icon_copy_definition.png" align="right"></a>'.format(index)
|
2013-11-14 17:22:20 +00:00
|
|
|
|
if query is not None and query('kanji', markupKanji(definition)):
|
2014-12-28 12:46:01 +00:00
|
|
|
|
links += '<a href="addKanji:{0}"><img src="://img/img/icon_add_expression.png" align="right"></a>'.format(index)
|
2013-11-11 04:27:25 +00:00
|
|
|
|
|
2016-05-18 16:26:47 +00:00
|
|
|
|
readings = ', '.join(definition['kunyomi'] + definition['onyomi'])
|
|
|
|
|
glossary = ', '.join(definition['glossary'])
|
|
|
|
|
|
2016-05-18 05:28:56 +00:00
|
|
|
|
html = u'''
|
2014-12-28 12:46:01 +00:00
|
|
|
|
<span class="links">{0}</span>
|
|
|
|
|
<span class="expression">{1}<br></span>
|
|
|
|
|
<span class="reading">[{2}]<br></span>
|
|
|
|
|
<span class="glossary">{3}<br></span>
|
2016-05-18 16:26:47 +00:00
|
|
|
|
<br clear="all">'''.format(links, definition['character'], readings, glossary)
|
2013-11-11 04:27:25 +00:00
|
|
|
|
|
2011-08-28 18:01:32 +00:00
|
|
|
|
return html
|
2013-11-11 04:27:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def buildKanjiDefs(definitions, query):
|
|
|
|
|
html = buildDefHeader()
|
|
|
|
|
|
|
|
|
|
if len(definitions) > 0:
|
|
|
|
|
for i, definition in enumerate(definitions):
|
|
|
|
|
html += buildKanjiDef(definition, i, query)
|
|
|
|
|
else:
|
2013-11-16 19:53:02 +00:00
|
|
|
|
html += buildEmpty()
|
2013-11-11 04:27:25 +00:00
|
|
|
|
|
|
|
|
|
return html + buildDefFooter()
|
2014-12-20 11:19:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extractKindleDeck(filename):
|
2016-05-18 16:26:47 +00:00
|
|
|
|
words = []
|
2014-12-20 11:19:22 +00:00
|
|
|
|
|
|
|
|
|
try:
|
2014-12-28 10:13:24 +00:00
|
|
|
|
with sqlite3.connect(unicode(filename)) as db:
|
|
|
|
|
for row in db.execute('select word from WORDS'):
|
|
|
|
|
words.append(row[0])
|
2014-12-20 11:19:22 +00:00
|
|
|
|
except sqlite3.OperationalError:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
return words
|
|
|
|
|
|
|
|
|
|
|
2014-12-24 08:26:05 +00:00
|
|
|
|
def extractWordList(filename):
|
2016-05-18 16:26:47 +00:00
|
|
|
|
words = []
|
2014-12-24 08:26:05 +00:00
|
|
|
|
|
|
|
|
|
with codecs.open(unicode(filename), 'rb', 'utf-8') as fp:
|
|
|
|
|
words = re.split('[;,\s]', fp.read())
|
|
|
|
|
|
|
|
|
|
return filter(None, words)
|