1
yomichan-anki/yomi_base/reader_util.py

287 lines
8.4 KiB
Python
Raw Normal View History

2011-08-28 18:01:32 +00:00
# -*- coding: utf-8 -*-
2011-10-27 15:22:26 +00:00
# Copyright (C) 2013 Alex Yatskov
2011-08-28 18:01:32 +00:00
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from PyQt4 import QtGui
import re
import codecs
import sqlite3
2011-08-28 18:01:32 +00:00
def decodeContent(content):
encodings = ['utf-8', 'shift_jis', 'euc-jp', 'utf-16']
2016-05-18 16:30:57 +00:00
errors = {}
2011-08-28 18:01:32 +00:00
for encoding in encodings:
try:
return content.decode(encoding), encoding
except UnicodeDecodeError, e:
errors[encoding] = e[2]
encoding = sorted(errors, key=errors.get, reverse=True)[0]
return content.decode(encoding, 'replace'), encoding
def stripReadings(content):
2016-05-18 05:12:05 +00:00
return re.sub(u'《[^》]+》', u'', content)
2011-08-28 18:01:32 +00:00
def findSentence(content, position):
2016-05-18 16:30:57 +00:00
quotesFwd = {u'': u'', u'': u'', u"'": u"'", u'"': u'"'}
quotesBwd = {u'': u'', u'': u'', u"'": u"'", u'"': u'"'}
2011-08-28 18:01:32 +00:00
terminators = u'。..?!'
quoteStack = []
2011-08-28 18:01:32 +00:00
start = 0
for i in xrange(position, start, -1):
c = content[i]
if not quoteStack and (c in terminators or c in quotesFwd or c == '\n'):
start = i + 1
break
if quoteStack and c == quoteStack[0]:
quoteStack.pop()
elif c in quotesBwd:
quoteStack.insert(0, quotesBwd[c])
quoteStack = []
2011-08-28 18:01:32 +00:00
end = len(content)
for i in xrange(position, end):
c = content[i]
if not quoteStack:
if c in terminators:
end = i + 1
break
elif c in quotesBwd:
2011-08-28 18:01:32 +00:00
end = i
break
if quoteStack and c == quoteStack[0]:
quoteStack.pop()
elif c in quotesFwd:
quoteStack.insert(0, quotesFwd[c])
return content[start:end].strip()
2011-08-28 18:01:32 +00:00
def formatFields(fields, markup):
result = {}
2011-08-28 18:01:32 +00:00
for field, value in fields.items():
try:
result[field] = value.format(**markup)
except KeyError:
pass
2016-05-18 16:30:57 +00:00
except ValueError:
pass
2011-08-28 18:01:32 +00:00
return result
def splitTags(tags):
return filter(lambda tag: tag.strip(), re.split('[;,\s]', tags))
def markupVocabExp(definition):
if definition['reading']:
summary = u'{expression} [{reading}]'.format(**definition)
else:
summary = u'{expression}'.format(**definition)
2011-08-28 18:01:32 +00:00
return {
'expression': definition['expression'],
2016-05-18 05:12:05 +00:00
'reading': definition['reading'] or u'',
2016-05-18 17:00:14 +00:00
'glossary': '; '.join(definition['glossary']),
2016-05-02 03:43:09 +00:00
'sentence': definition.get('sentence'),
'summary': summary
2011-08-28 18:01:32 +00:00
}
def markupVocabReading(definition):
if definition['reading']:
return {
'expression': definition['reading'],
2016-05-18 05:12:05 +00:00
'reading': u'',
2016-05-18 17:00:14 +00:00
'glossary': '; '.join(definition['glossary']),
2016-05-02 03:43:09 +00:00
'sentence': definition.get('sentence'),
'summary': definition['reading']
}
2011-08-28 18:01:32 +00:00
def copyVocabDef(definition):
2016-05-18 16:26:47 +00:00
glossary = '; '.join(definition['glossary'])
if definition['reading']:
2016-05-18 16:30:57 +00:00
result = u'{0}\t{1}\t{2}\n'.format(definition['expression'], definition['reading'], glossary)
else:
2016-05-18 16:30:57 +00:00
result = u'{0}\t{1}\n'.format(definition['expression'], glossary)
QtGui.QApplication.clipboard().setText(result)
2011-08-28 18:01:32 +00:00
def markupKanji(definition):
return {
'character': definition['character'],
2016-05-18 17:00:14 +00:00
'onyomi': ', '.join(definition['onyomi']),
'kunyomi': ', '.join(definition['kunyomi']),
'glossary': ', '.join(definition['glossary']),
2016-05-02 03:43:09 +00:00
'summary': definition['character']
}
2011-08-28 18:01:32 +00:00
def copyKanjiDef(definition):
2016-05-18 16:26:47 +00:00
result = u'{0}\t{1}\t{2}\t{3}'.format(
definition['character'],
', '.join(definition['kunyomi']),
', '.join(definition['onyomi']),
', '.join(definition['glossary'])
)
QtGui.QApplication.clipboard().setText(result)
def buildDefHeader():
2016-05-02 03:43:09 +00:00
palette = QtGui.QApplication.palette()
toolTipBg = palette.color(QtGui.QPalette.Window).name()
toolTipFg = palette.color(QtGui.QPalette.WindowText).name()
2016-05-18 05:28:56 +00:00
return u'''
<html><head><style>
body {{ background-color: {0}; color: {1}; font-size: 11pt; }}
span.expression {{ font-size: 15pt; }}
2016-05-18 05:28:56 +00:00
</style></head><body>'''.format(toolTipBg, toolTipFg)
def buildDefFooter():
return '</body></html>'
def buildEmpty():
2016-05-18 05:28:56 +00:00
return u'''
<p>No definitions to display.</p>
<p>Mouse over text with the <em>middle mouse button</em> or <em>shift key</em> pressed to search.</p>
2016-05-18 05:28:56 +00:00
<p>You can also also input terms in the search box below.'''
def buildVocabDef(definition, index, query):
2016-05-18 05:12:05 +00:00
reading = u''
if definition['reading']:
reading = u'<span class="reading">[{0}]<br></span>'.format(definition['reading'])
2011-08-28 18:01:32 +00:00
2016-05-18 05:12:05 +00:00
rules = u''
if definition.get('rules'):
rules = ' &lt; '.join(definition['rules'])
rules = '<span class="rules">({0})<br></span>'.format(rules)
2011-08-28 18:01:32 +00:00
links = '<a href="copyVocabDef:{0}"><img src="://img/img/icon_copy_definition.png" align="right"></a>'.format(index)
if query is not None:
if query('vocab', markupVocabExp(definition)):
links += '<a href="addVocabExp:{0}"><img src="://img/img/icon_add_expression.png" align="right"></a>'.format(index)
if query('vocab', markupVocabReading(definition)):
links += '<a href="addVocabReading:{0}"><img src="://img/img/icon_add_reading.png" align="right"></a>'.format(index)
2011-08-28 18:01:32 +00:00
2016-05-18 16:26:47 +00:00
glossary = u'<ol>'
for g in definition['glossary']:
glossary += u'<li>{0}</li>'.format(g)
glossary += u'</ol>'
2016-05-18 05:28:56 +00:00
html = u'''
2016-05-18 16:49:23 +00:00
<span class="links">{links}</span>
<span class="expression">{expression}</span>
<span class="reading">{reading}</span>
<span class="rules">{rules}</span>
<span class="glossary">{glossary}<br></span>
<br clear="all">'''.format(
links = links,
expression = definition['expression'],
reading = reading,
glossary = glossary,
rules = rules
)
2011-08-28 18:01:32 +00:00
return html
def buildVocabDefs(definitions, query):
html = buildDefHeader()
if len(definitions) > 0:
2011-08-28 18:01:32 +00:00
for i, definition in enumerate(definitions):
html += buildVocabDef(definition, i, query)
2011-08-28 18:01:32 +00:00
else:
html += buildEmpty()
2011-08-28 18:01:32 +00:00
return html + buildDefFooter()
def buildKanjiDef(definition, index, query):
links = '<a href="copyKanjiDef:{0}"><img src="://img/img/icon_copy_definition.png" align="right"></a>'.format(index)
if query is not None and query('kanji', markupKanji(definition)):
links += '<a href="addKanji:{0}"><img src="://img/img/icon_add_expression.png" align="right"></a>'.format(index)
2016-05-18 16:26:47 +00:00
readings = ', '.join(definition['kunyomi'] + definition['onyomi'])
glossary = ', '.join(definition['glossary'])
2016-05-18 05:28:56 +00:00
html = u'''
2016-05-18 16:49:23 +00:00
<span class="links">{links}</span>
<span class="expression">{expression}<br></span>
<span class="reading">[{reading}]<br></span>
<span class="glossary">{glossary}<br></span>
<br clear="all">'''.format(
links = links,
expression = definition['character'],
reading = readings,
glossary = glossary
)
2011-08-28 18:01:32 +00:00
return html
def buildKanjiDefs(definitions, query):
html = buildDefHeader()
if len(definitions) > 0:
for i, definition in enumerate(definitions):
html += buildKanjiDef(definition, i, query)
else:
html += buildEmpty()
return html + buildDefFooter()
def extractKindleDeck(filename):
2016-05-18 16:26:47 +00:00
words = []
try:
with sqlite3.connect(unicode(filename)) as db:
for row in db.execute('select word from WORDS'):
words.append(row[0])
except sqlite3.OperationalError:
pass
return words
def extractWordList(filename):
2016-05-18 16:26:47 +00:00
words = []
with codecs.open(unicode(filename), 'rb', 'utf-8') as fp:
words = re.split('[;,\s]', fp.read())
return filter(None, words)