From 7eadff3457690074c5c0140a6e9ffd6164021176 Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Thu, 31 Mar 2016 20:03:39 -0700 Subject: [PATCH] Moving large files to CSV format, deleting unused kradfile --- build_dict.sh | 5 ++- ext/bg/dictionary.js | 81 +++++++++++++++++++++----------------------- ext/client.js | 4 ++- util/compile.py | 51 ++++++---------------------- 4 files changed, 54 insertions(+), 87 deletions(-) diff --git a/build_dict.sh b/build_dict.sh index a13b4ed7..42eed600 100755 --- a/build_dict.sh +++ b/build_dict.sh @@ -3,7 +3,6 @@ KANJIDIC=util/data/kanjidic EDICT=util/data/edict ENAMDICT=util/data/enamdict -KRADFILE=util/data/kradfile -DICT_DIR=ext/jp/data +DICT_DIR=ext/bg/data -util/compile.py --kanjidic $KANJIDIC --kradfile $KRADFILE --edict $EDICT --enamdict $ENAMDICT $DICT_DIR +util/compile.py --kanjidic $KANJIDIC --edict $EDICT --enamdict $ENAMDICT $DICT_DIR diff --git a/ext/bg/dictionary.js b/ext/bg/dictionary.js index eff54890..30c34687 100644 --- a/ext/bg/dictionary.js +++ b/ext/bg/dictionary.js @@ -19,43 +19,33 @@ class Dictionary { constructor() { - this.termDicts = []; - this.kanjiDicts = []; + this.terms = []; + this.termIndices = {}; + + this.kanji = []; + this.kanjiIndices = {}; } - addTermDict(termDict) { - this.termDicts.push(termDict); + addTermDict(terms) { + let index = this.terms.length; + for (const [e, r, g, t] in terms) { + this.storeIndex(this.termIndices, e, index); + this.storeIndex(this.termIndices, r, index++); + this.terms.push([e, r, g, t]); + } } - addKanjiDict(kanjiDict) { - this.kanjiDicts.push(kanjiDict); + addKanjiDict(kanji) { + let index = this.kanji.length; + for (const [c, k, o, g] in kanji) { + this.storeIndex(this.kanjiIndices, c, index++); + this.kanji.push([c, k, o, g]); + } } - findTerm(term) { - let results = []; - for (let dict of this.termDicts) { - results = results.concat(this.findTermInDict(term, dict)); - } - - return results; - } - - findKanji(kanji) { - const results = []; - for (let dict of this.kanjiDicts) { - const result = this.findKanjiInDict(kanji, dict); - if (result !== null) { - results.push(result); - } - } - - return results; - } - - findTermInDict(term, dict) { - return (dict.indices[term] || []).map(index => { - const [e, r, g, t] = dict.defs[index]; + return (this.termIndices[term] || []).map(index => { + const [e, r, g, t] = this.terms[index]; return { id: index, expression: e, @@ -66,19 +56,24 @@ class Dictionary { }); } - findKanjiInDict(kanji, dict) { - const def = dict.defs[kanji]; - if (def === null) { - return null; - } + findKanji(kanji) { + return (this.kanjiIndices[kanji] || []).map(index => { + const [c, k, o, g] = def; + return { + id: kanji.charCodeAt(0), + character: c, + kunyomi: k, + onyomi: o, + glossary: g + }; + }); + } - const [c, k, o, g] = def; - return { - id: kanji.charCodeAt(0), - character: c, - kunyomi: k, - onyomi: o, - glossary: g - }; + storeIndex(indices, term, index) { + if (term.length > 0) { + const indices = this.termIndices[term] || []; + indices.push(term); + this.termIndices[term] = indices; + } } } diff --git a/ext/client.js b/ext/client.js index 2d9a470f..1c8c0a9f 100644 --- a/ext/client.js +++ b/ext/client.js @@ -27,7 +27,9 @@ class Client { this.popup.classList.add('yomichan-popup'); this.popup.addEventListener('mousedown', (e) => e.stopPropagation()); this.popup.addEventListener('scroll', (e) => e.stopPropagation()); - document.body.appendChild(this.popup); + + const base = document.body.appendChild('div'); + base.createShadowRoot().appendChild(this.popup); chrome.runtime.onMessage.addListener(this.onMessage.bind(this)); window.addEventListener('mousedown', this.onMouseDown.bind(this)); diff --git a/util/compile.py b/util/compile.py index 485537dc..790ebfc7 100755 --- a/util/compile.py +++ b/util/compile.py @@ -18,7 +18,6 @@ import codecs -import json import optparse import os.path import re @@ -111,7 +110,7 @@ def load_definitions(path): def parse_kanji_dic(path): - results = {} + results = [] for line in load_definitions(path): segments = line.split() @@ -119,32 +118,20 @@ def parse_kanji_dic(path): kunyomi = ', '.join(filter(lambda x: filter(is_hiragana, x), segments[1:])) onyomi = ', '.join(filter(lambda x: filter(is_katakana, x), segments[1:])) glossary = '; '.join(re.findall('\{([^\}]+)\}', line)) - results[character] = (kunyomi, onyomi, glossary) - - return results - - -def parse_krad_file(path): - results = {} - - for line in load_definitions(path): - segments = line.split(' ') - character = segments[0] - radicals = ' '.join(segments[2:]) - results[character] = radicals; + results.append((character, kunyomi, onyomi, glossary)) return results def parse_edict(path): - defs = [] + results = [] for line in load_definitions(path): segments = line.split('/') expression = segments[0].split(' ') term = expression[0] match = re.search('\[([^\]]+)\]', expression[1]) - reading = None if match is None else match.group(1) + reading = '' if match is None else match.group(1) glossary = '; '.join(filter(lambda x: len(x) > 0, segments[1:])) glossary = re.sub('\(\d+\)\s*', '', glossary) @@ -156,30 +143,21 @@ def parse_edict(path): tags = set(tags).intersection(PARSED_TAGS) tags = ' '.join(tags) - defs.append((term, reading, glossary, tags)) + results.append((term, reading, glossary, tags)) - indices = {} - for i, d in enumerate(defs): - for key in d[:2]: - if key is not None: - values = indices.get(key, []) - values.append(i) - indices[key] = values - - return {'defs': defs, 'indices': indices} + return results[1:] def build_dict(output_dir, input_file, parser): if input_file is not None: base = os.path.splitext(os.path.basename(input_file))[0] - with open(os.path.join(output_dir, base) + '.json', 'w') as fp: - # json.dump(parser(input_file), fp, sort_keys=True, indent=4, separators=(',', ': ')) - json.dump(parser(input_file), fp) + with codecs.open(os.path.join(output_dir, base) + '.csv', 'wb', encoding='utf-8') as fp: + for d in parser(input_file): + fp.write('\t'.join(d) + '\n') -def build(dict_dir, kanjidic, kradfile, edict, enamdict): +def build(dict_dir, kanjidic, edict, enamdict): build_dict(dict_dir, kanjidic, parse_kanji_dic) - build_dict(dict_dir, kradfile, parse_krad_file) build_dict(dict_dir, edict, parse_edict) build_dict(dict_dir, enamdict, parse_edict) @@ -187,7 +165,6 @@ def build(dict_dir, kanjidic, kradfile, edict, enamdict): def main(): parser = optparse.OptionParser() parser.add_option('--kanjidic', dest='kanjidic') - parser.add_option('--kradfile', dest='kradfile') parser.add_option('--edict', dest='edict') parser.add_option('--enamdict', dest='enamdict') @@ -196,13 +173,7 @@ def main(): if len(args) == 0: parser.print_help() else: - build( - args[0], - options.kanjidic, - options.kradfile, - options.edict, - options.enamdict - ) + build(args[0], options.kanjidic, options.edict, options.enamdict) if __name__ == '__main__':