Moving large files to CSV format, deleting unused kradfile

2016-03-31 20:03:39 -07:00 · 2016-03-31 20:03:39 -07:00 · 7eadff3457
commit 7eadff3457
parent b97e75ba32
4 changed files with 54 additions and 87 deletions
--- a/build_dict.sh
+++ b/build_dict.sh
@ -3,7 +3,6 @@
 KANJIDIC=util/data/kanjidic
 EDICT=util/data/edict
 ENAMDICT=util/data/enamdict
-KRADFILE=util/data/kradfile
+DICT_DIR=ext/bg/data
 DICT_DIR=ext/jp/data
-util/compile.py --kanjidic $KANJIDIC --kradfile $KRADFILE --edict $EDICT --enamdict $ENAMDICT $DICT_DIR
+util/compile.py --kanjidic $KANJIDIC --edict $EDICT --enamdict $ENAMDICT $DICT_DIR
--- a/ext/bg/dictionary.js
+++ b/ext/bg/dictionary.js
@ -19,43 +19,33 @@
 class Dictionary {
    constructor() {
-        this.termDicts  = [];
+        this.terms       = [];
-        this.kanjiDicts = [];
+        this.termIndices = {};
        this.kanji        = [];
        this.kanjiIndices = {};
    }
-    addTermDict(termDict) {
+    addTermDict(terms) {
-        this.termDicts.push(termDict);
+        let index = this.terms.length;
        for (const [e, r, g, t] in terms) {
            this.storeIndex(this.termIndices, e, index);
            this.storeIndex(this.termIndices, r, index++);
            this.terms.push([e, r, g, t]);
        }
    }
-    addKanjiDict(kanjiDict) {
+    addKanjiDict(kanji) {
-        this.kanjiDicts.push(kanjiDict);
+        let index = this.kanji.length;
        for (const [c, k, o, g] in kanji) {
            this.storeIndex(this.kanjiIndices, c, index++);
            this.kanji.push([c, k, o, g]);
        }
    }
    findTerm(term) {
-        let results = [];
+        return (this.termIndices[term] || []).map(index => {
-        for (let dict of this.termDicts) {
+            const [e, r, g, t] = this.terms[index];
            results = results.concat(this.findTermInDict(term, dict));
        }
        return results;
    }
    findKanji(kanji) {
        const results = [];
        for (let dict of this.kanjiDicts) {
            const result = this.findKanjiInDict(kanji, dict);
            if (result !== null) {
                results.push(result);
            }
        }
        return results;
    }
    findTermInDict(term, dict) {
        return (dict.indices[term] || []).map(index => {
            const [e, r, g, t] = dict.defs[index];
            return {
                id:         index,
                expression: e,
@ -66,19 +56,24 @@ class Dictionary {
        });
    }
-    findKanjiInDict(kanji, dict) {
+    findKanji(kanji) {
-        const def = dict.defs[kanji];
+        return (this.kanjiIndices[kanji] || []).map(index => {
-        if (def === null) {
+            const [c, k, o, g] = def;
-            return null;
+            return {
-        }
+                id:        kanji.charCodeAt(0),
                character: c,
                kunyomi:   k,
                onyomi:    o,
                glossary:  g
            };
        });
    }
-        const [c, k, o, g] = def;
+    storeIndex(indices, term, index) {
-        return {
+        if (term.length > 0) {
-            id:        kanji.charCodeAt(0),
+            const indices = this.termIndices[term] || [];
-            character: c,
+            indices.push(term);
-            kunyomi:   k,
+            this.termIndices[term] = indices;
-            onyomi:    o,
+        }
            glossary:  g
        };
    }
 }
--- a/ext/client.js
+++ b/ext/client.js
@ -27,7 +27,9 @@ class Client {
        this.popup.classList.add('yomichan-popup');
        this.popup.addEventListener('mousedown', (e) => e.stopPropagation());
        this.popup.addEventListener('scroll', (e) => e.stopPropagation());
-        document.body.appendChild(this.popup);
+
        const base = document.body.appendChild('div');
        base.createShadowRoot().appendChild(this.popup);
        chrome.runtime.onMessage.addListener(this.onMessage.bind(this));
        window.addEventListener('mousedown', this.onMouseDown.bind(this));
--- a/util/compile.py
+++ b/util/compile.py
@ -18,7 +18,6 @@
 import codecs
 import json
 import optparse
 import os.path
 import re
@ -111,7 +110,7 @@ def load_definitions(path):
 def parse_kanji_dic(path):
-    results = {}
+    results = []
    for line in load_definitions(path):
        segments = line.split()
@ -119,32 +118,20 @@ def parse_kanji_dic(path):
        kunyomi = ', '.join(filter(lambda x: filter(is_hiragana, x), segments[1:]))
        onyomi = ', '.join(filter(lambda x: filter(is_katakana, x), segments[1:]))
        glossary = '; '.join(re.findall('\{([^\}]+)\}', line))
-        results[character] = (kunyomi, onyomi, glossary)
+        results.append((character, kunyomi, onyomi, glossary))
    return results
 def parse_krad_file(path):
    results = {}
    for line in load_definitions(path):
        segments = line.split(' ')
        character = segments[0]
        radicals = ' '.join(segments[2:])
        results[character] = radicals;
    return results
 def parse_edict(path):
-    defs = []
+    results = []
    for line in load_definitions(path):
        segments = line.split('/')
        expression = segments[0].split(' ')
        term = expression[0]
        match = re.search('\[([^\]]+)\]', expression[1])
-        reading = None if match is None else match.group(1)
+        reading = '' if match is None else match.group(1)
        glossary = '; '.join(filter(lambda x: len(x) > 0, segments[1:]))
        glossary = re.sub('\(\d+\)\s*', '', glossary)
@ -156,30 +143,21 @@ def parse_edict(path):
        tags = set(tags).intersection(PARSED_TAGS)
        tags = ' '.join(tags)
-        defs.append((term, reading, glossary, tags))
+        results.append((term, reading, glossary, tags))
-    indices = {}
+    return results[1:]
    for i, d in enumerate(defs):
        for key in d[:2]:
            if key is not None:
                values = indices.get(key, [])
                values.append(i)
                indices[key] = values
    return {'defs': defs, 'indices': indices}
 def build_dict(output_dir, input_file, parser):
    if input_file is not None:
        base = os.path.splitext(os.path.basename(input_file))[0]
-        with open(os.path.join(output_dir, base) + '.json', 'w') as fp:
+        with codecs.open(os.path.join(output_dir, base) + '.csv', 'wb', encoding='utf-8') as fp:
-            # json.dump(parser(input_file), fp, sort_keys=True, indent=4, separators=(',', ': '))
+            for d in parser(input_file):
-            json.dump(parser(input_file), fp)
+                fp.write('\t'.join(d) + '\n')
-def build(dict_dir, kanjidic, kradfile, edict, enamdict):
+def build(dict_dir, kanjidic, edict, enamdict):
    build_dict(dict_dir, kanjidic, parse_kanji_dic)
    build_dict(dict_dir, kradfile, parse_krad_file)
    build_dict(dict_dir, edict, parse_edict)
    build_dict(dict_dir, enamdict, parse_edict)
@ -187,7 +165,6 @@ def build(dict_dir, kanjidic, kradfile, edict, enamdict):
 def main():
    parser = optparse.OptionParser()
    parser.add_option('--kanjidic', dest='kanjidic')
    parser.add_option('--kradfile', dest='kradfile')
    parser.add_option('--edict', dest='edict')
    parser.add_option('--enamdict', dest='enamdict')
@ -196,13 +173,7 @@ def main():
    if len(args) == 0:
        parser.print_help()
    else:
-        build(
+        build(args[0], options.kanjidic, options.edict, options.enamdict)
            args[0],
            options.kanjidic,
            options.kradfile,
            options.edict,
            options.enamdict
        )
 if __name__ == '__main__':