File reorg

2016-05-08 12:01:43 -07:00 · 2016-05-08 12:01:43 -07:00 · 14ecb8f328
commit 14ecb8f328
parent 449ffc260a
13 changed files with 2 additions and 255 deletions
--- a/build_tmpl.sh
+++ b/build_tmpl.sh
@ -1,2 +1,2 @@
 #!/bin/sh
-handlebars util/tmpl/* -f ext/bg/js/templates.js
+handlebars tmpl/* -f ext/bg/js/templates.js
--- a/build_tmpl_auto.sh
+++ b/build_tmpl_auto.sh
@ -1,5 +1,5 @@
 #!/bin/bash
-DIRECTORY_TO_OBSERVE="util/tmpl"
+DIRECTORY_TO_OBSERVE="tmpl"
 BUILD_SCRIPT="build_tmpl.sh"
 function block_for_change {
--- a/util/tmpl/footer.html
+++ b/util/tmpl/footer.html
--- a/util/tmpl/header.html
+++ b/util/tmpl/header.html
--- a/util/tmpl/kanji-link.html
+++ b/util/tmpl/kanji-link.html
--- a/util/tmpl/kanji-list.html
+++ b/util/tmpl/kanji-list.html
--- a/util/tmpl/kanji.html
+++ b/util/tmpl/kanji.html
--- a/util/tmpl/term-list.html
+++ b/util/tmpl/term-list.html
--- a/util/tmpl/term.html
+++ b/util/tmpl/term.html
--- a/util/compile.py
+++ b/util/compile.py
@ -1,244 +0,0 @@
 #!/usr/bin/env python
 # Copyright (C) 2016  Alex Yatskov <alex@foosoft.net>
 # Author: Alex Yatskov <alex@foosoft.net>
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import codecs
 import json
 import optparse
 import os.path
 import re
 PARSED_TAGS = {
    'Buddh',
    'MA',
    'X',
    'abbr',
    'adj',
    'adj-f',
    'adj-i',
    'adj-na',
    'adj-no',
    'adj-pn',
    'adj-t',
    'adv',
    'adv-n',
    'adv-to',
    'arch',
    'ateji',
    'aux',
    'aux-adj',
    'aux-v',
    'c',
    'chn',
    'col',
    'comp',
    'conj',
    'ctr',
    'derog',
    'eK',
    'ek',
    'exp',
    'f',
    'fam',
    'fem',
    'food',
    'g',
    'geom',
    'gikun',
    'gram',
    'h',
    'hon',
    'hum',
    'iK',
    'id',
    'ik',
    'int',
    'io',
    'iv',
    'ling',
    'm',
    'm-sl',
    'male',
    'male-sl',
    'math',
    'mil',
    'n',
    'n-adv',
    'n-pref',
    'n-suf',
    'n-t',
    'num',
    'oK',
    'obs',
    'obsc',
    'ok',
    'on-mim',
    'P',
    'p',
    'physics',
    'pn',
    'poet',
    'pol',
    'pr',
    'pref',
    'prt',
    'rare',
    's',
    'sens',
    'sl',
    'st',
    'suf',
    'u',
    'uK',
    'uk',
    'v1',
    'v2a-s',
    'v4h',
    'v4r',
    'v5',
    'v5aru',
    'v5b',
    'v5g',
    'v5k',
    'v5k-s',
    'v5m',
    'v5n',
    'v5r',
    'v5r-i',
    'v5s',
    'v5t',
    'v5u',
    'v5u-s',
    'v5uru',
    'v5z',
    'vi',
    'vk',
    'vn',
    'vs',
    'vs-c',
    'vs-i',
    'vs-s',
    'vt',
    'vulg',
    'vz'
 }
 def is_hiragana(c):
    return 0x3040 <= ord(c) < 0x30a0
 def is_katakana(c):
    return 0x30a0 <= ord(c) < 0x3100
 def load_definitions(path):
    print('Parsing "{0}"...'.format(path))
    with codecs.open(path, encoding='euc-jp') as fp:
        return filter(lambda x: x and x[0] != '#', fp.read().splitlines())
 def parse_kanji_dic(path):
    results = {}
    for line in load_definitions(path):
        segments = line.split()
        character = segments[0]
        kunyomi = ' '.join(filter(lambda x: list(filter(is_hiragana, x)), segments[1:]))
        onyomi = ' '.join(filter(lambda x: list(filter(is_katakana, x)), segments[1:]))
        glossary = re.findall('\{([^\}]+)\}', line)
        results[character] = (kunyomi or None, onyomi or None, glossary)
    return results
 def parse_edict(path):
    results = []
    for line in load_definitions(path):
        segments = line.split('/')
        exp_parts = segments[0].split(' ')
        expression = exp_parts[0]
        reading_match = re.search('\[([^\]]+)\]', exp_parts[1])
        reading = None if reading_match is None else reading_match.group(1)
        defs = []
        tags = set()
        for index, dfn in enumerate(filter(None, segments[1:])):
            dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s*)*)(.*)$', dfn)
            tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
            tags_raw = tags_raw.intersection(PARSED_TAGS)
            tags = tags.union(tags_raw)
            gloss = dfn_match.group(2).strip()
            if len(gloss) == 0:
                continue
            if index == 0 or len(dfn_match.group(1)) > 0:
                defs.append([gloss])
            else:
                defs[-1].append(gloss)
        result = [expression, reading, ' '.join(tags)]
        result += map(lambda x: '; '.join(x), defs)
        results.append(result)
    indices = {}
    for i, d in enumerate(results):
        for key in d[:2]:
            if key is not None:
                values = indices.get(key, [])
                values.append(i)
                indices[key] = values
    return {'defs': results, 'indices': indices}
 def build_dict(output_dir, input_file, parser):
    if input_file is not None:
        base = os.path.splitext(os.path.basename(input_file))[0]
        with open(os.path.join(output_dir, base) + '.json', 'w') as fp:
             # json.dump(parser(input_file), fp, sort_keys=True, indent=4, separators=(',', ': '))
             json.dump(parser(input_file), fp, separators=(',', ':'))
 def build(dict_dir, kanjidic, edict, enamdict):
    build_dict(dict_dir, kanjidic, parse_kanji_dic)
    build_dict(dict_dir, edict, parse_edict)
    build_dict(dict_dir, enamdict, parse_edict)
 def main():
    parser = optparse.OptionParser()
    parser.add_option('--kanjidic', dest='kanjidic')
    parser.add_option('--edict', dest='edict')
    parser.add_option('--enamdict', dest='enamdict')
    options, args = parser.parse_args()
    if len(args) == 0:
        parser.print_help()
    else:
        build(args[0], options.kanjidic, options.edict, options.enamdict)
 if __name__ == '__main__':
    main()
--- a/util/data/edict
+++ b/util/data/edict
@ -1,3 +0,0 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:51b9e6f5761f3a9c2d1f6c65df131ad3fa60738ffda06e1e0cfb07fdfe6acd89
 size 15655597
--- a/util/data/enamdict
+++ b/util/data/enamdict
@ -1,3 +0,0 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:9fd670f8c8a020aeb2b870d186d223702e91e0538cec04a784b1433363aff9f9
 size 26426507
--- a/util/data/kanjidic
+++ b/util/data/kanjidic
@ -1,3 +0,0 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:494d5a6d750444907995eb3be7bbe7d817a20d3431ae0d8c944715690610b69c
 size 1213904
`@ -1,2 +1,2 @@`
	`#!/bin/sh`	`#!/bin/sh`
	`handlebars util/tmpl/* -f ext/bg/js/templates.js`	`handlebars tmpl/* -f ext/bg/js/templates.js`