Adding in contents of enamdict

Former-commit-id: 2738889da33182d941c2179afbc5f3d776dc886a
2014-12-28 19:13:24 +09:00 · 2014-12-28 19:13:24 +09:00 · 1c5129f82b
commit 1c5129f82b
parent 3ba372c9c7
6 changed files with 739451 additions and 73 deletions
--- a/build_dict.sh
+++ b/build_dict.sh
@ -2,7 +2,8 @@
 KANJIDIC=util/kanjidic
 EDICT=util/edict
 ENAMDICT=util/enamdict
 DICT=yomi_base/japanese/dictionary.db
 [ -f $DICT ] && rm $DICT
-util/compile.py --kanjidic $KANJIDIC --edict $EDICT $DICT
+util/compile.py --kanjidic $KANJIDIC --edict $EDICT $DICT --enamdict $ENAMDICT
--- a/util/compile.py
+++ b/util/compile.py
@ -26,64 +26,74 @@ import sys
 PARSED_TAGS = {
-    'P',        # common word
+    'P',       # common word
-    'adj',      # former adjective classification (being removed)
+    'adj',     # former adjective classification (being removed)
-    'adj-f',    # noun or verb acting prenominally (other than the above)
+    'adj-f',   # noun or verb acting prenominally (other than the above)
-    'adj-i',    # adjective (keiyoushi)
+    'adj-i',   # adjective (keiyoushi)
-    'adj-na',   # adjectival nouns or quasi-adjectives (keiyodoshi)
+    'adj-na',  # adjectival nouns or quasi-adjectives (keiyodoshi)
-    'adj-no',   # nouns which may take the genitive case particle `no'
+    'adj-no',  # nouns which may take the genitive case particle `no'
-    'adj-pn',   # pre-noun adjectival (rentaishi)
+    'adj-pn',  # pre-noun adjectival (rentaishi)
-    'adj-t',    # `taru' adjective
+    'adj-t',   # `taru' adjective
-    'adv',      # adverb (fukushi)
+    'adv',     # adverb (fukushi)
-    'adv-n',    # adverbial noun
+    'adv-n',   # adverbial noun
-    'adv-to',   # adverb taking the `to' particle
+    'adv-to',  # adverb taking the `to' particle
-    'aux',      # auxiliary
+    'aux',     # auxiliary
-    'aux-adj',  # auxiliary adjective
+    'aux-adj', # auxiliary adjective
-    'aux-v',    # auxiliary verb
+    'aux-v',   # auxiliary verb
-    'conj',     # conjunction
+    'c',       # company name
-    'ctr',      # counter
+    'conj',    # conjunction
-    'exp',      # Expressions (phrases, clauses, etc.)
+    'ctr',     # counter
-    'int',      # interjection (kandoushi)
+    'exp',     # Expressions (phrases, clauses, etc.)
-    'iv',       # irregular verb
+    'f',       # female given name
-    'n',        # noun (common) (futsuumeishi)
+    'g',       # given name, as-yet not classified by sex
-    'n-adv',    # adverbial noun (fukushitekimeishi)
+    'h',       # full (usually family plus given) name of a particular person
-    'n-pref',   # noun, used as a prefix
+    'int',     # interjection (kandoushi)
-    'n-suf',    # noun, used as a suffix
+    'iv',      # irregular verb
-    'n-t',      # noun (temporal) (jisoumeishi)
+    'm',       # male given name
-    'num',      # numeric
+    'n',       # noun (common) (futsuumeishi)
-    'pn',       # pronoun
+    'n-adv',   # adverbial noun (fukushitekimeishi)
-    'pref' ,    # prefix
+    'n-pref',  # noun, used as a prefix
-    'prt',      # particle
+    'n-suf',   # noun, used as a suffix
-    'suf',      # suffix
+    'n-t',     # noun (temporal) (jisoumeishi)
-    'v1',       # Ichidan verb
+    'num',     # numeric
-    'v2a-s',    # Nidan verb with 'u' ending (archaic)
+    'p',       # place-name
-    'v4h',      # Yodan verb with `hu/fu' ending (archaic)
+    'pn',      # pronoun
-    'v4r',      # Yodan verb with `ru' ending (archaic)
+    'pr',      # product name
-    'v5',       # Godan verb (not completely classified)
+    'pref' ,   # prefix
-    'v5aru',    # Godan verb - -aru special class
+    'prt',     # particle
-    'v5b',      # Godan verb with `bu' ending
+    's',       # surname
-    'v5g',      # Godan verb with `gu' ending
+    'st',      # stations
-    'v5k',      # Godan verb with `ku' ending
+    'suf',     # suffix
-    'v5k-s',    # Godan verb - iku/yuku special class
+    'u',       # person name, either given or surname, as-yet unclassified
-    'v5m',      # Godan verb with `mu' ending
+    'v1',      # Ichidan verb
-    'v5n',      # Godan verb with `nu' ending
+    'v2a-s',   # Nidan verb with 'u' ending (archaic)
-    'v5r',      # Godan verb with `ru' ending
+    'v4h',     # Yodan verb with `hu/fu' ending (archaic)
-    'v5r-i',    # Godan verb with `ru' ending (irregular verb)
+    'v4r',     # Yodan verb with `ru' ending (archaic)
-    'v5s',      # Godan verb with `su' ending
+    'v5',      # Godan verb (not completely classified)
-    'v5t',      # Godan verb with `tsu' ending
+    'v5aru',   # Godan verb - -aru special class
-    'v5u',      # Godan verb with `u' ending
+    'v5b',     # Godan verb with `bu' ending
-    'v5u-s',    # Godan verb with `u' ending (special class)
+    'v5g',     # Godan verb with `gu' ending
-    'v5uru',    # Godan verb - uru old class verb (old form of Eru)
+    'v5k',     # Godan verb with `ku' ending
-    'v5z',      # Godan verb with `zu' ending
+    'v5k-s',   # Godan verb - iku/yuku special class
-    'vi',       # intransitive verb
+    'v5m',     # Godan verb with `mu' ending
-    'vk',       # kuru verb - special class
+    'v5n',     # Godan verb with `nu' ending
-    'vn',       # irregular nu verb
+    'v5r',     # Godan verb with `ru' ending
-    'vs',       # noun or participle which takes the aux. verb suru
+    'v5r-i',   # Godan verb with `ru' ending (irregular verb)
-    'vs-c',     # su verb - precursor to the modern suru
+    'v5s',     # Godan verb with `su' ending
-    'vs-i',     # suru verb - irregular
+    'v5t',     # Godan verb with `tsu' ending
-    'vs-s',     # suru verb - special class
+    'v5u',     # Godan verb with `u' ending
-    'vt',       # transitive verb
+    'v5u-s',   # Godan verb with `u' ending (special class)
-    'vz',       # Ichidan verb - zuru verb - (alternative form of -jiru verbs)
+    'v5uru',   # Godan verb - uru old class verb (old form of Eru)
    'v5z',     # Godan verb with `zu' ending
    'vi',      # intransitive verb
    'vk',      # kuru verb - special class
    'vn',      # irregular nu verb
    'vs',      # noun or participle which takes the aux. verb suru
    'vs-c',    # su verb - precursor to the modern suru
    'vs-i',    # suru verb - irregular
    'vs-s',    # suru verb - special class
    'vt',      # transitive verb
    'vz',      # Ichidan verb - zuru verb - (alternative form of -jiru verbs)
 }
@ -172,18 +182,21 @@ def writeEdict(cursor, values):
    cursor.executemany('INSERT INTO Terms VALUES(?, ?, ?, ?)', values)
-def build(path, kanjidic, kradfile, edict):
+def build(path, kanjidic, kradfile, edict, enamdict):
    with sqlite3.connect(path) as db:
        cursor = db.cursor()
        if kanjidic is not None:
-            writeKanjiDic(cursor, parseKanjiDic(kanjidic))
+            writeKanjiDic(db, parseKanjiDic(kanjidic))
        if kradfile is not None:
-            writeKradFile(cursor, parseKradFile(kradfile))
+            writeKradFile(db, parseKradFile(kradfile))
        terms = []
        if edict is not None:
-            writeEdict(cursor, parseEdict(edict))
+            terms += parseEdict(edict)
        if enamdict is not None:
            terms += parseEdict(enamdict)
        if len(terms) > 0:
            writeEdict(db, terms)
 def main():
@ -191,13 +204,20 @@ def main():
    parser.add_option('--kanjidic', dest='kanjidic')
    parser.add_option('--kradfile', dest='kradfile')
    parser.add_option('--edict', dest='edict')
    parser.add_option('--enamdict', dest='enamdict')
    options, args = parser.parse_args()
    if len(args) == 0:
        parser.print_help()
    else:
-        build(args[0], options.kanjidic, options.kradfile, options.edict)
+        build(
            args[0],
            options.kanjidic,
            options.kradfile,
            options.edict,
            options.enamdict
        )
 if __name__ == '__main__':
--- a/util/enamdict
+++ b/util/enamdict
--- a/yomi_base/constants.py
+++ b/yomi_base/constants.py
@ -17,5 +17,5 @@
 c = {
-    'appVersion': '0.14b',
+    'appVersion': '0.15b',
 }
--- a/yomi_base/japanese/dictionary.db
+++ b/yomi_base/japanese/dictionary.db
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76e8f49f623012ca646b235b660c244070f0fff53811b77c429b907878c28585
+oid sha256:7944b0babc6812a36c01265b8df517f15f5358dee415ece53736c5c2136aa5cb
-size 20201472
+size 57806848
--- a/yomi_base/reader_util.py
+++ b/yomi_base/reader_util.py
@ -240,10 +240,9 @@ def extractKindleDeck(filename):
    words = list()
    try:
-        connection = sqlite3.connect(unicode(filename))
+        with sqlite3.connect(unicode(filename)) as db:
-        for row in connection.execute('select word from WORDS'):
+            for row in db.execute('select word from WORDS'):
-            words.append(row[0])
+                words.append(row[0])
        connection.close()
    except sqlite3.OperationalError:
        pass