Adding in contents of enamdict
Former-commit-id: 2738889da33182d941c2179afbc5f3d776dc886a
This commit is contained in:
parent
3ba372c9c7
commit
1c5129f82b
@ -2,7 +2,8 @@
|
|||||||
|
|
||||||
KANJIDIC=util/kanjidic
|
KANJIDIC=util/kanjidic
|
||||||
EDICT=util/edict
|
EDICT=util/edict
|
||||||
|
ENAMDICT=util/enamdict
|
||||||
DICT=yomi_base/japanese/dictionary.db
|
DICT=yomi_base/japanese/dictionary.db
|
||||||
|
|
||||||
[ -f $DICT ] && rm $DICT
|
[ -f $DICT ] && rm $DICT
|
||||||
util/compile.py --kanjidic $KANJIDIC --edict $EDICT $DICT
|
util/compile.py --kanjidic $KANJIDIC --edict $EDICT $DICT --enamdict $ENAMDICT
|
||||||
|
150
util/compile.py
150
util/compile.py
@ -26,64 +26,74 @@ import sys
|
|||||||
|
|
||||||
|
|
||||||
PARSED_TAGS = {
|
PARSED_TAGS = {
|
||||||
'P', # common word
|
'P', # common word
|
||||||
'adj', # former adjective classification (being removed)
|
'adj', # former adjective classification (being removed)
|
||||||
'adj-f', # noun or verb acting prenominally (other than the above)
|
'adj-f', # noun or verb acting prenominally (other than the above)
|
||||||
'adj-i', # adjective (keiyoushi)
|
'adj-i', # adjective (keiyoushi)
|
||||||
'adj-na', # adjectival nouns or quasi-adjectives (keiyodoshi)
|
'adj-na', # adjectival nouns or quasi-adjectives (keiyodoshi)
|
||||||
'adj-no', # nouns which may take the genitive case particle `no'
|
'adj-no', # nouns which may take the genitive case particle `no'
|
||||||
'adj-pn', # pre-noun adjectival (rentaishi)
|
'adj-pn', # pre-noun adjectival (rentaishi)
|
||||||
'adj-t', # `taru' adjective
|
'adj-t', # `taru' adjective
|
||||||
'adv', # adverb (fukushi)
|
'adv', # adverb (fukushi)
|
||||||
'adv-n', # adverbial noun
|
'adv-n', # adverbial noun
|
||||||
'adv-to', # adverb taking the `to' particle
|
'adv-to', # adverb taking the `to' particle
|
||||||
'aux', # auxiliary
|
'aux', # auxiliary
|
||||||
'aux-adj', # auxiliary adjective
|
'aux-adj', # auxiliary adjective
|
||||||
'aux-v', # auxiliary verb
|
'aux-v', # auxiliary verb
|
||||||
'conj', # conjunction
|
'c', # company name
|
||||||
'ctr', # counter
|
'conj', # conjunction
|
||||||
'exp', # Expressions (phrases, clauses, etc.)
|
'ctr', # counter
|
||||||
'int', # interjection (kandoushi)
|
'exp', # Expressions (phrases, clauses, etc.)
|
||||||
'iv', # irregular verb
|
'f', # female given name
|
||||||
'n', # noun (common) (futsuumeishi)
|
'g', # given name, as-yet not classified by sex
|
||||||
'n-adv', # adverbial noun (fukushitekimeishi)
|
'h', # full (usually family plus given) name of a particular person
|
||||||
'n-pref', # noun, used as a prefix
|
'int', # interjection (kandoushi)
|
||||||
'n-suf', # noun, used as a suffix
|
'iv', # irregular verb
|
||||||
'n-t', # noun (temporal) (jisoumeishi)
|
'm', # male given name
|
||||||
'num', # numeric
|
'n', # noun (common) (futsuumeishi)
|
||||||
'pn', # pronoun
|
'n-adv', # adverbial noun (fukushitekimeishi)
|
||||||
'pref' , # prefix
|
'n-pref', # noun, used as a prefix
|
||||||
'prt', # particle
|
'n-suf', # noun, used as a suffix
|
||||||
'suf', # suffix
|
'n-t', # noun (temporal) (jisoumeishi)
|
||||||
'v1', # Ichidan verb
|
'num', # numeric
|
||||||
'v2a-s', # Nidan verb with 'u' ending (archaic)
|
'p', # place-name
|
||||||
'v4h', # Yodan verb with `hu/fu' ending (archaic)
|
'pn', # pronoun
|
||||||
'v4r', # Yodan verb with `ru' ending (archaic)
|
'pr', # product name
|
||||||
'v5', # Godan verb (not completely classified)
|
'pref' , # prefix
|
||||||
'v5aru', # Godan verb - -aru special class
|
'prt', # particle
|
||||||
'v5b', # Godan verb with `bu' ending
|
's', # surname
|
||||||
'v5g', # Godan verb with `gu' ending
|
'st', # stations
|
||||||
'v5k', # Godan verb with `ku' ending
|
'suf', # suffix
|
||||||
'v5k-s', # Godan verb - iku/yuku special class
|
'u', # person name, either given or surname, as-yet unclassified
|
||||||
'v5m', # Godan verb with `mu' ending
|
'v1', # Ichidan verb
|
||||||
'v5n', # Godan verb with `nu' ending
|
'v2a-s', # Nidan verb with 'u' ending (archaic)
|
||||||
'v5r', # Godan verb with `ru' ending
|
'v4h', # Yodan verb with `hu/fu' ending (archaic)
|
||||||
'v5r-i', # Godan verb with `ru' ending (irregular verb)
|
'v4r', # Yodan verb with `ru' ending (archaic)
|
||||||
'v5s', # Godan verb with `su' ending
|
'v5', # Godan verb (not completely classified)
|
||||||
'v5t', # Godan verb with `tsu' ending
|
'v5aru', # Godan verb - -aru special class
|
||||||
'v5u', # Godan verb with `u' ending
|
'v5b', # Godan verb with `bu' ending
|
||||||
'v5u-s', # Godan verb with `u' ending (special class)
|
'v5g', # Godan verb with `gu' ending
|
||||||
'v5uru', # Godan verb - uru old class verb (old form of Eru)
|
'v5k', # Godan verb with `ku' ending
|
||||||
'v5z', # Godan verb with `zu' ending
|
'v5k-s', # Godan verb - iku/yuku special class
|
||||||
'vi', # intransitive verb
|
'v5m', # Godan verb with `mu' ending
|
||||||
'vk', # kuru verb - special class
|
'v5n', # Godan verb with `nu' ending
|
||||||
'vn', # irregular nu verb
|
'v5r', # Godan verb with `ru' ending
|
||||||
'vs', # noun or participle which takes the aux. verb suru
|
'v5r-i', # Godan verb with `ru' ending (irregular verb)
|
||||||
'vs-c', # su verb - precursor to the modern suru
|
'v5s', # Godan verb with `su' ending
|
||||||
'vs-i', # suru verb - irregular
|
'v5t', # Godan verb with `tsu' ending
|
||||||
'vs-s', # suru verb - special class
|
'v5u', # Godan verb with `u' ending
|
||||||
'vt', # transitive verb
|
'v5u-s', # Godan verb with `u' ending (special class)
|
||||||
'vz', # Ichidan verb - zuru verb - (alternative form of -jiru verbs)
|
'v5uru', # Godan verb - uru old class verb (old form of Eru)
|
||||||
|
'v5z', # Godan verb with `zu' ending
|
||||||
|
'vi', # intransitive verb
|
||||||
|
'vk', # kuru verb - special class
|
||||||
|
'vn', # irregular nu verb
|
||||||
|
'vs', # noun or participle which takes the aux. verb suru
|
||||||
|
'vs-c', # su verb - precursor to the modern suru
|
||||||
|
'vs-i', # suru verb - irregular
|
||||||
|
'vs-s', # suru verb - special class
|
||||||
|
'vt', # transitive verb
|
||||||
|
'vz', # Ichidan verb - zuru verb - (alternative form of -jiru verbs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -172,18 +182,21 @@ def writeEdict(cursor, values):
|
|||||||
cursor.executemany('INSERT INTO Terms VALUES(?, ?, ?, ?)', values)
|
cursor.executemany('INSERT INTO Terms VALUES(?, ?, ?, ?)', values)
|
||||||
|
|
||||||
|
|
||||||
def build(path, kanjidic, kradfile, edict):
|
def build(path, kanjidic, kradfile, edict, enamdict):
|
||||||
with sqlite3.connect(path) as db:
|
with sqlite3.connect(path) as db:
|
||||||
cursor = db.cursor()
|
|
||||||
|
|
||||||
if kanjidic is not None:
|
if kanjidic is not None:
|
||||||
writeKanjiDic(cursor, parseKanjiDic(kanjidic))
|
writeKanjiDic(db, parseKanjiDic(kanjidic))
|
||||||
|
|
||||||
if kradfile is not None:
|
if kradfile is not None:
|
||||||
writeKradFile(cursor, parseKradFile(kradfile))
|
writeKradFile(db, parseKradFile(kradfile))
|
||||||
|
|
||||||
|
terms = []
|
||||||
if edict is not None:
|
if edict is not None:
|
||||||
writeEdict(cursor, parseEdict(edict))
|
terms += parseEdict(edict)
|
||||||
|
if enamdict is not None:
|
||||||
|
terms += parseEdict(enamdict)
|
||||||
|
if len(terms) > 0:
|
||||||
|
writeEdict(db, terms)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -191,13 +204,20 @@ def main():
|
|||||||
parser.add_option('--kanjidic', dest='kanjidic')
|
parser.add_option('--kanjidic', dest='kanjidic')
|
||||||
parser.add_option('--kradfile', dest='kradfile')
|
parser.add_option('--kradfile', dest='kradfile')
|
||||||
parser.add_option('--edict', dest='edict')
|
parser.add_option('--edict', dest='edict')
|
||||||
|
parser.add_option('--enamdict', dest='enamdict')
|
||||||
|
|
||||||
options, args = parser.parse_args()
|
options, args = parser.parse_args()
|
||||||
|
|
||||||
if len(args) == 0:
|
if len(args) == 0:
|
||||||
parser.print_help()
|
parser.print_help()
|
||||||
else:
|
else:
|
||||||
build(args[0], options.kanjidic, options.kradfile, options.edict)
|
build(
|
||||||
|
args[0],
|
||||||
|
options.kanjidic,
|
||||||
|
options.kradfile,
|
||||||
|
options.edict,
|
||||||
|
options.enamdict
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
739358
util/enamdict
Normal file
739358
util/enamdict
Normal file
File diff suppressed because it is too large
Load Diff
@ -17,5 +17,5 @@
|
|||||||
|
|
||||||
|
|
||||||
c = {
|
c = {
|
||||||
'appVersion': '0.14b',
|
'appVersion': '0.15b',
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:76e8f49f623012ca646b235b660c244070f0fff53811b77c429b907878c28585
|
oid sha256:7944b0babc6812a36c01265b8df517f15f5358dee415ece53736c5c2136aa5cb
|
||||||
size 20201472
|
size 57806848
|
||||||
|
@ -240,10 +240,9 @@ def extractKindleDeck(filename):
|
|||||||
words = list()
|
words = list()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
connection = sqlite3.connect(unicode(filename))
|
with sqlite3.connect(unicode(filename)) as db:
|
||||||
for row in connection.execute('select word from WORDS'):
|
for row in db.execute('select word from WORDS'):
|
||||||
words.append(row[0])
|
words.append(row[0])
|
||||||
connection.close()
|
|
||||||
except sqlite3.OperationalError:
|
except sqlite3.OperationalError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user