Renaming columns in database, making sure P tag gets parsed
Former-commit-id: 0794e83218672f47fc467e2d7f24bede994c94d7
This commit is contained in:
parent
4329d6c047
commit
c43d1da885
@ -3,7 +3,7 @@
|
|||||||
KANJIDIC=util/data/kanjidic
|
KANJIDIC=util/data/kanjidic
|
||||||
KRADFILE=util/data/kradfile
|
KRADFILE=util/data/kradfile
|
||||||
EDICT=util/data/edict
|
EDICT=util/data/edict
|
||||||
DICT=yomi_base/japanese2/data/dictionary.db
|
DICT=yomi_base/japanese/data/dictionary.db
|
||||||
|
|
||||||
rm $DICT
|
rm $DICT
|
||||||
util/compile.py --kanjidic $KANJIDIC --kradfile $KRADFILE --edict $EDICT $DICT
|
util/compile.py --kanjidic $KANJIDIC --kradfile $KRADFILE --edict $EDICT $DICT
|
||||||
|
@ -25,7 +25,8 @@ import sqlite3
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
GRAMMAR_TAGS = {
|
PARSED_TAGS = {
|
||||||
|
'P', # common word
|
||||||
'adj', # former adjective classification (being removed)
|
'adj', # former adjective classification (being removed)
|
||||||
'adj-f', # noun or verb acting prenominally (other than the above)
|
'adj-f', # noun or verb acting prenominally (other than the above)
|
||||||
'adj-i', # adjective (keiyoushi)
|
'adj-i', # adjective (keiyoushi)
|
||||||
@ -106,17 +107,17 @@ def parseKanjiDic(path):
|
|||||||
for line in loadDefinitions(path):
|
for line in loadDefinitions(path):
|
||||||
segments = line.split()
|
segments = line.split()
|
||||||
character = segments[0]
|
character = segments[0]
|
||||||
kunYomi = ', '.join(filter(lambda x: filter(isHiragana, x), segments[1:]))
|
kunyomi = ', '.join(filter(lambda x: filter(isHiragana, x), segments[1:]))
|
||||||
onYomi = ', '.join(filter(lambda x: filter(isKatakana, x), segments[1:]))
|
onyomi = ', '.join(filter(lambda x: filter(isKatakana, x), segments[1:]))
|
||||||
meanings = '; '.join(re.findall('\{([^\}]+)\}', line))
|
glossary = '; '.join(re.findall('\{([^\}]+)\}', line))
|
||||||
results.append((character, onYomi, kunYomi, meanings))
|
results.append((character, onyomi, kunyomi, glossary))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def writeKanjiDic(cursor, values):
|
def writeKanjiDic(cursor, values):
|
||||||
cursor.execute('DROP TABLE IF EXISTS Kanji')
|
cursor.execute('DROP TABLE IF EXISTS Kanji')
|
||||||
cursor.execute('CREATE TABLE Kanji(character TEXT, kunYomi TEXT, onYomi TEXT, meanings TEXT)')
|
cursor.execute('CREATE TABLE Kanji(character TEXT, kunyomi TEXT, onyomi TEXT, glossary TEXT)')
|
||||||
cursor.executemany('INSERT INTO Kanji VALUES(?, ?, ?, ?)', values)
|
cursor.executemany('INSERT INTO Kanji VALUES(?, ?, ?, ?)', values)
|
||||||
|
|
||||||
|
|
||||||
@ -149,25 +150,25 @@ def parseEdict(path):
|
|||||||
match = re.search('\[([^\]]+)\]', expression[1])
|
match = re.search('\[([^\]]+)\]', expression[1])
|
||||||
reading = None if match is None else match.group(1)
|
reading = None if match is None else match.group(1)
|
||||||
|
|
||||||
definitions = filter(lambda x: len(x) > 0, segments[1:])
|
glossary = filter(lambda x: len(x) > 0, segments[1:])
|
||||||
definitions = '; '.join(definitions)
|
glossary = '; '.join(glossary)
|
||||||
definitions = re.sub('\(\d+\)\s*', str(), definitions)
|
glossary = re.sub('\(\d+\)\s*', str(), glossary)
|
||||||
|
|
||||||
tags = list()
|
tags = list()
|
||||||
for group in re.findall('\(([^\)\]]+)\)', definitions):
|
for group in re.findall('\(([^\)\]]+)\)', glossary):
|
||||||
tags.extend(group.split(','))
|
tags.extend(group.split(','))
|
||||||
|
|
||||||
tags = set(tags).intersection(GRAMMAR_TAGS)
|
tags = set(tags).intersection(PARSED_TAGS)
|
||||||
tags = ' '.join(sorted(tags))
|
tags = ' '.join(sorted(tags))
|
||||||
|
|
||||||
results.append((term, reading, definitions, tags))
|
results.append((term, reading, glossary, tags))
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def writeEdict(cursor, values):
|
def writeEdict(cursor, values):
|
||||||
cursor.execute('DROP TABLE IF EXISTS Terms')
|
cursor.execute('DROP TABLE IF EXISTS Terms')
|
||||||
cursor.execute('CREATE TABLE Terms(expression TEXT, reading TEXT, definitions TEXT, tags TEXT)')
|
cursor.execute('CREATE TABLE Terms(expression TEXT, reading TEXT, glossary TEXT, tags TEXT)')
|
||||||
cursor.executemany('INSERT INTO Terms VALUES(?, ?, ?, ?)', values)
|
cursor.executemany('INSERT INTO Terms VALUES(?, ?, ?, ?)', values)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:da31b200f6362ba5041bbb848d9c7e3d991a96dfe395d18255333107f21a205c
|
oid sha256:4718fcf7ca6fbb26611ba5246e75faed0a4d8ccb994e811724a5c5ca1b9e182a
|
||||||
size 20322304
|
size 20370432
|
||||||
|
Loading…
Reference in New Issue
Block a user