Simplify compile script
This commit is contained in:
parent
a7e5a23e2d
commit
61993db702
226
util/compile.py
226
util/compile.py
@ -25,118 +25,118 @@ import re
|
||||
|
||||
|
||||
PARSED_TAGS = {
|
||||
'Buddh': 'Buddhist term',
|
||||
'MA': 'martial arts term',
|
||||
'X': 'rude or X-rated term',
|
||||
'abbr': 'abbreviation',
|
||||
'adj': 'former adjective classification (being removed)',
|
||||
'adj-f': 'noun or verb acting prenominally (other than the above)',
|
||||
'adj-i': 'adjective (keiyoushi)',
|
||||
'adj-na': 'adjectival nouns or quasi-adjectives (keiyodoshi)',
|
||||
'adj-no': 'nouns which may take the genitive case particle "no"',
|
||||
'adj-pn': 'pre-noun adjectival (rentaishi)',
|
||||
'adj-t': '"taru" adjective',
|
||||
'adv': 'adverb (fukushi)',
|
||||
'adv-n': 'adverbial noun',
|
||||
'adv-to': 'adverb taking the "to" particle',
|
||||
'arch': 'archaism',
|
||||
'ateji': 'ateji (phonetic) reading',
|
||||
'aux': 'auxiliary',
|
||||
'aux-adj': 'auxiliary adjective',
|
||||
'aux-v': 'auxiliary verb',
|
||||
'c': 'company name',
|
||||
'chn': 'children\'s language',
|
||||
'col': 'colloquialism',
|
||||
'comp': 'computer terminology',
|
||||
'conj': 'conjunction',
|
||||
'ctr': 'counter',
|
||||
'derog': 'derogatory term',
|
||||
'eK': 'exclusively kanji',
|
||||
'ek': 'exclusively kana',
|
||||
'exp': 'Expressions (phrases, clauses, etc.)',
|
||||
'f': 'female given name',
|
||||
'fam': 'familiar language',
|
||||
'fem': 'female term or language',
|
||||
'food': 'food term',
|
||||
'g': 'given name, as-yet not classified by sex',
|
||||
'geom': 'geometry term',
|
||||
'gikun': 'gikun (meaning) reading',
|
||||
'gram': 'grammatical term',
|
||||
'h': 'full (usually family plus given) name of a particular person',
|
||||
'hon': 'honorific or respectful (sonkeigo) language',
|
||||
'hum': 'humble (kenjougo) language',
|
||||
'iK': 'word containing irregular kanji usage',
|
||||
'id': 'idiomatic expression',
|
||||
'ik': 'word containing irregular kana usage',
|
||||
'int': 'interjection (kandoushi)',
|
||||
'io': 'irregular okurigana usage',
|
||||
'iv': 'irregular verb',
|
||||
'ling': 'linguistics terminology',
|
||||
'm': 'male given name',
|
||||
'm-sl': 'manga slang',
|
||||
'male': 'male term or language',
|
||||
'male-sl': 'male slang',
|
||||
'math': 'mathematics',
|
||||
'mil': 'military',
|
||||
'n': 'noun (common) (futsuumeishi)',
|
||||
'n-adv': 'adverbial noun (fukushitekimeishi)',
|
||||
'n-pref': 'noun, used as a prefix',
|
||||
'n-suf': 'noun, used as a suffix',
|
||||
'n-t': 'noun (temporal) (jisoumeishi)',
|
||||
'num': 'numeric',
|
||||
'oK': 'word containing out-dated kanji',
|
||||
'obs': 'obsolete term',
|
||||
'obsc': 'obscure term',
|
||||
'ok': 'out-dated or obsolete kana usage',
|
||||
'on-mim': 'onomatopoeic or mimetic word',
|
||||
'P': 'popular term',
|
||||
'p': 'place-name',
|
||||
'physics': 'physics terminology',
|
||||
'pn': 'pronoun',
|
||||
'poet': 'poetical term',
|
||||
'pol': 'polite (teineigo) language',
|
||||
'pr': 'product name',
|
||||
'pref': 'prefix',
|
||||
'prt': 'particle',
|
||||
'rare': 'rare (now replaced by "obsc")',
|
||||
's': 'surname',
|
||||
'sens': 'sensitive word',
|
||||
'sl': 'slang',
|
||||
'st': 'stations',
|
||||
'suf': 'suffix',
|
||||
'u': 'person name, either given or surname, as-yet unclassified',
|
||||
'uK': 'word usually written using kanji alone',
|
||||
'uk': 'word usually written using kana alone',
|
||||
'v1': 'Ichidan verb',
|
||||
'v2a-s': 'Nidan verb with "u" ending (archaic)',
|
||||
'v4h': 'Yodan verb with "hu/fu" ending (archaic)',
|
||||
'v4r': 'Yodan verb with "ru" ending (archaic)',
|
||||
'v5': 'Godan verb (not completely classified)',
|
||||
'v5aru': 'Godan verb - -aru special class',
|
||||
'v5b': 'Godan verb with "bu" ending',
|
||||
'v5g': 'Godan verb with "gu" ending',
|
||||
'v5k': 'Godan verb with "ku" ending',
|
||||
'v5k-s': 'Godan verb - iku/yuku special class',
|
||||
'v5m': 'Godan verb with "mu" ending',
|
||||
'v5n': 'Godan verb with "nu" ending',
|
||||
'v5r': 'Godan verb with "ru" ending',
|
||||
'v5r-i': 'Godan verb with "ru" ending (irregular verb)',
|
||||
'v5s': 'Godan verb with "su" ending',
|
||||
'v5t': 'Godan verb with "tsu" ending',
|
||||
'v5u': 'Godan verb with "u" ending',
|
||||
'v5u-s': 'Godan verb with "u" ending (special class)',
|
||||
'v5uru': 'Godan verb - uru old class verb (old form of Eru)',
|
||||
'v5z': 'Godan verb with "zu" ending',
|
||||
'vi': 'intransitive verb',
|
||||
'vk': 'kuru verb - special class',
|
||||
'vn': 'irregular nu verb',
|
||||
'vs': 'noun or participle which takes the aux. verb suru',
|
||||
'vs-c': 'su verb - precursor to the modern suru',
|
||||
'vs-i': 'suru verb - irregular',
|
||||
'vs-s': 'suru verb - special class',
|
||||
'vt': 'transitive ver',
|
||||
'vulg': 'vulgar expression or word',
|
||||
'vz': 'Ichidan verb - zuru verb - (alternative form of -jiru verbs)',
|
||||
'Buddh',
|
||||
'MA',
|
||||
'X',
|
||||
'abbr',
|
||||
'adj',
|
||||
'adj-f',
|
||||
'adj-i',
|
||||
'adj-na',
|
||||
'adj-no',
|
||||
'adj-pn',
|
||||
'adj-t',
|
||||
'adv',
|
||||
'adv-n',
|
||||
'adv-to',
|
||||
'arch',
|
||||
'ateji',
|
||||
'aux',
|
||||
'aux-adj',
|
||||
'aux-v',
|
||||
'c',
|
||||
'chn',
|
||||
'col',
|
||||
'comp',
|
||||
'conj',
|
||||
'ctr',
|
||||
'derog',
|
||||
'eK',
|
||||
'ek',
|
||||
'exp',
|
||||
'f',
|
||||
'fam',
|
||||
'fem',
|
||||
'food',
|
||||
'g',
|
||||
'geom',
|
||||
'gikun',
|
||||
'gram',
|
||||
'h',
|
||||
'hon',
|
||||
'hum',
|
||||
'iK',
|
||||
'id',
|
||||
'ik',
|
||||
'int',
|
||||
'io',
|
||||
'iv',
|
||||
'ling',
|
||||
'm',
|
||||
'm-sl',
|
||||
'male',
|
||||
'male-sl',
|
||||
'math',
|
||||
'mil',
|
||||
'n',
|
||||
'n-adv',
|
||||
'n-pref',
|
||||
'n-suf',
|
||||
'n-t',
|
||||
'num',
|
||||
'oK',
|
||||
'obs',
|
||||
'obsc',
|
||||
'ok',
|
||||
'on-mim',
|
||||
'P',
|
||||
'p',
|
||||
'physics',
|
||||
'pn',
|
||||
'poet',
|
||||
'pol',
|
||||
'pr',
|
||||
'pref',
|
||||
'prt',
|
||||
'rare',
|
||||
's',
|
||||
'sens',
|
||||
'sl',
|
||||
'st',
|
||||
'suf',
|
||||
'u',
|
||||
'uK',
|
||||
'uk',
|
||||
'v1',
|
||||
'v2a-s',
|
||||
'v4h',
|
||||
'v4r',
|
||||
'v5',
|
||||
'v5aru',
|
||||
'v5b',
|
||||
'v5g',
|
||||
'v5k',
|
||||
'v5k-s',
|
||||
'v5m',
|
||||
'v5n',
|
||||
'v5r',
|
||||
'v5r-i',
|
||||
'v5s',
|
||||
'v5t',
|
||||
'v5u',
|
||||
'v5u-s',
|
||||
'v5uru',
|
||||
'v5z',
|
||||
'vi',
|
||||
'vk',
|
||||
'vn',
|
||||
'vs',
|
||||
'vs-c',
|
||||
'vs-i',
|
||||
'vs-s',
|
||||
'vt',
|
||||
'vulg',
|
||||
'vz'
|
||||
}
|
||||
|
||||
|
||||
@ -184,7 +184,7 @@ def parse_edict(path):
|
||||
dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s*)*)(.*)$', dfn)
|
||||
|
||||
tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
|
||||
tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys()))
|
||||
tags_raw = tags_raw.intersection(PARSED_TAGS)
|
||||
tags = tags.union(tags_raw)
|
||||
|
||||
gloss = dfn_match.group(2).strip()
|
||||
|
Loading…
Reference in New Issue
Block a user