Simplify compile script

2016-04-29 22:25:33 -07:00 · 2016-04-29 22:25:33 -07:00 · 61993db702
commit 61993db702
parent a7e5a23e2d
1 changed files with 113 additions and 113 deletions
--- a/util/compile.py
+++ b/util/compile.py
@ -25,118 +25,118 @@ import re


 PARSED_TAGS = {
-    'Buddh':   'Buddhist term',
-    'MA':      'martial arts term',
-    'X':       'rude or X-rated term',
-    'abbr':    'abbreviation',
-    'adj':     'former adjective classification (being removed)',
-    'adj-f':   'noun or verb acting prenominally (other than the above)',
-    'adj-i':   'adjective (keiyoushi)',
-    'adj-na':  'adjectival nouns or quasi-adjectives (keiyodoshi)',
-    'adj-no':  'nouns which may take the genitive case particle "no"',
-    'adj-pn':  'pre-noun adjectival (rentaishi)',
-    'adj-t':   '"taru" adjective',
-    'adv':     'adverb (fukushi)',
-    'adv-n':   'adverbial noun',
-    'adv-to':  'adverb taking the "to" particle',
-    'arch':    'archaism',
-    'ateji':   'ateji (phonetic) reading',
-    'aux':     'auxiliary',
-    'aux-adj': 'auxiliary adjective',
-    'aux-v':   'auxiliary verb',
-    'c':       'company name',
-    'chn':     'children\'s language',
-    'col':     'colloquialism',
-    'comp':    'computer terminology',
-    'conj':    'conjunction',
-    'ctr':     'counter',
-    'derog':   'derogatory term',
-    'eK':      'exclusively kanji',
-    'ek':      'exclusively kana',
-    'exp':     'Expressions (phrases, clauses, etc.)',
-    'f':       'female given name',
-    'fam':     'familiar language',
-    'fem':     'female term or language',
-    'food':    'food term',
-    'g':       'given name, as-yet not classified by sex',
-    'geom':    'geometry term',
-    'gikun':   'gikun (meaning) reading',
-    'gram':    'grammatical term',
-    'h':       'full (usually family plus given) name of a particular person',
-    'hon':     'honorific or respectful (sonkeigo) language',
-    'hum':     'humble (kenjougo) language',
-    'iK':      'word containing irregular kanji usage',
-    'id':      'idiomatic expression',
-    'ik':      'word containing irregular kana usage',
-    'int':     'interjection (kandoushi)',
-    'io':      'irregular okurigana usage',
-    'iv':      'irregular verb',
-    'ling':    'linguistics terminology',
-    'm':       'male given name',
-    'm-sl':    'manga slang',
-    'male':    'male term or language',
-    'male-sl': 'male slang',
-    'math':    'mathematics',
-    'mil':     'military',
-    'n':       'noun (common) (futsuumeishi)',
-    'n-adv':   'adverbial noun (fukushitekimeishi)',
-    'n-pref':  'noun, used as a prefix',
-    'n-suf':   'noun, used as a suffix',
-    'n-t':     'noun (temporal) (jisoumeishi)',
-    'num':     'numeric',
-    'oK':      'word containing out-dated kanji',
-    'obs':     'obsolete term',
-    'obsc':    'obscure term',
-    'ok':      'out-dated or obsolete kana usage',
-    'on-mim':  'onomatopoeic or mimetic word',
-    'P':       'popular term',
-    'p':       'place-name',
-    'physics': 'physics terminology',
-    'pn':      'pronoun',
-    'poet':    'poetical term',
-    'pol':     'polite (teineigo) language',
-    'pr':      'product name',
-    'pref':    'prefix',
-    'prt':     'particle',
-    'rare':    'rare (now replaced by "obsc")',
-    's':       'surname',
-    'sens':    'sensitive word',
-    'sl':      'slang',
-    'st':      'stations',
-    'suf':     'suffix',
-    'u':       'person name, either given or surname, as-yet unclassified',
-    'uK':      'word usually written using kanji alone',
-    'uk':      'word usually written using kana alone',
-    'v1':      'Ichidan verb',
-    'v2a-s':   'Nidan verb with "u" ending (archaic)',
-    'v4h':     'Yodan verb with "hu/fu" ending (archaic)',
-    'v4r':     'Yodan verb with "ru" ending (archaic)',
-    'v5':      'Godan verb (not completely classified)',
-    'v5aru':   'Godan verb - -aru special class',
-    'v5b':     'Godan verb with "bu" ending',
-    'v5g':     'Godan verb with "gu" ending',
-    'v5k':     'Godan verb with "ku" ending',
-    'v5k-s':   'Godan verb - iku/yuku special class',
-    'v5m':     'Godan verb with "mu" ending',
-    'v5n':     'Godan verb with "nu" ending',
-    'v5r':     'Godan verb with "ru" ending',
-    'v5r-i':   'Godan verb with "ru" ending (irregular verb)',
-    'v5s':     'Godan verb with "su" ending',
-    'v5t':     'Godan verb with "tsu" ending',
-    'v5u':     'Godan verb with "u" ending',
-    'v5u-s':   'Godan verb with "u" ending (special class)',
-    'v5uru':   'Godan verb - uru old class verb (old form of Eru)',
-    'v5z':     'Godan verb with "zu" ending',
-    'vi':      'intransitive verb',
-    'vk':      'kuru verb - special class',
-    'vn':      'irregular nu verb',
-    'vs':      'noun or participle which takes the aux. verb suru',
-    'vs-c':    'su verb - precursor to the modern suru',
-    'vs-i':    'suru verb - irregular',
-    'vs-s':    'suru verb - special class',
-    'vt':      'transitive ver',
-    'vulg':    'vulgar expression or word',
-    'vz':      'Ichidan verb - zuru verb - (alternative form of -jiru verbs)',
+    'Buddh',
+    'MA',
+    'X',
+    'abbr',
+    'adj',
+    'adj-f',
+    'adj-i',
+    'adj-na',
+    'adj-no',
+    'adj-pn',
+    'adj-t',
+    'adv',
+    'adv-n',
+    'adv-to',
+    'arch',
+    'ateji',
+    'aux',
+    'aux-adj',
+    'aux-v',
+    'c',
+    'chn',
+    'col',
+    'comp',
+    'conj',
+    'ctr',
+    'derog',
+    'eK',
+    'ek',
+    'exp',
+    'f',
+    'fam',
+    'fem',
+    'food',
+    'g',
+    'geom',
+    'gikun',
+    'gram',
+    'h',
+    'hon',
+    'hum',
+    'iK',
+    'id',
+    'ik',
+    'int',
+    'io',
+    'iv',
+    'ling',
+    'm',
+    'm-sl',
+    'male',
+    'male-sl',
+    'math',
+    'mil',
+    'n',
+    'n-adv',
+    'n-pref',
+    'n-suf',
+    'n-t',
+    'num',
+    'oK',
+    'obs',
+    'obsc',
+    'ok',
+    'on-mim',
+    'P',
+    'p',
+    'physics',
+    'pn',
+    'poet',
+    'pol',
+    'pr',
+    'pref',
+    'prt',
+    'rare',
+    's',
+    'sens',
+    'sl',
+    'st',
+    'suf',
+    'u',
+    'uK',
+    'uk',
+    'v1',
+    'v2a-s',
+    'v4h',
+    'v4r',
+    'v5',
+    'v5aru',
+    'v5b',
+    'v5g',
+    'v5k',
+    'v5k-s',
+    'v5m',
+    'v5n',
+    'v5r',
+    'v5r-i',
+    'v5s',
+    'v5t',
+    'v5u',
+    'v5u-s',
+    'v5uru',
+    'v5z',
+    'vi',
+    'vk',
+    'vn',
+    'vs',
+    'vs-c',
+    'vs-i',
+    'vs-s',
+    'vt',
+    'vulg',
+    'vz'
 }


@ -184,7 +184,7 @@ def parse_edict(path):
            dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s*)*)(.*)$', dfn)

            tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1))))
-            tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys()))
+            tags_raw = tags_raw.intersection(PARSED_TAGS)
            tags = tags.union(tags_raw)

            gloss = dfn_match.group(2).strip()