From 61993db7026d2e06666ba6aeea1f7eed388d8592 Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Fri, 29 Apr 2016 22:25:33 -0700 Subject: [PATCH] Simplify compile script --- util/compile.py | 226 ++++++++++++++++++++++++------------------------ 1 file changed, 113 insertions(+), 113 deletions(-) diff --git a/util/compile.py b/util/compile.py index 842dfa9a..41c3f432 100755 --- a/util/compile.py +++ b/util/compile.py @@ -25,118 +25,118 @@ import re PARSED_TAGS = { - 'Buddh': 'Buddhist term', - 'MA': 'martial arts term', - 'X': 'rude or X-rated term', - 'abbr': 'abbreviation', - 'adj': 'former adjective classification (being removed)', - 'adj-f': 'noun or verb acting prenominally (other than the above)', - 'adj-i': 'adjective (keiyoushi)', - 'adj-na': 'adjectival nouns or quasi-adjectives (keiyodoshi)', - 'adj-no': 'nouns which may take the genitive case particle "no"', - 'adj-pn': 'pre-noun adjectival (rentaishi)', - 'adj-t': '"taru" adjective', - 'adv': 'adverb (fukushi)', - 'adv-n': 'adverbial noun', - 'adv-to': 'adverb taking the "to" particle', - 'arch': 'archaism', - 'ateji': 'ateji (phonetic) reading', - 'aux': 'auxiliary', - 'aux-adj': 'auxiliary adjective', - 'aux-v': 'auxiliary verb', - 'c': 'company name', - 'chn': 'children\'s language', - 'col': 'colloquialism', - 'comp': 'computer terminology', - 'conj': 'conjunction', - 'ctr': 'counter', - 'derog': 'derogatory term', - 'eK': 'exclusively kanji', - 'ek': 'exclusively kana', - 'exp': 'Expressions (phrases, clauses, etc.)', - 'f': 'female given name', - 'fam': 'familiar language', - 'fem': 'female term or language', - 'food': 'food term', - 'g': 'given name, as-yet not classified by sex', - 'geom': 'geometry term', - 'gikun': 'gikun (meaning) reading', - 'gram': 'grammatical term', - 'h': 'full (usually family plus given) name of a particular person', - 'hon': 'honorific or respectful (sonkeigo) language', - 'hum': 'humble (kenjougo) language', - 'iK': 'word containing irregular kanji usage', - 'id': 'idiomatic expression', - 'ik': 'word containing irregular kana usage', - 'int': 'interjection (kandoushi)', - 'io': 'irregular okurigana usage', - 'iv': 'irregular verb', - 'ling': 'linguistics terminology', - 'm': 'male given name', - 'm-sl': 'manga slang', - 'male': 'male term or language', - 'male-sl': 'male slang', - 'math': 'mathematics', - 'mil': 'military', - 'n': 'noun (common) (futsuumeishi)', - 'n-adv': 'adverbial noun (fukushitekimeishi)', - 'n-pref': 'noun, used as a prefix', - 'n-suf': 'noun, used as a suffix', - 'n-t': 'noun (temporal) (jisoumeishi)', - 'num': 'numeric', - 'oK': 'word containing out-dated kanji', - 'obs': 'obsolete term', - 'obsc': 'obscure term', - 'ok': 'out-dated or obsolete kana usage', - 'on-mim': 'onomatopoeic or mimetic word', - 'P': 'popular term', - 'p': 'place-name', - 'physics': 'physics terminology', - 'pn': 'pronoun', - 'poet': 'poetical term', - 'pol': 'polite (teineigo) language', - 'pr': 'product name', - 'pref': 'prefix', - 'prt': 'particle', - 'rare': 'rare (now replaced by "obsc")', - 's': 'surname', - 'sens': 'sensitive word', - 'sl': 'slang', - 'st': 'stations', - 'suf': 'suffix', - 'u': 'person name, either given or surname, as-yet unclassified', - 'uK': 'word usually written using kanji alone', - 'uk': 'word usually written using kana alone', - 'v1': 'Ichidan verb', - 'v2a-s': 'Nidan verb with "u" ending (archaic)', - 'v4h': 'Yodan verb with "hu/fu" ending (archaic)', - 'v4r': 'Yodan verb with "ru" ending (archaic)', - 'v5': 'Godan verb (not completely classified)', - 'v5aru': 'Godan verb - -aru special class', - 'v5b': 'Godan verb with "bu" ending', - 'v5g': 'Godan verb with "gu" ending', - 'v5k': 'Godan verb with "ku" ending', - 'v5k-s': 'Godan verb - iku/yuku special class', - 'v5m': 'Godan verb with "mu" ending', - 'v5n': 'Godan verb with "nu" ending', - 'v5r': 'Godan verb with "ru" ending', - 'v5r-i': 'Godan verb with "ru" ending (irregular verb)', - 'v5s': 'Godan verb with "su" ending', - 'v5t': 'Godan verb with "tsu" ending', - 'v5u': 'Godan verb with "u" ending', - 'v5u-s': 'Godan verb with "u" ending (special class)', - 'v5uru': 'Godan verb - uru old class verb (old form of Eru)', - 'v5z': 'Godan verb with "zu" ending', - 'vi': 'intransitive verb', - 'vk': 'kuru verb - special class', - 'vn': 'irregular nu verb', - 'vs': 'noun or participle which takes the aux. verb suru', - 'vs-c': 'su verb - precursor to the modern suru', - 'vs-i': 'suru verb - irregular', - 'vs-s': 'suru verb - special class', - 'vt': 'transitive ver', - 'vulg': 'vulgar expression or word', - 'vz': 'Ichidan verb - zuru verb - (alternative form of -jiru verbs)', + 'Buddh', + 'MA', + 'X', + 'abbr', + 'adj', + 'adj-f', + 'adj-i', + 'adj-na', + 'adj-no', + 'adj-pn', + 'adj-t', + 'adv', + 'adv-n', + 'adv-to', + 'arch', + 'ateji', + 'aux', + 'aux-adj', + 'aux-v', + 'c', + 'chn', + 'col', + 'comp', + 'conj', + 'ctr', + 'derog', + 'eK', + 'ek', + 'exp', + 'f', + 'fam', + 'fem', + 'food', + 'g', + 'geom', + 'gikun', + 'gram', + 'h', + 'hon', + 'hum', + 'iK', + 'id', + 'ik', + 'int', + 'io', + 'iv', + 'ling', + 'm', + 'm-sl', + 'male', + 'male-sl', + 'math', + 'mil', + 'n', + 'n-adv', + 'n-pref', + 'n-suf', + 'n-t', + 'num', + 'oK', + 'obs', + 'obsc', + 'ok', + 'on-mim', + 'P', + 'p', + 'physics', + 'pn', + 'poet', + 'pol', + 'pr', + 'pref', + 'prt', + 'rare', + 's', + 'sens', + 'sl', + 'st', + 'suf', + 'u', + 'uK', + 'uk', + 'v1', + 'v2a-s', + 'v4h', + 'v4r', + 'v5', + 'v5aru', + 'v5b', + 'v5g', + 'v5k', + 'v5k-s', + 'v5m', + 'v5n', + 'v5r', + 'v5r-i', + 'v5s', + 'v5t', + 'v5u', + 'v5u-s', + 'v5uru', + 'v5z', + 'vi', + 'vk', + 'vn', + 'vs', + 'vs-c', + 'vs-i', + 'vs-s', + 'vt', + 'vulg', + 'vz' } @@ -184,7 +184,7 @@ def parse_edict(path): dfn_match = re.search(r'^((?:\((?:[\w\-\,\:]*)*\)\s*)*)(.*)$', dfn) tags_raw = set(filter(None, re.split(r'[\s\(\),]', dfn_match.group(1)))) - tags_raw = tags_raw.intersection(set(PARSED_TAGS.keys())) + tags_raw = tags_raw.intersection(PARSED_TAGS) tags = tags.union(tags_raw) gloss = dfn_match.group(2).strip()