From bf28dea2838619c6446367f240e240cdddef3586 Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Tue, 12 Apr 2016 20:58:41 -0700 Subject: [PATCH] Updating dictionaries --- ext/bg/js/dictionary.js | 33 +++++++-------------------------- ext/bg/js/translator.js | 15 ++------------- util/compile.py | 10 ++++++++++ 3 files changed, 19 insertions(+), 39 deletions(-) diff --git a/ext/bg/js/dictionary.js b/ext/bg/js/dictionary.js index a68c2daf..feb8c92b 100644 --- a/ext/bg/js/dictionary.js +++ b/ext/bg/js/dictionary.js @@ -19,31 +19,20 @@ class Dictionary { constructor() { - this.terms = []; - this.termIndices = {}; - - this.kanji = []; - this.kanjiIndices = {}; + this.termDicts = {}; + this.kanjiDicts = {}; } - addTermData(terms) { - let index = this.terms.length; - for (const [e, r, g, t] of terms) { - this.storeIndex(this.termIndices, e, index); - this.storeIndex(this.termIndices, r, index++); - this.terms.push([e, r, g, t]); - } + addTermDict(name, dict) { + this.termDicts[name] = dict; } - addKanjiData(kanji) { - let index = this.kanji.length; - for (const [c, k, o, g] of kanji) { - this.storeIndex(this.kanjiIndices, c, index++); - this.kanji.push([c, k, o, g]); - } + addKanjiDict(name, dict) { + this.kanjiDicts[name] = dict; } findTerm(term) { + const results = []; return (this.termIndices[term] || []).map(index => { const [e, r, g, t] = this.terms[index]; return {id: index, expression: e, reading: r, glossary: g, tags: t.split(' ')}; @@ -56,12 +45,4 @@ class Dictionary { return {id: index, character: c, kunyomi: k, onyomi: o, glossary: g}; }); } - - storeIndex(indices, term, index) { - if (term.length > 0) { - const indices = this.termIndices[term] || []; - indices.push(index); - this.termIndices[term] = indices; - } - } } diff --git a/ext/bg/js/translator.js b/ext/bg/js/translator.js index e8224320..5414a553 100644 --- a/ext/bg/js/translator.js +++ b/ext/bg/js/translator.js @@ -46,11 +46,11 @@ class Translator { this.deinflector.setRules(JSON.parse(response)); break; case 'kanjidic': - this.dictionary.addKanjiData(Translator.parseCsv(response)); + this.dictionary.addKanjiDict('kanjidic', JSON.parse(response)); break; case 'edict': case 'enamdict': - this.dictionary.addTermData(Translator.parseCsv(response)); + this.dictionary.addTermDict(key, JSON.parse(response)); break; } @@ -164,15 +164,4 @@ class Translator { xhr.open('GET', chrome.extension.getURL(url), true); xhr.send(); } - - static parseCsv(data) { - const result = []; - for (const row of data.split('\n')) { - if (row.length > 0) { - result.push(row.split('\t')); - } - } - - return result; - } } diff --git a/util/compile.py b/util/compile.py index b41fdf6d..8d8470dd 100755 --- a/util/compile.py +++ b/util/compile.py @@ -44,6 +44,7 @@ PARSED_TAGS = { 'aux': 'auxiliary', 'aux-adj': 'auxiliary adjective', 'aux-v': 'auxiliary verb', + 'c': 'company name', 'chn': 'children\'s language', 'col': 'colloquialism', 'comp': 'computer terminology', @@ -53,12 +54,15 @@ PARSED_TAGS = { 'eK': 'exclusively kanji', 'ek': 'exclusively kana', 'exp': 'Expressions (phrases, clauses, etc.)', + 'f': 'female given name', 'fam': 'familiar language', 'fem': 'female term or language', 'food': 'food term', + 'g': 'given name, as-yet not classified by sex', 'geom': 'geometry term', 'gikun': 'gikun (meaning) reading', 'gram': 'grammatical term', + 'h': 'full (usually family plus given) name of a particular person', 'hon': 'honorific or respectful (sonkeigo) language', 'hum': 'humble (kenjougo) language', 'iK': 'word containing irregular kanji usage', @@ -68,6 +72,7 @@ PARSED_TAGS = { 'io': 'irregular okurigana usage', 'iv': 'irregular verb', 'ling': 'linguistics terminology', + 'm': 'male given name', 'm-sl': 'manga slang', 'male': 'male term or language', 'male-sl': 'male slang', @@ -84,16 +89,21 @@ PARSED_TAGS = { 'obsc': 'obscure term', 'ok': 'out-dated or obsolete kana usage', 'on-mim': 'onomatopoeic or mimetic word', + 'p': 'place-name', 'physics': 'physics terminology', 'pn': 'pronoun', 'poet': 'poetical term', 'pol': 'polite (teineigo) language', + 'pr': 'product name', 'pref': 'prefix', 'prt': 'particle', 'rare': 'rare (now replaced by "obsc")', + 's': 'surname', 'sens': 'sensitive word', 'sl': 'slang', + 'st': 'stations', 'suf': 'suffix', + 'u': 'person name, either given or surname, as-yet unclassified', 'uK': 'word usually written using kanji alone', 'uk': 'word usually written using kana alone', 'v1': 'Ichidan verb',