From 4835f58758f8fe9644bd44568de9a89c3155ae5b Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Tue, 27 Dec 2016 17:52:50 -0800 Subject: [PATCH] wip --- daijirin.go | 131 ++++------------------------------------------------ 1 file changed, 8 insertions(+), 123 deletions(-) diff --git a/daijirin.go b/daijirin.go index fa49e35..544ae49 100644 --- a/daijirin.go +++ b/daijirin.go @@ -34,16 +34,18 @@ type daijirinExtractor struct { metaExp *regexp.Regexp v5Exp *regexp.Regexp v1Exp *regexp.Regexp + tagExp *regexp.Regexp } func makeDaijirinExtractor() epwingExtractor { return &daijirinExtractor{ partsExp: regexp.MustCompile(`([^(【〖]+)(?:【(.*)】)?(?:〖(.*)〗)?(?:((.*)))?`), readGroupExp: regexp.MustCompile(`[-・]+`), - expVarExp: regexp.MustCompile(`\((.*)\)`), - metaExp: regexp.MustCompile(`((.*))`), - v5Exp: regexp.MustCompile(`(動.五)|(動..二)`), - v1Exp: regexp.MustCompile(`動..一`), + expVarExp: regexp.MustCompile(`\(([^\)]*)\)`), + metaExp: regexp.MustCompile(`(([^)]*))`), + v5Exp: regexp.MustCompile(`(動.[四五](\[[^\]]+\])?)|(動..二)`), + v1Exp: regexp.MustCompile(`(動..一)`), + tagExp: regexp.MustCompile(`(動.[四五](\[[^\]]+\])?)|(動..二)|(動..一)|(名)|(形動)|(副)|(連語)|(形)|(枕詞)|(代)|(感)|(接尾)|(助動)|(接続)|(接頭)|(連体)|(終助)|(接助)|(副助)|(係助)|(格助)|(間投助)`), } } @@ -138,126 +140,9 @@ func (*daijirinExtractor) getRevision() string { } func (e *daijirinExtractor) exportTags(term *dbTerm, tags []string) { - parsed := []string{ - "並立助", - "代", - "係助", - "副", - "副助", - "助動", - "動", - "動ア上一", - "動ア下一", - "動ア下二", - "動ア五[ハ四]", - "動カ上一", - "動カ上二", - "動カ下一", - "動カ下二", - "動カ五", - "動カ五[四]", - "動カ四", - "動カ変", - "動ガ上一", - "動ガ上二", - "動ガ下一", - "動ガ下二", - "動ガ五", - "動ガ五[四]", - "動ガ四", - "動サ上一", - "動サ下一", - "動サ下二", - "動サ五", - "動サ五[四]", - "動サ四", - "動サ変", - "動サ特活", - "動ザ上一", - "動ザ上二", - "動ザ下一", - "動ザ下二", - "動タ上一", - "動タ上二", - "動タ下一", - "動タ下二", - "動タ五[四]", - "動タ四", - "動ダ上二", - "動ダ下一", - "動ダ下二", - "動ナ上一", - "動ナ下一", - "動ナ下二", - "動ナ五", - "動ナ五[四]", - "動ハ上一", - "動ハ上二", - "動ハ下一", - "動ハ下二", - "動ハ四", - "動ハ特活", - "動バ上一", - "動バ上二", - "動バ下一", - "動バ下二", - "動バ五[四]", - "動バ四", - "動マ上一", - "動マ上二", - "動マ下一", - "動マ下二", - "動マ五", - "動マ五[四]", - "動マ四", - "動マ特活", - "動マ特活", - "動ヤ上一", - "動ヤ上二", - "動ヤ下二", - "動ラ上一", - "動ラ上二", - "動ラ下一", - "動ラ下二", - "動ラ五", - "動ラ五[四]", - "動ラ四", - "動ラ変", - "動ラ特活", - "動ワ上一", - "動ワ上二", - "動ワ下一", - "動ワ下二", - "動ワ五", - "動ワ五[ハ四]", - "動五[四]", - "動特活", - "動詞五[四]段型活用", - "名", - "形", - "形ク", - "形シク", - "形動", - "形動タリ", - "形動ナリ", - "感", - "接助", - "接尾", - "接続", - "接頭", - "枕詞", - "格助", - "終助", - "連体", - "連語", - "間投助", - } - for _, tag := range tags { - for _, p := range parsed { - if tag == p { - term.addTags(tag) - } + if e.tagExp.MatchString(tag) { + term.addTags(tag) } } }