From f222e002c6d7569fe8ee8bce5a7f739121b01c67 Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Wed, 21 Dec 2016 22:41:27 -0800 Subject: [PATCH] WIP --- daijirin.go | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/daijirin.go b/daijirin.go index 601894d..1d65cca 100644 --- a/daijirin.go +++ b/daijirin.go @@ -68,9 +68,11 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm { } var tags []string - if matches := e.annotExp.FindStringSubmatch(strings.Split(entry.Text, "\n")[0]); matches != nil { - for _, tag := range strings.Split(matches[1], "・") { - tags = append(tags, tag) + for _, split := range strings.Split(entry.Text, "\n") { + if matches := e.annotExp.FindStringSubmatch(split); matches != nil { + for _, tag := range strings.Split(matches[1], "・") { + tags = append(tags, tag) + } } } @@ -83,6 +85,8 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm { } e.exportTags(&term, tags) + e.exportRules(&term, tags) + terms = append(terms, term) } @@ -96,6 +100,8 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm { } e.exportTags(&term, tags) + e.exportRules(&term, tags) + terms = append(terms, term) } } @@ -108,6 +114,46 @@ func (*daijirinExtractor) extractKanji(entry epwingEntry) []dbKanji { return nil } +func (e *daijirinExtractor) exportRules(term *dbTerm, tags []string) { + v5 := []string{ + "動ワ五", + "動カ下二", "動カ五", + "動ガ下二", "動ガ五", + "動サ五", + "動タ五", + "動ナ五", + "動バ五", + "動マ五", + "動ラ五", + } + + v1 := []string{ + "動バ下一", + } + +tagLoop: + for _, tag := range tags { + if tag == "形" { + term.addRules("adj-i") + continue tagLoop + } + + for _, v := range v5 { + if strings.HasPrefix(tag, v) { + term.addRules("v5") + continue tagLoop + } + } + + for _, v := range v1 { + if strings.HasPrefix(tag, v) { + term.addRules("v1") + continue tagLoop + } + } + } +} + func (e *daijirinExtractor) exportTags(term *dbTerm, tags []string) { parsed := []string{ "並立助",