From 8a363c52fd4300c7b5ad8c87b3d4c25ee4a2e5e6 Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Tue, 13 Dec 2016 14:12:07 -0800 Subject: [PATCH] WIP --- epwing.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/epwing.go b/epwing.go index 44eb912..57431b2 100644 --- a/epwing.go +++ b/epwing.go @@ -24,7 +24,6 @@ package main import ( "encoding/json" - "fmt" "io" "io/ioutil" "regexp" @@ -65,13 +64,12 @@ func makeDaijirinExtractor() epwingExtractor { partsExp: regexp.MustCompile(`(?P[^(【〖]+)(?:【(?P.*)】)?(?:〖(?P.*)〗)?(?:((?P.*)))?`), phonExp: regexp.MustCompile(`[-・]+`), variantExp: regexp.MustCompile(`\((.*)\)`), - annotExp: regexp.MustCompile(`(.*)`), + annotExp: regexp.MustCompile(`((.*))`), } } func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm { - var readings []string - var expressions []string + var expressions, readings, glossary, tags []string matches := e.partsExp.FindStringSubmatch(entry.Heading) for i, name := range e.partsExp.SubexpNames() { @@ -97,8 +95,16 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm { } } - fmt.Printf("%q\n", expressions) - fmt.Printf("%q\n", readings) + for i, split := range strings.Split(entry.Text, "\n") { + if i == 0 { + matches := e.annotExp.FindStringSubmatch(split) + if len(matches) >= 1 { + tags = append(tags, strings.Split(matches[1], `・`)...) + } + } + + glossary = append(glossary, split) + } return nil }