1
This commit is contained in:
Alex Yatskov 2016-12-13 14:12:07 -08:00
parent 8faa2c3354
commit 8a363c52fd

View File

@ -24,7 +24,6 @@ package main
import ( import (
"encoding/json" "encoding/json"
"fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"regexp" "regexp"
@ -65,13 +64,12 @@ func makeDaijirinExtractor() epwingExtractor {
partsExp: regexp.MustCompile(`(?P<reading>[^(【〖]+)(?:【(?P<expression>.*)】)?(?:〖(?P<native>.*)〗)?(?:(?P<tag>.*))?`), partsExp: regexp.MustCompile(`(?P<reading>[^(【〖]+)(?:【(?P<expression>.*)】)?(?:〖(?P<native>.*)〗)?(?:(?P<tag>.*))?`),
phonExp: regexp.MustCompile(`[-・]+`), phonExp: regexp.MustCompile(`[-・]+`),
variantExp: regexp.MustCompile(`\((.*)\)`), variantExp: regexp.MustCompile(`\((.*)\)`),
annotExp: regexp.MustCompile(`.*`), annotExp: regexp.MustCompile(`(.*)`),
} }
} }
func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm { func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
var readings []string var expressions, readings, glossary, tags []string
var expressions []string
matches := e.partsExp.FindStringSubmatch(entry.Heading) matches := e.partsExp.FindStringSubmatch(entry.Heading)
for i, name := range e.partsExp.SubexpNames() { for i, name := range e.partsExp.SubexpNames() {
@ -97,8 +95,16 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
} }
} }
fmt.Printf("%q\n", expressions) for i, split := range strings.Split(entry.Text, "\n") {
fmt.Printf("%q\n", readings) if i == 0 {
matches := e.annotExp.FindStringSubmatch(split)
if len(matches) >= 1 {
tags = append(tags, strings.Split(matches[1], ``)...)
}
}
glossary = append(glossary, split)
}
return nil return nil
} }