Add daijirin data
This commit is contained in:
parent
8a363c52fd
commit
95ca3cc700
1475
daijirin.go
Normal file
1475
daijirin.go
Normal file
File diff suppressed because it is too large
Load Diff
55
epwing.go
55
epwing.go
@ -27,7 +27,6 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type epwingEntry struct {
|
type epwingEntry struct {
|
||||||
@ -59,60 +58,6 @@ type daijirinExtractor struct {
|
|||||||
annotExp *regexp.Regexp
|
annotExp *regexp.Regexp
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeDaijirinExtractor() epwingExtractor {
|
|
||||||
return &daijirinExtractor{
|
|
||||||
partsExp: regexp.MustCompile(`(?P<reading>[^(【〖]+)(?:【(?P<expression>.*)】)?(?:〖(?P<native>.*)〗)?(?:((?P<tag>.*)))?`),
|
|
||||||
phonExp: regexp.MustCompile(`[-・]+`),
|
|
||||||
variantExp: regexp.MustCompile(`\((.*)\)`),
|
|
||||||
annotExp: regexp.MustCompile(`((.*))`),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *daijirinExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
|
||||||
var expressions, readings, glossary, tags []string
|
|
||||||
|
|
||||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
|
||||||
for i, name := range e.partsExp.SubexpNames() {
|
|
||||||
value := matches[i]
|
|
||||||
if i == 0 || len(value) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
switch name {
|
|
||||||
case "expression":
|
|
||||||
expression := e.annotExp.ReplaceAllLiteralString(value, "")
|
|
||||||
for _, split := range strings.Split(expression, `・`) {
|
|
||||||
splitInc := e.variantExp.ReplaceAllString(split, "$1")
|
|
||||||
expressions = append(expressions, splitInc)
|
|
||||||
if split != splitInc {
|
|
||||||
splitExc := e.variantExp.ReplaceAllLiteralString(split, "")
|
|
||||||
expressions = append(expressions, splitExc)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case "reading":
|
|
||||||
reading := e.phonExp.ReplaceAllLiteralString(value, "")
|
|
||||||
readings = append(readings, reading)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, split := range strings.Split(entry.Text, "\n") {
|
|
||||||
if i == 0 {
|
|
||||||
matches := e.annotExp.FindStringSubmatch(split)
|
|
||||||
if len(matches) >= 1 {
|
|
||||||
tags = append(tags, strings.Split(matches[1], `・`)...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
glossary = append(glossary, split)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e *daijirinExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func exportEpwingDb(outputDir, title string, reader io.Reader, flags int) error {
|
func exportEpwingDb(outputDir, title string, reader io.Reader, flags int) error {
|
||||||
data, err := ioutil.ReadAll(reader)
|
data, err := ioutil.ReadAll(reader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user