wadai support
This commit is contained in:
parent
5b06b26441
commit
4f60286763
66
wadai.go
66
wadai.go
@ -22,15 +22,79 @@
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type wadaiExtractor struct {
|
||||
partsExp *regexp.Regexp
|
||||
literalPartsExp *regexp.Regexp
|
||||
readPartsExp *regexp.Regexp
|
||||
quotedExp *regexp.Regexp
|
||||
alphaExp *regexp.Regexp
|
||||
}
|
||||
|
||||
func makeWadaiExtractor() epwingExtractor {
|
||||
return &wadaiExtractor{}
|
||||
return &wadaiExtractor{
|
||||
partsExp: regexp.MustCompile(`([^<]+)(?:<([^>【]+)(?:【([^】]+)】)?>)?`),
|
||||
literalPartsExp: regexp.MustCompile(`(¶)?(.*)`),
|
||||
readPartsExp: regexp.MustCompile(`([^1234567890]+)(.*)`),
|
||||
quotedExp: regexp.MustCompile(`「?([^」]+)`),
|
||||
alphaExp: regexp.MustCompile(`[a-z]+`),
|
||||
}
|
||||
}
|
||||
|
||||
func (e *wadaiExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||
if matches == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
preset := false
|
||||
literal := matches[1]
|
||||
if literalMatches := e.literalPartsExp.FindStringSubmatch(literal); literalMatches != nil {
|
||||
preset = len(literalMatches[1]) > 0
|
||||
literal = literalMatches[2]
|
||||
}
|
||||
|
||||
reading := matches[2]
|
||||
if readMatches := e.readPartsExp.FindStringSubmatch(reading); readMatches != nil {
|
||||
reading = readMatches[1]
|
||||
}
|
||||
|
||||
expressions := strings.Split(matches[3], "・")
|
||||
if len(expressions) == 0 {
|
||||
expressions = append(expressions, "")
|
||||
}
|
||||
|
||||
var terms []dbTerm
|
||||
for _, expression := range expressions {
|
||||
if preset {
|
||||
expression = literal
|
||||
reading = ""
|
||||
} else if len(expression) == 0 {
|
||||
expression = literal
|
||||
}
|
||||
|
||||
if quotedMatches := e.quotedExp.FindStringSubmatch(reading); quotedMatches != nil {
|
||||
reading = quotedMatches[1]
|
||||
}
|
||||
|
||||
if alphaMatches := e.alphaExp.FindStringSubmatch(expression); alphaMatches != nil && len(reading) > 0 {
|
||||
expression = reading
|
||||
reading = ""
|
||||
}
|
||||
|
||||
term := dbTerm{
|
||||
Expression: expression,
|
||||
Reading: reading,
|
||||
Glossary: []string{entry.Text},
|
||||
}
|
||||
|
||||
terms = append(terms, term)
|
||||
}
|
||||
|
||||
return terms
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user