update the daijisen regexps
this also fixes #5 the method used is a bit hacky but it works
This commit is contained in:
parent
8bc7ffdb36
commit
94326126d3
@ -10,6 +10,7 @@ import (
|
|||||||
type daijisenExtractor struct {
|
type daijisenExtractor struct {
|
||||||
partsExp *regexp.Regexp
|
partsExp *regexp.Regexp
|
||||||
expShapesExp *regexp.Regexp
|
expShapesExp *regexp.Regexp
|
||||||
|
expMultiExp *regexp.Regexp
|
||||||
expVarExp *regexp.Regexp
|
expVarExp *regexp.Regexp
|
||||||
readGroupExp *regexp.Regexp
|
readGroupExp *regexp.Regexp
|
||||||
metaExp *regexp.Regexp
|
metaExp *regexp.Regexp
|
||||||
@ -20,7 +21,8 @@ type daijisenExtractor struct {
|
|||||||
func makeDaijisenExtractor() epwingExtractor {
|
func makeDaijisenExtractor() epwingExtractor {
|
||||||
return &daijisenExtractor{
|
return &daijisenExtractor{
|
||||||
partsExp: regexp.MustCompile(`([^【]+)(?:【(.*)】)?`),
|
partsExp: regexp.MustCompile(`([^【]+)(?:【(.*)】)?`),
|
||||||
expShapesExp: regexp.MustCompile(`[×△]+`),
|
expShapesExp: regexp.MustCompile(`[×△=‐]+`),
|
||||||
|
expMultiExp: regexp.MustCompile(`】[^【】]*【`),
|
||||||
expVarExp: regexp.MustCompile(`(([^)]*))`),
|
expVarExp: regexp.MustCompile(`(([^)]*))`),
|
||||||
readGroupExp: regexp.MustCompile(`[‐・]+`),
|
readGroupExp: regexp.MustCompile(`[‐・]+`),
|
||||||
metaExp: regexp.MustCompile(`[([^]]*)]`),
|
metaExp: regexp.MustCompile(`[([^]]*)]`),
|
||||||
@ -37,6 +39,7 @@ func (e *daijisenExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
|
|||||||
|
|
||||||
var expressions []string
|
var expressions []string
|
||||||
if expression := matches[2]; len(expression) > 0 {
|
if expression := matches[2]; len(expression) > 0 {
|
||||||
|
expression = e.expMultiExp.ReplaceAllString(expression, "・")
|
||||||
expression = e.expShapesExp.ReplaceAllString(expression, "")
|
expression = e.expShapesExp.ReplaceAllString(expression, "")
|
||||||
for _, split := range strings.Split(expression, "・") {
|
for _, split := range strings.Split(expression, "・") {
|
||||||
splitInc := e.expVarExp.ReplaceAllString(split, "$1")
|
splitInc := e.expVarExp.ReplaceAllString(split, "$1")
|
||||||
@ -112,7 +115,7 @@ func (e *daijisenExtractor) exportRules(term *dbTerm, tags []string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (*daijisenExtractor) getRevision() string {
|
func (*daijisenExtractor) getRevision() string {
|
||||||
return "daijisen1"
|
return "daijisen2"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (*daijisenExtractor) getFontNarrow() map[int]string {
|
func (*daijisenExtractor) getFontNarrow() map[int]string {
|
||||||
|
Loading…
Reference in New Issue
Block a user