wip
This commit is contained in:
parent
d41e2aa3c8
commit
c932dc4f0f
56
daijisen.go
56
daijisen.go
@ -29,8 +29,9 @@ import (
|
|||||||
|
|
||||||
type daijisenExtractor struct {
|
type daijisenExtractor struct {
|
||||||
partsExp *regexp.Regexp
|
partsExp *regexp.Regexp
|
||||||
|
expShapesExp *regexp.Regexp
|
||||||
|
readVarExp *regexp.Regexp
|
||||||
readGroupExp *regexp.Regexp
|
readGroupExp *regexp.Regexp
|
||||||
expVarExp *regexp.Regexp
|
|
||||||
metaExp *regexp.Regexp
|
metaExp *regexp.Regexp
|
||||||
v5Exp *regexp.Regexp
|
v5Exp *regexp.Regexp
|
||||||
v1Exp *regexp.Regexp
|
v1Exp *regexp.Regexp
|
||||||
@ -38,10 +39,11 @@ type daijisenExtractor struct {
|
|||||||
|
|
||||||
func makeDaijisenExtractor() epwingExtractor {
|
func makeDaijisenExtractor() epwingExtractor {
|
||||||
return &daijisenExtractor{
|
return &daijisenExtractor{
|
||||||
partsExp: regexp.MustCompile(`([^(【〖]+)(?:【(.*)】)?(?:〖(.*)〗)?(?:((.*)))?`),
|
partsExp: regexp.MustCompile(`([^【]+)(?:【(.*)】)?`),
|
||||||
|
expShapesExp: regexp.MustCompile(`[×△]+`),
|
||||||
|
readVarExp: regexp.MustCompile(`\(([^\)]*)\)`),
|
||||||
readGroupExp: regexp.MustCompile(`[-・]+`),
|
readGroupExp: regexp.MustCompile(`[-・]+`),
|
||||||
expVarExp: regexp.MustCompile(`\(([^\)]*)\)`),
|
metaExp: regexp.MustCompile(`[([^]]*)]`),
|
||||||
metaExp: regexp.MustCompile(`(([^)]*))`),
|
|
||||||
v5Exp: regexp.MustCompile(`(動.[四五]([[^]]+])?)|(動..二)`),
|
v5Exp: regexp.MustCompile(`(動.[四五]([[^]]+])?)|(動..二)`),
|
||||||
v1Exp: regexp.MustCompile(`(動..一)`),
|
v1Exp: regexp.MustCompile(`(動..一)`),
|
||||||
}
|
}
|
||||||
@ -53,22 +55,16 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var expressions, readings []string
|
var expressions []string
|
||||||
if expression := matches[2]; len(expression) > 0 {
|
if expression := matches[2]; len(expression) > 0 {
|
||||||
expression = e.metaExp.ReplaceAllLiteralString(expression, "")
|
expression = e.expShapesExp.ReplaceAllString(expression, "")
|
||||||
for _, split := range strings.Split(expression, "・") {
|
expressions = strings.Split(expression, "・")
|
||||||
splitInc := e.expVarExp.ReplaceAllString(split, "$1")
|
|
||||||
expressions = append(expressions, splitInc)
|
|
||||||
if split != splitInc {
|
|
||||||
splitExc := e.expVarExp.ReplaceAllLiteralString(split, "")
|
|
||||||
expressions = append(expressions, splitExc)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if reading := matches[1]; len(reading) > 0 {
|
var reading string
|
||||||
|
if reading = matches[1]; len(reading) > 0 {
|
||||||
reading = e.readGroupExp.ReplaceAllLiteralString(reading, "")
|
reading = e.readGroupExp.ReplaceAllLiteralString(reading, "")
|
||||||
readings = append(readings, reading)
|
reading = e.readVarExp.ReplaceAllLiteralString(reading, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
var tags []string
|
var tags []string
|
||||||
@ -82,29 +78,25 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
|||||||
|
|
||||||
var terms []dbTerm
|
var terms []dbTerm
|
||||||
if len(expressions) == 0 {
|
if len(expressions) == 0 {
|
||||||
for _, reading := range readings {
|
term := dbTerm{
|
||||||
|
Expression: reading,
|
||||||
|
Glossary: []string{entry.Text},
|
||||||
|
}
|
||||||
|
|
||||||
|
e.exportRules(&term, tags)
|
||||||
|
terms = append(terms, term)
|
||||||
|
|
||||||
|
} else {
|
||||||
|
for _, expression := range expressions {
|
||||||
term := dbTerm{
|
term := dbTerm{
|
||||||
Expression: reading,
|
Expression: expression,
|
||||||
|
Reading: reading,
|
||||||
Glossary: []string{entry.Text},
|
Glossary: []string{entry.Text},
|
||||||
}
|
}
|
||||||
|
|
||||||
e.exportRules(&term, tags)
|
e.exportRules(&term, tags)
|
||||||
terms = append(terms, term)
|
terms = append(terms, term)
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
|
||||||
for _, expression := range expressions {
|
|
||||||
for _, reading := range readings {
|
|
||||||
term := dbTerm{
|
|
||||||
Expression: expression,
|
|
||||||
Reading: reading,
|
|
||||||
Glossary: []string{entry.Text},
|
|
||||||
}
|
|
||||||
|
|
||||||
e.exportRules(&term, tags)
|
|
||||||
terms = append(terms, term)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return terms
|
return terms
|
||||||
|
@ -137,6 +137,7 @@ func epwingExportDb(inputPath, outputDir, title string, stride int, pretty bool)
|
|||||||
translateExp := regexp.MustCompile(`{{([nw])_(\d+)}}`)
|
translateExp := regexp.MustCompile(`{{([nw])_(\d+)}}`)
|
||||||
epwingExtractors := map[string]epwingExtractor{
|
epwingExtractors := map[string]epwingExtractor{
|
||||||
"三省堂 スーパー大辞林": makeDaijirinExtractor(),
|
"三省堂 スーパー大辞林": makeDaijirinExtractor(),
|
||||||
|
"大辞泉": makeDaijisenExtractor(),
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
Loading…
Reference in New Issue
Block a user