2021-01-01 22:31:58 +00:00
|
|
|
|
package yomichan
|
2018-02-17 19:29:06 +00:00
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"regexp"
|
|
|
|
|
"strings"
|
2021-01-01 05:53:10 +00:00
|
|
|
|
|
2022-07-04 03:59:33 +00:00
|
|
|
|
zig "foosoft.net/projects/zero-epwing-go"
|
2018-02-17 19:29:06 +00:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type koujienExtractor struct {
|
|
|
|
|
partsExp *regexp.Regexp
|
|
|
|
|
readGroupExp *regexp.Regexp
|
|
|
|
|
expVarExp *regexp.Regexp
|
|
|
|
|
metaExp *regexp.Regexp
|
|
|
|
|
v5Exp *regexp.Regexp
|
|
|
|
|
v1Exp *regexp.Regexp
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func makeKoujienExtractor() epwingExtractor {
|
|
|
|
|
return &koujienExtractor{
|
|
|
|
|
partsExp: regexp.MustCompile(`([^(【〖]+)(?:【(.*)】)?(?:〖(.*)〗)?(?:((.*)))?`),
|
|
|
|
|
readGroupExp: regexp.MustCompile(`[‐・]+`),
|
|
|
|
|
expVarExp: regexp.MustCompile(`\(([^\)]*)\)`),
|
|
|
|
|
metaExp: regexp.MustCompile(`(([^)]*))`),
|
|
|
|
|
v5Exp: regexp.MustCompile(`(動.[四五]([[^]]+])?)|(動..二)`),
|
|
|
|
|
v1Exp: regexp.MustCompile(`(動..一)`),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
func makeFuzokuExtractor() epwingExtractor {
|
|
|
|
|
return &koujienExtractor{
|
|
|
|
|
partsExp: regexp.MustCompile(`([^(【〖]+)(?:【(.*)】)?(?:〖(.*)〗)?(?:((.*)))?`),
|
|
|
|
|
readGroupExp: regexp.MustCompile(`[-・]+`),
|
|
|
|
|
expVarExp: regexp.MustCompile(`\(([^\)]*)\)`),
|
|
|
|
|
metaExp: regexp.MustCompile(`(([^)]*))`),
|
|
|
|
|
v5Exp: regexp.MustCompile(`(動.[四五]([[^]]+])?)|(動..二)`),
|
|
|
|
|
v1Exp: regexp.MustCompile(`(動..一)`),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-01 05:53:10 +00:00
|
|
|
|
func (e *koujienExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
2018-02-17 19:29:06 +00:00
|
|
|
|
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
|
|
|
|
if matches == nil {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var expressions, readings []string
|
|
|
|
|
if expression := matches[2]; len(expression) > 0 {
|
|
|
|
|
expression = e.metaExp.ReplaceAllLiteralString(expression, "")
|
|
|
|
|
for _, split := range strings.Split(expression, "・") {
|
|
|
|
|
splitInc := e.expVarExp.ReplaceAllString(split, "$1")
|
|
|
|
|
expressions = append(expressions, splitInc)
|
|
|
|
|
if split != splitInc {
|
|
|
|
|
splitExc := e.expVarExp.ReplaceAllLiteralString(split, "")
|
|
|
|
|
expressions = append(expressions, splitExc)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if reading := matches[1]; len(reading) > 0 {
|
|
|
|
|
reading = e.readGroupExp.ReplaceAllLiteralString(reading, "")
|
|
|
|
|
readings = append(readings, reading)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var tags []string
|
|
|
|
|
for _, split := range strings.Split(entry.Text, "\n") {
|
|
|
|
|
if matches := e.metaExp.FindStringSubmatch(split); matches != nil {
|
|
|
|
|
for _, tag := range strings.Split(matches[1], "・") {
|
|
|
|
|
tags = append(tags, tag)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var terms []dbTerm
|
|
|
|
|
if len(expressions) == 0 {
|
|
|
|
|
for _, reading := range readings {
|
|
|
|
|
term := dbTerm{
|
|
|
|
|
Expression: reading,
|
2023-01-22 20:14:33 +00:00
|
|
|
|
Glossary: []any{entry.Text},
|
2018-02-17 19:29:06 +00:00
|
|
|
|
Sequence: sequence,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
e.exportRules(&term, tags)
|
|
|
|
|
terms = append(terms, term)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
for _, expression := range expressions {
|
|
|
|
|
for _, reading := range readings {
|
|
|
|
|
term := dbTerm{
|
|
|
|
|
Expression: expression,
|
|
|
|
|
Reading: reading,
|
2023-01-22 20:14:33 +00:00
|
|
|
|
Glossary: []any{entry.Text},
|
2018-02-17 19:29:06 +00:00
|
|
|
|
Sequence: sequence,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
e.exportRules(&term, tags)
|
|
|
|
|
terms = append(terms, term)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return terms
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-01 05:53:10 +00:00
|
|
|
|
func (*koujienExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
2018-02-17 19:29:06 +00:00
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (e *koujienExtractor) exportRules(term *dbTerm, tags []string) {
|
|
|
|
|
for _, tag := range tags {
|
|
|
|
|
if tag == "形" {
|
|
|
|
|
term.addRules("adj-i")
|
|
|
|
|
} else if tag == "動サ変" && (strings.HasSuffix(term.Expression, "する") || strings.HasSuffix(term.Expression, "為る")) {
|
|
|
|
|
term.addRules("vs")
|
|
|
|
|
} else if term.Expression == "来る" {
|
|
|
|
|
term.addRules("vk")
|
|
|
|
|
} else if e.v5Exp.MatchString(tag) {
|
|
|
|
|
term.addRules("v5")
|
|
|
|
|
} else if e.v1Exp.MatchString(tag) {
|
|
|
|
|
term.addRules("v1")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (*koujienExtractor) getRevision() string {
|
|
|
|
|
return "koujien"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (*koujienExtractor) getFontNarrow() map[int]string {
|
|
|
|
|
return map[int]string{}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (*koujienExtractor) getFontWide() map[int]string {
|
|
|
|
|
return map[int]string{
|
|
|
|
|
41531: "⟨",
|
|
|
|
|
41532: "⟩",
|
|
|
|
|
42017: "⇿",
|
|
|
|
|
42018: "🈑",
|
|
|
|
|
42023: "🈩",
|
|
|
|
|
42024: "🈔",
|
|
|
|
|
42025: "㊇",
|
|
|
|
|
42026: "3",
|
|
|
|
|
42027: "❷",
|
|
|
|
|
42028: "❶",
|
|
|
|
|
42031: "❸",
|
|
|
|
|
42037: "❹",
|
|
|
|
|
42043: "❺",
|
|
|
|
|
42045: "❻",
|
|
|
|
|
42057: "❼",
|
|
|
|
|
42083: "❽",
|
|
|
|
|
42284: "❾",
|
|
|
|
|
42544: "❿",
|
|
|
|
|
42561: "鉏",
|
|
|
|
|
43611: "⓫",
|
|
|
|
|
43612: "⓬",
|
|
|
|
|
44142: "𑖀",
|
|
|
|
|
44856: "㉑",
|
|
|
|
|
44857: "㉒",
|
|
|
|
|
46374: "〔",
|
|
|
|
|
46375: "〕",
|
|
|
|
|
46390: "①",
|
|
|
|
|
46391: "②",
|
|
|
|
|
46392: "③",
|
|
|
|
|
46393: "④",
|
|
|
|
|
46394: "⑤",
|
|
|
|
|
46395: "⑥",
|
|
|
|
|
46396: "⑦",
|
|
|
|
|
46397: "⑧",
|
|
|
|
|
46398: "⑨",
|
|
|
|
|
46399: "⑩",
|
|
|
|
|
46400: "⑪",
|
|
|
|
|
46401: "⑫",
|
|
|
|
|
46402: "⑬",
|
|
|
|
|
46403: "⑭",
|
|
|
|
|
46404: "⑮",
|
|
|
|
|
46405: "⑯",
|
|
|
|
|
46406: "⑰",
|
|
|
|
|
46407: "⑱",
|
|
|
|
|
46408: "⑲",
|
|
|
|
|
46409: "⑳",
|
|
|
|
|
46677: "⇀",
|
|
|
|
|
46420: "⇨",
|
|
|
|
|
47175: "(季)",
|
|
|
|
|
56383: "㋐",
|
|
|
|
|
56384: "㋑",
|
|
|
|
|
56385: "㋒",
|
|
|
|
|
56386: "㋓",
|
|
|
|
|
56387: "㋔",
|
|
|
|
|
56388: "㋕",
|
|
|
|
|
56389: "㋖",
|
|
|
|
|
56390: "㋗",
|
|
|
|
|
56391: "㋘",
|
|
|
|
|
56392: "㋙",
|
|
|
|
|
56393: "㋚",
|
|
|
|
|
56394: "㋛",
|
|
|
|
|
56395: "㋜",
|
|
|
|
|
56396: "㋝",
|
|
|
|
|
56397: "㋞",
|
|
|
|
|
56398: "▷",
|
|
|
|
|
}
|
|
|
|
|
}
|