2021-01-01 22:31:58 +00:00
|
|
|
|
package yomichan
|
2017-03-21 06:08:12 +00:00
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"regexp"
|
|
|
|
|
"strings"
|
2021-01-01 05:53:10 +00:00
|
|
|
|
|
2023-12-31 04:43:50 +00:00
|
|
|
|
zig "git.foosoft.net/alex/zero-epwing-go"
|
2017-03-21 06:08:12 +00:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type kotowazaExtractor struct {
|
|
|
|
|
readGroupExp *regexp.Regexp
|
|
|
|
|
readGroupAltsExp *regexp.Regexp
|
|
|
|
|
readGroupNoAltsExp *regexp.Regexp
|
|
|
|
|
wordGroupExp *regexp.Regexp
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func makeKotowazaExtractor() epwingExtractor {
|
|
|
|
|
return &kotowazaExtractor{
|
|
|
|
|
readGroupExp: regexp.MustCompile(`([^ぁ-ゖァ-ヺ]*)(\([^)]*\))`),
|
|
|
|
|
readGroupAltsExp: regexp.MustCompile(`\(([^)]*)\)`),
|
|
|
|
|
readGroupNoAltsExp: regexp.MustCompile(`\(([^・)]*)\)`),
|
|
|
|
|
wordGroupExp: regexp.MustCompile(`=([^〔=]*)〔=([^〕]*)〕`),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-01 05:53:10 +00:00
|
|
|
|
func (e *kotowazaExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
2017-03-21 06:08:12 +00:00
|
|
|
|
heading := entry.Heading
|
|
|
|
|
|
|
|
|
|
queue := []string{heading}
|
|
|
|
|
reducedExpressions := []string{}
|
|
|
|
|
|
|
|
|
|
for len(queue) > 0 {
|
|
|
|
|
expression := queue[0]
|
|
|
|
|
queue = queue[1:]
|
|
|
|
|
|
|
|
|
|
matches := e.wordGroupExp.FindStringSubmatch(expression)
|
|
|
|
|
if matches == nil {
|
|
|
|
|
reducedExpressions = append(reducedExpressions, expression)
|
|
|
|
|
} else {
|
|
|
|
|
replacements := []string{matches[1]}
|
|
|
|
|
replacements = append(replacements, strings.Split(matches[2], "・")...)
|
|
|
|
|
for _, replacement := range replacements {
|
|
|
|
|
queue = append(queue, strings.Replace(expression, matches[0], replacement, -1))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var terms []dbTerm
|
|
|
|
|
for _, reducedExpression := range reducedExpressions {
|
|
|
|
|
expression := e.readGroupExp.ReplaceAllString(reducedExpression, "$1")
|
|
|
|
|
readAltsExpression := e.readGroupExp.ReplaceAllString(reducedExpression, "$2")
|
|
|
|
|
readAltsExpression = e.readGroupNoAltsExp.ReplaceAllString(readAltsExpression, "$1")
|
|
|
|
|
|
|
|
|
|
var readings []string
|
|
|
|
|
queue = []string{readAltsExpression}
|
|
|
|
|
for len(queue) > 0 {
|
|
|
|
|
readExpression := queue[0]
|
|
|
|
|
queue = queue[1:]
|
|
|
|
|
|
|
|
|
|
matches := e.readGroupAltsExp.FindStringSubmatch(readExpression)
|
|
|
|
|
if matches == nil {
|
|
|
|
|
readings = append(readings, readExpression)
|
|
|
|
|
} else {
|
|
|
|
|
replacements := strings.Split(matches[1], "・")
|
|
|
|
|
for _, replacement := range replacements {
|
|
|
|
|
queue = append(queue, strings.Replace(readExpression, matches[0], replacement, -1))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, reading := range readings {
|
|
|
|
|
term := dbTerm{
|
|
|
|
|
Expression: expression,
|
|
|
|
|
Reading: reading,
|
2023-01-22 20:14:33 +00:00
|
|
|
|
Glossary: []any{entry.Text},
|
2017-10-12 23:48:58 +00:00
|
|
|
|
Sequence: sequence,
|
2017-03-21 06:08:12 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
terms = append(terms, term)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return terms
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-01 05:53:10 +00:00
|
|
|
|
func (e *kotowazaExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
2017-03-21 06:08:12 +00:00
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (e *kotowazaExtractor) exportRules(term *dbTerm, tags []string) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (*kotowazaExtractor) getRevision() string {
|
|
|
|
|
return "kotowaza1"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (*kotowazaExtractor) getFontNarrow() map[int]string {
|
|
|
|
|
return map[int]string{}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (*kotowazaExtractor) getFontWide() map[int]string {
|
|
|
|
|
return map[int]string{}
|
|
|
|
|
}
|