add epwing support for kotowaza
This commit is contained in:
parent
7f474a07f6
commit
cd85c4935e
@ -134,6 +134,7 @@ func epwingExportDb(inputPath, outputDir, title string, stride int, pretty bool)
|
||||
"三省堂 スーパー大辞林": makeDaijirinExtractor(),
|
||||
"大辞泉": makeDaijisenExtractor(),
|
||||
"明鏡国語辞典": makeMeikyouExtractor(),
|
||||
"故事ことわざの辞典": makeKotowazaExtractor(),
|
||||
"研究社 新和英大辞典 第5版": makeWadaiExtractor(),
|
||||
}
|
||||
|
||||
|
123
kotowaza.go
Normal file
123
kotowaza.go
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Alex Yatskov <alex@foosoft.net>, ajyliew
|
||||
* Author: ajyliew
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
* this software and associated documentation files (the "Software"), to deal in
|
||||
* the Software without restriction, including without limitation the rights to
|
||||
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
* the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type kotowazaExtractor struct {
|
||||
readGroupExp *regexp.Regexp
|
||||
readGroupAltsExp *regexp.Regexp
|
||||
readGroupNoAltsExp *regexp.Regexp
|
||||
wordGroupExp *regexp.Regexp
|
||||
}
|
||||
|
||||
func makeKotowazaExtractor() epwingExtractor {
|
||||
return &kotowazaExtractor{
|
||||
readGroupExp: regexp.MustCompile(`([^ぁ-ゖァ-ヺ]*)(\([^)]*\))`),
|
||||
readGroupAltsExp: regexp.MustCompile(`\(([^)]*)\)`),
|
||||
readGroupNoAltsExp: regexp.MustCompile(`\(([^・)]*)\)`),
|
||||
wordGroupExp: regexp.MustCompile(`=([^〔=]*)〔=([^〕]*)〕`),
|
||||
}
|
||||
}
|
||||
|
||||
func (e *kotowazaExtractor) extractTerms(entry epwingEntry) []dbTerm {
|
||||
heading := entry.Heading
|
||||
|
||||
queue := []string{heading}
|
||||
reducedExpressions := []string{}
|
||||
|
||||
for len(queue) > 0 {
|
||||
expression := queue[0]
|
||||
queue = queue[1:]
|
||||
|
||||
matches := e.wordGroupExp.FindStringSubmatch(expression)
|
||||
if matches == nil {
|
||||
reducedExpressions = append(reducedExpressions, expression)
|
||||
} else {
|
||||
replacements := []string{matches[1]}
|
||||
replacements = append(replacements, strings.Split(matches[2], "・")...)
|
||||
for _, replacement := range replacements {
|
||||
queue = append(queue, strings.Replace(expression, matches[0], replacement, -1))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var terms []dbTerm
|
||||
for _, reducedExpression := range reducedExpressions {
|
||||
expression := e.readGroupExp.ReplaceAllString(reducedExpression, "$1")
|
||||
readAltsExpression := e.readGroupExp.ReplaceAllString(reducedExpression, "$2")
|
||||
readAltsExpression = e.readGroupNoAltsExp.ReplaceAllString(readAltsExpression, "$1")
|
||||
|
||||
var readings []string
|
||||
queue = []string{readAltsExpression}
|
||||
for len(queue) > 0 {
|
||||
readExpression := queue[0]
|
||||
queue = queue[1:]
|
||||
|
||||
matches := e.readGroupAltsExp.FindStringSubmatch(readExpression)
|
||||
if matches == nil {
|
||||
readings = append(readings, readExpression)
|
||||
} else {
|
||||
replacements := strings.Split(matches[1], "・")
|
||||
for _, replacement := range replacements {
|
||||
queue = append(queue, strings.Replace(readExpression, matches[0], replacement, -1))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, reading := range readings {
|
||||
term := dbTerm{
|
||||
Expression: expression,
|
||||
Reading: reading,
|
||||
Glossary: []string{entry.Text},
|
||||
}
|
||||
|
||||
terms = append(terms, term)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return terms
|
||||
}
|
||||
|
||||
func (e *kotowazaExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *kotowazaExtractor) exportRules(term *dbTerm, tags []string) {
|
||||
}
|
||||
|
||||
func (*kotowazaExtractor) getRevision() string {
|
||||
return "kotowaza1"
|
||||
}
|
||||
|
||||
func (*kotowazaExtractor) getFontNarrow() map[int]string {
|
||||
return map[int]string{}
|
||||
}
|
||||
|
||||
func (*kotowazaExtractor) getFontWide() map[int]string {
|
||||
return map[int]string{}
|
||||
}
|
Loading…
Reference in New Issue
Block a user