From cd85c4935ebcba040db551a4af5d50d6ba4222d8 Mon Sep 17 00:00:00 2001 From: Austin Liew Date: Tue, 21 Mar 2017 02:08:12 -0400 Subject: [PATCH] add epwing support for kotowaza --- epwing.go | 1 + kotowaza.go | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 kotowaza.go diff --git a/epwing.go b/epwing.go index efc4d24..b9266cc 100644 --- a/epwing.go +++ b/epwing.go @@ -134,6 +134,7 @@ func epwingExportDb(inputPath, outputDir, title string, stride int, pretty bool) "三省堂 スーパー大辞林": makeDaijirinExtractor(), "大辞泉": makeDaijisenExtractor(), "明鏡国語辞典": makeMeikyouExtractor(), + "故事ことわざの辞典": makeKotowazaExtractor(), "研究社 新和英大辞典 第5版": makeWadaiExtractor(), } diff --git a/kotowaza.go b/kotowaza.go new file mode 100644 index 0000000..0c85a2f --- /dev/null +++ b/kotowaza.go @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2017 Alex Yatskov , ajyliew + * Author: ajyliew + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package main + +import ( + "regexp" + "strings" +) + +type kotowazaExtractor struct { + readGroupExp *regexp.Regexp + readGroupAltsExp *regexp.Regexp + readGroupNoAltsExp *regexp.Regexp + wordGroupExp *regexp.Regexp +} + +func makeKotowazaExtractor() epwingExtractor { + return &kotowazaExtractor{ + readGroupExp: regexp.MustCompile(`([^ぁ-ゖァ-ヺ]*)(\([^)]*\))`), + readGroupAltsExp: regexp.MustCompile(`\(([^)]*)\)`), + readGroupNoAltsExp: regexp.MustCompile(`\(([^・)]*)\)`), + wordGroupExp: regexp.MustCompile(`=([^〔=]*)〔=([^〕]*)〕`), + } +} + +func (e *kotowazaExtractor) extractTerms(entry epwingEntry) []dbTerm { + heading := entry.Heading + + queue := []string{heading} + reducedExpressions := []string{} + + for len(queue) > 0 { + expression := queue[0] + queue = queue[1:] + + matches := e.wordGroupExp.FindStringSubmatch(expression) + if matches == nil { + reducedExpressions = append(reducedExpressions, expression) + } else { + replacements := []string{matches[1]} + replacements = append(replacements, strings.Split(matches[2], "・")...) + for _, replacement := range replacements { + queue = append(queue, strings.Replace(expression, matches[0], replacement, -1)) + } + } + } + + var terms []dbTerm + for _, reducedExpression := range reducedExpressions { + expression := e.readGroupExp.ReplaceAllString(reducedExpression, "$1") + readAltsExpression := e.readGroupExp.ReplaceAllString(reducedExpression, "$2") + readAltsExpression = e.readGroupNoAltsExp.ReplaceAllString(readAltsExpression, "$1") + + var readings []string + queue = []string{readAltsExpression} + for len(queue) > 0 { + readExpression := queue[0] + queue = queue[1:] + + matches := e.readGroupAltsExp.FindStringSubmatch(readExpression) + if matches == nil { + readings = append(readings, readExpression) + } else { + replacements := strings.Split(matches[1], "・") + for _, replacement := range replacements { + queue = append(queue, strings.Replace(readExpression, matches[0], replacement, -1)) + } + } + } + + for _, reading := range readings { + term := dbTerm{ + Expression: expression, + Reading: reading, + Glossary: []string{entry.Text}, + } + + terms = append(terms, term) + } + + } + + return terms +} + +func (e *kotowazaExtractor) extractKanji(entry epwingEntry) []dbKanji { + return nil +} + +func (e *kotowazaExtractor) exportRules(term *dbTerm, tags []string) { +} + +func (*kotowazaExtractor) getRevision() string { + return "kotowaza1" +} + +func (*kotowazaExtractor) getFontNarrow() map[int]string { + return map[int]string{} +} + +func (*kotowazaExtractor) getFontWide() map[int]string { + return map[int]string{} +}