1
yomichan-import/wadai.go

371 lines
7.2 KiB
Go
Raw Normal View History

2017-02-19 23:31:37 +00:00
/*
2017-03-22 03:44:02 +00:00
* Copyright (c) 2017 Alex Yatskov <alex@foosoft.net>, ajyliew
* Author: Alex Yatskov <alex@foosoft.net>, ajyliew
2017-02-19 23:31:37 +00:00
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
* the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
2021-01-01 22:31:58 +00:00
package yomichan
2017-02-19 23:31:37 +00:00
2017-02-20 23:06:35 +00:00
import (
"regexp"
"strings"
2021-01-01 05:53:10 +00:00
2021-01-01 19:46:11 +00:00
zig "github.com/FooSoft/zero-epwing-go"
2017-02-20 23:06:35 +00:00
)
2017-02-19 23:31:37 +00:00
type wadaiExtractor struct {
2017-02-20 23:06:35 +00:00
partsExp *regexp.Regexp
literalPartsExp *regexp.Regexp
readPartsExp *regexp.Regexp
quotedExp *regexp.Regexp
alphaExp *regexp.Regexp
2017-02-19 23:31:37 +00:00
}
func makeWadaiExtractor() epwingExtractor {
2017-02-20 23:06:35 +00:00
return &wadaiExtractor{
partsExp: regexp.MustCompile(`([^]+)(?:([^>【]+)(?:【([^】]+)】)?)?`),
literalPartsExp: regexp.MustCompile(`(¶)?(.*)`),
readPartsExp: regexp.MustCompile(`([^]+)(.*)`),
quotedExp: regexp.MustCompile(`「?([^」]+)`),
alphaExp: regexp.MustCompile(`[a-z]+`),
}
2017-02-19 23:31:37 +00:00
}
2021-01-01 05:53:10 +00:00
func (e *wadaiExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
2017-02-20 23:06:35 +00:00
matches := e.partsExp.FindStringSubmatch(entry.Heading)
if matches == nil {
return nil
}
preset := false
literal := matches[1]
if literalMatches := e.literalPartsExp.FindStringSubmatch(literal); literalMatches != nil {
preset = len(literalMatches[1]) > 0
literal = literalMatches[2]
}
reading := matches[2]
if readMatches := e.readPartsExp.FindStringSubmatch(reading); readMatches != nil {
reading = readMatches[1]
}
expressions := strings.Split(matches[3], "・")
if len(expressions) == 0 {
expressions = append(expressions, "")
}
2017-02-19 23:31:37 +00:00
var terms []dbTerm
2017-02-20 23:06:35 +00:00
for _, expression := range expressions {
if preset {
expression = literal
reading = ""
} else if len(expression) == 0 {
expression = literal
}
if quotedMatches := e.quotedExp.FindStringSubmatch(reading); quotedMatches != nil {
reading = quotedMatches[1]
}
if alphaMatches := e.alphaExp.FindStringSubmatch(expression); alphaMatches != nil && len(reading) > 0 {
expression = reading
reading = ""
}
expression = strings.TrimSpace(expression)
if len(expression) == 0 {
continue
}
2017-02-20 23:06:35 +00:00
term := dbTerm{
Expression: expression,
Reading: reading,
Glossary: []string{entry.Text},
Sequence: sequence,
2017-02-20 23:06:35 +00:00
}
terms = append(terms, term)
}
2017-02-19 23:31:37 +00:00
return terms
}
2021-01-01 05:53:10 +00:00
func (e *wadaiExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
2017-02-19 23:31:37 +00:00
return nil
}
func (*wadaiExtractor) getRevision() string {
return "wadai1"
}
func (*wadaiExtractor) getFontNarrow() map[int]string {
return map[int]string{
41267: "﹢",
41269: "*",
41270: "ᐦ",
41284: "Á",
41285: "É",
41287: "Ó",
41288: "Ú",
41290: "á",
41291: "é",
41292: "í",
41293: "ó",
41294: "ú",
41295: "ý",
41313: "À",
41314: "È",
41319: "à",
41320: "è",
41321: "ì",
41322: "ò",
41323: "ù",
41505: "Ö",
41506: "Ü",
41508: "ä",
41509: "ë",
41510: "ï",
41511: "ö",
41512: "ü",
41513: "ÿ",
41515: "Â",
41516: "Ê",
41517: "Î",
41520: "â",
41521: "ê",
41522: "î",
41523: "ô",
41524: "û",
41525: "ā",
41526: "ē",
41527: "ī",
41528: "ō",
41529: "ū",
41530: "ȳ",
41532: "Ç",
41533: "ç",
41534: "ɘ́",
41538: "ɔ́",
41561: "˜",
41566: "ã",
41567: "ñ",
41581: "ʌ",
41582: "ø",
41583: "ə",
41585: "ε",
41587: "ɔ",
41588: "℧",
41590: "ð",
41593: "ŋ",
41594: "ː",
41596: "Ø",
41762: "\\",
41768: "˘",
41773: "Ŭ",
41775: "ă",
41776: "ĕ",
41777: "ğ",
41778: "ĭ",
41779: "ŏ",
41780: "ŭ",
41784: "Č",
41788: "Š",
41791: "č",
41792: "ě",
41794: "ň",
41795: "ř",
41796: "š",
41797: "ž",
41804: "ą",
41805: "ę",
41811: "ș",
41812: "ț",
41822: "Ś",
41823: "ć",
41824: "ń",
41825: "ś",
41826: "ź",
42061: "",
42063: "Ł",
42068: "ł",
42071: "õ",
42075: "Å",
42076: "å",
42077: "ů",
42081: "Ḥ",
42089: "ḍ",
42090: "ḥ",
42092: "ṃ",
42093: "ṇ",
42095: "ṣ",
42102: "İ",
42104: "Ż",
42109: "ṅ",
42287: "‴",
42316: "Ō",
42322: "b̄",
42324: "d̅",
42325: "h̄",
42327: "s̅",
42330: "z̅",
42344: "〚",
42345: "〛",
42356: "ǔ",
42357: "ż",
42358: "Ž",
42359: "ž",
}
}
func (*wadaiExtractor) getFontWide() map[int]string {
return map[int]string{
45380: "☞",
45397: "æ",
45402: "œ",
45406: "Æ",
45429: "©",
45613: "<",
45614: ">",
45629: "┏",
45653: "⛤",
45662: "嗉",
45665: "圳",
45666: "拼",
45667: "攩",
45671: "烤",
45673: "玢",
45674: "癤",
45675: "皶",
45676: "磠",
45677: "稃",
45681: "蔲",
45684: "顬",
45685: "骶",
45689: "榍",
45857: "倻",
45870: "噯",
45876: "垜",
45898: "愷",
45900: "擤",
45906: "晷",
45909: "枘",
45910: "不",
45913: "楣",
45916: "梲",
45919: "桛",
45921: "楤",
45922: "橅",
45923: "檉",
45933: "淄",
46125: "煆",
46135: "珅",
46137: "琛",
46141: "痤",
46142: "癭",
46143: "瘭",
46152: "窠",
46154: "笯",
46155: "筠",
46156: "簎",
46157: "糝",
46161: "翟",
46163: "翮",
46166: "腊",
46168: "舢",
46169: "芷",
46177: "蒴",
46181: "蕙",
46190: "蚉",
46191: "蝲",
46197: "豇",
46198: "跑",
46200: "跗",
46201: "跆",
46202: "蒁",
46372: "鄱",
46374: "鄧",
46388: "卍",
46390: "𨫤",
46391: "鈹",
46398: "顥",
46404: "駃",
46405: "騠",
46406: "髁",
46409: "魳",
46410: "鱏",
46411: "鱓",
46414: "鱮",
46415: "鰶",
46416: "魬",
46417: "𩸽",
46418: "鯥",
46419: "鰙",
46422: "鮄",
46423: "鱵",
46424: "鷴",
46425: "鶍",
46426: "鵟",
46428: "鼯",
46449: "▶",
46459: "㧍",
46460: "嘈",
46461: "愈",
46462: "淝",
46634: "灤",
46635: "焮",
46636: "獮",
46637: "瓚",
46638: "絓",
46639: "芎",
46650: "薏",
46651: "辶",
46652: "醞",
46653: "挵",
46654: "飥",
46655: "鬐",
46656: "俏",
46657: "啐",
46658: "塼",
46659: "濰",
46660: "磲",
46661: "篊",
46662: "菀",
46663: "芩",
46664: "𧿹",
46665: "鈸",
46666: "驎",
46667: "硨",
46668: "蘞",
46669: "梣",
46670: "槵",
46671: "橉",
46672: "莧",
46682: "彔",
46683: "噦",
46684: "袘",
46685: "餺",
46686: "►",
46688: "棈",
46689: "▷",
46695: "[ローマ字]",
46699: "◧",
46700: "◨",
}
}