1

Font translation

This commit is contained in:
Alex Yatskov 2016-12-13 18:00:36 -08:00
parent 95ca3cc700
commit 26c55ee91d
2 changed files with 1425 additions and 1389 deletions

View File

@ -81,7 +81,8 @@ func (e *daijirinExtractor) extractKanji(entry epwingEntry) []dbKanji {
return nil
}
var daijirinFontNarrow = map[int]string{
func (*daijirinExtractor) getFontNarrow() map[int]string {
return map[int]string{
49441: "á",
49442: "à",
49443: "â",
@ -236,8 +237,10 @@ var daijirinFontNarrow = map[int]string{
50209: "ɲ",
50210: "ː",
}
}
var daijirinFontWide = map[int]string{
func (*daijirinExtractor) getFontWide() map[int]string {
return map[int]string{
41249: "仿",
41250: "佉",
41251: "侗",
@ -1473,3 +1476,4 @@ var daijirinFontWide = map[int]string{
45163: "ヰ",
45175: "㏋",
}
}

View File

@ -27,6 +27,8 @@ import (
"io"
"io/ioutil"
"regexp"
"strconv"
"strings"
)
type epwingEntry struct {
@ -49,6 +51,8 @@ type epwingBook struct {
type epwingExtractor interface {
extractTerms(entry epwingEntry) []dbTerm
extractKanji(entry epwingEntry) []dbKanji
getFontNarrow() map[int]string
getFontWide() map[int]string
}
type daijirinExtractor struct {
@ -69,6 +73,7 @@ func exportEpwingDb(outputDir, title string, reader io.Reader, flags int) error
return err
}
translateExp := regexp.MustCompile(`{{([nw])_(\d+)}}`)
epwingExtractors := map[string]epwingExtractor{
"三省堂 スーパー大辞林": makeDaijirinExtractor(),
}
@ -78,7 +83,34 @@ func exportEpwingDb(outputDir, title string, reader io.Reader, flags int) error
for _, subbook := range book.Subbooks {
if extractor, ok := epwingExtractors[subbook.Title]; ok {
fontNarrow := extractor.getFontNarrow()
fontWide := extractor.getFontWide()
translate := func(str string) string {
for _, matches := range translateExp.FindAllStringSubmatch(str, -1) {
var font map[int]string
if matches[1] == "n" {
font = fontNarrow
} else {
font = fontWide
}
code, _ := strconv.Atoi(matches[2])
replacement, ok := font[code]
if !ok {
replacement = "<22>"
}
str = strings.Replace(str, matches[0], replacement, -1)
}
return str
}
for _, entry := range subbook.Entries {
entry.Heading = translate(entry.Heading)
entry.Text = translate(entry.Text)
terms = append(terms, extractor.extractTerms(entry)...)
kanji = append(kanji, extractor.extractKanji(entry)...)
}