Switch to zig for EPWING parsing
This commit is contained in:
parent
50901f7155
commit
b66d908b23
11
common.go
11
common.go
@ -266,8 +266,8 @@ func hasString(needle string, haystack []string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func detectFormat(path string) (string, error) {
|
||||
switch filepath.Ext(path) {
|
||||
func detectFormat(path *string) (string, error) {
|
||||
switch filepath.Ext(*path) {
|
||||
case ".sqlite":
|
||||
return "rikai", nil
|
||||
case ".kanjifreq":
|
||||
@ -276,7 +276,7 @@ func detectFormat(path string) (string, error) {
|
||||
return "termfreq", nil
|
||||
}
|
||||
|
||||
switch filepath.Base(path) {
|
||||
switch filepath.Base(*path) {
|
||||
case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml":
|
||||
return "edict", nil
|
||||
case "JMnedict", "JMnedict.xml":
|
||||
@ -284,16 +284,17 @@ func detectFormat(path string) (string, error) {
|
||||
case "kanjidic2", "kanjidic2.xml":
|
||||
return "kanjidic", nil
|
||||
case "CATALOGS":
|
||||
*path = filepath.Dir(*path)
|
||||
return "epwing", nil
|
||||
}
|
||||
|
||||
info, err := os.Stat(path)
|
||||
info, err := os.Stat(*path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if info.IsDir() {
|
||||
_, err := os.Stat(filepath.Join(path, "CATALOGS"))
|
||||
_, err := os.Stat(filepath.Join(*path, "CATALOGS"))
|
||||
if err == nil {
|
||||
return "epwing", nil
|
||||
}
|
||||
|
@ -25,6 +25,8 @@ package main
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/FooSoft/zero-epwing-go/zig"
|
||||
)
|
||||
|
||||
type daijirinExtractor struct {
|
||||
@ -47,7 +49,7 @@ func makeDaijirinExtractor() epwingExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *daijirinExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
func (e *daijirinExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||
if matches == nil {
|
||||
return nil
|
||||
@ -112,7 +114,7 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry, sequence int) []dbTe
|
||||
return terms
|
||||
}
|
||||
|
||||
func (*daijirinExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
||||
func (*daijirinExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,8 @@ package main
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/FooSoft/zero-epwing-go/zig"
|
||||
)
|
||||
|
||||
type daijisenExtractor struct {
|
||||
@ -49,7 +51,7 @@ func makeDaijisenExtractor() epwingExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *daijisenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
func (e *daijisenExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||
if matches == nil {
|
||||
return nil
|
||||
@ -111,7 +113,7 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTe
|
||||
return terms
|
||||
}
|
||||
|
||||
func (*daijisenExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
||||
func (*daijisenExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
107
epwing.go
107
epwing.go
@ -23,123 +23,29 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/FooSoft/zero-epwing-go/zig"
|
||||
)
|
||||
|
||||
type epwingEntry struct {
|
||||
Heading string `json:"heading"`
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
type epwingSubbook struct {
|
||||
Title string `json:"title"`
|
||||
Copyright string `json:"copyright"`
|
||||
Entries []epwingEntry `json:"entries"`
|
||||
}
|
||||
|
||||
type epwingBook struct {
|
||||
CharCode string `json:"charCode"`
|
||||
DiscCode string `json:"discCode"`
|
||||
Subbooks []epwingSubbook `json:"subbooks"`
|
||||
}
|
||||
|
||||
type epwingExtractor interface {
|
||||
extractTerms(entry epwingEntry, sequence int) []dbTerm
|
||||
extractKanji(entry epwingEntry) []dbKanji
|
||||
extractTerms(entry zig.BookEntry, sequence int) []dbTerm
|
||||
extractKanji(entry zig.BookEntry) []dbKanji
|
||||
getFontNarrow() map[int]string
|
||||
getFontWide() map[int]string
|
||||
getRevision() string
|
||||
}
|
||||
|
||||
func epwingExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
|
||||
stat, err := os.Stat(inputPath)
|
||||
book, err := zig.Load(inputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var toolExec bool
|
||||
if stat.IsDir() {
|
||||
toolExec = true
|
||||
} else if filepath.Base(inputPath) == "CATALOGS" {
|
||||
inputPath = filepath.Dir(inputPath)
|
||||
toolExec = true
|
||||
}
|
||||
|
||||
var data []byte
|
||||
if toolExec {
|
||||
exePath, err := os.Executable()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
toolPath := filepath.Join("bin", runtime.GOOS, "zero-epwing")
|
||||
if runtime.GOOS == "windows" {
|
||||
toolPath += ".exe"
|
||||
}
|
||||
|
||||
toolPath = filepath.Join(filepath.Dir(exePath), toolPath)
|
||||
|
||||
if _, err = os.Stat(toolPath); err != nil {
|
||||
return fmt.Errorf("failed to find zero-epwing in '%s'", toolPath)
|
||||
}
|
||||
|
||||
cmd := exec.Command(toolPath, "--entries", inputPath)
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stderr, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Printf("invoking zero-epwing from '%s'...\n", toolPath)
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go func() {
|
||||
scanner := bufio.NewScanner(stderr)
|
||||
for scanner.Scan() {
|
||||
log.Printf("\t > %s\n", scanner.Text())
|
||||
}
|
||||
}()
|
||||
|
||||
if data, err = ioutil.ReadAll(stdout); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := cmd.Wait(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Println("completed zero-epwing processing")
|
||||
} else {
|
||||
data, err = ioutil.ReadFile(inputPath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var book epwingBook
|
||||
if err := json.Unmarshal(data, &book); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
translateExp := regexp.MustCompile(`{{([nw])_(\d+)}}`)
|
||||
epwingExtractors := map[string]epwingExtractor{
|
||||
"三省堂 スーパー大辞林": makeDaijirinExtractor(),
|
||||
@ -160,12 +66,11 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p
|
||||
kanji dbKanjiList
|
||||
revisions []string
|
||||
titles []string
|
||||
sequence int
|
||||
)
|
||||
|
||||
log.Println("formatting dictionary data...")
|
||||
|
||||
var sequence int
|
||||
|
||||
for _, subbook := range book.Subbooks {
|
||||
if extractor, ok := epwingExtractors[subbook.Title]; ok {
|
||||
fontNarrow := extractor.getFontNarrow()
|
||||
|
@ -25,6 +25,8 @@ package main
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/FooSoft/zero-epwing-go/zig"
|
||||
)
|
||||
|
||||
type gakkenExtractor struct {
|
||||
@ -69,7 +71,7 @@ var cosmetics = strings.NewReplacer("(1)", "①", "(2)", "②", "(3)", "③", "(
|
||||
"セ゛", "ゼ",
|
||||
"ソ゛", "ゾ")
|
||||
|
||||
func (e *gakkenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
func (e *gakkenExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||
if matches == nil {
|
||||
return nil
|
||||
@ -140,7 +142,7 @@ func (e *gakkenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm
|
||||
return terms
|
||||
}
|
||||
|
||||
func (*gakkenExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
||||
func (*gakkenExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
9
go.mod
9
go.mod
@ -1,9 +0,0 @@
|
||||
module github.com/FooSoft/yomichan-import
|
||||
|
||||
go 1.13
|
||||
|
||||
require (
|
||||
github.com/FooSoft/jmdict v0.0.0-20190926045629-808d66c7b050
|
||||
github.com/andlabs/ui v0.0.0-20180902183112-867a9e5a498d
|
||||
github.com/mattn/go-sqlite3 v2.0.2+incompatible
|
||||
)
|
6
go.sum
6
go.sum
@ -1,6 +0,0 @@
|
||||
github.com/FooSoft/jmdict v0.0.0-20190926045629-808d66c7b050 h1:3VWk8B61jEgfcV+pEgzWO1j7TVXC9g5QaS0J06994Zc=
|
||||
github.com/FooSoft/jmdict v0.0.0-20190926045629-808d66c7b050/go.mod h1:zo92ezZlNld5cN1iuS0QRAmSsHcpvcqGZLVNKPM4Hlg=
|
||||
github.com/andlabs/ui v0.0.0-20180902183112-867a9e5a498d h1:4ianvxb8s3oyizgjuWWxGuTAUU+6JStcvj6BuHS4PVY=
|
||||
github.com/andlabs/ui v0.0.0-20180902183112-867a9e5a498d/go.mod h1:5G2EjwzgZUPnnReoKvPWVneT8APYbyKkihDVAHUi0II=
|
||||
github.com/mattn/go-sqlite3 v2.0.2+incompatible h1:qzw9c2GNT8UFrgWNDhCTqRqYUSmu/Dav/9Z58LGpk7U=
|
||||
github.com/mattn/go-sqlite3 v2.0.2+incompatible/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
2
gui.go
2
gui.go
@ -116,7 +116,7 @@ func gui() error {
|
||||
return
|
||||
}
|
||||
|
||||
format, err := detectFormat(inputPath)
|
||||
format, err := detectFormat(&inputPath)
|
||||
if err != nil {
|
||||
ui.MsgBoxError(window, "Error", "Unable to detect dictionary format")
|
||||
importButton.Enable()
|
||||
|
@ -25,6 +25,8 @@ package main
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/FooSoft/zero-epwing-go/zig"
|
||||
)
|
||||
|
||||
type kotowazaExtractor struct {
|
||||
@ -43,7 +45,7 @@ func makeKotowazaExtractor() epwingExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *kotowazaExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
func (e *kotowazaExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||
heading := entry.Heading
|
||||
|
||||
queue := []string{heading}
|
||||
@ -104,7 +106,7 @@ func (e *kotowazaExtractor) extractTerms(entry epwingEntry, sequence int) []dbTe
|
||||
return terms
|
||||
}
|
||||
|
||||
func (e *kotowazaExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
||||
func (e *kotowazaExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,8 @@ package main
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/FooSoft/zero-epwing-go/zig"
|
||||
)
|
||||
|
||||
type koujienExtractor struct {
|
||||
@ -57,7 +59,7 @@ func makeFuzokuExtractor() epwingExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *koujienExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
func (e *koujienExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||
if matches == nil {
|
||||
return nil
|
||||
@ -122,7 +124,7 @@ func (e *koujienExtractor) extractTerms(entry epwingEntry, sequence int) []dbTer
|
||||
return terms
|
||||
}
|
||||
|
||||
func (*koujienExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
||||
func (*koujienExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
2
main.go
2
main.go
@ -107,7 +107,7 @@ func main() {
|
||||
|
||||
if *format == "" {
|
||||
var err error
|
||||
if *format, err = detectFormat(inputPath); err != nil {
|
||||
if *format, err = detectFormat(&inputPath); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
@ -25,6 +25,8 @@ package main
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/FooSoft/zero-epwing-go/zig"
|
||||
)
|
||||
|
||||
type meikyouExtractor struct {
|
||||
@ -77,7 +79,7 @@ func makeMeikyouExtractor() epwingExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *meikyouExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
func (e *meikyouExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||
if matches == nil {
|
||||
return nil
|
||||
@ -153,7 +155,7 @@ func (e *meikyouExtractor) extractTerms(entry epwingEntry, sequence int) []dbTer
|
||||
return terms
|
||||
}
|
||||
|
||||
func (e *meikyouExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
||||
func (e *meikyouExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
6
wadai.go
6
wadai.go
@ -25,6 +25,8 @@ package main
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/FooSoft/zero-epwing-go/zig"
|
||||
)
|
||||
|
||||
type wadaiExtractor struct {
|
||||
@ -45,7 +47,7 @@ func makeWadaiExtractor() epwingExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
func (e *wadaiExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
||||
func (e *wadaiExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||
if matches == nil {
|
||||
return nil
|
||||
@ -104,7 +106,7 @@ func (e *wadaiExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm
|
||||
return terms
|
||||
}
|
||||
|
||||
func (e *wadaiExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
||||
func (e *wadaiExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user