Switch to zig for EPWING parsing
This commit is contained in:
parent
50901f7155
commit
b66d908b23
11
common.go
11
common.go
@ -266,8 +266,8 @@ func hasString(needle string, haystack []string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func detectFormat(path string) (string, error) {
|
func detectFormat(path *string) (string, error) {
|
||||||
switch filepath.Ext(path) {
|
switch filepath.Ext(*path) {
|
||||||
case ".sqlite":
|
case ".sqlite":
|
||||||
return "rikai", nil
|
return "rikai", nil
|
||||||
case ".kanjifreq":
|
case ".kanjifreq":
|
||||||
@ -276,7 +276,7 @@ func detectFormat(path string) (string, error) {
|
|||||||
return "termfreq", nil
|
return "termfreq", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
switch filepath.Base(path) {
|
switch filepath.Base(*path) {
|
||||||
case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml":
|
case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml":
|
||||||
return "edict", nil
|
return "edict", nil
|
||||||
case "JMnedict", "JMnedict.xml":
|
case "JMnedict", "JMnedict.xml":
|
||||||
@ -284,16 +284,17 @@ func detectFormat(path string) (string, error) {
|
|||||||
case "kanjidic2", "kanjidic2.xml":
|
case "kanjidic2", "kanjidic2.xml":
|
||||||
return "kanjidic", nil
|
return "kanjidic", nil
|
||||||
case "CATALOGS":
|
case "CATALOGS":
|
||||||
|
*path = filepath.Dir(*path)
|
||||||
return "epwing", nil
|
return "epwing", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
info, err := os.Stat(path)
|
info, err := os.Stat(*path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
if info.IsDir() {
|
if info.IsDir() {
|
||||||
_, err := os.Stat(filepath.Join(path, "CATALOGS"))
|
_, err := os.Stat(filepath.Join(*path, "CATALOGS"))
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return "epwing", nil
|
return "epwing", nil
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/FooSoft/zero-epwing-go/zig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type daijirinExtractor struct {
|
type daijirinExtractor struct {
|
||||||
@ -47,7 +49,7 @@ func makeDaijirinExtractor() epwingExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *daijirinExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
func (e *daijirinExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||||
if matches == nil {
|
if matches == nil {
|
||||||
return nil
|
return nil
|
||||||
@ -112,7 +114,7 @@ func (e *daijirinExtractor) extractTerms(entry epwingEntry, sequence int) []dbTe
|
|||||||
return terms
|
return terms
|
||||||
}
|
}
|
||||||
|
|
||||||
func (*daijirinExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
func (*daijirinExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,6 +25,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/FooSoft/zero-epwing-go/zig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type daijisenExtractor struct {
|
type daijisenExtractor struct {
|
||||||
@ -49,7 +51,7 @@ func makeDaijisenExtractor() epwingExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *daijisenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
func (e *daijisenExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||||
if matches == nil {
|
if matches == nil {
|
||||||
return nil
|
return nil
|
||||||
@ -111,7 +113,7 @@ func (e *daijisenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTe
|
|||||||
return terms
|
return terms
|
||||||
}
|
}
|
||||||
|
|
||||||
func (*daijisenExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
func (*daijisenExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
107
epwing.go
107
epwing.go
@ -23,123 +23,29 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
|
||||||
"log"
|
"log"
|
||||||
"os"
|
|
||||||
"os/exec"
|
|
||||||
"path/filepath"
|
|
||||||
"regexp"
|
"regexp"
|
||||||
"runtime"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/FooSoft/zero-epwing-go/zig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type epwingEntry struct {
|
|
||||||
Heading string `json:"heading"`
|
|
||||||
Text string `json:"text"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type epwingSubbook struct {
|
|
||||||
Title string `json:"title"`
|
|
||||||
Copyright string `json:"copyright"`
|
|
||||||
Entries []epwingEntry `json:"entries"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type epwingBook struct {
|
|
||||||
CharCode string `json:"charCode"`
|
|
||||||
DiscCode string `json:"discCode"`
|
|
||||||
Subbooks []epwingSubbook `json:"subbooks"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type epwingExtractor interface {
|
type epwingExtractor interface {
|
||||||
extractTerms(entry epwingEntry, sequence int) []dbTerm
|
extractTerms(entry zig.BookEntry, sequence int) []dbTerm
|
||||||
extractKanji(entry epwingEntry) []dbKanji
|
extractKanji(entry zig.BookEntry) []dbKanji
|
||||||
getFontNarrow() map[int]string
|
getFontNarrow() map[int]string
|
||||||
getFontWide() map[int]string
|
getFontWide() map[int]string
|
||||||
getRevision() string
|
getRevision() string
|
||||||
}
|
}
|
||||||
|
|
||||||
func epwingExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
|
func epwingExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
|
||||||
stat, err := os.Stat(inputPath)
|
book, err := zig.Load(inputPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var toolExec bool
|
|
||||||
if stat.IsDir() {
|
|
||||||
toolExec = true
|
|
||||||
} else if filepath.Base(inputPath) == "CATALOGS" {
|
|
||||||
inputPath = filepath.Dir(inputPath)
|
|
||||||
toolExec = true
|
|
||||||
}
|
|
||||||
|
|
||||||
var data []byte
|
|
||||||
if toolExec {
|
|
||||||
exePath, err := os.Executable()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
toolPath := filepath.Join("bin", runtime.GOOS, "zero-epwing")
|
|
||||||
if runtime.GOOS == "windows" {
|
|
||||||
toolPath += ".exe"
|
|
||||||
}
|
|
||||||
|
|
||||||
toolPath = filepath.Join(filepath.Dir(exePath), toolPath)
|
|
||||||
|
|
||||||
if _, err = os.Stat(toolPath); err != nil {
|
|
||||||
return fmt.Errorf("failed to find zero-epwing in '%s'", toolPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd := exec.Command(toolPath, "--entries", inputPath)
|
|
||||||
|
|
||||||
stdout, err := cmd.StdoutPipe()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
stderr, err := cmd.StderrPipe()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Printf("invoking zero-epwing from '%s'...\n", toolPath)
|
|
||||||
if err := cmd.Start(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
scanner := bufio.NewScanner(stderr)
|
|
||||||
for scanner.Scan() {
|
|
||||||
log.Printf("\t > %s\n", scanner.Text())
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
if data, err = ioutil.ReadAll(stdout); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := cmd.Wait(); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Println("completed zero-epwing processing")
|
|
||||||
} else {
|
|
||||||
data, err = ioutil.ReadFile(inputPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
var book epwingBook
|
|
||||||
if err := json.Unmarshal(data, &book); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
translateExp := regexp.MustCompile(`{{([nw])_(\d+)}}`)
|
translateExp := regexp.MustCompile(`{{([nw])_(\d+)}}`)
|
||||||
epwingExtractors := map[string]epwingExtractor{
|
epwingExtractors := map[string]epwingExtractor{
|
||||||
"三省堂 スーパー大辞林": makeDaijirinExtractor(),
|
"三省堂 スーパー大辞林": makeDaijirinExtractor(),
|
||||||
@ -160,12 +66,11 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p
|
|||||||
kanji dbKanjiList
|
kanji dbKanjiList
|
||||||
revisions []string
|
revisions []string
|
||||||
titles []string
|
titles []string
|
||||||
|
sequence int
|
||||||
)
|
)
|
||||||
|
|
||||||
log.Println("formatting dictionary data...")
|
log.Println("formatting dictionary data...")
|
||||||
|
|
||||||
var sequence int
|
|
||||||
|
|
||||||
for _, subbook := range book.Subbooks {
|
for _, subbook := range book.Subbooks {
|
||||||
if extractor, ok := epwingExtractors[subbook.Title]; ok {
|
if extractor, ok := epwingExtractors[subbook.Title]; ok {
|
||||||
fontNarrow := extractor.getFontNarrow()
|
fontNarrow := extractor.getFontNarrow()
|
||||||
|
@ -25,6 +25,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/FooSoft/zero-epwing-go/zig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type gakkenExtractor struct {
|
type gakkenExtractor struct {
|
||||||
@ -69,7 +71,7 @@ var cosmetics = strings.NewReplacer("(1)", "①", "(2)", "②", "(3)", "③", "(
|
|||||||
"セ゛", "ゼ",
|
"セ゛", "ゼ",
|
||||||
"ソ゛", "ゾ")
|
"ソ゛", "ゾ")
|
||||||
|
|
||||||
func (e *gakkenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
func (e *gakkenExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||||
if matches == nil {
|
if matches == nil {
|
||||||
return nil
|
return nil
|
||||||
@ -140,7 +142,7 @@ func (e *gakkenExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm
|
|||||||
return terms
|
return terms
|
||||||
}
|
}
|
||||||
|
|
||||||
func (*gakkenExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
func (*gakkenExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
9
go.mod
9
go.mod
@ -1,9 +0,0 @@
|
|||||||
module github.com/FooSoft/yomichan-import
|
|
||||||
|
|
||||||
go 1.13
|
|
||||||
|
|
||||||
require (
|
|
||||||
github.com/FooSoft/jmdict v0.0.0-20190926045629-808d66c7b050
|
|
||||||
github.com/andlabs/ui v0.0.0-20180902183112-867a9e5a498d
|
|
||||||
github.com/mattn/go-sqlite3 v2.0.2+incompatible
|
|
||||||
)
|
|
6
go.sum
6
go.sum
@ -1,6 +0,0 @@
|
|||||||
github.com/FooSoft/jmdict v0.0.0-20190926045629-808d66c7b050 h1:3VWk8B61jEgfcV+pEgzWO1j7TVXC9g5QaS0J06994Zc=
|
|
||||||
github.com/FooSoft/jmdict v0.0.0-20190926045629-808d66c7b050/go.mod h1:zo92ezZlNld5cN1iuS0QRAmSsHcpvcqGZLVNKPM4Hlg=
|
|
||||||
github.com/andlabs/ui v0.0.0-20180902183112-867a9e5a498d h1:4ianvxb8s3oyizgjuWWxGuTAUU+6JStcvj6BuHS4PVY=
|
|
||||||
github.com/andlabs/ui v0.0.0-20180902183112-867a9e5a498d/go.mod h1:5G2EjwzgZUPnnReoKvPWVneT8APYbyKkihDVAHUi0II=
|
|
||||||
github.com/mattn/go-sqlite3 v2.0.2+incompatible h1:qzw9c2GNT8UFrgWNDhCTqRqYUSmu/Dav/9Z58LGpk7U=
|
|
||||||
github.com/mattn/go-sqlite3 v2.0.2+incompatible/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
|
2
gui.go
2
gui.go
@ -116,7 +116,7 @@ func gui() error {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
format, err := detectFormat(inputPath)
|
format, err := detectFormat(&inputPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ui.MsgBoxError(window, "Error", "Unable to detect dictionary format")
|
ui.MsgBoxError(window, "Error", "Unable to detect dictionary format")
|
||||||
importButton.Enable()
|
importButton.Enable()
|
||||||
|
@ -25,6 +25,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/FooSoft/zero-epwing-go/zig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type kotowazaExtractor struct {
|
type kotowazaExtractor struct {
|
||||||
@ -43,7 +45,7 @@ func makeKotowazaExtractor() epwingExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *kotowazaExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
func (e *kotowazaExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||||
heading := entry.Heading
|
heading := entry.Heading
|
||||||
|
|
||||||
queue := []string{heading}
|
queue := []string{heading}
|
||||||
@ -104,7 +106,7 @@ func (e *kotowazaExtractor) extractTerms(entry epwingEntry, sequence int) []dbTe
|
|||||||
return terms
|
return terms
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *kotowazaExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
func (e *kotowazaExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,6 +25,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/FooSoft/zero-epwing-go/zig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type koujienExtractor struct {
|
type koujienExtractor struct {
|
||||||
@ -57,7 +59,7 @@ func makeFuzokuExtractor() epwingExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *koujienExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
func (e *koujienExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||||
if matches == nil {
|
if matches == nil {
|
||||||
return nil
|
return nil
|
||||||
@ -122,7 +124,7 @@ func (e *koujienExtractor) extractTerms(entry epwingEntry, sequence int) []dbTer
|
|||||||
return terms
|
return terms
|
||||||
}
|
}
|
||||||
|
|
||||||
func (*koujienExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
func (*koujienExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
2
main.go
2
main.go
@ -107,7 +107,7 @@ func main() {
|
|||||||
|
|
||||||
if *format == "" {
|
if *format == "" {
|
||||||
var err error
|
var err error
|
||||||
if *format, err = detectFormat(inputPath); err != nil {
|
if *format, err = detectFormat(&inputPath); err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/FooSoft/zero-epwing-go/zig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type meikyouExtractor struct {
|
type meikyouExtractor struct {
|
||||||
@ -77,7 +79,7 @@ func makeMeikyouExtractor() epwingExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *meikyouExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
func (e *meikyouExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||||
if matches == nil {
|
if matches == nil {
|
||||||
return nil
|
return nil
|
||||||
@ -153,7 +155,7 @@ func (e *meikyouExtractor) extractTerms(entry epwingEntry, sequence int) []dbTer
|
|||||||
return terms
|
return terms
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *meikyouExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
func (e *meikyouExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
6
wadai.go
6
wadai.go
@ -25,6 +25,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/FooSoft/zero-epwing-go/zig"
|
||||||
)
|
)
|
||||||
|
|
||||||
type wadaiExtractor struct {
|
type wadaiExtractor struct {
|
||||||
@ -45,7 +47,7 @@ func makeWadaiExtractor() epwingExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *wadaiExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm {
|
func (e *wadaiExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTerm {
|
||||||
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
matches := e.partsExp.FindStringSubmatch(entry.Heading)
|
||||||
if matches == nil {
|
if matches == nil {
|
||||||
return nil
|
return nil
|
||||||
@ -104,7 +106,7 @@ func (e *wadaiExtractor) extractTerms(entry epwingEntry, sequence int) []dbTerm
|
|||||||
return terms
|
return terms
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *wadaiExtractor) extractKanji(entry epwingEntry) []dbKanji {
|
func (e *wadaiExtractor) extractKanji(entry zig.BookEntry) []dbKanji {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user