auto detect dictionary format
This commit is contained in:
parent
da60bede76
commit
a78a5a4a4a
36
common.go
36
common.go
@ -27,6 +27,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@ -114,9 +115,8 @@ func (kanji dbKanjiList) crush() [][]interface{} {
|
||||
return results
|
||||
}
|
||||
|
||||
func writeDb(outputDir, title string, revision string, termRecords [][]interface{}, kanjiRecords [][]interface{}, tagMeta map[string]dbTagMeta, pretty bool) error {
|
||||
func writeDb(outputDir, title string, revision string, termRecords [][]interface{}, kanjiRecords [][]interface{}, tagMeta map[string]dbTagMeta, stride int, pretty bool) error {
|
||||
const DB_VERSION = 1
|
||||
const BANK_STRIDE = 10000
|
||||
|
||||
marshalJson := func(obj interface{}, pretty bool) ([]byte, error) {
|
||||
if pretty {
|
||||
@ -130,9 +130,9 @@ func writeDb(outputDir, title string, revision string, termRecords [][]interface
|
||||
recordCount := len(records)
|
||||
bankCount := 0
|
||||
|
||||
for i := 0; i < recordCount; i += BANK_STRIDE {
|
||||
for i := 0; i < recordCount; i += stride {
|
||||
indexSrc := i
|
||||
indexDst := i + BANK_STRIDE
|
||||
indexDst := i + stride
|
||||
if indexDst > recordCount {
|
||||
indexDst = recordCount
|
||||
}
|
||||
@ -142,7 +142,7 @@ func writeDb(outputDir, title string, revision string, termRecords [][]interface
|
||||
return 0, err
|
||||
}
|
||||
|
||||
fp, err := os.Create(path.Join(outputDir, fmt.Sprintf("%s_bank_%d.json", prefix, i/BANK_STRIDE+1)))
|
||||
fp, err := os.Create(path.Join(outputDir, fmt.Sprintf("%s_bank_%d.json", prefix, i/stride+1)))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
@ -222,3 +222,29 @@ func hasString(needle string, haystack []string) bool {
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func detectFormat(path string) string {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if info.IsDir() {
|
||||
_, err := os.Stat(filepath.Join(path, "CATALOGS"))
|
||||
if err == nil {
|
||||
return "epwing"
|
||||
}
|
||||
} else {
|
||||
base := filepath.Base(path)
|
||||
switch base {
|
||||
case "JMdict_e.xml":
|
||||
return "edict"
|
||||
case "JMnedict.xml":
|
||||
return "enamdict"
|
||||
case "kanjidic2.xml":
|
||||
return "kanjidic"
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
7
edict.go
7
edict.go
@ -165,7 +165,7 @@ func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
|
||||
return terms
|
||||
}
|
||||
|
||||
func jmdictExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
func jmdictExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
|
||||
reader, err := os.Open(inputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -182,6 +182,10 @@ func jmdictExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
terms = append(terms, jmdictExtractTerms(entry)...)
|
||||
}
|
||||
|
||||
if title == "" {
|
||||
title = "JMdict"
|
||||
}
|
||||
|
||||
return writeDb(
|
||||
outputDir,
|
||||
title,
|
||||
@ -189,6 +193,7 @@ func jmdictExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
terms.crush(),
|
||||
nil,
|
||||
jmdictBuildTagMeta(entities),
|
||||
stride,
|
||||
pretty,
|
||||
)
|
||||
}
|
||||
|
@ -97,7 +97,7 @@ func jmnedictExtractTerms(enamdictEntry jmdict.JmnedictEntry) []dbTerm {
|
||||
return terms
|
||||
}
|
||||
|
||||
func jmnedictExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
func jmnedictExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
|
||||
reader, err := os.Open(inputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -114,6 +114,10 @@ func jmnedictExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
terms = append(terms, jmnedictExtractTerms(e)...)
|
||||
}
|
||||
|
||||
if title == "" {
|
||||
title = "JMnedict"
|
||||
}
|
||||
|
||||
return writeDb(
|
||||
outputDir,
|
||||
title,
|
||||
@ -121,6 +125,7 @@ func jmnedictExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
terms.crush(),
|
||||
nil,
|
||||
jmnedictBuildTagMeta(entities),
|
||||
stride,
|
||||
pretty,
|
||||
)
|
||||
}
|
||||
|
17
epwing.go
17
epwing.go
@ -60,7 +60,7 @@ type epwingExtractor interface {
|
||||
getRevision() string
|
||||
}
|
||||
|
||||
func epwingExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
func epwingExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
|
||||
stat, err := os.Stat(inputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -99,9 +99,12 @@ func epwingExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
"三省堂 スーパー大辞林": makeDaijirinExtractor(),
|
||||
}
|
||||
|
||||
var terms dbTermList
|
||||
var kanji dbKanjiList
|
||||
var revisions []string
|
||||
var (
|
||||
terms dbTermList
|
||||
kanji dbKanjiList
|
||||
revisions []string
|
||||
titles []string
|
||||
)
|
||||
|
||||
for _, subbook := range book.Subbooks {
|
||||
if extractor, ok := epwingExtractors[subbook.Title]; ok {
|
||||
@ -138,11 +141,16 @@ func epwingExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
}
|
||||
|
||||
revisions = append(revisions, extractor.getRevision())
|
||||
titles = append(titles, subbook.Title)
|
||||
} else {
|
||||
return fmt.Errorf("failed to find compatible extractor for '%s'", subbook.Title)
|
||||
}
|
||||
}
|
||||
|
||||
if title == "" {
|
||||
title = strings.Join(titles, ", ")
|
||||
}
|
||||
|
||||
return writeDb(
|
||||
outputDir,
|
||||
title,
|
||||
@ -150,6 +158,7 @@ func epwingExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
terms.crush(),
|
||||
kanji.crush(),
|
||||
nil,
|
||||
stride,
|
||||
pretty,
|
||||
)
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ func kanjidicExtractKanji(entry jmdict.KanjidicCharacter) dbKanji {
|
||||
return kanji
|
||||
}
|
||||
|
||||
func kanjidicExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
func kanjidicExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
|
||||
reader, err := os.Open(inputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -106,6 +106,10 @@ func kanjidicExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
"heisig": {Notes: "frame number in Remembering the Kanji"},
|
||||
}
|
||||
|
||||
if title == "" {
|
||||
title = "KANJIDIC2"
|
||||
}
|
||||
|
||||
return writeDb(
|
||||
outputDir,
|
||||
title,
|
||||
@ -113,6 +117,7 @@ func kanjidicExportDb(inputPath, outputDir, title string, pretty bool) error {
|
||||
nil,
|
||||
kanji.crush(),
|
||||
tagMeta,
|
||||
stride,
|
||||
pretty,
|
||||
)
|
||||
}
|
||||
|
42
main.go
42
main.go
@ -31,17 +31,16 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
)
|
||||
|
||||
func usage() {
|
||||
fmt.Fprintf(os.Stderr, "Usage:\n %s [options] [edict|enamdict|kanjidic|epwing] input-path [output-dir]\n\n", path.Base(os.Args[0]))
|
||||
fmt.Fprintf(os.Stderr, "Usage:\n %s [options] input-path [output-dir]\n\n", path.Base(os.Args[0]))
|
||||
fmt.Fprintf(os.Stderr, "Parameters:\n")
|
||||
flag.PrintDefaults()
|
||||
}
|
||||
|
||||
func exportDb(inputPath, outputDir, format, title string, pretty bool) error {
|
||||
handlers := map[string]func(string, string, string, bool) error{
|
||||
func exportDb(inputPath, outputDir, format, title string, stride int, pretty bool) error {
|
||||
handlers := map[string]func(string, string, string, int, bool) error{
|
||||
"edict": jmdictExportDb,
|
||||
"enamdict": jmnedictExportDb,
|
||||
"kanjidic": kanjidicExportDb,
|
||||
@ -53,39 +52,43 @@ func exportDb(inputPath, outputDir, format, title string, pretty bool) error {
|
||||
return errors.New("unrecognized dictionray format")
|
||||
}
|
||||
|
||||
log.Printf("converting '%s' to '%s'...", inputPath, outputDir)
|
||||
return handler(inputPath, outputDir, title, pretty)
|
||||
log.Printf("converting '%s' to '%s' in '%s' format...", inputPath, outputDir, format)
|
||||
return handler(inputPath, outputDir, title, stride, pretty)
|
||||
}
|
||||
|
||||
func serveDb(serveDir string, port int) error {
|
||||
log.Printf("starting HTTP server on port %d...\n", port)
|
||||
log.Printf("starting dictionary server on port %d...\n", port)
|
||||
return http.ListenAndServe(fmt.Sprintf(":%d", port), http.FileServer(http.Dir(serveDir)))
|
||||
}
|
||||
|
||||
func main() {
|
||||
var (
|
||||
serve = flag.Bool("serve", false, "serve JSON over HTTP")
|
||||
format = flag.String("format", "", "dictionary format [edict|enamdict|kanjidic|epwing]")
|
||||
port = flag.Int("port", 9876, "port to serve JSON on")
|
||||
pretty = flag.Bool("pretty", false, "output prettified JSON")
|
||||
serve = flag.Bool("serve", false, "serve JSON over HTTP")
|
||||
stride = flag.Int("stride", 10000, "dictionary bank stride")
|
||||
title = flag.String("title", "", "dictionary title")
|
||||
)
|
||||
|
||||
flag.Usage = usage
|
||||
flag.Parse()
|
||||
|
||||
if flag.NArg() != 2 && flag.NArg() != 3 {
|
||||
if flag.NArg() != 1 && flag.NArg() != 2 {
|
||||
usage()
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
var (
|
||||
format = flag.Arg(0)
|
||||
inputPath = flag.Arg(1)
|
||||
outputDir string
|
||||
)
|
||||
inputPath := flag.Arg(0)
|
||||
if *format == "" {
|
||||
if *format = detectFormat(inputPath); *format == "" {
|
||||
log.Fatal("failed to detect dictionary format")
|
||||
}
|
||||
}
|
||||
|
||||
if flag.NArg() == 3 {
|
||||
outputDir = flag.Arg(2)
|
||||
var outputDir string
|
||||
if flag.NArg() == 2 {
|
||||
outputDir = flag.Arg(1)
|
||||
} else {
|
||||
var err error
|
||||
outputDir, err = ioutil.TempDir("", "yomichan_tmp_")
|
||||
@ -94,12 +97,7 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
if *title == "" {
|
||||
t := time.Now()
|
||||
*title = fmt.Sprintf("%s-%s", format, t.Format("20060102150405"))
|
||||
}
|
||||
|
||||
if err := exportDb(inputPath, outputDir, format, *title, *pretty); err != nil {
|
||||
if err := exportDb(inputPath, outputDir, *format, *title, *stride, *pretty); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user