From e44d029f8927d91bcba83a2e87b58ddbc18fe4b7 Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Mon, 22 Aug 2016 20:51:30 -0700 Subject: [PATCH] WIP --- common.go | 104 ++++++++++++++++++++++++++++++++-------------------- edict.go | 4 +- enamdict.go | 4 +- kanjidic.go | 22 ++++++----- main.go | 14 +++---- 5 files changed, 85 insertions(+), 63 deletions(-) diff --git a/common.go b/common.go index d9baa42..8f737b2 100644 --- a/common.go +++ b/common.go @@ -24,25 +24,20 @@ package main import ( "encoding/json" - "io" + "fmt" + "os" + "path" "strings" ) -type termDefJson struct { - Expression string `json:"e"` - Reading string `json:"r"` - Tags string `json:"t"` - Glossary []string `json:"g"` -} - -type termEntJson struct { - Name string `json:"n"` - Value string `json:"v"` -} +const ( + REF_STEP_COUNT = 1000 +) type termJson struct { - Entities []termEntJson `json:"e"` - Defs []termDefJson `json:"d"` + Refs int `json:"refs"` + Entities [][]string `json:"ents"` + defs [][]string } type termSource struct { @@ -77,48 +72,77 @@ func buildTermJson(entries []termSource, entities map[string]string) termJson { var dict termJson for name, value := range entities { - ent := termEntJson{ - Name: name, - Value: value, - } - + ent := []string{name, value} dict.Entities = append(dict.Entities, ent) } for _, e := range entries { - def := termDefJson{ - e.Expression, - e.Reading, - strings.Join(e.Tags, " "), - e.Glossary, - } - - dict.Defs = append(dict.Defs, def) + def := []string{e.Expression, e.Reading, strings.Join(e.Tags, " ")} + def = append(def, e.Glossary...) + dict.defs = append(dict.defs, def) } + dict.Refs = len(dict.defs) / REF_STEP_COUNT + return dict } -func outputTermJson(writer io.Writer, entries []termSource, entities map[string]string, pretty bool) error { - dict := buildTermJson(entries, entities) - - var ( - bytes []byte - err error - ) - +func marshalJson(obj interface{}, pretty bool) ([]byte, error) { if pretty { - bytes, err = json.MarshalIndent(dict, "", " ") - } else { - bytes, err = json.Marshal(dict) + return json.MarshalIndent(obj, "", " ") } + return json.Marshal(obj) +} + +func outputTermJson(outputDir string, entries []termSource, entities map[string]string, pretty bool) error { + if err := os.MkdirAll(outputDir, 0755); err != nil { + return err + } + + outputIndex, err := os.Create(path.Join(outputDir, "index.json")) + if err != nil { + return err + } + defer outputIndex.Close() + + dict := buildTermJson(entries, entities) + + indexBytes, err := marshalJson(dict, pretty) if err != nil { return err } - _, err = writer.Write(bytes) - return err + if _, err = outputIndex.Write(indexBytes); err != nil { + return err + } + + defCnt := len(dict.defs) + + for i := 0; i < defCnt; i += REF_STEP_COUNT { + outputRef, err := os.Create(path.Join(outputDir, fmt.Sprintf("ref_%0.3d.json", i/REF_STEP_COUNT))) + if err != nil { + return err + } + defer outputRef.Close() + + indexSrc := i + indexDst := i + REF_STEP_COUNT + if indexDst > defCnt { + indexDst = defCnt + } + + refBytes, err := marshalJson(dict.defs[indexSrc:indexDst], pretty) + if err != nil { + return err + } + + if _, err = outputRef.Write(refBytes); err != nil { + return err + } + } + + return nil } func hasString(needle string, haystack []string) bool { diff --git a/edict.go b/edict.go index dabda16..5786621 100644 --- a/edict.go +++ b/edict.go @@ -87,7 +87,7 @@ func convertEdictEntry(edictEntry jmdict.JmdictEntry) []termSource { return entries } -func outputEdictJson(writer io.Writer, reader io.Reader, flags int) error { +func outputEdictJson(outputDir string, reader io.Reader, flags int) error { dict, entities, err := jmdict.LoadJmdictNoTransform(reader) if err != nil { return err @@ -98,5 +98,5 @@ func outputEdictJson(writer io.Writer, reader io.Reader, flags int) error { entries = append(entries, convertEdictEntry(e)...) } - return outputTermJson(writer, entries, entities, flags&flagPrettyJson == flagPrettyJson) + return outputTermJson(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson) } diff --git a/enamdict.go b/enamdict.go index 0912452..107426f 100644 --- a/enamdict.go +++ b/enamdict.go @@ -73,7 +73,7 @@ func convertJmnedictEntry(enamdictEntry jmdict.JmnedictEntry) []termSource { return entries } -func outputJmnedictJson(writer io.Writer, reader io.Reader, flags int) error { +func outputJmnedictJson(outputDir string, reader io.Reader, flags int) error { dict, entities, err := jmdict.LoadJmnedictNoTransform(reader) if err != nil { return err @@ -84,5 +84,5 @@ func outputJmnedictJson(writer io.Writer, reader io.Reader, flags int) error { entries = append(entries, convertJmnedictEntry(e)...) } - return outputTermJson(writer, entries, entities, flags&flagPrettyJson == flagPrettyJson) + return outputTermJson(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson) } diff --git a/kanjidic.go b/kanjidic.go index e3e8109..4372596 100644 --- a/kanjidic.go +++ b/kanjidic.go @@ -150,16 +150,18 @@ func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) kanjiS return character } -func outputKanjidicJson(writer io.Writer, reader io.Reader, flags int) error { - dict, err := jmdict.LoadKanjidic(reader) - if err != nil { - return err - } +func outputKanjidicJson(outputDir string, reader io.Reader, flags int) error { + // dict, err := jmdict.LoadKanjidic(reader) + // if err != nil { + // return err + // } - var kanji []kanjiSource - for _, kanjidicCharacter := range dict.Characters { - kanji = append(kanji, convertKanjidicCharacter(kanjidicCharacter)) - } + // var kanji []kanjiSource + // for _, kanjidicCharacter := range dict.Characters { + // kanji = append(kanji, convertKanjidicCharacter(kanjidicCharacter)) + // } - return outputKanjiJson(writer, kanji, flags&flagPrettyJson == flagPrettyJson) + // return outputKanjiJson(writer, kanji, flags&flagPrettyJson == flagPrettyJson) + + return nil } diff --git a/main.go b/main.go index 4788f8c..b88e299 100644 --- a/main.go +++ b/main.go @@ -42,8 +42,8 @@ func usage() { flag.PrintDefaults() } -func outputJson(fileFormat, inputFile, outputFile string, flags int) error { - handlers := map[string]func(io.Writer, io.Reader, int) error{ +func outputJson(fileFormat, inputPath, outputDir string, flags int) error { + handlers := map[string]func(string, io.Reader, int) error{ "edict": outputEdictJson, "enamdict": outputJmnedictJson, "kanjidic": outputKanjidicJson, @@ -54,17 +54,13 @@ func outputJson(fileFormat, inputFile, outputFile string, flags int) error { return errors.New("unrecognized file format") } - input, err := os.Open(inputFile) + input, err := os.Open(inputPath) if err != nil { return err } + defer input.Close() - output, err := os.Create(outputFile) - if err != nil { - return err - } - - return handler(output, input, flags) + return handler(outputDir, input, flags) } func main() {