diff --git a/common.go b/common.go index bfe2318..be71078 100644 --- a/common.go +++ b/common.go @@ -31,11 +31,13 @@ import ( ) const ( - REF_STEP_COUNT = 50000 + BANK_STRIDE = 50000 + DB_VERSION = 0 ) -type termJson struct { - Refs int `json:"refs"` +type termIndex struct { + Version int `json:"version"` + Banks int `json:"banks"` Entities [][]string `json:"ents"` defs [][]string } @@ -68,12 +70,10 @@ func (s *termSource) addTagsPri(tags ...string) { } } -func buildTermJson(entries []termSource, entities map[string]string) termJson { - var dict termJson - - for name, value := range entities { - ent := []string{name, value} - dict.Entities = append(dict.Entities, ent) +func buildTermIndex(entries []termSource, entities map[string]string) termIndex { + dict := termIndex{ + Version: DB_VERSION, + Banks: bankCount(len(entries)), } for _, e := range entries { @@ -82,20 +82,15 @@ func buildTermJson(entries []termSource, entities map[string]string) termJson { dict.defs = append(dict.defs, def) } - dict.Refs = len(dict.defs) / REF_STEP_COUNT + for name, value := range entities { + ent := []string{name, value} + dict.Entities = append(dict.Entities, ent) + } return dict } -func marshalJson(obj interface{}, pretty bool) ([]byte, error) { - if pretty { - return json.MarshalIndent(obj, "", " ") - } - - return json.Marshal(obj) -} - -func outputTermJson(outputDir string, entries []termSource, entities map[string]string, pretty bool) error { +func outputTermIndex(outputDir string, entries []termSource, entities map[string]string, pretty bool) error { if err := os.MkdirAll(outputDir, 0755); err != nil { return err } @@ -106,8 +101,7 @@ func outputTermJson(outputDir string, entries []termSource, entities map[string] } defer outputIndex.Close() - dict := buildTermJson(entries, entities) - + dict := buildTermIndex(entries, entities) indexBytes, err := marshalJson(dict, pretty) if err != nil { return err @@ -118,16 +112,15 @@ func outputTermJson(outputDir string, entries []termSource, entities map[string] } defCnt := len(dict.defs) - - for i := 0; i < defCnt; i += REF_STEP_COUNT { - outputRef, err := os.Create(path.Join(outputDir, fmt.Sprintf("ref_%d.json", i/REF_STEP_COUNT))) + for i := 0; i < defCnt; i += BANK_STRIDE { + outputRef, err := os.Create(path.Join(outputDir, fmt.Sprintf("bank_%d.json", i/BANK_STRIDE+1))) if err != nil { return err } defer outputRef.Close() indexSrc := i - indexDst := i + REF_STEP_COUNT + indexDst := i + BANK_STRIDE if indexDst > defCnt { indexDst = defCnt } @@ -145,6 +138,23 @@ func outputTermJson(outputDir string, entries []termSource, entities map[string] return nil } +func marshalJson(obj interface{}, pretty bool) ([]byte, error) { + if pretty { + return json.MarshalIndent(obj, "", " ") + } + + return json.Marshal(obj) +} + +func bankCount(defCount int) int { + count := defCount / BANK_STRIDE + if defCount%BANK_STRIDE > 0 { + count += 1 + } + + return count +} + func hasString(needle string, haystack []string) bool { for _, value := range haystack { if needle == value { diff --git a/edict.go b/edict.go index 5786621..e59581e 100644 --- a/edict.go +++ b/edict.go @@ -98,5 +98,5 @@ func outputEdictJson(outputDir string, reader io.Reader, flags int) error { entries = append(entries, convertEdictEntry(e)...) } - return outputTermJson(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson) + return outputTermIndex(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson) } diff --git a/enamdict.go b/enamdict.go index 107426f..1c7a937 100644 --- a/enamdict.go +++ b/enamdict.go @@ -84,5 +84,5 @@ func outputJmnedictJson(outputDir string, reader io.Reader, flags int) error { entries = append(entries, convertJmnedictEntry(e)...) } - return outputTermJson(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson) + return outputTermIndex(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson) } diff --git a/kanjidic.go b/kanjidic.go index 4372596..3ff4d50 100644 --- a/kanjidic.go +++ b/kanjidic.go @@ -23,25 +23,20 @@ package main import ( - "encoding/json" "fmt" "io" + "os" + "path" "strconv" "strings" "github.com/FooSoft/jmdict" ) -type kanjiDefJson struct { - Character string `json:"c"` - Onyomi string `json:"o"` - Kunyomi string `json:"k"` - Tags string `json:"t"` - Meanings []string `json:"m"` -} - -type kanjiJson struct { - Defs []kanjiDefJson `json:"d"` +type kanjiIndex struct { + Version int `json:"version"` + Banks int `json:"banks"` + defs [][]string } type kanjiSource struct { @@ -60,44 +55,67 @@ func (s *kanjiSource) addTags(tags ...string) { } } -func buildKanjiJson(kanji []kanjiSource) kanjiJson { - var dict kanjiJson +func buildKanjiIndex(entries []kanjiSource) kanjiIndex { + dict := kanjiIndex{ + Version: DB_VERSION, + Banks: bankCount(len(entries)), + } - for _, k := range kanji { - def := kanjiDefJson{ - Character: k.Character, - Onyomi: strings.Join(k.Onyomi, " "), - Kunyomi: strings.Join(k.Kunyomi, " "), - Tags: strings.Join(k.Tags, " "), - Meanings: k.Meanings, - } - - dict.Defs = append(dict.Defs, def) + for _, e := range entries { + def := []string{e.Character, strings.Join(e.Onyomi, " "), strings.Join(e.Kunyomi, " "), strings.Join(e.Tags, " ")} + def = append(def, e.Meanings...) + dict.defs = append(dict.defs, def) } return dict } -func outputKanjiJson(writer io.Writer, kanji []kanjiSource, pretty bool) error { - dict := buildKanjiJson(kanji) - - var ( - bytes []byte - err error - ) - - if pretty { - bytes, err = json.MarshalIndent(dict, "", " ") - } else { - bytes, err = json.Marshal(dict) +func outputKanjiIndex(outputDir string, entries []kanjiSource, pretty bool) error { + if err := os.MkdirAll(outputDir, 0755); err != nil { + return err } + outputIndex, err := os.Create(path.Join(outputDir, "index.json")) + if err != nil { + return err + } + defer outputIndex.Close() + + dict := buildKanjiIndex(entries) + indexBytes, err := marshalJson(dict, pretty) if err != nil { return err } - _, err = writer.Write(bytes) - return err + if _, err = outputIndex.Write(indexBytes); err != nil { + return err + } + + defCnt := len(dict.defs) + for i := 0; i < defCnt; i += BANK_STRIDE { + outputRef, err := os.Create(path.Join(outputDir, fmt.Sprintf("bank_%d.json", i/BANK_STRIDE+1))) + if err != nil { + return err + } + defer outputRef.Close() + + indexSrc := i + indexDst := i + BANK_STRIDE + if indexDst > defCnt { + indexDst = defCnt + } + + refBytes, err := marshalJson(dict.defs[indexSrc:indexDst], pretty) + if err != nil { + return err + } + + if _, err = outputRef.Write(refBytes); err != nil { + return err + } + } + + return nil } func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) kanjiSource { @@ -151,17 +169,15 @@ func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) kanjiS } func outputKanjidicJson(outputDir string, reader io.Reader, flags int) error { - // dict, err := jmdict.LoadKanjidic(reader) - // if err != nil { - // return err - // } + dict, err := jmdict.LoadKanjidic(reader) + if err != nil { + return err + } - // var kanji []kanjiSource - // for _, kanjidicCharacter := range dict.Characters { - // kanji = append(kanji, convertKanjidicCharacter(kanjidicCharacter)) - // } + var kanji []kanjiSource + for _, kanjidicCharacter := range dict.Characters { + kanji = append(kanji, convertKanjidicCharacter(kanjidicCharacter)) + } - // return outputKanjiJson(writer, kanji, flags&flagPrettyJson == flagPrettyJson) - - return nil + return outputKanjiIndex(outputDir, kanji, flags&flagPrettyJson == flagPrettyJson) }