1
This commit is contained in:
Alex Yatskov 2016-08-24 09:02:26 -07:00
parent d81d94fbcd
commit 1253ab2912
4 changed files with 101 additions and 75 deletions

View File

@ -31,11 +31,13 @@ import (
) )
const ( const (
REF_STEP_COUNT = 50000 BANK_STRIDE = 50000
DB_VERSION = 0
) )
type termJson struct { type termIndex struct {
Refs int `json:"refs"` Version int `json:"version"`
Banks int `json:"banks"`
Entities [][]string `json:"ents"` Entities [][]string `json:"ents"`
defs [][]string defs [][]string
} }
@ -68,12 +70,10 @@ func (s *termSource) addTagsPri(tags ...string) {
} }
} }
func buildTermJson(entries []termSource, entities map[string]string) termJson { func buildTermIndex(entries []termSource, entities map[string]string) termIndex {
var dict termJson dict := termIndex{
Version: DB_VERSION,
for name, value := range entities { Banks: bankCount(len(entries)),
ent := []string{name, value}
dict.Entities = append(dict.Entities, ent)
} }
for _, e := range entries { for _, e := range entries {
@ -82,20 +82,15 @@ func buildTermJson(entries []termSource, entities map[string]string) termJson {
dict.defs = append(dict.defs, def) dict.defs = append(dict.defs, def)
} }
dict.Refs = len(dict.defs) / REF_STEP_COUNT for name, value := range entities {
ent := []string{name, value}
dict.Entities = append(dict.Entities, ent)
}
return dict return dict
} }
func marshalJson(obj interface{}, pretty bool) ([]byte, error) { func outputTermIndex(outputDir string, entries []termSource, entities map[string]string, pretty bool) error {
if pretty {
return json.MarshalIndent(obj, "", " ")
}
return json.Marshal(obj)
}
func outputTermJson(outputDir string, entries []termSource, entities map[string]string, pretty bool) error {
if err := os.MkdirAll(outputDir, 0755); err != nil { if err := os.MkdirAll(outputDir, 0755); err != nil {
return err return err
} }
@ -106,8 +101,7 @@ func outputTermJson(outputDir string, entries []termSource, entities map[string]
} }
defer outputIndex.Close() defer outputIndex.Close()
dict := buildTermJson(entries, entities) dict := buildTermIndex(entries, entities)
indexBytes, err := marshalJson(dict, pretty) indexBytes, err := marshalJson(dict, pretty)
if err != nil { if err != nil {
return err return err
@ -118,16 +112,15 @@ func outputTermJson(outputDir string, entries []termSource, entities map[string]
} }
defCnt := len(dict.defs) defCnt := len(dict.defs)
for i := 0; i < defCnt; i += BANK_STRIDE {
for i := 0; i < defCnt; i += REF_STEP_COUNT { outputRef, err := os.Create(path.Join(outputDir, fmt.Sprintf("bank_%d.json", i/BANK_STRIDE+1)))
outputRef, err := os.Create(path.Join(outputDir, fmt.Sprintf("ref_%d.json", i/REF_STEP_COUNT)))
if err != nil { if err != nil {
return err return err
} }
defer outputRef.Close() defer outputRef.Close()
indexSrc := i indexSrc := i
indexDst := i + REF_STEP_COUNT indexDst := i + BANK_STRIDE
if indexDst > defCnt { if indexDst > defCnt {
indexDst = defCnt indexDst = defCnt
} }
@ -145,6 +138,23 @@ func outputTermJson(outputDir string, entries []termSource, entities map[string]
return nil return nil
} }
func marshalJson(obj interface{}, pretty bool) ([]byte, error) {
if pretty {
return json.MarshalIndent(obj, "", " ")
}
return json.Marshal(obj)
}
func bankCount(defCount int) int {
count := defCount / BANK_STRIDE
if defCount%BANK_STRIDE > 0 {
count += 1
}
return count
}
func hasString(needle string, haystack []string) bool { func hasString(needle string, haystack []string) bool {
for _, value := range haystack { for _, value := range haystack {
if needle == value { if needle == value {

View File

@ -98,5 +98,5 @@ func outputEdictJson(outputDir string, reader io.Reader, flags int) error {
entries = append(entries, convertEdictEntry(e)...) entries = append(entries, convertEdictEntry(e)...)
} }
return outputTermJson(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson) return outputTermIndex(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson)
} }

View File

@ -84,5 +84,5 @@ func outputJmnedictJson(outputDir string, reader io.Reader, flags int) error {
entries = append(entries, convertJmnedictEntry(e)...) entries = append(entries, convertJmnedictEntry(e)...)
} }
return outputTermJson(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson) return outputTermIndex(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson)
} }

View File

@ -23,25 +23,20 @@
package main package main
import ( import (
"encoding/json"
"fmt" "fmt"
"io" "io"
"os"
"path"
"strconv" "strconv"
"strings" "strings"
"github.com/FooSoft/jmdict" "github.com/FooSoft/jmdict"
) )
type kanjiDefJson struct { type kanjiIndex struct {
Character string `json:"c"` Version int `json:"version"`
Onyomi string `json:"o"` Banks int `json:"banks"`
Kunyomi string `json:"k"` defs [][]string
Tags string `json:"t"`
Meanings []string `json:"m"`
}
type kanjiJson struct {
Defs []kanjiDefJson `json:"d"`
} }
type kanjiSource struct { type kanjiSource struct {
@ -60,44 +55,67 @@ func (s *kanjiSource) addTags(tags ...string) {
} }
} }
func buildKanjiJson(kanji []kanjiSource) kanjiJson { func buildKanjiIndex(entries []kanjiSource) kanjiIndex {
var dict kanjiJson dict := kanjiIndex{
Version: DB_VERSION,
Banks: bankCount(len(entries)),
}
for _, k := range kanji { for _, e := range entries {
def := kanjiDefJson{ def := []string{e.Character, strings.Join(e.Onyomi, " "), strings.Join(e.Kunyomi, " "), strings.Join(e.Tags, " ")}
Character: k.Character, def = append(def, e.Meanings...)
Onyomi: strings.Join(k.Onyomi, " "), dict.defs = append(dict.defs, def)
Kunyomi: strings.Join(k.Kunyomi, " "),
Tags: strings.Join(k.Tags, " "),
Meanings: k.Meanings,
}
dict.Defs = append(dict.Defs, def)
} }
return dict return dict
} }
func outputKanjiJson(writer io.Writer, kanji []kanjiSource, pretty bool) error { func outputKanjiIndex(outputDir string, entries []kanjiSource, pretty bool) error {
dict := buildKanjiJson(kanji) if err := os.MkdirAll(outputDir, 0755); err != nil {
return err
var (
bytes []byte
err error
)
if pretty {
bytes, err = json.MarshalIndent(dict, "", " ")
} else {
bytes, err = json.Marshal(dict)
} }
outputIndex, err := os.Create(path.Join(outputDir, "index.json"))
if err != nil {
return err
}
defer outputIndex.Close()
dict := buildKanjiIndex(entries)
indexBytes, err := marshalJson(dict, pretty)
if err != nil { if err != nil {
return err return err
} }
_, err = writer.Write(bytes) if _, err = outputIndex.Write(indexBytes); err != nil {
return err return err
}
defCnt := len(dict.defs)
for i := 0; i < defCnt; i += BANK_STRIDE {
outputRef, err := os.Create(path.Join(outputDir, fmt.Sprintf("bank_%d.json", i/BANK_STRIDE+1)))
if err != nil {
return err
}
defer outputRef.Close()
indexSrc := i
indexDst := i + BANK_STRIDE
if indexDst > defCnt {
indexDst = defCnt
}
refBytes, err := marshalJson(dict.defs[indexSrc:indexDst], pretty)
if err != nil {
return err
}
if _, err = outputRef.Write(refBytes); err != nil {
return err
}
}
return nil
} }
func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) kanjiSource { func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) kanjiSource {
@ -151,17 +169,15 @@ func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) kanjiS
} }
func outputKanjidicJson(outputDir string, reader io.Reader, flags int) error { func outputKanjidicJson(outputDir string, reader io.Reader, flags int) error {
// dict, err := jmdict.LoadKanjidic(reader) dict, err := jmdict.LoadKanjidic(reader)
// if err != nil { if err != nil {
// return err return err
// } }
// var kanji []kanjiSource var kanji []kanjiSource
// for _, kanjidicCharacter := range dict.Characters { for _, kanjidicCharacter := range dict.Characters {
// kanji = append(kanji, convertKanjidicCharacter(kanjidicCharacter)) kanji = append(kanji, convertKanjidicCharacter(kanjidicCharacter))
// } }
// return outputKanjiJson(writer, kanji, flags&flagPrettyJson == flagPrettyJson) return outputKanjiIndex(outputDir, kanji, flags&flagPrettyJson == flagPrettyJson)
return nil
} }