1
This commit is contained in:
Alex Yatskov 2016-08-22 20:51:30 -07:00
parent c170cfd321
commit e44d029f89
5 changed files with 85 additions and 63 deletions

104
common.go
View File

@ -24,25 +24,20 @@ package main
import ( import (
"encoding/json" "encoding/json"
"io" "fmt"
"os"
"path"
"strings" "strings"
) )
type termDefJson struct { const (
Expression string `json:"e"` REF_STEP_COUNT = 1000
Reading string `json:"r"` )
Tags string `json:"t"`
Glossary []string `json:"g"`
}
type termEntJson struct {
Name string `json:"n"`
Value string `json:"v"`
}
type termJson struct { type termJson struct {
Entities []termEntJson `json:"e"` Refs int `json:"refs"`
Defs []termDefJson `json:"d"` Entities [][]string `json:"ents"`
defs [][]string
} }
type termSource struct { type termSource struct {
@ -77,48 +72,77 @@ func buildTermJson(entries []termSource, entities map[string]string) termJson {
var dict termJson var dict termJson
for name, value := range entities { for name, value := range entities {
ent := termEntJson{ ent := []string{name, value}
Name: name,
Value: value,
}
dict.Entities = append(dict.Entities, ent) dict.Entities = append(dict.Entities, ent)
} }
for _, e := range entries { for _, e := range entries {
def := termDefJson{ def := []string{e.Expression, e.Reading, strings.Join(e.Tags, " ")}
e.Expression, def = append(def, e.Glossary...)
e.Reading, dict.defs = append(dict.defs, def)
strings.Join(e.Tags, " "),
e.Glossary,
}
dict.Defs = append(dict.Defs, def)
} }
dict.Refs = len(dict.defs) / REF_STEP_COUNT
return dict return dict
} }
func outputTermJson(writer io.Writer, entries []termSource, entities map[string]string, pretty bool) error { func marshalJson(obj interface{}, pretty bool) ([]byte, error) {
dict := buildTermJson(entries, entities)
var (
bytes []byte
err error
)
if pretty { if pretty {
bytes, err = json.MarshalIndent(dict, "", " ") return json.MarshalIndent(obj, "", " ")
} else {
bytes, err = json.Marshal(dict)
} }
return json.Marshal(obj)
}
func outputTermJson(outputDir string, entries []termSource, entities map[string]string, pretty bool) error {
if err := os.MkdirAll(outputDir, 0755); err != nil {
return err
}
outputIndex, err := os.Create(path.Join(outputDir, "index.json"))
if err != nil {
return err
}
defer outputIndex.Close()
dict := buildTermJson(entries, entities)
indexBytes, err := marshalJson(dict, pretty)
if err != nil { if err != nil {
return err return err
} }
_, err = writer.Write(bytes) if _, err = outputIndex.Write(indexBytes); err != nil {
return err return err
}
defCnt := len(dict.defs)
for i := 0; i < defCnt; i += REF_STEP_COUNT {
outputRef, err := os.Create(path.Join(outputDir, fmt.Sprintf("ref_%0.3d.json", i/REF_STEP_COUNT)))
if err != nil {
return err
}
defer outputRef.Close()
indexSrc := i
indexDst := i + REF_STEP_COUNT
if indexDst > defCnt {
indexDst = defCnt
}
refBytes, err := marshalJson(dict.defs[indexSrc:indexDst], pretty)
if err != nil {
return err
}
if _, err = outputRef.Write(refBytes); err != nil {
return err
}
}
return nil
} }
func hasString(needle string, haystack []string) bool { func hasString(needle string, haystack []string) bool {

View File

@ -87,7 +87,7 @@ func convertEdictEntry(edictEntry jmdict.JmdictEntry) []termSource {
return entries return entries
} }
func outputEdictJson(writer io.Writer, reader io.Reader, flags int) error { func outputEdictJson(outputDir string, reader io.Reader, flags int) error {
dict, entities, err := jmdict.LoadJmdictNoTransform(reader) dict, entities, err := jmdict.LoadJmdictNoTransform(reader)
if err != nil { if err != nil {
return err return err
@ -98,5 +98,5 @@ func outputEdictJson(writer io.Writer, reader io.Reader, flags int) error {
entries = append(entries, convertEdictEntry(e)...) entries = append(entries, convertEdictEntry(e)...)
} }
return outputTermJson(writer, entries, entities, flags&flagPrettyJson == flagPrettyJson) return outputTermJson(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson)
} }

View File

@ -73,7 +73,7 @@ func convertJmnedictEntry(enamdictEntry jmdict.JmnedictEntry) []termSource {
return entries return entries
} }
func outputJmnedictJson(writer io.Writer, reader io.Reader, flags int) error { func outputJmnedictJson(outputDir string, reader io.Reader, flags int) error {
dict, entities, err := jmdict.LoadJmnedictNoTransform(reader) dict, entities, err := jmdict.LoadJmnedictNoTransform(reader)
if err != nil { if err != nil {
return err return err
@ -84,5 +84,5 @@ func outputJmnedictJson(writer io.Writer, reader io.Reader, flags int) error {
entries = append(entries, convertJmnedictEntry(e)...) entries = append(entries, convertJmnedictEntry(e)...)
} }
return outputTermJson(writer, entries, entities, flags&flagPrettyJson == flagPrettyJson) return outputTermJson(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson)
} }

View File

@ -150,16 +150,18 @@ func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) kanjiS
return character return character
} }
func outputKanjidicJson(writer io.Writer, reader io.Reader, flags int) error { func outputKanjidicJson(outputDir string, reader io.Reader, flags int) error {
dict, err := jmdict.LoadKanjidic(reader) // dict, err := jmdict.LoadKanjidic(reader)
if err != nil { // if err != nil {
return err // return err
} // }
var kanji []kanjiSource // var kanji []kanjiSource
for _, kanjidicCharacter := range dict.Characters { // for _, kanjidicCharacter := range dict.Characters {
kanji = append(kanji, convertKanjidicCharacter(kanjidicCharacter)) // kanji = append(kanji, convertKanjidicCharacter(kanjidicCharacter))
} // }
return outputKanjiJson(writer, kanji, flags&flagPrettyJson == flagPrettyJson) // return outputKanjiJson(writer, kanji, flags&flagPrettyJson == flagPrettyJson)
return nil
} }

14
main.go
View File

@ -42,8 +42,8 @@ func usage() {
flag.PrintDefaults() flag.PrintDefaults()
} }
func outputJson(fileFormat, inputFile, outputFile string, flags int) error { func outputJson(fileFormat, inputPath, outputDir string, flags int) error {
handlers := map[string]func(io.Writer, io.Reader, int) error{ handlers := map[string]func(string, io.Reader, int) error{
"edict": outputEdictJson, "edict": outputEdictJson,
"enamdict": outputJmnedictJson, "enamdict": outputJmnedictJson,
"kanjidic": outputKanjidicJson, "kanjidic": outputKanjidicJson,
@ -54,17 +54,13 @@ func outputJson(fileFormat, inputFile, outputFile string, flags int) error {
return errors.New("unrecognized file format") return errors.New("unrecognized file format")
} }
input, err := os.Open(inputFile) input, err := os.Open(inputPath)
if err != nil { if err != nil {
return err return err
} }
defer input.Close()
output, err := os.Create(outputFile) return handler(outputDir, input, flags)
if err != nil {
return err
}
return handler(output, input, flags)
} }
func main() { func main() {