WIP
This commit is contained in:
parent
d81d94fbcd
commit
1253ab2912
60
common.go
60
common.go
@ -31,11 +31,13 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
REF_STEP_COUNT = 50000
|
BANK_STRIDE = 50000
|
||||||
|
DB_VERSION = 0
|
||||||
)
|
)
|
||||||
|
|
||||||
type termJson struct {
|
type termIndex struct {
|
||||||
Refs int `json:"refs"`
|
Version int `json:"version"`
|
||||||
|
Banks int `json:"banks"`
|
||||||
Entities [][]string `json:"ents"`
|
Entities [][]string `json:"ents"`
|
||||||
defs [][]string
|
defs [][]string
|
||||||
}
|
}
|
||||||
@ -68,12 +70,10 @@ func (s *termSource) addTagsPri(tags ...string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func buildTermJson(entries []termSource, entities map[string]string) termJson {
|
func buildTermIndex(entries []termSource, entities map[string]string) termIndex {
|
||||||
var dict termJson
|
dict := termIndex{
|
||||||
|
Version: DB_VERSION,
|
||||||
for name, value := range entities {
|
Banks: bankCount(len(entries)),
|
||||||
ent := []string{name, value}
|
|
||||||
dict.Entities = append(dict.Entities, ent)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, e := range entries {
|
for _, e := range entries {
|
||||||
@ -82,20 +82,15 @@ func buildTermJson(entries []termSource, entities map[string]string) termJson {
|
|||||||
dict.defs = append(dict.defs, def)
|
dict.defs = append(dict.defs, def)
|
||||||
}
|
}
|
||||||
|
|
||||||
dict.Refs = len(dict.defs) / REF_STEP_COUNT
|
for name, value := range entities {
|
||||||
|
ent := []string{name, value}
|
||||||
|
dict.Entities = append(dict.Entities, ent)
|
||||||
|
}
|
||||||
|
|
||||||
return dict
|
return dict
|
||||||
}
|
}
|
||||||
|
|
||||||
func marshalJson(obj interface{}, pretty bool) ([]byte, error) {
|
func outputTermIndex(outputDir string, entries []termSource, entities map[string]string, pretty bool) error {
|
||||||
if pretty {
|
|
||||||
return json.MarshalIndent(obj, "", " ")
|
|
||||||
}
|
|
||||||
|
|
||||||
return json.Marshal(obj)
|
|
||||||
}
|
|
||||||
|
|
||||||
func outputTermJson(outputDir string, entries []termSource, entities map[string]string, pretty bool) error {
|
|
||||||
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -106,8 +101,7 @@ func outputTermJson(outputDir string, entries []termSource, entities map[string]
|
|||||||
}
|
}
|
||||||
defer outputIndex.Close()
|
defer outputIndex.Close()
|
||||||
|
|
||||||
dict := buildTermJson(entries, entities)
|
dict := buildTermIndex(entries, entities)
|
||||||
|
|
||||||
indexBytes, err := marshalJson(dict, pretty)
|
indexBytes, err := marshalJson(dict, pretty)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -118,16 +112,15 @@ func outputTermJson(outputDir string, entries []termSource, entities map[string]
|
|||||||
}
|
}
|
||||||
|
|
||||||
defCnt := len(dict.defs)
|
defCnt := len(dict.defs)
|
||||||
|
for i := 0; i < defCnt; i += BANK_STRIDE {
|
||||||
for i := 0; i < defCnt; i += REF_STEP_COUNT {
|
outputRef, err := os.Create(path.Join(outputDir, fmt.Sprintf("bank_%d.json", i/BANK_STRIDE+1)))
|
||||||
outputRef, err := os.Create(path.Join(outputDir, fmt.Sprintf("ref_%d.json", i/REF_STEP_COUNT)))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer outputRef.Close()
|
defer outputRef.Close()
|
||||||
|
|
||||||
indexSrc := i
|
indexSrc := i
|
||||||
indexDst := i + REF_STEP_COUNT
|
indexDst := i + BANK_STRIDE
|
||||||
if indexDst > defCnt {
|
if indexDst > defCnt {
|
||||||
indexDst = defCnt
|
indexDst = defCnt
|
||||||
}
|
}
|
||||||
@ -145,6 +138,23 @@ func outputTermJson(outputDir string, entries []termSource, entities map[string]
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func marshalJson(obj interface{}, pretty bool) ([]byte, error) {
|
||||||
|
if pretty {
|
||||||
|
return json.MarshalIndent(obj, "", " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
return json.Marshal(obj)
|
||||||
|
}
|
||||||
|
|
||||||
|
func bankCount(defCount int) int {
|
||||||
|
count := defCount / BANK_STRIDE
|
||||||
|
if defCount%BANK_STRIDE > 0 {
|
||||||
|
count += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|
||||||
func hasString(needle string, haystack []string) bool {
|
func hasString(needle string, haystack []string) bool {
|
||||||
for _, value := range haystack {
|
for _, value := range haystack {
|
||||||
if needle == value {
|
if needle == value {
|
||||||
|
2
edict.go
2
edict.go
@ -98,5 +98,5 @@ func outputEdictJson(outputDir string, reader io.Reader, flags int) error {
|
|||||||
entries = append(entries, convertEdictEntry(e)...)
|
entries = append(entries, convertEdictEntry(e)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
return outputTermJson(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson)
|
return outputTermIndex(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson)
|
||||||
}
|
}
|
||||||
|
@ -84,5 +84,5 @@ func outputJmnedictJson(outputDir string, reader io.Reader, flags int) error {
|
|||||||
entries = append(entries, convertJmnedictEntry(e)...)
|
entries = append(entries, convertJmnedictEntry(e)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
return outputTermJson(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson)
|
return outputTermIndex(outputDir, entries, entities, flags&flagPrettyJson == flagPrettyJson)
|
||||||
}
|
}
|
||||||
|
112
kanjidic.go
112
kanjidic.go
@ -23,25 +23,20 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/FooSoft/jmdict"
|
"github.com/FooSoft/jmdict"
|
||||||
)
|
)
|
||||||
|
|
||||||
type kanjiDefJson struct {
|
type kanjiIndex struct {
|
||||||
Character string `json:"c"`
|
Version int `json:"version"`
|
||||||
Onyomi string `json:"o"`
|
Banks int `json:"banks"`
|
||||||
Kunyomi string `json:"k"`
|
defs [][]string
|
||||||
Tags string `json:"t"`
|
|
||||||
Meanings []string `json:"m"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type kanjiJson struct {
|
|
||||||
Defs []kanjiDefJson `json:"d"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type kanjiSource struct {
|
type kanjiSource struct {
|
||||||
@ -60,44 +55,67 @@ func (s *kanjiSource) addTags(tags ...string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func buildKanjiJson(kanji []kanjiSource) kanjiJson {
|
func buildKanjiIndex(entries []kanjiSource) kanjiIndex {
|
||||||
var dict kanjiJson
|
dict := kanjiIndex{
|
||||||
|
Version: DB_VERSION,
|
||||||
|
Banks: bankCount(len(entries)),
|
||||||
|
}
|
||||||
|
|
||||||
for _, k := range kanji {
|
for _, e := range entries {
|
||||||
def := kanjiDefJson{
|
def := []string{e.Character, strings.Join(e.Onyomi, " "), strings.Join(e.Kunyomi, " "), strings.Join(e.Tags, " ")}
|
||||||
Character: k.Character,
|
def = append(def, e.Meanings...)
|
||||||
Onyomi: strings.Join(k.Onyomi, " "),
|
dict.defs = append(dict.defs, def)
|
||||||
Kunyomi: strings.Join(k.Kunyomi, " "),
|
|
||||||
Tags: strings.Join(k.Tags, " "),
|
|
||||||
Meanings: k.Meanings,
|
|
||||||
}
|
|
||||||
|
|
||||||
dict.Defs = append(dict.Defs, def)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return dict
|
return dict
|
||||||
}
|
}
|
||||||
|
|
||||||
func outputKanjiJson(writer io.Writer, kanji []kanjiSource, pretty bool) error {
|
func outputKanjiIndex(outputDir string, entries []kanjiSource, pretty bool) error {
|
||||||
dict := buildKanjiJson(kanji)
|
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
||||||
|
return err
|
||||||
var (
|
|
||||||
bytes []byte
|
|
||||||
err error
|
|
||||||
)
|
|
||||||
|
|
||||||
if pretty {
|
|
||||||
bytes, err = json.MarshalIndent(dict, "", " ")
|
|
||||||
} else {
|
|
||||||
bytes, err = json.Marshal(dict)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
outputIndex, err := os.Create(path.Join(outputDir, "index.json"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer outputIndex.Close()
|
||||||
|
|
||||||
|
dict := buildKanjiIndex(entries)
|
||||||
|
indexBytes, err := marshalJson(dict, pretty)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = writer.Write(bytes)
|
if _, err = outputIndex.Write(indexBytes); err != nil {
|
||||||
return err
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
defCnt := len(dict.defs)
|
||||||
|
for i := 0; i < defCnt; i += BANK_STRIDE {
|
||||||
|
outputRef, err := os.Create(path.Join(outputDir, fmt.Sprintf("bank_%d.json", i/BANK_STRIDE+1)))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer outputRef.Close()
|
||||||
|
|
||||||
|
indexSrc := i
|
||||||
|
indexDst := i + BANK_STRIDE
|
||||||
|
if indexDst > defCnt {
|
||||||
|
indexDst = defCnt
|
||||||
|
}
|
||||||
|
|
||||||
|
refBytes, err := marshalJson(dict.defs[indexSrc:indexDst], pretty)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err = outputRef.Write(refBytes); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) kanjiSource {
|
func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) kanjiSource {
|
||||||
@ -151,17 +169,15 @@ func convertKanjidicCharacter(kanjidicCharacter jmdict.KanjidicCharacter) kanjiS
|
|||||||
}
|
}
|
||||||
|
|
||||||
func outputKanjidicJson(outputDir string, reader io.Reader, flags int) error {
|
func outputKanjidicJson(outputDir string, reader io.Reader, flags int) error {
|
||||||
// dict, err := jmdict.LoadKanjidic(reader)
|
dict, err := jmdict.LoadKanjidic(reader)
|
||||||
// if err != nil {
|
if err != nil {
|
||||||
// return err
|
return err
|
||||||
// }
|
}
|
||||||
|
|
||||||
// var kanji []kanjiSource
|
var kanji []kanjiSource
|
||||||
// for _, kanjidicCharacter := range dict.Characters {
|
for _, kanjidicCharacter := range dict.Characters {
|
||||||
// kanji = append(kanji, convertKanjidicCharacter(kanjidicCharacter))
|
kanji = append(kanji, convertKanjidicCharacter(kanjidicCharacter))
|
||||||
// }
|
}
|
||||||
|
|
||||||
// return outputKanjiJson(writer, kanji, flags&flagPrettyJson == flagPrettyJson)
|
return outputKanjiIndex(outputDir, kanji, flags&flagPrettyJson == flagPrettyJson)
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user