2016-08-07 01:17:02 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2016 Alex Yatskov <alex@foosoft.net>
|
|
|
|
* Author: Alex Yatskov <alex@foosoft.net>
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
|
|
* this software and associated documentation files (the "Software"), to deal in
|
|
|
|
* the Software without restriction, including without limitation the rights to
|
|
|
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
|
|
* the Software, and to permit persons to whom the Software is furnished to do so,
|
|
|
|
* subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in all
|
|
|
|
* copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
|
|
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
|
|
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
|
|
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2017-06-26 00:22:17 +00:00
|
|
|
"archive/zip"
|
|
|
|
"bytes"
|
2016-08-07 01:17:02 +00:00
|
|
|
"encoding/json"
|
2017-06-26 01:06:41 +00:00
|
|
|
"errors"
|
2016-08-23 03:51:30 +00:00
|
|
|
"fmt"
|
|
|
|
"os"
|
2016-12-29 01:45:33 +00:00
|
|
|
"path/filepath"
|
2016-08-07 01:17:02 +00:00
|
|
|
"strings"
|
|
|
|
)
|
|
|
|
|
2016-12-17 23:48:13 +00:00
|
|
|
type dbTagMeta struct {
|
2016-12-18 05:47:13 +00:00
|
|
|
Category string `json:"category,omitempty"`
|
2016-12-18 03:24:08 +00:00
|
|
|
Notes string `json:"notes,omitempty"`
|
|
|
|
Order int `json:"order,omitempty"`
|
2016-12-17 23:48:13 +00:00
|
|
|
}
|
|
|
|
|
2016-11-05 20:13:13 +00:00
|
|
|
type dbTerm struct {
|
2016-08-07 01:17:02 +00:00
|
|
|
Expression string
|
|
|
|
Reading string
|
|
|
|
Tags []string
|
2016-12-17 23:48:13 +00:00
|
|
|
Rules []string
|
|
|
|
Score int
|
2016-08-07 01:17:02 +00:00
|
|
|
Glossary []string
|
|
|
|
}
|
|
|
|
|
2016-11-05 20:13:13 +00:00
|
|
|
type dbTermList []dbTerm
|
|
|
|
|
|
|
|
func (term *dbTerm) addTags(tags ...string) {
|
2016-12-17 23:48:13 +00:00
|
|
|
term.Tags = appendStringUnique(term.Tags, tags...)
|
2016-08-07 01:17:02 +00:00
|
|
|
}
|
|
|
|
|
2016-12-17 23:48:13 +00:00
|
|
|
func (term *dbTerm) addRules(rules ...string) {
|
|
|
|
term.Rules = appendStringUnique(term.Rules, rules...)
|
2016-08-07 20:24:56 +00:00
|
|
|
}
|
|
|
|
|
2016-12-18 03:24:08 +00:00
|
|
|
func (terms dbTermList) crush() [][]interface{} {
|
|
|
|
var results [][]interface{}
|
2016-11-05 20:13:13 +00:00
|
|
|
for _, t := range terms {
|
2016-12-18 03:24:08 +00:00
|
|
|
result := []interface{}{
|
2016-11-05 20:13:13 +00:00
|
|
|
t.Expression,
|
|
|
|
t.Reading,
|
|
|
|
strings.Join(t.Tags, " "),
|
2016-12-17 23:48:13 +00:00
|
|
|
strings.Join(t.Rules, " "),
|
2016-12-18 03:24:08 +00:00
|
|
|
t.Score,
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, gloss := range t.Glossary {
|
|
|
|
result = append(result, gloss)
|
2016-11-05 20:13:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
results = append(results, result)
|
2016-08-22 02:51:43 +00:00
|
|
|
}
|
2016-08-07 01:17:02 +00:00
|
|
|
|
2016-11-05 20:13:13 +00:00
|
|
|
return results
|
|
|
|
}
|
|
|
|
|
|
|
|
type dbKanji struct {
|
|
|
|
Character string
|
|
|
|
Onyomi []string
|
|
|
|
Kunyomi []string
|
|
|
|
Tags []string
|
|
|
|
Meanings []string
|
|
|
|
}
|
|
|
|
|
|
|
|
type dbKanjiList []dbKanji
|
|
|
|
|
|
|
|
func (kanji *dbKanji) addTags(tags ...string) {
|
|
|
|
for _, tag := range tags {
|
|
|
|
if !hasString(tag, kanji.Tags) {
|
|
|
|
kanji.Tags = append(kanji.Tags, tag)
|
|
|
|
}
|
2016-08-07 01:17:02 +00:00
|
|
|
}
|
2016-11-05 20:13:13 +00:00
|
|
|
}
|
|
|
|
|
2016-12-18 03:24:08 +00:00
|
|
|
func (kanji dbKanjiList) crush() [][]interface{} {
|
|
|
|
var results [][]interface{}
|
2016-11-05 20:13:13 +00:00
|
|
|
for _, k := range kanji {
|
2016-12-18 03:24:08 +00:00
|
|
|
result := []interface{}{
|
2016-11-05 20:13:13 +00:00
|
|
|
k.Character,
|
|
|
|
strings.Join(k.Onyomi, " "),
|
|
|
|
strings.Join(k.Kunyomi, " "),
|
|
|
|
strings.Join(k.Tags, " "),
|
|
|
|
}
|
2016-08-07 01:17:02 +00:00
|
|
|
|
2016-12-18 03:24:08 +00:00
|
|
|
for _, meaning := range k.Meanings {
|
|
|
|
result = append(result, meaning)
|
|
|
|
}
|
|
|
|
|
2016-11-05 20:13:13 +00:00
|
|
|
results = append(results, result)
|
2016-08-23 03:51:30 +00:00
|
|
|
}
|
|
|
|
|
2016-11-05 20:13:13 +00:00
|
|
|
return results
|
2016-08-23 03:51:30 +00:00
|
|
|
}
|
2016-08-07 01:17:02 +00:00
|
|
|
|
2017-06-26 00:22:17 +00:00
|
|
|
func writeDb(outputPath, title, revision string, termRecords [][]interface{}, kanjiRecords [][]interface{}, tagMeta map[string]dbTagMeta, stride int, pretty bool) error {
|
2016-11-05 20:13:13 +00:00
|
|
|
const DB_VERSION = 1
|
|
|
|
|
2017-06-26 00:22:17 +00:00
|
|
|
var zbuff bytes.Buffer
|
|
|
|
zip := zip.NewWriter(&zbuff)
|
|
|
|
|
2016-11-05 20:13:13 +00:00
|
|
|
marshalJson := func(obj interface{}, pretty bool) ([]byte, error) {
|
|
|
|
if pretty {
|
|
|
|
return json.MarshalIndent(obj, "", " ")
|
|
|
|
}
|
|
|
|
|
|
|
|
return json.Marshal(obj)
|
|
|
|
}
|
|
|
|
|
2016-12-18 03:24:08 +00:00
|
|
|
writeDbRecords := func(prefix string, records [][]interface{}) (int, error) {
|
2016-11-06 06:24:57 +00:00
|
|
|
recordCount := len(records)
|
|
|
|
bankCount := 0
|
|
|
|
|
2016-12-29 01:45:33 +00:00
|
|
|
for i := 0; i < recordCount; i += stride {
|
2016-11-06 06:24:57 +00:00
|
|
|
indexSrc := i
|
2016-12-29 01:45:33 +00:00
|
|
|
indexDst := i + stride
|
2016-11-06 06:24:57 +00:00
|
|
|
if indexDst > recordCount {
|
|
|
|
indexDst = recordCount
|
|
|
|
}
|
|
|
|
|
|
|
|
bytes, err := marshalJson(records[indexSrc:indexDst], pretty)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
2017-06-26 00:22:17 +00:00
|
|
|
zw, err := zip.Create(fmt.Sprintf("%s_bank_%d.json", prefix, i/stride+1))
|
2016-11-06 06:24:57 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
2017-06-26 00:22:17 +00:00
|
|
|
if _, err := zw.Write(bytes); err != nil {
|
2016-11-06 06:24:57 +00:00
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
bankCount += 1
|
|
|
|
}
|
|
|
|
|
|
|
|
return bankCount, nil
|
2016-11-05 20:13:13 +00:00
|
|
|
}
|
|
|
|
|
2016-11-06 06:24:57 +00:00
|
|
|
var err error
|
|
|
|
var db struct {
|
2016-12-17 23:48:13 +00:00
|
|
|
Title string `json:"title"`
|
|
|
|
Version int `json:"version"`
|
2016-12-24 05:52:49 +00:00
|
|
|
Revision string `json:"revision"`
|
2016-12-17 23:48:13 +00:00
|
|
|
TagMeta map[string]dbTagMeta `json:"tagMeta"`
|
|
|
|
TermBanks int `json:"termBanks"`
|
|
|
|
KanjiBanks int `json:"kanjiBanks"`
|
2016-11-06 06:24:57 +00:00
|
|
|
}
|
2016-11-05 20:13:13 +00:00
|
|
|
|
2016-11-06 00:09:23 +00:00
|
|
|
db.Title = title
|
2016-11-06 06:24:57 +00:00
|
|
|
db.Version = DB_VERSION
|
2016-12-24 05:52:49 +00:00
|
|
|
db.Revision = revision
|
2016-12-17 23:48:13 +00:00
|
|
|
db.TagMeta = tagMeta
|
2016-11-06 06:24:57 +00:00
|
|
|
|
|
|
|
if db.TermBanks, err = writeDbRecords("term", termRecords); err != nil {
|
|
|
|
return err
|
2016-11-05 20:13:13 +00:00
|
|
|
}
|
|
|
|
|
2016-11-06 06:24:57 +00:00
|
|
|
if db.KanjiBanks, err = writeDbRecords("kanji", kanjiRecords); err != nil {
|
2016-08-23 03:51:30 +00:00
|
|
|
return err
|
|
|
|
}
|
2016-08-07 01:17:02 +00:00
|
|
|
|
2016-11-05 20:13:13 +00:00
|
|
|
bytes, err := marshalJson(db, pretty)
|
2016-08-23 03:51:30 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
2016-08-07 01:17:02 +00:00
|
|
|
}
|
|
|
|
|
2017-06-26 00:22:17 +00:00
|
|
|
zw, err := zip.Create("index.json")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if _, err := zw.Write(bytes); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
zip.Close()
|
|
|
|
|
|
|
|
fp, err := os.Create(outputPath)
|
2016-08-07 01:17:02 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-06-26 00:22:17 +00:00
|
|
|
if _, err := fp.Write(zbuff.Bytes()); err != nil {
|
2016-08-23 03:51:30 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2017-06-26 00:22:17 +00:00
|
|
|
return fp.Close()
|
2016-08-24 16:02:26 +00:00
|
|
|
}
|
|
|
|
|
2016-12-17 23:48:13 +00:00
|
|
|
func appendStringUnique(target []string, source ...string) []string {
|
|
|
|
for _, str := range source {
|
|
|
|
if !hasString(str, target) {
|
|
|
|
target = append(target, str)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return target
|
|
|
|
}
|
|
|
|
|
2016-08-07 01:17:02 +00:00
|
|
|
func hasString(needle string, haystack []string) bool {
|
|
|
|
for _, value := range haystack {
|
|
|
|
if needle == value {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
2016-12-29 01:45:33 +00:00
|
|
|
|
2017-06-26 01:06:41 +00:00
|
|
|
func detectFormat(path string) (string, error) {
|
2017-06-26 01:29:53 +00:00
|
|
|
switch filepath.Base(path) {
|
2017-06-26 01:06:41 +00:00
|
|
|
case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml":
|
|
|
|
return "edict", nil
|
|
|
|
case "JMnedict", "JMnedict.xml":
|
|
|
|
return "enamdict", nil
|
|
|
|
case "kanjidic2", "kanjidic2.xml":
|
|
|
|
return "kanjidic", nil
|
|
|
|
case "CATALOGS":
|
|
|
|
return "epwing", nil
|
|
|
|
}
|
|
|
|
|
2016-12-29 01:45:33 +00:00
|
|
|
info, err := os.Stat(path)
|
|
|
|
if err != nil {
|
2017-06-26 01:06:41 +00:00
|
|
|
return "", err
|
2016-12-29 01:45:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if info.IsDir() {
|
|
|
|
_, err := os.Stat(filepath.Join(path, "CATALOGS"))
|
|
|
|
if err == nil {
|
2017-06-26 01:06:41 +00:00
|
|
|
return "epwing", nil
|
2016-12-29 01:45:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-26 01:06:41 +00:00
|
|
|
return "", errors.New("unrecognized dictionary format")
|
2016-12-29 01:45:33 +00:00
|
|
|
}
|