356 lines
6.8 KiB
Go
356 lines
6.8 KiB
Go
package yomichan
|
|
|
|
import (
|
|
"archive/zip"
|
|
"bytes"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"golang.org/x/exp/slices"
|
|
)
|
|
|
|
const (
|
|
DefaultFormat = ""
|
|
DefaultLanguage = ""
|
|
DefaultPretty = false
|
|
DefaultStride = 10000
|
|
DefaultTitle = ""
|
|
)
|
|
|
|
type dbRecord []any
|
|
type dbRecordList []dbRecord
|
|
|
|
type dbTag struct {
|
|
Name string
|
|
Category string
|
|
Order int
|
|
Notes string
|
|
Score int
|
|
}
|
|
|
|
type dbTagList []dbTag
|
|
|
|
func (meta dbTagList) crush() dbRecordList {
|
|
var results dbRecordList
|
|
for _, m := range meta {
|
|
results = append(results, dbRecord{m.Name, m.Category, m.Order, m.Notes, m.Score})
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
type dbMeta struct {
|
|
Expression string
|
|
Mode string
|
|
Data any
|
|
}
|
|
|
|
type dbMetaList []dbMeta
|
|
|
|
func (freqs dbMetaList) crush() dbRecordList {
|
|
var results dbRecordList
|
|
for _, f := range freqs {
|
|
results = append(results, dbRecord{f.Expression, f.Mode, f.Data})
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
type dbTerm struct {
|
|
Expression string
|
|
Reading string
|
|
DefinitionTags []string
|
|
Rules []string
|
|
Score int
|
|
Glossary []any
|
|
Sequence int
|
|
TermTags []string
|
|
}
|
|
|
|
type dbTermList []dbTerm
|
|
|
|
func (term *dbTerm) addDefinitionTags(tags ...string) {
|
|
term.DefinitionTags = appendStringUnique(term.DefinitionTags, tags...)
|
|
}
|
|
|
|
func (term *dbTerm) addTermTags(tags ...string) {
|
|
term.TermTags = appendStringUnique(term.TermTags, tags...)
|
|
}
|
|
|
|
func (term *dbTerm) addRules(rules ...string) {
|
|
term.Rules = appendStringUnique(term.Rules, rules...)
|
|
}
|
|
|
|
func (terms dbTermList) crush() dbRecordList {
|
|
var results dbRecordList
|
|
for _, t := range terms {
|
|
result := dbRecord{
|
|
t.Expression,
|
|
t.Reading,
|
|
strings.Join(t.DefinitionTags, " "),
|
|
strings.Join(t.Rules, " "),
|
|
t.Score,
|
|
t.Glossary,
|
|
t.Sequence,
|
|
strings.Join(t.TermTags, " "),
|
|
}
|
|
|
|
results = append(results, result)
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
type dbKanji struct {
|
|
Character string
|
|
Onyomi []string
|
|
Kunyomi []string
|
|
Tags []string
|
|
Meanings []string
|
|
Stats map[string]string
|
|
}
|
|
|
|
type dbKanjiList []dbKanji
|
|
|
|
func (kanji *dbKanji) addTags(tags ...string) {
|
|
for _, tag := range tags {
|
|
if !slices.Contains(kanji.Tags, tag) {
|
|
kanji.Tags = append(kanji.Tags, tag)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (kanji dbKanjiList) crush() dbRecordList {
|
|
var results dbRecordList
|
|
for _, k := range kanji {
|
|
result := dbRecord{
|
|
k.Character,
|
|
strings.Join(k.Onyomi, " "),
|
|
strings.Join(k.Kunyomi, " "),
|
|
strings.Join(k.Tags, " "),
|
|
k.Meanings,
|
|
k.Stats,
|
|
}
|
|
|
|
results = append(results, result)
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
type dbIndex struct {
|
|
Title string `json:"title"`
|
|
Format int `json:"format"`
|
|
Revision string `json:"revision"`
|
|
Sequenced bool `json:"sequenced"`
|
|
Author string `json:"author"`
|
|
Url string `json:"url"`
|
|
Description string `json:"description"`
|
|
Attribution string `json:"attribution"`
|
|
}
|
|
|
|
func (index *dbIndex) setDefaults() {
|
|
if index.Format == 0 {
|
|
index.Format = 3
|
|
}
|
|
if index.Author == "" {
|
|
index.Author = "yomichan-import"
|
|
}
|
|
if index.Url == "" {
|
|
index.Url = "https://github.com/FooSoft/yomichan-import"
|
|
}
|
|
}
|
|
|
|
func writeDb(outputPath string, index dbIndex, recordData map[string]dbRecordList, stride int, pretty bool) error {
|
|
var zbuff bytes.Buffer
|
|
zip := zip.NewWriter(&zbuff)
|
|
|
|
marshalJSON := func(obj any, pretty bool) ([]byte, error) {
|
|
if pretty {
|
|
return json.MarshalIndent(obj, "", " ")
|
|
}
|
|
|
|
return json.Marshal(obj)
|
|
}
|
|
|
|
writeDbRecords := func(prefix string, records dbRecordList) (int, error) {
|
|
recordCount := len(records)
|
|
bankCount := 0
|
|
|
|
for i := 0; i < recordCount; i += stride {
|
|
indexSrc := i
|
|
indexDst := i + stride
|
|
if indexDst > recordCount {
|
|
indexDst = recordCount
|
|
}
|
|
|
|
bytes, err := marshalJSON(records[indexSrc:indexDst], pretty)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
zw, err := zip.Create(fmt.Sprintf("%s_bank_%d.json", prefix, i/stride+1))
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
if _, err := zw.Write(bytes); err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
bankCount++
|
|
}
|
|
|
|
return bankCount, nil
|
|
}
|
|
|
|
var err error
|
|
|
|
for recordType, recordEntries := range recordData {
|
|
if _, err := writeDbRecords(recordType, recordEntries); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
index.setDefaults()
|
|
bytes, err := marshalJSON(index, pretty)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
zw, err := zip.Create("index.json")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if _, err := zw.Write(bytes); err != nil {
|
|
return err
|
|
}
|
|
|
|
zip.Close()
|
|
|
|
fp, err := os.Create(outputPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if _, err := fp.Write(zbuff.Bytes()); err != nil {
|
|
return err
|
|
}
|
|
|
|
return fp.Close()
|
|
}
|
|
|
|
func appendStringUnique(target []string, source ...string) []string {
|
|
for _, str := range source {
|
|
if !slices.Contains(target, str) {
|
|
target = append(target, str)
|
|
}
|
|
}
|
|
|
|
return target
|
|
}
|
|
|
|
func intersection(s1, s2 []string) []string {
|
|
s := []string{}
|
|
m := make(map[string]bool)
|
|
for _, e := range s1 {
|
|
m[e] = true
|
|
}
|
|
for _, e := range s2 {
|
|
if m[e] {
|
|
s = append(s, e)
|
|
m[e] = false
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
|
|
func union(s1, s2 []string) []string {
|
|
s := []string{}
|
|
m := make(map[string]bool)
|
|
for _, e := range s1 {
|
|
if !m[e] {
|
|
s = append(s, e)
|
|
m[e] = true
|
|
}
|
|
}
|
|
for _, e := range s2 {
|
|
if !m[e] {
|
|
s = append(s, e)
|
|
m[e] = true
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
|
|
func detectFormat(path string) (string, error) {
|
|
switch filepath.Ext(path) {
|
|
case ".sqlite":
|
|
return "rikai", nil
|
|
case ".kanjifreq":
|
|
return "kanjifreq", nil
|
|
case ".termfreq":
|
|
return "termfreq", nil
|
|
}
|
|
|
|
switch filepath.Base(path) {
|
|
case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml", "JMdict_e_examp":
|
|
return "edict", nil
|
|
case "JMnedict", "JMnedict.xml":
|
|
return "enamdict", nil
|
|
case "kanjidic2", "kanjidic2.xml":
|
|
return "kanjidic", nil
|
|
}
|
|
|
|
info, err := os.Stat(path)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
if info.IsDir() {
|
|
_, err := os.Stat(filepath.Join(path, "CATALOGS"))
|
|
if err == nil {
|
|
return "epwing", nil
|
|
}
|
|
|
|
_, err = os.Stat(filepath.Join(path, "catalogs"))
|
|
if err == nil {
|
|
return "epwing", nil
|
|
}
|
|
}
|
|
|
|
return "", errors.New("unrecognized dictionary format")
|
|
}
|
|
|
|
func ExportDb(inputPath, outputPath, format, language, title string, stride int, pretty bool) error {
|
|
handlers := map[string]func(string, string, string, string, int, bool) error{
|
|
"edict": jmdictExportDb,
|
|
"forms": formsExportDb,
|
|
"enamdict": jmnedictExportDb,
|
|
"epwing": epwingExportDb,
|
|
"kanjidic": kanjidicExportDb,
|
|
"rikai": rikaiExportDb,
|
|
"kanjifreq": frequencyKanjiExportDb,
|
|
"termfreq": frequencyTermsExportDb,
|
|
}
|
|
|
|
var err error
|
|
if format == DefaultFormat {
|
|
if format, err = detectFormat(inputPath); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
handler, ok := handlers[strings.ToLower(format)]
|
|
if !ok {
|
|
return errors.New("unrecognized dictionary format")
|
|
}
|
|
|
|
return handler(inputPath, outputPath, strings.ToLower(language), title, stride, pretty)
|
|
}
|