initial work on db redesign
This commit is contained in:
parent
f0d72fefaa
commit
532838764b
96
common.go
96
common.go
@ -33,10 +33,43 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
type dbTagMeta struct {
|
||||
Category string `json:"category,omitempty"`
|
||||
Notes string `json:"notes,omitempty"`
|
||||
Order int `json:"order,omitempty"`
|
||||
const databaseVersion = 2
|
||||
|
||||
type dbRecord []interface{}
|
||||
type dbRecordList []dbRecord
|
||||
|
||||
type dbTag struct {
|
||||
Name string
|
||||
Category string
|
||||
Order int
|
||||
Notes string
|
||||
}
|
||||
|
||||
type dbTagList []dbTag
|
||||
|
||||
func (meta dbTagList) crush() dbRecordList {
|
||||
var results dbRecordList
|
||||
for _, m := range meta {
|
||||
results = append(results, dbRecord{m.Name, m.Category, m.Order, m.Notes})
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
type dbFrequency struct {
|
||||
Expression string
|
||||
Count int
|
||||
}
|
||||
|
||||
type dbFrequencyList []dbFrequency
|
||||
|
||||
func (freqs dbFrequencyList) crush() dbRecordList {
|
||||
var results dbRecordList
|
||||
for _, f := range freqs {
|
||||
results = append(results, dbRecord{f.Expression, f.Count})
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
type dbTerm struct {
|
||||
@ -58,10 +91,10 @@ func (term *dbTerm) addRules(rules ...string) {
|
||||
term.Rules = appendStringUnique(term.Rules, rules...)
|
||||
}
|
||||
|
||||
func (terms dbTermList) crush() [][]interface{} {
|
||||
var results [][]interface{}
|
||||
func (terms dbTermList) crush() dbRecordList {
|
||||
var results dbRecordList
|
||||
for _, t := range terms {
|
||||
result := []interface{}{
|
||||
result := dbRecord{
|
||||
t.Expression,
|
||||
t.Reading,
|
||||
strings.Join(t.Tags, " "),
|
||||
@ -97,10 +130,10 @@ func (kanji *dbKanji) addTags(tags ...string) {
|
||||
}
|
||||
}
|
||||
|
||||
func (kanji dbKanjiList) crush() [][]interface{} {
|
||||
var results [][]interface{}
|
||||
func (kanji dbKanjiList) crush() dbRecordList {
|
||||
var results dbRecordList
|
||||
for _, k := range kanji {
|
||||
result := []interface{}{
|
||||
result := dbRecord{
|
||||
k.Character,
|
||||
strings.Join(k.Onyomi, " "),
|
||||
strings.Join(k.Kunyomi, " "),
|
||||
@ -117,13 +150,11 @@ func (kanji dbKanjiList) crush() [][]interface{} {
|
||||
return results
|
||||
}
|
||||
|
||||
func writeDb(outputPath, title, revision string, termRecords [][]interface{}, kanjiRecords [][]interface{}, tagMeta map[string]dbTagMeta, stride int, pretty bool) error {
|
||||
const DB_VERSION = 1
|
||||
|
||||
func writeDb(outputPath, title, revision string, recordData map[string]dbRecordList, stride int, pretty bool) error {
|
||||
var zbuff bytes.Buffer
|
||||
zip := zip.NewWriter(&zbuff)
|
||||
|
||||
marshalJson := func(obj interface{}, pretty bool) ([]byte, error) {
|
||||
marshalJSON := func(obj interface{}, pretty bool) ([]byte, error) {
|
||||
if pretty {
|
||||
return json.MarshalIndent(obj, "", " ")
|
||||
}
|
||||
@ -131,7 +162,7 @@ func writeDb(outputPath, title, revision string, termRecords [][]interface{}, ka
|
||||
return json.Marshal(obj)
|
||||
}
|
||||
|
||||
writeDbRecords := func(prefix string, records [][]interface{}) (int, error) {
|
||||
writeDbRecords := func(prefix string, records dbRecordList) (int, error) {
|
||||
recordCount := len(records)
|
||||
bankCount := 0
|
||||
|
||||
@ -142,7 +173,7 @@ func writeDb(outputPath, title, revision string, termRecords [][]interface{}, ka
|
||||
indexDst = recordCount
|
||||
}
|
||||
|
||||
bytes, err := marshalJson(records[indexSrc:indexDst], pretty)
|
||||
bytes, err := marshalJSON(records[indexSrc:indexDst], pretty)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
@ -156,7 +187,7 @@ func writeDb(outputPath, title, revision string, termRecords [][]interface{}, ka
|
||||
return 0, err
|
||||
}
|
||||
|
||||
bankCount += 1
|
||||
bankCount++
|
||||
}
|
||||
|
||||
return bankCount, nil
|
||||
@ -164,28 +195,22 @@ func writeDb(outputPath, title, revision string, termRecords [][]interface{}, ka
|
||||
|
||||
var err error
|
||||
var db struct {
|
||||
Title string `json:"title"`
|
||||
Version int `json:"version"`
|
||||
Revision string `json:"revision"`
|
||||
TagMeta map[string]dbTagMeta `json:"tagMeta"`
|
||||
TermBanks int `json:"termBanks"`
|
||||
KanjiBanks int `json:"kanjiBanks"`
|
||||
Title string `json:"title"`
|
||||
Version int `json:"version"`
|
||||
Revision string `json:"revision"`
|
||||
}
|
||||
|
||||
db.Title = title
|
||||
db.Version = DB_VERSION
|
||||
db.Version = databaseVersion
|
||||
db.Revision = revision
|
||||
db.TagMeta = tagMeta
|
||||
|
||||
if db.TermBanks, err = writeDbRecords("term", termRecords); err != nil {
|
||||
return err
|
||||
for recordType, recordEntries := range recordData {
|
||||
if _, err := writeDbRecords(recordType, recordEntries); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if db.KanjiBanks, err = writeDbRecords("kanji", kanjiRecords); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
bytes, err := marshalJson(db, pretty)
|
||||
bytes, err := marshalJSON(db, pretty)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -234,8 +259,13 @@ func hasString(needle string, haystack []string) bool {
|
||||
}
|
||||
|
||||
func detectFormat(path string) (string, error) {
|
||||
if filepath.Ext(path) == ".sqlite" {
|
||||
switch filepath.Ext(path) {
|
||||
case ".sqlite":
|
||||
return "rikai", nil
|
||||
case ".kanji_freq":
|
||||
return "kanji_freq", nil
|
||||
case ".term_freq":
|
||||
return "term_freq", nil
|
||||
}
|
||||
|
||||
switch filepath.Base(path) {
|
||||
|
31
edict.go
31
edict.go
@ -29,7 +29,7 @@ import (
|
||||
"github.com/FooSoft/jmdict"
|
||||
)
|
||||
|
||||
const JMDICT_REVISION = "jmdict3"
|
||||
const jmdictRevision = "jmdict3"
|
||||
|
||||
func jmdictBuildRules(term *dbTerm) {
|
||||
for _, tag := range term.Tags {
|
||||
@ -71,17 +71,17 @@ func jmdictAddPriorities(term *dbTerm, priorities ...string) {
|
||||
}
|
||||
}
|
||||
|
||||
func jmdictBuildTagMeta(entities map[string]string) map[string]dbTagMeta {
|
||||
tags := map[string]dbTagMeta{
|
||||
"news": {Notes: "appears frequently in Mainichi Shimbun", Category: "frequent", Order: -2},
|
||||
"ichi": {Notes: "listed as common in Ichimango Goi Bunruishuu", Category: "frequent", Order: -2},
|
||||
"spec": {Notes: "common words not included in frequency lists", Category: "frequent", Order: -2},
|
||||
"gai": {Notes: "common loanword", Category: "frequent", Order: -2},
|
||||
"P": {Notes: "popular term", Category: "popular", Order: -10},
|
||||
func jmdictBuildTagMeta(entities map[string]string) dbTagList {
|
||||
tags := dbTagList{
|
||||
dbTag{Name: "news", Notes: "appears frequently in Mainichi Shimbun", Category: "frequent", Order: -2},
|
||||
dbTag{Name: "ichi", Notes: "listed as common in Ichimango Goi Bunruishuu", Category: "frequent", Order: -2},
|
||||
dbTag{Name: "spec", Notes: "common words not included in frequency lists", Category: "frequent", Order: -2},
|
||||
dbTag{Name: "gai", Notes: "common loanword", Category: "frequent", Order: -2},
|
||||
dbTag{Name: "P", Notes: "popular term", Category: "popular", Order: -10},
|
||||
}
|
||||
|
||||
for name, value := range entities {
|
||||
tag := dbTagMeta{Notes: value}
|
||||
tag := dbTag{Name: name, Notes: value}
|
||||
|
||||
switch name {
|
||||
case "exp", "id":
|
||||
@ -92,7 +92,7 @@ func jmdictBuildTagMeta(entities map[string]string) map[string]dbTagMeta {
|
||||
tag.Order = -4
|
||||
}
|
||||
|
||||
tags[name] = tag
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
|
||||
return tags
|
||||
@ -227,13 +227,16 @@ func jmdictExportDb(inputPath, outputPath, language, title string, stride int, p
|
||||
title = "JMdict"
|
||||
}
|
||||
|
||||
recordData := map[string]dbRecordList{
|
||||
"terms": terms.crush(),
|
||||
"tags": jmdictBuildTagMeta(entities).crush(),
|
||||
}
|
||||
|
||||
return writeDb(
|
||||
outputPath,
|
||||
title,
|
||||
JMDICT_REVISION,
|
||||
terms.crush(),
|
||||
nil,
|
||||
jmdictBuildTagMeta(entities),
|
||||
jmdictRevision,
|
||||
recordData,
|
||||
stride,
|
||||
pretty,
|
||||
)
|
||||
|
21
enamdict.go
21
enamdict.go
@ -28,13 +28,13 @@ import (
|
||||
"github.com/FooSoft/jmdict"
|
||||
)
|
||||
|
||||
const JMNEDICT_REVISION = "jmnedict1"
|
||||
const jmnedictRevision = "jmnedict1"
|
||||
|
||||
func jmnedictBuildTagMeta(entities map[string]string) map[string]dbTagMeta {
|
||||
tags := make(map[string]dbTagMeta)
|
||||
func jmnedictBuildTagMeta(entities map[string]string) dbTagList {
|
||||
var tags dbTagList
|
||||
|
||||
for name, value := range entities {
|
||||
tag := dbTagMeta{Notes: value}
|
||||
tag := dbTag{Name: name, Notes: value}
|
||||
|
||||
switch name {
|
||||
case "company", "fem", "given", "masc", "organization", "person", "place", "product", "station", "surname", "unclass", "work":
|
||||
@ -42,7 +42,7 @@ func jmnedictBuildTagMeta(entities map[string]string) map[string]dbTagMeta {
|
||||
tag.Order = 4
|
||||
}
|
||||
|
||||
tags[name] = tag
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
|
||||
return tags
|
||||
@ -118,13 +118,16 @@ func jmnedictExportDb(inputPath, outputPath, language, title string, stride int,
|
||||
title = "JMnedict"
|
||||
}
|
||||
|
||||
recordData := map[string]dbRecordList{
|
||||
"terms": terms.crush(),
|
||||
"tags": jmnedictBuildTagMeta(entities).crush(),
|
||||
}
|
||||
|
||||
return writeDb(
|
||||
outputPath,
|
||||
title,
|
||||
JMNEDICT_REVISION,
|
||||
terms.crush(),
|
||||
nil,
|
||||
jmnedictBuildTagMeta(entities),
|
||||
jmnedictRevision,
|
||||
recordData,
|
||||
stride,
|
||||
pretty,
|
||||
)
|
||||
|
@ -200,13 +200,16 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p
|
||||
title = strings.Join(titles, ", ")
|
||||
}
|
||||
|
||||
recordData := map[string]dbRecordList{
|
||||
"kanji": kanji.crush(),
|
||||
"terms": terms.crush(),
|
||||
}
|
||||
|
||||
return writeDb(
|
||||
outputPath,
|
||||
title,
|
||||
strings.Join(revisions, ";"),
|
||||
terms.crush(),
|
||||
kanji.crush(),
|
||||
nil,
|
||||
recordData,
|
||||
stride,
|
||||
pretty,
|
||||
)
|
||||
|
74
frequency.go
Normal file
74
frequency.go
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (c) 2017 Alex Yatskov <alex@foosoft.net>
|
||||
* Author: Alex Yatskov <alex@foosoft.net>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
* this software and associated documentation files (the "Software"), to deal in
|
||||
* the Software without restriction, including without limitation the rights to
|
||||
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
* the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const frequencyRevision = "frequency1"
|
||||
|
||||
func frequncyExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
|
||||
reader, err := os.Open(inputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
var frequencies dbFrequencyList
|
||||
for scanner := bufio.NewScanner(reader); scanner.Scan(); {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
|
||||
parts := strings.Split(line, "\t")
|
||||
if len(parts) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
expression := parts[0]
|
||||
count, err := strconv.Atoi(parts[1])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
frequencies = append(frequencies, dbFrequency{expression, count})
|
||||
}
|
||||
|
||||
recordData := map[string]dbRecordList{
|
||||
"frequencies": frequencies.crush(),
|
||||
}
|
||||
|
||||
return writeDb(
|
||||
outputPath,
|
||||
title,
|
||||
frequencyRevision,
|
||||
recordData,
|
||||
stride,
|
||||
pretty,
|
||||
)
|
||||
}
|
27
kanjidic.go
27
kanjidic.go
@ -30,7 +30,7 @@ import (
|
||||
"github.com/FooSoft/jmdict"
|
||||
)
|
||||
|
||||
const KANJIDIC_REVISION = "kanjidic1"
|
||||
const kanjidicRevision = "kanjidic1"
|
||||
|
||||
func kanjidicExtractKanji(entry jmdict.KanjidicCharacter, language string) *dbKanji {
|
||||
if entry.ReadingMeaning == nil {
|
||||
@ -116,26 +116,29 @@ func kanjidicExportDb(inputPath, outputPath, language, title string, stride int,
|
||||
}
|
||||
}
|
||||
|
||||
tagMeta := map[string]dbTagMeta{
|
||||
"jouyou": {Notes: "included in list of regular-use characters", Category: "frequent", Order: -5},
|
||||
"jinmeiyou": {Notes: "included in list of characters for use in personal names", Category: "frequent", Order: -5},
|
||||
"jlpt": {Notes: "corresponding Japanese Language Proficiency Test level"},
|
||||
"grade": {Notes: "school grade level at which the character is taught"},
|
||||
"strokes": {Notes: "number of strokes needed to write the character"},
|
||||
"heisig": {Notes: "frame number in Remembering the Kanji"},
|
||||
tags := dbTagList{
|
||||
dbTag{Name: "jouyou", Notes: "included in list of regular-use characters", Category: "frequent", Order: -5},
|
||||
dbTag{Name: "jinmeiyou", Notes: "included in list of characters for use in personal names", Category: "frequent", Order: -5},
|
||||
dbTag{Name: "jlpt", Notes: "corresponding Japanese Language Proficiency Test level"},
|
||||
dbTag{Name: "grade", Notes: "school grade level at which the character is taught"},
|
||||
dbTag{Name: "strokes", Notes: "number of strokes needed to write the character"},
|
||||
dbTag{Name: "heisig", Notes: "frame number in Remembering the Kanji"},
|
||||
}
|
||||
|
||||
if title == "" {
|
||||
title = "KANJIDIC2"
|
||||
}
|
||||
|
||||
recordData := map[string]dbRecordList{
|
||||
"kanji": kanji.crush(),
|
||||
"tags": tags.crush(),
|
||||
}
|
||||
|
||||
return writeDb(
|
||||
outputPath,
|
||||
title,
|
||||
KANJIDIC_REVISION,
|
||||
nil,
|
||||
kanji.crush(),
|
||||
tagMeta,
|
||||
kanjidicRevision,
|
||||
recordData,
|
||||
stride,
|
||||
pretty,
|
||||
)
|
||||
|
25
rikai.go
25
rikai.go
@ -30,7 +30,7 @@ import (
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
const RIKAI_REVISION = "rikai2"
|
||||
const rikaiRevision = "rikai2"
|
||||
|
||||
type rikaiEntry struct {
|
||||
kanji string
|
||||
@ -158,21 +158,24 @@ func rikaiExportDb(inputPath, outputPath, language, title string, stride int, pr
|
||||
title = "Rikai"
|
||||
}
|
||||
|
||||
entities := map[string]dbTagMeta{
|
||||
"P": {Category: "popular", Order: -10},
|
||||
"exp": {Category: "expression", Order: -5},
|
||||
"id": {Category: "expression", Order: -5},
|
||||
"arch": {Category: "archaism", Order: -4},
|
||||
"iK": {Category: "archaism", Order: -4},
|
||||
tags := dbTagList{
|
||||
dbTag{Name: "P", Category: "popular", Order: -10},
|
||||
dbTag{Name: "exp", Category: "expression", Order: -5},
|
||||
dbTag{Name: "id", Category: "expression", Order: -5},
|
||||
dbTag{Name: "arch", Category: "archaism", Order: -4},
|
||||
dbTag{Name: "iK", Category: "archaism", Order: -4},
|
||||
}
|
||||
|
||||
recordData := map[string]dbRecordList{
|
||||
"terms": terms.crush(),
|
||||
"tags": tags.crush(),
|
||||
}
|
||||
|
||||
return writeDb(
|
||||
outputPath,
|
||||
title,
|
||||
RIKAI_REVISION,
|
||||
terms.crush(),
|
||||
nil,
|
||||
entities,
|
||||
rikaiRevision,
|
||||
recordData,
|
||||
stride,
|
||||
pretty,
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user