2021-01-01 22:31:58 +00:00
|
|
|
package yomichan
|
2016-08-07 01:17:02 +00:00
|
|
|
|
|
|
|
import (
|
2016-12-19 01:46:40 +00:00
|
|
|
"os"
|
2016-08-08 02:55:46 +00:00
|
|
|
"strconv"
|
2016-08-07 01:17:02 +00:00
|
|
|
|
2022-07-04 03:59:33 +00:00
|
|
|
"foosoft.net/projects/jmdict"
|
2016-08-07 01:17:02 +00:00
|
|
|
)
|
|
|
|
|
2017-06-11 00:35:58 +00:00
|
|
|
func kanjidicExtractKanji(entry jmdict.KanjidicCharacter, language string) *dbKanji {
|
|
|
|
if entry.ReadingMeaning == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-09-16 20:54:28 +00:00
|
|
|
kanji := dbKanji{
|
|
|
|
Character: entry.Literal,
|
|
|
|
Stats: make(map[string]string),
|
|
|
|
}
|
2016-08-07 01:17:02 +00:00
|
|
|
|
2017-06-11 00:35:58 +00:00
|
|
|
for _, m := range entry.ReadingMeaning.Meanings {
|
|
|
|
if m.Language == nil && language == "" || m.Language != nil && language == *m.Language {
|
|
|
|
kanji.Meanings = append(kanji.Meanings, m.Meaning)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(kanji.Meanings) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-09-16 20:54:28 +00:00
|
|
|
for _, number := range entry.DictionaryNumbers {
|
2017-09-18 02:53:56 +00:00
|
|
|
kanji.Stats[number.Type] = number.Value
|
2017-09-16 20:54:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if frequency := entry.Misc.Frequency; frequency != nil {
|
2017-09-17 21:11:15 +00:00
|
|
|
kanji.Stats["freq"] = *frequency
|
2017-09-16 20:54:28 +00:00
|
|
|
}
|
|
|
|
|
2016-11-05 20:13:13 +00:00
|
|
|
if level := entry.Misc.JlptLevel; level != nil {
|
2017-09-17 21:11:15 +00:00
|
|
|
kanji.Stats["jlpt"] = *level
|
2017-09-16 20:54:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if counts := entry.Misc.StrokeCounts; len(counts) > 0 {
|
2017-09-17 21:11:15 +00:00
|
|
|
kanji.Stats["strokes"] = counts[0]
|
2016-08-08 02:55:46 +00:00
|
|
|
}
|
|
|
|
|
2017-09-18 02:53:56 +00:00
|
|
|
for _, code := range entry.Codepoint {
|
|
|
|
kanji.Stats[code.Type] = code.Value
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, code := range entry.QueryCode {
|
|
|
|
kanji.Stats[code.Type] = code.Value
|
|
|
|
}
|
|
|
|
|
2016-11-05 20:13:13 +00:00
|
|
|
if grade := entry.Misc.Grade; grade != nil {
|
2017-09-17 21:11:15 +00:00
|
|
|
kanji.Stats["grade"] = *grade
|
2016-08-08 02:55:46 +00:00
|
|
|
if gradeInt, err := strconv.Atoi(*grade); err == nil {
|
|
|
|
if gradeInt >= 1 && gradeInt <= 8 {
|
2016-11-05 20:13:13 +00:00
|
|
|
kanji.addTags("jouyou")
|
2016-08-08 02:55:46 +00:00
|
|
|
} else if gradeInt >= 9 && gradeInt <= 10 {
|
2016-11-05 20:13:13 +00:00
|
|
|
kanji.addTags("jinmeiyou")
|
2016-08-08 02:55:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-11 00:35:58 +00:00
|
|
|
for _, r := range entry.ReadingMeaning.Readings {
|
|
|
|
switch r.Type {
|
|
|
|
case "ja_on":
|
|
|
|
kanji.Onyomi = append(kanji.Onyomi, r.Value)
|
|
|
|
case "ja_kun":
|
|
|
|
kanji.Kunyomi = append(kanji.Kunyomi, r.Value)
|
2016-08-07 01:17:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-11 00:35:58 +00:00
|
|
|
return &kanji
|
2016-08-07 01:17:02 +00:00
|
|
|
}
|
|
|
|
|
2017-06-26 00:22:17 +00:00
|
|
|
func kanjidicExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
|
2016-12-19 01:46:40 +00:00
|
|
|
reader, err := os.Open(inputPath)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer reader.Close()
|
|
|
|
|
2016-08-24 16:02:26 +00:00
|
|
|
dict, err := jmdict.LoadKanjidic(reader)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-08-23 03:51:30 +00:00
|
|
|
|
2017-06-11 00:35:58 +00:00
|
|
|
var langTag string
|
|
|
|
switch language {
|
|
|
|
case "french":
|
|
|
|
langTag = "fr"
|
|
|
|
case "spanish":
|
|
|
|
langTag = "es"
|
|
|
|
case "portuguese":
|
|
|
|
langTag = "pt"
|
|
|
|
}
|
|
|
|
|
2016-11-05 20:13:13 +00:00
|
|
|
var kanji dbKanjiList
|
|
|
|
for _, entry := range dict.Characters {
|
2017-06-11 00:35:58 +00:00
|
|
|
kanjiCurr := kanjidicExtractKanji(entry, langTag)
|
|
|
|
if kanjiCurr != nil {
|
|
|
|
kanji = append(kanji, *kanjiCurr)
|
|
|
|
}
|
2016-08-24 16:02:26 +00:00
|
|
|
}
|
2016-08-07 01:17:02 +00:00
|
|
|
|
2017-09-10 20:37:40 +00:00
|
|
|
if title == "" {
|
|
|
|
title = "KANJIDIC2"
|
|
|
|
}
|
|
|
|
|
2017-09-10 20:25:11 +00:00
|
|
|
tags := dbTagList{
|
|
|
|
dbTag{Name: "jouyou", Notes: "included in list of regular-use characters", Category: "frequent", Order: -5},
|
|
|
|
dbTag{Name: "jinmeiyou", Notes: "included in list of characters for use in personal names", Category: "frequent", Order: -5},
|
2017-09-17 21:11:15 +00:00
|
|
|
|
2017-09-18 02:53:56 +00:00
|
|
|
dbTag{Name: "freq", Notes: "Frequency", Category: "misc"},
|
2017-09-23 02:17:00 +00:00
|
|
|
dbTag{Name: "grade", Notes: "Grade level", Category: "misc"},
|
|
|
|
dbTag{Name: "jlpt", Notes: "JLPT level", Category: "misc"},
|
|
|
|
dbTag{Name: "strokes", Notes: "Stroke count", Category: "misc"},
|
2017-09-18 02:53:56 +00:00
|
|
|
|
2017-09-23 02:17:00 +00:00
|
|
|
dbTag{Name: "jis208", Notes: "JIS X 0208-1997 kuten code", Category: "code"},
|
|
|
|
dbTag{Name: "jis212", Notes: "JIS X 0212-1990 kuten code", Category: "code"},
|
|
|
|
dbTag{Name: "jis213", Notes: "JIS X 0213-2000 kuten code", Category: "code"},
|
|
|
|
dbTag{Name: "ucs", Notes: "Unicode hex code", Category: "code"},
|
2017-09-18 02:53:56 +00:00
|
|
|
|
2017-09-23 02:17:00 +00:00
|
|
|
dbTag{Name: "deroo", Notes: "2001 Kanji", Category: "class"},
|
|
|
|
dbTag{Name: "four_corner", Notes: "Four corner code", Category: "class"},
|
2017-09-22 06:23:28 +00:00
|
|
|
dbTag{Name: "misclass", Notes: "Misclassification", Category: "class"},
|
2017-09-23 02:17:00 +00:00
|
|
|
dbTag{Name: "sh_desc", Notes: "The Kanji Dictionary", Category: "class"},
|
|
|
|
dbTag{Name: "skip", Notes: "SKIP code", Category: "class"},
|
2017-09-18 02:53:56 +00:00
|
|
|
|
|
|
|
dbTag{Name: "busy_people", Notes: "Japanese For Busy People", Category: "index"},
|
|
|
|
dbTag{Name: "crowley", Notes: "The Kanji Way to Japanese Language Power", Category: "index"},
|
|
|
|
dbTag{Name: "gakken", Notes: "A New Dictionary of Kanji Usage", Category: "index"},
|
|
|
|
dbTag{Name: "halpern_kkd", Notes: "Kodansha Kanji Dictionary", Category: "index"},
|
|
|
|
dbTag{Name: "halpern_kkld", Notes: "Kanji Learners Dictionary", Category: "index"},
|
|
|
|
dbTag{Name: "halpern_kkld_2ed", Notes: "Kanji Learners Dictionary", Category: "index"},
|
|
|
|
dbTag{Name: "halpern_njecd", Notes: "New Japanese-English Character Dictionary", Category: "index"},
|
|
|
|
dbTag{Name: "heisig", Notes: "Remembering The Kanji", Category: "index"},
|
|
|
|
dbTag{Name: "heisig6", Notes: "Remembering The Kanji, Sixth Ed.", Category: "index"},
|
|
|
|
dbTag{Name: "henshall", Notes: "A Guide To Remembering Japanese Characters", Category: "index"},
|
|
|
|
dbTag{Name: "henshall3", Notes: "A Guide To Reading and Writing Japanese", Category: "index"},
|
|
|
|
dbTag{Name: "jf_cards", Notes: "Japanese Kanji Flashcards", Category: "index"},
|
|
|
|
dbTag{Name: "kanji_in_context", Notes: "Kanji in Context", Category: "index"},
|
|
|
|
dbTag{Name: "kodansha_compact", Notes: "Kodansha Compact Kanji Guide", Category: "index"},
|
|
|
|
dbTag{Name: "maniette", Notes: "Les Kanjis dans la tete", Category: "index"},
|
|
|
|
dbTag{Name: "moro", Notes: "Daikanwajiten", Category: "index"},
|
|
|
|
dbTag{Name: "nelson_c", Notes: "Modern Reader's Japanese-English Character Dictionary", Category: "index"},
|
|
|
|
dbTag{Name: "nelson_n", Notes: "The New Nelson Japanese-English Character Dictionary", Category: "index"},
|
|
|
|
dbTag{Name: "oneill_kk", Notes: "Essential Kanji", Category: "index"},
|
|
|
|
dbTag{Name: "oneill_names", Notes: "Japanese Names", Category: "index"},
|
|
|
|
dbTag{Name: "sakade", Notes: "A Guide To Reading and Writing Japanese", Category: "index"},
|
|
|
|
dbTag{Name: "sh_kk", Notes: "Kanji and Kana", Category: "index"},
|
|
|
|
dbTag{Name: "sh_kk2", Notes: "Kanji and Kana", Category: "index"},
|
|
|
|
dbTag{Name: "tutt_cards", Notes: "Tuttle Kanji Cards", Category: "index"},
|
2016-12-17 23:48:13 +00:00
|
|
|
}
|
|
|
|
|
2017-09-10 20:25:11 +00:00
|
|
|
recordData := map[string]dbRecordList{
|
|
|
|
"kanji": kanji.crush(),
|
2017-09-10 20:45:06 +00:00
|
|
|
"tag": tags.crush(),
|
2017-09-10 20:25:11 +00:00
|
|
|
}
|
|
|
|
|
2023-01-22 20:27:02 +00:00
|
|
|
index := dbIndex{
|
|
|
|
Title: title,
|
|
|
|
Revision: "kanjidic2",
|
|
|
|
Sequenced: false,
|
|
|
|
Attribution: edrdgAttribution,
|
|
|
|
}
|
|
|
|
|
2016-11-05 20:13:13 +00:00
|
|
|
return writeDb(
|
2017-06-26 00:22:17 +00:00
|
|
|
outputPath,
|
2023-01-22 20:27:02 +00:00
|
|
|
index,
|
2017-09-10 20:25:11 +00:00
|
|
|
recordData,
|
2016-12-29 01:45:33 +00:00
|
|
|
stride,
|
2016-12-19 01:31:27 +00:00
|
|
|
pretty,
|
2016-11-05 20:13:13 +00:00
|
|
|
)
|
2016-08-07 01:17:02 +00:00
|
|
|
}
|