1
yomichan-import/kanjidic.go

119 lines
3.5 KiB
Go
Raw Normal View History

2016-08-07 01:17:02 +00:00
/*
* Copyright (c) 2016 Alex Yatskov <alex@foosoft.net>
* Author: Alex Yatskov <alex@foosoft.net>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
* the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package main
import (
2016-08-08 02:55:46 +00:00
"fmt"
2016-12-19 01:46:40 +00:00
"os"
2016-08-08 02:55:46 +00:00
"strconv"
2016-08-07 01:17:02 +00:00
"github.com/FooSoft/jmdict"
)
2016-12-24 05:52:49 +00:00
const KANJIDIC_REVISION = "kanjidic:1"
2016-12-18 19:46:47 +00:00
func kanjidicExtractKanji(entry jmdict.KanjidicCharacter) dbKanji {
2016-11-05 20:13:13 +00:00
kanji := dbKanji{Character: entry.Literal}
2016-08-07 01:17:02 +00:00
2016-11-05 20:13:13 +00:00
if level := entry.Misc.JlptLevel; level != nil {
kanji.addTags(fmt.Sprintf("jlpt:%s", *level))
2016-08-08 02:55:46 +00:00
}
2016-11-05 20:13:13 +00:00
if grade := entry.Misc.Grade; grade != nil {
kanji.addTags(fmt.Sprintf("grade:%s", *grade))
2016-08-08 02:55:46 +00:00
if gradeInt, err := strconv.Atoi(*grade); err == nil {
if gradeInt >= 1 && gradeInt <= 8 {
2016-11-05 20:13:13 +00:00
kanji.addTags("jouyou")
2016-08-08 02:55:46 +00:00
} else if gradeInt >= 9 && gradeInt <= 10 {
2016-11-05 20:13:13 +00:00
kanji.addTags("jinmeiyou")
2016-08-08 02:55:46 +00:00
}
}
}
2016-11-05 20:13:13 +00:00
for _, number := range entry.DictionaryNumbers {
2016-08-08 02:55:46 +00:00
if number.Type == "heisig" {
2016-11-05 20:13:13 +00:00
kanji.addTags(fmt.Sprintf("heisig:%s", number.Value))
2016-08-08 02:55:46 +00:00
}
}
2016-11-05 20:13:13 +00:00
if counts := entry.Misc.StrokeCounts; len(counts) > 0 {
kanji.addTags(fmt.Sprintf("strokes:%s", counts[0]))
2016-08-08 02:55:46 +00:00
}
2016-11-05 20:13:13 +00:00
if entry.ReadingMeaning != nil {
for _, m := range entry.ReadingMeaning.Meanings {
2016-08-07 01:17:02 +00:00
if m.Language == nil || *m.Language == "en" {
2016-11-05 20:13:13 +00:00
kanji.Meanings = append(kanji.Meanings, m.Meaning)
2016-08-07 01:17:02 +00:00
}
}
2016-11-05 20:13:13 +00:00
for _, r := range entry.ReadingMeaning.Readings {
2016-08-07 01:17:02 +00:00
switch r.Type {
case "ja_on":
2016-11-05 20:13:13 +00:00
kanji.Onyomi = append(kanji.Onyomi, r.Value)
2016-08-07 01:17:02 +00:00
case "ja_kun":
2016-11-05 20:13:13 +00:00
kanji.Kunyomi = append(kanji.Kunyomi, r.Value)
2016-08-07 01:17:02 +00:00
}
}
}
2016-11-05 20:13:13 +00:00
return kanji
2016-08-07 01:17:02 +00:00
}
2016-12-19 01:46:40 +00:00
func kanjidicExportDb(inputPath, outputDir, title string, pretty bool) error {
reader, err := os.Open(inputPath)
if err != nil {
return err
}
defer reader.Close()
2016-08-24 16:02:26 +00:00
dict, err := jmdict.LoadKanjidic(reader)
if err != nil {
return err
}
2016-08-23 03:51:30 +00:00
2016-11-05 20:13:13 +00:00
var kanji dbKanjiList
for _, entry := range dict.Characters {
2016-12-18 19:46:47 +00:00
kanji = append(kanji, kanjidicExtractKanji(entry))
2016-08-24 16:02:26 +00:00
}
2016-08-07 01:17:02 +00:00
2016-12-17 23:48:13 +00:00
tagMeta := map[string]dbTagMeta{
2016-12-18 19:46:47 +00:00
"jouyou": {Notes: "included in list of regular-use characters", Category: "frequent", Order: -5},
"jinmeiyou": {Notes: "included in list of characters for use in personal names", Category: "frequent", Order: -5},
2016-12-17 23:48:13 +00:00
"jlpt": {Notes: "corresponding Japanese Language Proficiency Test level"},
"grade": {Notes: "school grade level at which the character is taught"},
"strokes": {Notes: "number of strokes needed to write the character"},
"heisig": {Notes: "frame number in Remembering the Kanji"},
}
2016-11-05 20:13:13 +00:00
return writeDb(
outputDir,
2016-11-06 00:09:23 +00:00
title,
2016-12-24 05:52:49 +00:00
KANJIDIC_REVISION,
2016-11-06 06:24:57 +00:00
nil,
2016-11-05 20:13:13 +00:00
kanji.crush(),
2016-12-17 23:48:13 +00:00
tagMeta,
2016-12-19 01:31:27 +00:00
pretty,
2016-11-05 20:13:13 +00:00
)
2016-08-07 01:17:02 +00:00
}