WIP
This commit is contained in:
parent
60212ff4c1
commit
63c8268fcc
62
epwing.go
62
epwing.go
@ -26,6 +26,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"regexp"
|
||||||
)
|
)
|
||||||
|
|
||||||
type epwingEntry struct {
|
type epwingEntry struct {
|
||||||
@ -33,23 +34,48 @@ type epwingEntry struct {
|
|||||||
Text string `json:"text"`
|
Text string `json:"text"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type epwingBook struct {
|
type epwingSubbook struct {
|
||||||
Title string `json:"title"`
|
Title string `json:"title"`
|
||||||
Copyright string `json:"copyright"`
|
Copyright string `json:"copyright"`
|
||||||
Entries []epwingEntry `json:"entries"`
|
Entries []epwingEntry `json:"entries"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type epwingDict struct {
|
type epwingBook struct {
|
||||||
CharacterCode string `json:"characterCode"`
|
CharCode string `json:"charCode"`
|
||||||
DiscCode string `json:"discCode"`
|
DiscCode string `json:"discCode"`
|
||||||
SubBooks []epwingBook `json:"subBooks"`
|
Subbooks []epwingSubbook `json:"subbooks"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func extractDaijisenTerms(entry epwingEntry) []dbTerm {
|
// 3934
|
||||||
|
// (?P<kana>[^(【<]+)(?:【(?P<kanji>.*)】)?(?:<\?>(?P<native>.*)<\?>)?(?:((?P<tag>.*)))?
|
||||||
|
// (?P<kana>[^(【〖]+)(?:【(?P<expression>.*)】)?(?:〖(?P<native>.*)〗)?(?:((?P<tag>.*)))?
|
||||||
|
// "heading": "きれ‐あが・る【切れ上がる】",
|
||||||
|
// "text": "きれ‐あが・る【切れ上がる】\n[動ラ五(四)]上の方まで切れる。また、目尻や額の生え際などが上の方へ上がっている。「―・った目元」\n"
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// "heading": "きれ‐あじ【切れ味】‐あぢ",
|
||||||
|
// "text": "きれ‐あじ【切れ味】‐あぢ\n<?>刃物の切れぐあい。「―のいいナイフ」<?>才能・技などの鋭さ。「鋭い―の批評」「―のいいショット」\n"
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// "heading": "き‐れい【×綺麗・奇麗】",
|
||||||
|
// "text": "き‐れい【×綺麗・奇麗】\n[形動]<?>[ナリ]<?>色・形などが華やかな美しさをもっているさま。「―な花」「―に着飾る」<?>姿・顔かたちが整っていて美しいさま。「―な脚」「―な女性」<?>声などが快く聞こえるさま。「―な発音」<?>よごれがなく清潔なさま。「手を―に洗う」「―な空気」「―な選挙」<?>男女間に肉体的な交渉がないさま。清純。「―な関係」<?>乱れたところがないさま。整然としているさま。「机の上を―に片づける」<?>(「きれいに」の形で)残りなく物事が行われるさま。すっかり。「―に忘れる」「―にたいらげる」→美しい[用法]\n[派生]きれいさ[名]\n[類語](<?>)美しい・美美(びび)しい・煌(きら)やか・鮮やか・美麗・華麗・華美・鮮麗・流麗・優美・美的/(<?>)麗(うるわ)しい・見目よい・端整・端麗・秀麗・佳麗(かれい)・艶美(えんび)・艶麗(えんれい)・あでやか/(<?>)清い・清らか・清潔・清浄(せいじよう・しようじよう)・清澄・清冽(せいれつ)・無垢(むく)・純潔・潔白(けつぱく)\n"
|
||||||
|
// },
|
||||||
|
|
||||||
|
func extractDaijirinTerms(entry epwingEntry) []dbTerm {
|
||||||
|
exp := regexp.MustCompile(`(?P<kana>[^(【〖]+)(?:【(?P<expression>.*)】)?(?:〖(?P<native>.*)〗)?(?:((?P<tag>.*)))?`)
|
||||||
|
matches := exp.FindStringSubmatch(entry.Heading)
|
||||||
|
|
||||||
|
results := make(map[string]string)
|
||||||
|
for i, name := range exp.SubexpNames() {
|
||||||
|
if i > 0 {
|
||||||
|
results[name] = matches[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func extractDaijisenKanji(entry epwingEntry) []dbKanji {
|
func extractDaijirinKanji(entry epwingEntry) []dbKanji {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,32 +85,30 @@ func exportEpwingDb(outputDir, title string, reader io.Reader, flags int) error
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var dict epwingDict
|
var book epwingBook
|
||||||
if err := json.Unmarshal(data, &dict); err != nil {
|
if err := json.Unmarshal(data, &book); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
termExtractors := map[string]func(epwingEntry) []dbTerm{
|
termExtractors := map[string]func(epwingEntry) []dbTerm{
|
||||||
"大辞泉": extractDaijisenTerms,
|
"三省堂 スーパー大辞林": extractDaijirinTerms,
|
||||||
}
|
}
|
||||||
|
|
||||||
var terms dbTermList
|
var terms dbTermList
|
||||||
for _, subBook := range dict.SubBooks {
|
for _, subbook := range book.Subbooks {
|
||||||
if extractor, ok := termExtractors[subBook.Title]; ok {
|
if extractor, ok := termExtractors[subbook.Title]; ok {
|
||||||
for _, entry := range subBook.Entries {
|
for _, entry := range subbook.Entries {
|
||||||
terms = append(terms, extractor(entry)...)
|
terms = append(terms, extractor(entry)...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
kanjiExtractors := map[string]func(epwingEntry) []dbKanji{
|
kanjiExtractors := map[string]func(epwingEntry) []dbKanji{}
|
||||||
"大辞泉": extractDaijisenKanji,
|
|
||||||
}
|
|
||||||
|
|
||||||
var kanji dbKanjiList
|
var kanji dbKanjiList
|
||||||
for _, subBook := range dict.SubBooks {
|
for _, subbook := range book.Subbooks {
|
||||||
if extractor, ok := kanjiExtractors[subBook.Title]; ok {
|
if extractor, ok := kanjiExtractors[subbook.Title]; ok {
|
||||||
for _, entry := range subBook.Entries {
|
for _, entry := range subbook.Entries {
|
||||||
kanji = append(kanji, extractor(entry)...)
|
kanji = append(kanji, extractor(entry)...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user