2021-01-01 22:31:58 +00:00
|
|
|
|
package yomichan
|
2017-08-19 19:17:18 +00:00
|
|
|
|
|
|
|
|
|
import (
|
2017-08-19 19:17:31 +00:00
|
|
|
|
"database/sql"
|
2017-08-19 21:13:51 +00:00
|
|
|
|
"regexp"
|
2017-08-19 19:17:18 +00:00
|
|
|
|
"strings"
|
2017-08-19 19:17:31 +00:00
|
|
|
|
|
|
|
|
|
_ "github.com/mattn/go-sqlite3"
|
2017-08-19 19:17:18 +00:00
|
|
|
|
)
|
|
|
|
|
|
2017-08-19 21:13:51 +00:00
|
|
|
|
type rikaiEntry struct {
|
|
|
|
|
kanji string
|
|
|
|
|
kana string
|
|
|
|
|
entry string
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-19 19:17:18 +00:00
|
|
|
|
func rikaiBuildRules(term *dbTerm) {
|
2017-10-12 23:48:58 +00:00
|
|
|
|
for _, tag := range term.DefinitionTags {
|
2017-08-19 19:17:18 +00:00
|
|
|
|
switch tag {
|
2020-12-06 04:48:34 +00:00
|
|
|
|
case "adj-i", "v1", "vk", "vz":
|
2017-08-19 19:17:18 +00:00
|
|
|
|
term.addRules(tag)
|
|
|
|
|
default:
|
|
|
|
|
if strings.HasPrefix(tag, "v5") {
|
|
|
|
|
term.addRules("v5")
|
|
|
|
|
} else if strings.HasPrefix(tag, "vs") {
|
|
|
|
|
term.addRules("vs")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func rikaiBuildScore(term *dbTerm) {
|
2017-10-12 23:48:58 +00:00
|
|
|
|
for _, tag := range term.DefinitionTags {
|
2017-08-19 19:17:18 +00:00
|
|
|
|
switch tag {
|
2017-08-24 03:03:06 +00:00
|
|
|
|
case "news", "ichi", "spec", "gai":
|
2017-09-10 20:37:40 +00:00
|
|
|
|
term.Score++
|
|
|
|
|
case "arch", "iK":
|
|
|
|
|
term.Score--
|
2017-08-19 19:17:18 +00:00
|
|
|
|
case "P":
|
|
|
|
|
term.Score += 5
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-19 21:13:51 +00:00
|
|
|
|
func rikaiExtractTerms(rows *sql.Rows) (dbTermList, error) {
|
|
|
|
|
var terms dbTermList
|
|
|
|
|
|
|
|
|
|
dfnExp := regexp.MustCompile(`^(?:*\(KC\) )?((?:\((?:[\w\-\,\:]*)*\)\s*)*)(.*)$`)
|
|
|
|
|
readExp := regexp.MustCompile(`\[([^\]]+)\]`)
|
|
|
|
|
tagExp := regexp.MustCompile(`[\s\(\),]`)
|
|
|
|
|
|
2017-10-12 23:48:58 +00:00
|
|
|
|
var sequence int
|
|
|
|
|
|
2017-08-19 21:13:51 +00:00
|
|
|
|
for rows.Next() {
|
|
|
|
|
var (
|
|
|
|
|
kanji, kana, entry *string
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if err := rows.Scan(&kanji, &kana, &entry); err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if entry == nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
segments := strings.Split(*entry, "/")
|
|
|
|
|
indexFirst := 0
|
|
|
|
|
|
|
|
|
|
if !strings.HasPrefix(segments[0], "*") && !strings.HasPrefix(segments[0], "(") {
|
|
|
|
|
expParts := strings.Split(segments[0], " ")
|
|
|
|
|
if len(expParts) > 1 {
|
|
|
|
|
if readMatch := readExp.FindStringSubmatch(expParts[1]); readMatch != nil {
|
|
|
|
|
kana = &readMatch[1]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
indexFirst = 1
|
|
|
|
|
if indexFirst == len(segments) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var term dbTerm
|
2017-10-12 23:48:58 +00:00
|
|
|
|
term.Sequence = sequence
|
2017-08-19 21:13:51 +00:00
|
|
|
|
if kana != nil {
|
|
|
|
|
term.Expression = *kana
|
|
|
|
|
term.Reading = *kana
|
|
|
|
|
}
|
|
|
|
|
if kanji != nil {
|
|
|
|
|
term.Expression = *kanji
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for i := indexFirst; i < len(segments); i++ {
|
|
|
|
|
segment := segments[i]
|
|
|
|
|
|
|
|
|
|
if dfnMatch := dfnExp.FindStringSubmatch(segment); dfnMatch != nil {
|
|
|
|
|
for _, tag := range tagExp.Split(dfnMatch[1], -1) {
|
|
|
|
|
if rikaiTagParsed(tag) {
|
2017-10-12 23:48:58 +00:00
|
|
|
|
term.addDefinitionTags(tag)
|
2017-08-19 21:13:51 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if len(dfnMatch[2]) > 0 {
|
|
|
|
|
term.Glossary = append(term.Glossary, dfnMatch[2])
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rikaiBuildRules(&term)
|
|
|
|
|
rikaiBuildScore(&term)
|
|
|
|
|
|
|
|
|
|
terms = append(terms, term)
|
2017-10-12 23:48:58 +00:00
|
|
|
|
|
|
|
|
|
sequence++
|
2017-08-19 21:13:51 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return terms, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func rikaiExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
|
2017-08-19 19:17:31 +00:00
|
|
|
|
db, err := sql.Open("sqlite3", inputPath)
|
2017-08-19 19:17:18 +00:00
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2017-08-19 19:17:31 +00:00
|
|
|
|
defer db.Close()
|
2017-08-19 19:17:18 +00:00
|
|
|
|
|
2017-08-19 19:17:31 +00:00
|
|
|
|
dictRows, err := db.Query("SELECT kanji, kana, entry FROM dict")
|
2017-08-19 19:17:18 +00:00
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-19 21:13:51 +00:00
|
|
|
|
terms, err := rikaiExtractTerms(dictRows)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
2017-08-19 19:17:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if title == "" {
|
2017-08-19 19:17:31 +00:00
|
|
|
|
title = "Rikai"
|
2017-08-19 19:17:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
2017-09-10 20:25:11 +00:00
|
|
|
|
tags := dbTagList{
|
|
|
|
|
dbTag{Name: "P", Category: "popular", Order: -10},
|
|
|
|
|
dbTag{Name: "exp", Category: "expression", Order: -5},
|
|
|
|
|
dbTag{Name: "id", Category: "expression", Order: -5},
|
|
|
|
|
dbTag{Name: "arch", Category: "archaism", Order: -4},
|
|
|
|
|
dbTag{Name: "iK", Category: "archaism", Order: -4},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
recordData := map[string]dbRecordList{
|
2017-09-10 20:45:06 +00:00
|
|
|
|
"term": terms.crush(),
|
|
|
|
|
"tag": tags.crush(),
|
2017-08-19 21:13:51 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-01-22 20:27:02 +00:00
|
|
|
|
index := dbIndex{
|
2023-01-29 00:39:08 +00:00
|
|
|
|
Title: title,
|
|
|
|
|
Revision: "rikai2",
|
|
|
|
|
Sequenced: true,
|
2023-01-22 20:27:02 +00:00
|
|
|
|
}
|
|
|
|
|
|
2017-08-19 21:13:51 +00:00
|
|
|
|
return writeDb(
|
|
|
|
|
outputPath,
|
2023-01-22 20:27:02 +00:00
|
|
|
|
index,
|
2017-09-10 20:25:11 +00:00
|
|
|
|
recordData,
|
2017-08-19 21:13:51 +00:00
|
|
|
|
stride,
|
|
|
|
|
pretty,
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func rikaiTagParsed(tag string) bool {
|
|
|
|
|
tags := []string{
|
|
|
|
|
"Buddh",
|
|
|
|
|
"MA",
|
|
|
|
|
"X",
|
|
|
|
|
"abbr",
|
|
|
|
|
"adj",
|
|
|
|
|
"adj-f",
|
|
|
|
|
"adj-i",
|
|
|
|
|
"adj-na",
|
|
|
|
|
"adj-no",
|
|
|
|
|
"adj-pn",
|
|
|
|
|
"adj-t",
|
|
|
|
|
"adv",
|
|
|
|
|
"adv-n",
|
|
|
|
|
"adv-to",
|
|
|
|
|
"arch",
|
|
|
|
|
"ateji",
|
|
|
|
|
"aux",
|
|
|
|
|
"aux-adj",
|
|
|
|
|
"aux-v",
|
|
|
|
|
"c",
|
|
|
|
|
"chn",
|
|
|
|
|
"col",
|
|
|
|
|
"comp",
|
|
|
|
|
"conj",
|
|
|
|
|
"ctr",
|
|
|
|
|
"derog",
|
|
|
|
|
"eK",
|
|
|
|
|
"ek",
|
|
|
|
|
"exp",
|
|
|
|
|
"f",
|
|
|
|
|
"fam",
|
|
|
|
|
"fem",
|
|
|
|
|
"food",
|
|
|
|
|
"g",
|
|
|
|
|
"geom",
|
|
|
|
|
"gikun",
|
|
|
|
|
"gram",
|
|
|
|
|
"h",
|
|
|
|
|
"hon",
|
|
|
|
|
"hum",
|
|
|
|
|
"iK",
|
|
|
|
|
"id",
|
|
|
|
|
"ik",
|
|
|
|
|
"int",
|
|
|
|
|
"io",
|
|
|
|
|
"iv",
|
|
|
|
|
"ling",
|
|
|
|
|
"m",
|
|
|
|
|
"m-sl",
|
|
|
|
|
"male",
|
|
|
|
|
"male-sl",
|
|
|
|
|
"math",
|
|
|
|
|
"mil",
|
|
|
|
|
"n",
|
|
|
|
|
"n-adv",
|
|
|
|
|
"n-pref",
|
|
|
|
|
"n-suf",
|
|
|
|
|
"n-t",
|
|
|
|
|
"num",
|
|
|
|
|
"oK",
|
|
|
|
|
"obs",
|
|
|
|
|
"obsc",
|
|
|
|
|
"ok",
|
|
|
|
|
"on-mim",
|
|
|
|
|
"P",
|
|
|
|
|
"p",
|
|
|
|
|
"physics",
|
|
|
|
|
"pn",
|
|
|
|
|
"poet",
|
|
|
|
|
"pol",
|
|
|
|
|
"pr",
|
|
|
|
|
"pref",
|
|
|
|
|
"prt",
|
|
|
|
|
"rare",
|
|
|
|
|
"s",
|
|
|
|
|
"sens",
|
|
|
|
|
"sl",
|
|
|
|
|
"st",
|
|
|
|
|
"suf",
|
|
|
|
|
"u",
|
|
|
|
|
"uK",
|
|
|
|
|
"uk",
|
|
|
|
|
"v1",
|
|
|
|
|
"v2a-s",
|
|
|
|
|
"v4h",
|
|
|
|
|
"v4r",
|
|
|
|
|
"v5",
|
|
|
|
|
"v5aru",
|
|
|
|
|
"v5b",
|
|
|
|
|
"v5g",
|
|
|
|
|
"v5k",
|
|
|
|
|
"v5k-s",
|
|
|
|
|
"v5m",
|
|
|
|
|
"v5n",
|
|
|
|
|
"v5r",
|
|
|
|
|
"v5r-i",
|
|
|
|
|
"v5s",
|
|
|
|
|
"v5t",
|
|
|
|
|
"v5u",
|
|
|
|
|
"v5u-s",
|
|
|
|
|
"v5uru",
|
|
|
|
|
"v5z",
|
|
|
|
|
"vi",
|
|
|
|
|
"vk",
|
|
|
|
|
"vn",
|
|
|
|
|
"vs",
|
|
|
|
|
"vs-c",
|
|
|
|
|
"vs-i",
|
|
|
|
|
"vs-s",
|
|
|
|
|
"vt",
|
|
|
|
|
"vulg",
|
|
|
|
|
"vz",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, curr := range tags {
|
|
|
|
|
if curr == tag {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false
|
2017-08-19 19:17:18 +00:00
|
|
|
|
}
|