rtk-merge/main.go

226 lines
4.6 KiB
Go
Raw Normal View History

2024-03-31 03:44:24 +00:00
package main
import (
"encoding/csv"
2024-03-31 04:47:56 +00:00
"encoding/json"
2024-03-31 04:14:00 +00:00
"errors"
2024-03-31 03:44:24 +00:00
"flag"
"fmt"
2024-03-31 05:00:58 +00:00
"html"
2024-03-31 03:44:24 +00:00
"log"
"os"
"path/filepath"
2024-03-31 04:14:00 +00:00
"strings"
"github.com/themoeway/jmdict-go"
2024-03-31 03:44:24 +00:00
)
type (
StoryEntry struct {
Author string `json:"author"`
Content string `json:"content"`
ModifiedDate string `json:"modifiedDate"`
StarredCount int `json:"starredCount"`
ReportedCount int `json:"reportedCount"`
}
KanjiEntry struct {
Character string `json:"character"`
Reading string `json:"reading"`
FrameNumber int `json:"frameNumber"`
StrokeCount int `json:"strokeCount"`
Story string `json:"story"`
Stories []StoryEntry `json:"stories"`
}
TableRow = []string
Table = []TableRow
)
2024-03-31 05:00:58 +00:00
func wrapSpan(content, class string) string {
return fmt.Sprintf("<span class='%s'>%s</span>", class, html.EscapeString(content))
}
2024-03-31 04:47:56 +00:00
func injectStories(table Table, path string, heisigIndex int) error {
fp, err := os.Open(path)
if err != nil {
return err
}
defer fp.Close()
decoder := json.NewDecoder(fp)
var characters []KanjiEntry
if err := decoder.Decode(&characters); err != nil {
return err
}
2024-03-31 16:22:32 +00:00
seenStories := make(map[string]bool)
2024-03-31 04:47:56 +00:00
for i, row := range table {
for _, character := range characters {
if character.Character != row[heisigIndex] {
continue
}
var stories strings.Builder
for _, story := range character.Stories {
2024-03-31 16:22:32 +00:00
storyNoBreaks := strings.ReplaceAll(story.Content, "\n", " ")
if seen, _ := seenStories[storyNoBreaks]; seen {
continue
} else {
stories.WriteString(wrapSpan(storyNoBreaks, "rtk-story"))
seenStories[storyNoBreaks] = true
}
2024-03-31 04:47:56 +00:00
}
row = append(row, stories.String())
table[i] = row
}
}
2024-03-31 03:44:24 +00:00
return nil
}
2024-03-31 04:47:56 +00:00
func injectKanjidic(table Table, path string, heisigIndex int) error {
2024-03-31 04:14:00 +00:00
fp, err := os.Open(path)
if err != nil {
return err
}
defer fp.Close()
kd, err := jmdict.LoadKanjidic(fp)
if err != nil {
return err
}
for i, row := range table {
var found bool
for _, character := range kd.Characters {
2024-03-31 04:47:56 +00:00
if character.Literal != row[heisigIndex] {
2024-03-31 04:14:00 +00:00
continue
}
var (
2024-03-31 05:00:58 +00:00
meanings strings.Builder
kunyomi strings.Builder
onyomi strings.Builder
2024-03-31 04:14:00 +00:00
)
for _, reading := range character.ReadingMeaning.Readings {
switch reading.Type {
case "ja_on":
2024-03-31 05:00:58 +00:00
onyomi.WriteString(wrapSpan(reading.Value, "rtk-onyomi"))
2024-03-31 04:14:00 +00:00
case "ja_kun":
2024-03-31 05:00:58 +00:00
kunyomi.WriteString(wrapSpan(reading.Value, "rtk-kunyomi"))
2024-03-31 04:14:00 +00:00
}
}
for _, meaning := range character.ReadingMeaning.Meanings {
if meaning.Language == nil {
2024-03-31 05:00:58 +00:00
meanings.WriteString(wrapSpan(meaning.Meaning, "rtk-meaning"))
2024-03-31 04:14:00 +00:00
}
}
2024-03-31 05:00:58 +00:00
row = append(row, meanings.String())
row = append(row, kunyomi.String())
row = append(row, onyomi.String())
2024-03-31 04:14:00 +00:00
table[i] = row
found = true
}
if !found {
return errors.New("character not found")
}
}
2024-03-31 03:44:24 +00:00
return nil
}
2024-03-31 04:47:56 +00:00
func loadTable(path string, heisigIndex, columnLimit int) (Table, error) {
2024-03-31 03:44:24 +00:00
fp, err := os.Open(path)
if err != nil {
return nil, err
}
defer fp.Close()
reader := csv.NewReader(fp)
reader.Comment = '#'
reader.Comma = '\t'
table, err := reader.ReadAll()
if err != nil {
return nil, err
}
for i := range table {
2024-03-31 04:47:56 +00:00
table[i] = table[i][:columnLimit]
if len(table[i]) < heisigIndex {
return nil, errors.New("unexpected heisig index")
2024-03-31 04:14:00 +00:00
}
2024-03-31 03:44:24 +00:00
}
return table, nil
}
func saveTable(path string, table Table) error {
fp, err := os.Create(path)
if err != nil {
return err
}
defer fp.Close()
writer := csv.NewWriter(fp)
2024-03-31 05:00:58 +00:00
writer.Comma = '\t'
2024-03-31 03:44:24 +00:00
if err := writer.WriteAll(table); err != nil {
return err
}
return nil
}
func main() {
var (
storiesPath = flag.String("stories", "", "path for stories JSON")
kanjidicPath = flag.String("kanjidic", "", "path for KANJIDIC")
2024-03-31 04:47:56 +00:00
heisigIndex = flag.Int("heisig", 0, "heisig index column index")
columnLimit = flag.Int("columns", 3, "column trim value")
2024-03-31 03:44:24 +00:00
)
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s [options] <notes_in.txt> <notes_out.txt>\n", filepath.Base(os.Args[0]))
fmt.Fprintln(os.Stderr, "Options:")
flag.PrintDefaults()
}
flag.Parse()
args := flag.Args()
if len(args) != 2 {
flag.Usage()
os.Exit(2)
}
2024-03-31 04:47:56 +00:00
table, err := loadTable(args[0], *heisigIndex, *columnLimit)
2024-03-31 03:44:24 +00:00
if err != nil {
log.Fatal(err)
}
2024-03-31 05:00:58 +00:00
if len(*kanjidicPath) > 0 {
if err := injectKanjidic(table, *kanjidicPath, *heisigIndex); err != nil {
2024-03-31 03:44:24 +00:00
log.Fatal(err)
}
}
2024-03-31 05:00:58 +00:00
if len(*storiesPath) > 0 {
if err := injectStories(table, *storiesPath, *heisigIndex); err != nil {
2024-03-31 03:44:24 +00:00
log.Fatal(err)
}
}
2024-03-31 04:14:00 +00:00
if err := saveTable(args[1], table); err != nil {
2024-03-31 03:44:24 +00:00
log.Fatal(err)
}
}