1
yomichan-import/frequency.go

75 lines
1.5 KiB
Go
Raw Normal View History

2021-01-01 22:31:58 +00:00
package yomichan
2017-09-10 20:25:11 +00:00
import (
"bufio"
"os"
"strconv"
"strings"
)
2017-09-10 20:45:06 +00:00
func frequencyTermsExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
2017-09-23 06:03:05 +00:00
return frequncyExportDb(inputPath, outputPath, language, title, stride, pretty, "term_meta")
2017-09-10 20:45:06 +00:00
}
func frequencyKanjiExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
2017-09-23 06:03:05 +00:00
return frequncyExportDb(inputPath, outputPath, language, title, stride, pretty, "kanji_meta")
2017-09-10 20:45:06 +00:00
}
func frequncyExportDb(inputPath, outputPath, language, title string, stride int, pretty bool, key string) error {
2017-09-10 20:25:11 +00:00
reader, err := os.Open(inputPath)
if err != nil {
return err
}
defer reader.Close()
2017-09-23 06:03:05 +00:00
var frequencies dbMetaList
2017-09-10 20:25:11 +00:00
for scanner := bufio.NewScanner(reader); scanner.Scan(); {
line := scanner.Text()
if strings.HasPrefix(line, "#") {
continue
}
parts := strings.Split(line, "\t")
2017-09-23 19:50:54 +00:00
if len(parts) < 2 {
2017-09-10 20:25:11 +00:00
continue
}
expression := parts[0]
count, err := strconv.Atoi(parts[1])
if err != nil {
2017-09-23 19:50:54 +00:00
expression = parts[1]
count, err = strconv.Atoi(parts[0])
if err != nil {
continue
}
2017-09-10 20:25:11 +00:00
}
2017-09-23 06:03:05 +00:00
frequencies = append(frequencies, dbMeta{expression, "freq", count})
2017-09-10 20:25:11 +00:00
}
2017-09-12 20:51:49 +00:00
if title == "" {
title = "Frequency"
}
2017-09-10 20:25:11 +00:00
recordData := map[string]dbRecordList{
2017-09-10 20:45:06 +00:00
key: frequencies.crush(),
2017-09-10 20:25:11 +00:00
}
index := dbIndex{
Title: title,
Revision: "frequency1",
Sequenced: false,
Description: "",
Attribution: "",
}
index.setDefaults()
2017-09-10 20:25:11 +00:00
return writeDb(
outputPath,
index,
2017-09-10 20:25:11 +00:00
recordData,
stride,
pretty,
)
}