257 lines
5.9 KiB
Go
257 lines
5.9 KiB
Go
package yomichan
|
||
|
||
import (
|
||
"os"
|
||
"strings"
|
||
|
||
"foosoft.net/projects/jmdict"
|
||
"golang.org/x/exp/slices"
|
||
)
|
||
|
||
func (h *headword) InfoSymbols() string {
|
||
infoSymbols := []string{}
|
||
if h.IsPriority {
|
||
infoSymbols = append(infoSymbols, prioritySymbol)
|
||
}
|
||
if h.IsRareKanji {
|
||
infoSymbols = append(infoSymbols, rareKanjiSymbol)
|
||
}
|
||
if h.IsIrregular {
|
||
infoSymbols = append(infoSymbols, irregularSymbol)
|
||
}
|
||
if h.IsOutdated {
|
||
infoSymbols = append(infoSymbols, outdatedSymbol)
|
||
}
|
||
return strings.Join(infoSymbols[:], " | ")
|
||
}
|
||
|
||
func (h *headword) GlossText() string {
|
||
gloss := h.Expression
|
||
if h.IsAteji {
|
||
gloss = "〈" + gloss + "〉"
|
||
}
|
||
symbolText := h.InfoSymbols()
|
||
if symbolText != "" {
|
||
gloss += "(" + symbolText + ")"
|
||
}
|
||
return gloss
|
||
}
|
||
|
||
func (h *headword) TableColHeaderText() string {
|
||
text := h.KanjiForm()
|
||
if h.IsAteji {
|
||
text = "〈" + text + "〉"
|
||
}
|
||
return text
|
||
}
|
||
|
||
func (h *headword) TableRowHeaderText() string {
|
||
text := h.Reading
|
||
if h.IsGikun {
|
||
text = "〈" + text + "〉"
|
||
}
|
||
return text
|
||
}
|
||
|
||
func (h *headword) TableCellText() string {
|
||
text := h.InfoSymbols()
|
||
if text == "" {
|
||
return defaultSymbol
|
||
} else {
|
||
return text
|
||
}
|
||
}
|
||
|
||
func (h *headword) KanjiForm() string {
|
||
if h.IsKanaOnly() {
|
||
return "∅"
|
||
} else {
|
||
return h.Expression
|
||
}
|
||
}
|
||
|
||
func needsFormTable(headwords []headword) bool {
|
||
// Does the entry contain more than 1 distinct reading?
|
||
// E.g. バカがい and ばかがい are not distinct.
|
||
uniqueReading := ""
|
||
for _, h := range headwords {
|
||
if h.IsGikun {
|
||
return true
|
||
} else if h.IsSearchOnly {
|
||
continue
|
||
} else if h.IsKanaOnly() {
|
||
continue
|
||
} else if uniqueReading == "" {
|
||
uniqueReading = katakanaToHiragana(h.Reading)
|
||
} else if uniqueReading != katakanaToHiragana(h.Reading) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
type formTableData struct {
|
||
kanjiForms []string
|
||
readings []string
|
||
colHeaderText map[string]string
|
||
rowHeaderText map[string]string
|
||
cellText map[string]map[string]string
|
||
}
|
||
|
||
func tableData(headwords []headword) formTableData {
|
||
d := formTableData{
|
||
kanjiForms: []string{},
|
||
readings: []string{},
|
||
colHeaderText: make(map[string]string),
|
||
rowHeaderText: make(map[string]string),
|
||
cellText: make(map[string]map[string]string),
|
||
}
|
||
for _, h := range headwords {
|
||
if h.IsSearchOnly {
|
||
continue
|
||
}
|
||
kanjiForm := h.KanjiForm()
|
||
if !slices.Contains(d.kanjiForms, kanjiForm) {
|
||
d.kanjiForms = append(d.kanjiForms, kanjiForm)
|
||
d.colHeaderText[kanjiForm] = h.TableColHeaderText()
|
||
}
|
||
reading := h.Reading
|
||
if !slices.Contains(d.readings, reading) {
|
||
d.readings = append(d.readings, reading)
|
||
d.rowHeaderText[reading] = h.TableRowHeaderText()
|
||
d.cellText[reading] = make(map[string]string)
|
||
}
|
||
d.cellText[reading][kanjiForm] = h.TableCellText()
|
||
}
|
||
return d
|
||
}
|
||
|
||
func formsTableGlossary(headwords []headword) []any {
|
||
d := tableData(headwords)
|
||
|
||
attr := contentAttr{}
|
||
centeredAttr := contentAttr{textAlign: "center"}
|
||
leftAttr := contentAttr{textAlign: "left"}
|
||
|
||
cornerCell := contentTableHeadCell(attr, "") // empty cell in upper left corner
|
||
headRowCells := []any{cornerCell}
|
||
for _, kanjiForm := range d.kanjiForms {
|
||
content := d.colHeaderText[kanjiForm]
|
||
cell := contentTableHeadCell(centeredAttr, content)
|
||
headRowCells = append(headRowCells, cell)
|
||
}
|
||
headRow := contentTableRow(attr, headRowCells...)
|
||
tableRows := []any{headRow}
|
||
for _, reading := range d.readings {
|
||
rowHeadCellText := d.rowHeaderText[reading]
|
||
rowHeadCell := contentTableHeadCell(leftAttr, rowHeadCellText)
|
||
rowCells := []any{rowHeadCell}
|
||
for _, kanjiForm := range d.kanjiForms {
|
||
text := d.cellText[reading][kanjiForm]
|
||
rowCell := contentTableCell(centeredAttr, text)
|
||
rowCells = append(rowCells, rowCell)
|
||
}
|
||
tableRow := contentTableRow(attr, rowCells...)
|
||
tableRows = append(tableRows, tableRow)
|
||
}
|
||
tableAttr := contentAttr{data: map[string]string{"content": "formsTable"}}
|
||
contentTable := contentTable(tableAttr, tableRows...)
|
||
content := contentStructure(contentTable)
|
||
return []any{content}
|
||
}
|
||
|
||
func formsGlossary(headwords []headword) []any {
|
||
glossary := []any{}
|
||
for _, h := range headwords {
|
||
if h.IsSearchOnly {
|
||
continue
|
||
}
|
||
text := h.GlossText()
|
||
glossary = append(glossary, text)
|
||
}
|
||
return glossary
|
||
}
|
||
|
||
func baseFormsTerm(entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
|
||
term := dbTerm{Sequence: entry.Sequence}
|
||
headwords := extractHeadwords(entry)
|
||
|
||
if needsFormTable(headwords) {
|
||
term.Glossary = formsTableGlossary(headwords)
|
||
} else {
|
||
term.Glossary = formsGlossary(headwords)
|
||
}
|
||
|
||
partsOfSpeech := meta.seqToPartsOfSpeech[entry.Sequence]
|
||
rules := grammarRules(partsOfSpeech)
|
||
term.addRules(rules...)
|
||
|
||
return term
|
||
}
|
||
|
||
func formsExportDb(inputPath, outputPath, languageName, title string, stride int, pretty bool) error {
|
||
reader, err := os.Open(inputPath)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
defer reader.Close()
|
||
|
||
dictionary, entities, err := jmdict.LoadJmdictNoTransform(reader)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
meta := newJmdictMetadata(dictionary, "")
|
||
|
||
terms := dbTermList{}
|
||
for _, entry := range dictionary.Entries {
|
||
baseTerm := baseFormsTerm(entry, meta)
|
||
headwords := extractHeadwords(entry)
|
||
for _, h := range headwords {
|
||
if h.IsSearchOnly {
|
||
if term, ok := jmdictSearchTerm(h, entry, meta); ok {
|
||
terms = append(terms, term)
|
||
}
|
||
continue
|
||
}
|
||
term := baseTerm
|
||
term.Expression = h.Expression
|
||
term.Reading = h.Reading
|
||
term.addTermTags(h.TermTags...)
|
||
term.Score = calculateTermScore(1, 0, h)
|
||
terms = append(terms, term)
|
||
}
|
||
}
|
||
|
||
tags := dbTagList{}
|
||
tags = append(tags, entityTags(entities)...)
|
||
tags = append(tags, newsFrequencyTags()...)
|
||
tags = append(tags, customDbTags()...)
|
||
|
||
if title == "" {
|
||
title = "JMdict Forms"
|
||
}
|
||
|
||
recordData := map[string]dbRecordList{
|
||
"term": terms.crush(),
|
||
"tag": tags.crush(),
|
||
}
|
||
|
||
jmdictDate := jmdictPublicationDate(dictionary)
|
||
|
||
index := dbIndex{
|
||
Title: title,
|
||
Revision: "JMdict." + jmdictDate,
|
||
Sequenced: true,
|
||
Attribution: edrdgAttribution,
|
||
}
|
||
|
||
return writeDb(
|
||
outputPath,
|
||
index,
|
||
recordData,
|
||
stride,
|
||
pretty,
|
||
)
|
||
}
|