1
yomichan-import/jmdict_forms.go

267 lines
6.2 KiB
Go
Raw Normal View History

2023-01-22 20:37:18 +00:00
package yomichan
import (
"os"
"strings"
"foosoft.net/projects/jmdict"
"golang.org/x/exp/slices"
)
func kata2hira(word string) string {
charMap := func(character rune) rune {
if (character >= 'ァ' && character <= 'ヶ') || (character >= 'ヽ' && character <= 'ヾ') {
return character - 0x60
} else {
return character
}
}
return strings.Map(charMap, word)
}
func (h *headword) InfoSymbols() string {
infoSymbols := []string{}
if h.IsPriority {
infoSymbols = append(infoSymbols, prioritySymbol)
}
if h.IsRareKanji {
infoSymbols = append(infoSymbols, rareKanjiSymbol)
}
if h.IsIrregular {
infoSymbols = append(infoSymbols, irregularSymbol)
}
if h.IsOutdated {
infoSymbols = append(infoSymbols, outdatedSymbol)
}
return strings.Join(infoSymbols[:], " | ")
}
func (h *headword) GlossText() string {
gloss := h.Expression
if h.IsAteji {
gloss = "〈" + gloss + "〉"
}
symbolText := h.InfoSymbols()
if symbolText != "" {
gloss += "" + symbolText + ""
}
return gloss
}
func (h *headword) TableColHeaderText() string {
text := h.KanjiForm()
if h.IsAteji {
text = "〈" + text + "〉"
}
return text
}
func (h *headword) TableRowHeaderText() string {
text := h.Reading
if h.IsGikun {
text = "〈" + text + "〉"
}
return text
}
func (h *headword) TableCellText() string {
text := h.InfoSymbols()
if text == "" {
return defaultSymbol
} else {
return text
}
}
func (h *headword) KanjiForm() string {
if h.IsKanaOnly() {
return "∅"
} else {
return h.Expression
}
}
func needsFormTable(headwords []headword) bool {
2023-01-22 20:37:18 +00:00
// Does the entry contain more than 1 distinct reading?
// E.g. バカがい and ばかがい are not distinct.
uniqueReading := ""
for _, h := range headwords {
if h.IsGikun {
return true
} else if h.IsSearchOnly {
continue
} else if h.IsKanaOnly() {
continue
} else if uniqueReading == "" {
uniqueReading = kata2hira(h.Reading)
} else if uniqueReading != kata2hira(h.Reading) {
return true
}
}
return false
}
type formTableData struct {
kanjiForms []string
readings []string
colHeaderText map[string]string
rowHeaderText map[string]string
cellText map[string]map[string]string
}
func tableData(headwords []headword) formTableData {
d := formTableData{
kanjiForms: []string{},
readings: []string{},
colHeaderText: make(map[string]string),
rowHeaderText: make(map[string]string),
cellText: make(map[string]map[string]string),
}
for _, h := range headwords {
if h.IsSearchOnly {
continue
}
kanjiForm := h.KanjiForm()
if !slices.Contains(d.kanjiForms, kanjiForm) {
d.kanjiForms = append(d.kanjiForms, kanjiForm)
d.colHeaderText[kanjiForm] = h.TableColHeaderText()
}
reading := h.Reading
if !slices.Contains(d.readings, reading) {
d.readings = append(d.readings, reading)
d.rowHeaderText[reading] = h.TableRowHeaderText()
d.cellText[reading] = make(map[string]string)
}
d.cellText[reading][kanjiForm] = h.TableCellText()
}
return d
}
func formsTableGlossary(headwords []headword) []any {
d := tableData(headwords)
attr := contentAttr{}
centeredAttr := contentAttr{textAlign: "center"}
leftAttr := contentAttr{textAlign: "left"}
cornerCell := contentTableHeadCell(attr, "") // empty cell in upper left corner
headRowCells := []any{cornerCell}
for _, kanjiForm := range d.kanjiForms {
content := d.colHeaderText[kanjiForm]
cell := contentTableHeadCell(centeredAttr, content)
headRowCells = append(headRowCells, cell)
}
headRow := contentTableRow(attr, headRowCells...)
tableRows := []any{headRow}
for _, reading := range d.readings {
rowHeadCellText := d.rowHeaderText[reading]
rowHeadCell := contentTableHeadCell(leftAttr, rowHeadCellText)
rowCells := []any{rowHeadCell}
for _, kanjiForm := range d.kanjiForms {
text := d.cellText[reading][kanjiForm]
rowCell := contentTableCell(centeredAttr, text)
rowCells = append(rowCells, rowCell)
}
tableRow := contentTableRow(attr, rowCells...)
tableRows = append(tableRows, tableRow)
}
tableAttr := contentAttr{data: map[string]string{"content": "formsTable"}}
contentTable := contentTable(tableAttr, tableRows...)
content := contentStructure(contentTable)
return []any{content}
}
func formsGlossary(headwords []headword) []any {
glossary := []any{}
for _, h := range headwords {
if h.IsSearchOnly {
continue
}
text := h.GlossText()
glossary = append(glossary, text)
}
return glossary
}
func baseFormsTerm(entry jmdict.JmdictEntry) dbTerm {
term := dbTerm{Sequence: entry.Sequence}
headwords := extractHeadwords(entry)
if needsFormTable(headwords) {
2023-01-22 20:37:18 +00:00
term.Glossary = formsTableGlossary(headwords)
} else {
term.Glossary = formsGlossary(headwords)
}
for _, sense := range entry.Sense {
rules := grammarRules(sense.PartsOfSpeech)
term.addRules(rules...)
}
return term
}
func formsExportDb(inputPath, outputPath, languageName, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath)
if err != nil {
return err
}
defer reader.Close()
dictionary, entities, err := jmdict.LoadJmdictNoTransform(reader)
2023-01-22 20:37:18 +00:00
if err != nil {
return err
}
meta := newJmdictMetadata(dictionary, "english")
2023-01-22 20:37:18 +00:00
terms := dbTermList{}
for _, entry := range dictionary.Entries {
baseTerm := baseFormsTerm(entry)
headwords := extractHeadwords(entry)
for _, h := range headwords {
if h.IsSearchOnly {
if term, ok := createSearchTerm(h, entry, meta); ok {
terms = append(terms, term)
}
continue
2023-01-22 20:37:18 +00:00
}
term := baseTerm
term.Expression = h.Expression
term.Reading = h.Reading
term.addTermTags(h.TermTags...)
term.Score = calculateTermScore(1, 0, h)
2023-01-22 20:37:18 +00:00
terms = append(terms, term)
}
}
tags := dbTagList{}
tags = append(tags, entityTags(entities)...)
tags = append(tags, newsFrequencyTags()...)
tags = append(tags, customDbTags()...)
2023-01-22 20:37:18 +00:00
if title == "" {
title = "JMdict Forms"
}
recordData := map[string]dbRecordList{
"term": terms.crush(),
"tag": tags.crush(),
2023-01-22 20:37:18 +00:00
}
jmdictDate := jmdictPublicationDate(dictionary)
index := dbIndex{
Title: title,
Revision: "JMdict." + jmdictDate,
Sequenced: true,
Attribution: edrdgAttribution,
}
index.setDefaults()
return writeDb(
outputPath,
index,
recordData,
stride,
pretty,
)
}