This allows a user to install the English version and another version without cluttering their setup with duplicated information. If a user doesn't want to use the English version, they can get the "search" and "forms" terms by installing the separate jmdict_forms file.
259 lines
5.9 KiB
259 lines
5.9 KiB
package yomichan
import (
func kata2hira(word string) string {
charMap := func(character rune) rune {
if (character >= 'ァ' && character <= 'ヶ') || (character >= 'ヽ' && character <= 'ヾ') {
return character - 0x60
} else {
return character
return strings.Map(charMap, word)
func (h *headword) InfoSymbols() string {
infoSymbols := []string{}
if h.IsPriority {
infoSymbols = append(infoSymbols, prioritySymbol)
if h.IsRareKanji {
infoSymbols = append(infoSymbols, rareKanjiSymbol)
if h.IsIrregular {
infoSymbols = append(infoSymbols, irregularSymbol)
if h.IsOutdated {
infoSymbols = append(infoSymbols, outdatedSymbol)
return strings.Join(infoSymbols[:], " | ")
func (h *headword) GlossText() string {
gloss := h.Expression
if h.IsAteji {
gloss = "〈" + gloss + "〉"
symbolText := h.InfoSymbols()
if symbolText != "" {
gloss += "(" + symbolText + ")"
return gloss
func (h *headword) TableColHeaderText() string {
text := h.KanjiForm()
if h.IsAteji {
text = "〈" + text + "〉"
return text
func (h *headword) TableRowHeaderText() string {
text := h.Reading
if h.IsGikun {
text = "〈" + text + "〉"
return text
func (h *headword) TableCellText() string {
text := h.InfoSymbols()
if text == "" {
return defaultSymbol
} else {
return text
func (h *headword) KanjiForm() string {
if h.IsKanaOnly() {
return "∅"
} else {
return h.Expression
func jmdNeedsFormTable(headwords []headword) bool {
// Does the entry contain more than 1 distinct reading?
// E.g. バカがい and ばかがい are not distinct.
uniqueReading := ""
for _, h := range headwords {
if h.IsGikun {
return true
} else if h.IsSearchOnly {
} else if h.IsKanaOnly() {
} else if uniqueReading == "" {
uniqueReading = kata2hira(h.Reading)
} else if uniqueReading != kata2hira(h.Reading) {
return true
return false
type formTableData struct {
kanjiForms []string
readings []string
colHeaderText map[string]string
rowHeaderText map[string]string
cellText map[string]map[string]string
func tableData(headwords []headword) formTableData {
d := formTableData{
kanjiForms: []string{},
readings: []string{},
colHeaderText: make(map[string]string),
rowHeaderText: make(map[string]string),
cellText: make(map[string]map[string]string),
for _, h := range headwords {
if h.IsSearchOnly {
kanjiForm := h.KanjiForm()
if !slices.Contains(d.kanjiForms, kanjiForm) {
d.kanjiForms = append(d.kanjiForms, kanjiForm)
d.colHeaderText[kanjiForm] = h.TableColHeaderText()
reading := h.Reading
if !slices.Contains(d.readings, reading) {
d.readings = append(d.readings, reading)
d.rowHeaderText[reading] = h.TableRowHeaderText()
d.cellText[reading] = make(map[string]string)
d.cellText[reading][kanjiForm] = h.TableCellText()
return d
func formsTableGlossary(headwords []headword) []any {
d := tableData(headwords)
attr := contentAttr{}
centeredAttr := contentAttr{textAlign: "center"}
leftAttr := contentAttr{textAlign: "left"}
cornerCell := contentTableHeadCell(attr, "") // empty cell in upper left corner
headRowCells := []any{cornerCell}
for _, kanjiForm := range d.kanjiForms {
content := d.colHeaderText[kanjiForm]
cell := contentTableHeadCell(centeredAttr, content)
headRowCells = append(headRowCells, cell)
headRow := contentTableRow(attr, headRowCells...)
tableRows := []any{headRow}
for _, reading := range d.readings {
rowHeadCellText := d.rowHeaderText[reading]
rowHeadCell := contentTableHeadCell(leftAttr, rowHeadCellText)
rowCells := []any{rowHeadCell}
for _, kanjiForm := range d.kanjiForms {
text := d.cellText[reading][kanjiForm]
rowCell := contentTableCell(centeredAttr, text)
rowCells = append(rowCells, rowCell)
tableRow := contentTableRow(attr, rowCells...)
tableRows = append(tableRows, tableRow)
tableAttr := contentAttr{data: map[string]string{"content": "formsTable"}}
contentTable := contentTable(tableAttr, tableRows...)
content := contentStructure(contentTable)
return []any{content}
func formsGlossary(headwords []headword) []any {
glossary := []any{}
for _, h := range headwords {
if h.IsSearchOnly {
text := h.GlossText()
glossary = append(glossary, text)
return glossary
func baseFormsTerm(entry jmdict.JmdictEntry) dbTerm {
term := dbTerm{Sequence: entry.Sequence}
headwords := extractHeadwords(entry)
if jmdNeedsFormTable(headwords) {
term.Glossary = formsTableGlossary(headwords)
} else {
term.Glossary = formsGlossary(headwords)
for _, sense := range entry.Sense {
rules := grammarRules(sense.PartsOfSpeech)
return term
func formsExportDb(inputPath, outputPath, languageName, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath)
if err != nil {
return err
defer reader.Close()
dictionary, _, err := jmdict.LoadJmdictNoTransform(reader)
if err != nil {
return err
meta := newJmdictMetadata(dictionary, languageName)
terms := dbTermList{}
for _, entry := range dictionary.Entries {
baseTerm := baseFormsTerm(entry)
headwords := extractHeadwords(entry)
for _, h := range headwords {
var term dbTerm
if h.IsSearchOnly {
term = createSearchTerm(h, entry, meta)
} else {
term = baseTerm
term.Expression = h.Expression
term.Reading = h.Reading
terms = append(terms, term)
if title == "" {
title = "JMdict Forms"
recordData := map[string]dbRecordList{
"term": terms.crush(),
"tag": dbRecordList{},
jmdictDate := jmdictPublicationDate(dictionary)
index := dbIndex{
Title: title,
Revision: "JMdict." + jmdictDate,
Sequenced: true,
Attribution: edrdgAttribution,
return writeDb(