Add new JMdict version

2023-01-22 14:37:18 -06:00 · 2023-01-22 14:37:18 -06:00 · abc28bb19d
commit abc28bb19d
parent 73fb992865
13 changed files with 2127 additions and 254 deletions
--- a/common.go
+++ b/common.go
@ -306,7 +306,7 @@ func detectFormat(path string) (string, error) {
 	}

 	switch filepath.Base(path) {
-	case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml":
+	case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml", "JMdict_e_examp":
 		return "edict", nil
 	case "JMnedict", "JMnedict.xml":
 		return "enamdict", nil
@ -336,7 +336,8 @@ func detectFormat(path string) (string, error) {

 func ExportDb(inputPath, outputPath, format, language, title string, stride int, pretty bool) error {
 	handlers := map[string]func(string, string, string, string, int, bool) error{
-		"edict":     jmdictExportDb,
+		"edict":     jmdExportDb,
+		"forms":     formsExportDb,
 		"enamdict":  jmnedictExportDb,
 		"epwing":    epwingExportDb,
 		"kanjidic":  kanjidicExportDb,
--- a/edict.go
+++ b/edict.go
@ -1,252 +0,0 @@
-package yomichan
-
-import (
-	"os"
-	"strings"
-
-	"foosoft.net/projects/jmdict"
-)
-
-const edrdgAttribution = "This publication has included material from the JMdict (EDICT, etc.) dictionary files in accordance with the licence provisions of the Electronic Dictionaries Research Group. See http://www.edrdg.org/"
-
-func jmdictBuildRules(term *dbTerm) {
-	for _, tag := range term.DefinitionTags {
-		switch tag {
-		case "adj-i", "v1", "vk", "vz":
-			term.addRules(tag)
-		default:
-			if strings.HasPrefix(tag, "v5") {
-				term.addRules("v5")
-			} else if strings.HasPrefix(tag, "vs-") {
-				term.addRules("vs")
-			}
-		}
-	}
-}
-
-func jmdictBuildScore(term *dbTerm) {
-	for _, tag := range term.DefinitionTags {
-		switch tag {
-		case "arch":
-			term.Score -= 100
-		}
-	}
-	for _, tag := range term.TermTags {
-		switch tag {
-		case "news", "ichi", "spec", "gai1":
-			term.Score += 100
-		case "P":
-			term.Score += 500
-		case "iK", "ik", "ok", "oK", "io", "oik":
-			term.Score -= 100
-		}
-	}
-}
-
-func jmdictAddPriorities(term *dbTerm, priorities ...string) {
-	for _, priority := range priorities {
-		switch priority {
-		case "news1", "ichi1", "spec1", "gai1":
-			term.addTermTags("P")
-			fallthrough
-		case "news2", "ichi2", "spec2", "gai2":
-			term.addTermTags(priority[:len(priority)-1])
-		}
-	}
-}
-
-func jmdictBuildTagMeta(entities map[string]string) dbTagList {
-	tags := dbTagList{
-		dbTag{Name: "news", Notes: "appears frequently in Mainichi Shimbun", Category: "frequent", Order: -2},
-		dbTag{Name: "ichi", Notes: "listed as common in Ichimango Goi Bunruishuu", Category: "frequent", Order: -2},
-		dbTag{Name: "spec", Notes: "common words not included in frequency lists", Category: "frequent", Order: -2},
-		dbTag{Name: "gai", Notes: "common loanword", Category: "frequent", Order: -2},
-		dbTag{Name: "P", Notes: "popular term", Category: "popular", Order: -10, Score: 10},
-	}
-
-	for name, value := range entities {
-		tag := dbTag{Name: name, Notes: value}
-
-		switch name {
-		case "exp", "id":
-			tag.Category = "expression"
-			tag.Order = -5
-		case "arch":
-			tag.Category = "archaism"
-			tag.Order = -4
-		case "iK", "ik", "ok", "oK", "io", "oik":
-			tag.Score = -5
-		case "adj-f", "adj-i", "adj-ix", "adj-ku", "adj-na", "adj-nari", "adj-no", "adj-pn", "adj-shiku", "adj-t", "adv", "adv-to", "aux-adj",
-			"aux", "aux-v", "conj", "cop-da", "ctr", "int", "n-adv", "n", "n-pref", "n-pr", "n-suf", "n-t", "num", "pn", "pref", "prt", "suf",
-			"unc", "v1", "v1-s", "v2a-s", "v2b-k", "v2d-s", "v2g-k", "v2g-s", "v2h-k", "v2h-s", "v2k-k", "v2k-s", "v2m-s", "v2n-s", "v2r-k",
-			"v2r-s", "v2s-s", "v2t-k", "v2t-s", "v2w-s", "v2y-k", "v2y-s", "v2z-s", "v4b", "v4h", "v4k", "v4m", "v4r", "v4s", "v4t", "v5aru",
-			"v5b", "v5g", "v5k", "v5k-s", "v5m", "v5n", "v5r-i", "v5r", "v5s", "v5t", "v5u", "v5u-s", "vi", "vk", "vn", "vr", "vs-c", "vs-i",
-			"vs", "vs-s", "vt", "vz":
-			tag.Category = "partOfSpeech"
-			tag.Order = -3
-		}
-
-		tags = append(tags, tag)
-	}
-
-	return tags
-}
-
-func jmdictExtractTerms(edictEntry jmdict.JmdictEntry, language string) []dbTerm {
-	var terms []dbTerm
-
-	convert := func(reading jmdict.JmdictReading, kanji *jmdict.JmdictKanji) {
-		if kanji != nil && reading.Restrictions != nil && !hasString(kanji.Expression, reading.Restrictions) {
-			return
-		}
-
-		var termBase dbTerm
-		termBase.addTermTags(reading.Information...)
-
-		if kanji == nil {
-			termBase.Expression = reading.Reading
-			jmdictAddPriorities(&termBase, reading.Priorities...)
-		} else {
-			termBase.Expression = kanji.Expression
-			termBase.Reading = reading.Reading
-			termBase.addTermTags(kanji.Information...)
-
-			for _, priority := range kanji.Priorities {
-				if hasString(priority, reading.Priorities) {
-					jmdictAddPriorities(&termBase, priority)
-				}
-			}
-		}
-
-		var partsOfSpeech []string
-		for index, sense := range edictEntry.Sense {
-
-			if len(sense.PartsOfSpeech) != 0 {
-				partsOfSpeech = sense.PartsOfSpeech
-			}
-
-			if sense.RestrictedReadings != nil && !hasString(reading.Reading, sense.RestrictedReadings) {
-				continue
-			}
-
-			if kanji != nil && sense.RestrictedKanji != nil && !hasString(kanji.Expression, sense.RestrictedKanji) {
-				continue
-			}
-
-			term := dbTerm{
-				Reading:    termBase.Reading,
-				Expression: termBase.Expression,
-				Score:      len(edictEntry.Sense) - index,
-				Sequence:   edictEntry.Sequence,
-			}
-
-			for _, glossary := range sense.Glossary {
-				if glossary.Language == nil && language == "" || glossary.Language != nil && language == *glossary.Language {
-					term.Glossary = append(term.Glossary, glossary.Content)
-				}
-			}
-
-			if len(term.Glossary) == 0 {
-				continue
-			}
-
-			term.addDefinitionTags(termBase.DefinitionTags...)
-			term.addTermTags(termBase.TermTags...)
-			term.addDefinitionTags(partsOfSpeech...)
-			term.addDefinitionTags(sense.Fields...)
-			term.addDefinitionTags(sense.Misc...)
-			term.addDefinitionTags(sense.Dialects...)
-
-			jmdictBuildRules(&term)
-			jmdictBuildScore(&term)
-
-			terms = append(terms, term)
-		}
-	}
-
-	if len(edictEntry.Kanji) > 0 {
-		for _, kanji := range edictEntry.Kanji {
-			for _, reading := range edictEntry.Readings {
-				if reading.NoKanji == nil {
-					convert(reading, &kanji)
-				}
-			}
-		}
-		for _, reading := range edictEntry.Readings {
-			if reading.NoKanji != nil {
-				convert(reading, nil)
-			}
-		}
-	} else {
-		for _, reading := range edictEntry.Readings {
-			convert(reading, nil)
-		}
-	}
-
-	return terms
-}
-
-func jmdictExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
-	reader, err := os.Open(inputPath)
-	if err != nil {
-		return err
-	}
-	defer reader.Close()
-
-	dict, entities, err := jmdict.LoadJmdictNoTransform(reader)
-	if err != nil {
-		return err
-	}
-
-	var langTag string
-	switch language {
-	case "dutch":
-		langTag = "dut"
-	case "french":
-		langTag = "fre"
-	case "german":
-		langTag = "ger"
-	case "hungarian":
-		langTag = "hun"
-	case "italian":
-		langTag = "ita"
-	case "russian":
-		langTag = "rus"
-	case "slovenian":
-		langTag = "slv"
-	case "spanish":
-		langTag = "spa"
-	case "swedish":
-		langTag = "swe"
-	}
-
-	var terms dbTermList
-	for _, entry := range dict.Entries {
-		terms = append(terms, jmdictExtractTerms(entry, langTag)...)
-	}
-
-	if title == "" {
-		title = "JMdict"
-	}
-
-	recordData := map[string]dbRecordList{
-		"term": terms.crush(),
-		"tag":  jmdictBuildTagMeta(entities).crush(),
-	}
-
-	index := dbIndex{
-		Title:       title,
-		Revision:    "jmdict4",
-		Sequenced:   true,
-		Attribution: edrdgAttribution,
-	}
-	index.setDefaults()
-
-	return writeDb(
-		outputPath,
-		index,
-		recordData,
-		stride,
-		pretty,
-	)
-}
--- a/go.mod
+++ b/go.mod
@ -7,6 +7,7 @@ require (
 	foosoft.net/projects/zero-epwing-go v0.0.0-20220704035039-bc008453615d
 	github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e
 	github.com/mattn/go-sqlite3 v1.14.14
+	golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f
 )

 require golang.org/x/text v0.3.7 // indirect
--- a/go.sum
+++ b/go.sum
@ -6,5 +6,7 @@ github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e h1:wSQCJiig/QkoUnpvelSP
 github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e/go.mod h1:5G2EjwzgZUPnnReoKvPWVneT8APYbyKkihDVAHUi0II=
 github.com/mattn/go-sqlite3 v1.14.14 h1:qZgc/Rwetq+MtyE18WhzjokPD93dNqLGNT3QJuLvBGw=
 github.com/mattn/go-sqlite3 v1.14.14/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
+golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f h1:90Jq/vvGVDsqj8QqCynjFw9MCerDguSMODLYII416Y8=
+golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc=
 golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
--- a/jmdict.go
+++ b/jmdict.go
@ -0,0 +1,221 @@
+package yomichan
+
+import (
+	"os"
+	"regexp"
+	"strconv"
+	"strings"
+
+	"foosoft.net/projects/jmdict"
+	"golang.org/x/exp/slices"
+)
+
+func grammarRules(partsOfSpeech []string) []string {
+	rules := []string{}
+	for _, partOfSpeech := range partsOfSpeech {
+		switch partOfSpeech {
+		case "adj-i", "vk", "vz":
+			rules = append(rules, partOfSpeech)
+		default:
+			if strings.HasPrefix(partOfSpeech, "v5") {
+				rules = append(rules, "v5")
+			} else if strings.HasPrefix(partOfSpeech, "v1") {
+				rules = append(rules, "v1")
+			} else if strings.HasPrefix(partOfSpeech, "vs-") {
+				rules = append(rules, "vs")
+			}
+		}
+	}
+	return rules
+}
+
+func calculateTermScore(senseNumber int, headword headword) int {
+	const senseWeight int = 1
+	const entryPositionWeight int = 100
+	const priorityWeight int = 10000
+
+	score := 0
+	score -= (senseNumber - 1) * senseWeight
+	score -= headword.Index * entryPositionWeight
+	score += headword.Score() * priorityWeight
+
+	return score
+}
+
+func doDisplaySenseNumberTag(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) bool {
+	// Display sense numbers if the entry has more than one sense
+	// or if the headword is found in multiple entries.
+	hash := headword.Hash()
+	if meta.seqToSenseCount[entry.Sequence] > 1 {
+		return true
+	} else if len(meta.headwordHashToSeqs[hash]) > 1 {
+		return true
+	} else {
+		return false
+	}
+}
+
+func jmdictPublicationDate(dictionary jmdict.Jmdict) string {
+	dateEntry := dictionary.Entries[len(dictionary.Entries)-1]
+	r := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`)
+	jmdictDate := r.FindString(dateEntry.Sense[0].Glossary[0].Content)
+	return jmdictDate
+}
+
+func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
+	term := baseFormsTerm(entry)
+	term.Expression = headword.Expression
+	term.Reading = headword.Reading
+
+	term.addTermTags(headword.TermTags...)
+
+	term.addDefinitionTags("forms")
+	senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
+	term.Score = calculateTermScore(senseNumber, headword)
+	return term
+}
+
+func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
+	term := dbTerm{
+		Expression: headword.Expression,
+		Sequence:   -entry.Sequence,
+	}
+	for _, sense := range entry.Sense {
+		rules := grammarRules(sense.PartsOfSpeech)
+		term.addRules(rules...)
+	}
+	term.addTermTags(headword.TermTags...)
+	term.Score = calculateTermScore(0, headword)
+
+	redirectHeadword := meta.seqToMainHeadword[entry.Sequence]
+	expHash := redirectHeadword.ExpHash()
+	doDisplayReading := (len(meta.expHashToReadings[expHash]) > 1)
+
+	content := contentSpan(
+		contentAttr{fontSize: "130%"},
+		"⟶",
+		redirectHeadword.ToInternalLink(doDisplayReading),
+	)
+
+	term.Glossary = []any{contentStructure(content)}
+	return term
+}
+
+func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
+	term := dbTerm{
+		Expression: headword.Expression,
+		Reading:    headword.Reading,
+		Sequence:   entry.Sequence,
+	}
+
+	term.Glossary = createGlossary(sense, meta)
+
+	term.addTermTags(headword.TermTags...)
+
+	if doDisplaySenseNumberTag(headword, entry, meta) {
+		senseNumberTag := strconv.Itoa(senseNumber)
+		term.addDefinitionTags(senseNumberTag)
+	}
+	term.addDefinitionTags(sense.PartsOfSpeech...)
+	term.addDefinitionTags(sense.Fields...)
+	term.addDefinitionTags(sense.Misc...)
+	term.addDefinitionTags(sense.Dialects...)
+
+	rules := grammarRules(sense.PartsOfSpeech)
+	term.addRules(rules...)
+
+	term.Score = calculateTermScore(senseNumber, headword)
+
+	return term
+}
+
+func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {
+	if meta.seqToSenseCount[entry.Sequence] == 0 {
+		return nil, false
+	}
+	if headword.IsSearchOnly {
+		searchTerm := createSearchTerm(headword, entry, meta)
+		return []dbTerm{searchTerm}, true
+	}
+	terms := []dbTerm{}
+	senseNumber := 1
+	for _, sense := range entry.Sense {
+		if !glossaryContainsLanguage(sense.Glossary, meta.language) {
+			continue
+		}
+		if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
+			senseNumber += 1
+			continue
+		}
+		if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
+			senseNumber += 1
+			continue
+		}
+		senseTerm := createSenseTerm(sense, senseNumber, headword, entry, meta)
+		senseNumber += 1
+		terms = append(terms, senseTerm)
+	}
+
+	if meta.hasMultipleForms[entry.Sequence] {
+		formsTerm := createFormsTerm(headword, entry, meta)
+		terms = append(terms, formsTerm)
+	}
+	return terms, true
+}
+
+func jmdExportDb(inputPath string, outputPath string, languageName string, title string, stride int, pretty bool) error {
+	reader, err := os.Open(inputPath)
+	if err != nil {
+		return err
+	}
+	defer reader.Close()
+
+	dictionary, entities, err := jmdict.LoadJmdictNoTransform(reader)
+	if err != nil {
+		return err
+	}
+
+	meta := newJmdictMetadata(dictionary, languageName)
+
+	terms := dbTermList{}
+	for _, entry := range dictionary.Entries {
+		headwords := extractHeadwords(entry)
+		for _, headword := range headwords {
+			if newTerms, ok := extractTerms(headword, entry, meta); ok {
+				terms = append(terms, newTerms...)
+			}
+		}
+	}
+
+	tags := dbTagList{}
+	tags = append(tags, entityTags(entities)...)
+	tags = append(tags, senseNumberTags(meta.maxSenseCount)...)
+	tags = append(tags, newsFrequencyTags()...)
+	tags = append(tags, customDbTags()...)
+
+	recordData := map[string]dbRecordList{
+		"term": terms.crush(),
+		"tag":  tags.crush(),
+	}
+
+	if title == "" {
+		title = "JMdict"
+	}
+	jmdictDate := jmdictPublicationDate(dictionary)
+
+	index := dbIndex{
+		Title:       title,
+		Revision:    "JMdict." + jmdictDate,
+		Sequenced:   true,
+		Attribution: edrdgAttribution,
+	}
+	index.setDefaults()
+
+	return writeDb(
+		outputPath,
+		index,
+		recordData,
+		stride,
+		pretty,
+	)
+}
--- a/jmdictConstants.go
+++ b/jmdictConstants.go
@ -0,0 +1,215 @@
+package yomichan
+
+type LangCode struct {
+	language string
+	code     string
+}
+
+const edrdgAttribution = "This publication has included material from the JMdict (EDICT, etc.) dictionary files in accordance with the licence provisions of the Electronic Dictionaries Research Group. See http://www.edrdg.org/"
+
+const prioritySymbol = "★"
+const rareKanjiSymbol = "🅁"
+const irregularSymbol = "⚠"
+const outdatedSymbol = "⛬"
+const defaultSymbol = "㊒"
+
+const priorityTagName = "⭐"
+const rareKanjiTagName = "R"
+const irregularTagName = "⚠️"
+const outdatedTagName = "⛬"
+const atejiTagName = "ateji"
+const gikunTagName = "gikun"
+
+const langMarker = "'🌐 '"
+const noteMarker = "'📝 '"
+const infoMarker = "'ℹ️ '"
+const refMarker = "'➡️ '"
+const antonymMarker = "'🔄 '"
+
+var ISOtoFlag = map[string]string{
+	"":    "'🇬🇧 '",
+	"eng": "'🇬🇧 '",
+	"dut": "'🇳🇱 '",
+	"fre": "'🇫🇷 '",
+	"ger": "'🇩🇪 '",
+	"hun": "'🇭🇺 '",
+	"ita": "'🇮🇹 '",
+	"jpn": "'🇯🇵 '",
+	"rus": "'🇷🇺 '",
+	"slv": "'🇸🇮 '",
+	"spa": "'🇪🇸 '",
+	"swe": "'🇸🇪 '",
+}
+
+var langNameToCode = map[string]string{
+	"":          "eng",
+	"english":   "eng",
+	"dutch":     "dut",
+	"french":    "fre",
+	"german":    "ger",
+	"hungarian": "hun",
+	"italian":   "ita",
+	"russian":   "rus",
+	"slovenian": "slv",
+	"spanish":   "spa",
+	"swedish":   "swe",
+}
+
+var glossTypeCodeToName = map[LangCode]string{
+	LangCode{"eng", "lit"}:  "literally",
+	LangCode{"eng", "fig"}:  "figuratively",
+	LangCode{"eng", "expl"}: "", // don't need to tell the user that an explanation is an explanation
+	LangCode{"eng", "tm"}:   "trademark",
+}
+
+var refNoteHint = map[LangCode]string{
+	LangCode{"eng", "xref"}: "see",
+	LangCode{"eng", "ant"}:  "antonym",
+}
+
+var sourceLangTypeCodeToType = map[LangCode]string{
+	LangCode{"eng", "part"}: "partial",
+	LangCode{"eng", ""}:     "", // implied "full"
+}
+
+var langCodeToName = map[LangCode]string{
+	LangCode{"eng", "afr"}: "Afrikaans",
+	LangCode{"eng", "ain"}: "Ainu",
+	LangCode{"eng", "alg"}: "Algonquian",
+	LangCode{"eng", "amh"}: "Amharic",
+	LangCode{"eng", "ara"}: "Arabic",
+	LangCode{"eng", "arn"}: "Mapudungun",
+	LangCode{"eng", "bnt"}: "Bantu",
+	LangCode{"eng", "bre"}: "Breton",
+	LangCode{"eng", "bul"}: "Bulgarian",
+	LangCode{"eng", "bur"}: "Burmese",
+	LangCode{"eng", "chi"}: "Chinese",
+	LangCode{"eng", "chn"}: "Chinook Jargon",
+	LangCode{"eng", "cze"}: "Czech",
+	LangCode{"eng", "dan"}: "Danish",
+	LangCode{"eng", "dut"}: "Dutch",
+	LangCode{"eng", "eng"}: "English",
+	LangCode{"eng", "epo"}: "Esperanto",
+	LangCode{"eng", "est"}: "Estonian",
+	LangCode{"eng", "fil"}: "Filipino",
+	LangCode{"eng", "fin"}: "Finnish",
+	LangCode{"eng", "fre"}: "French",
+	LangCode{"eng", "geo"}: "Georgian",
+	LangCode{"eng", "ger"}: "German",
+	LangCode{"eng", "glg"}: "Galician",
+	LangCode{"eng", "grc"}: "Ancient Greek",
+	LangCode{"eng", "gre"}: "Modern Greek",
+	LangCode{"eng", "haw"}: "Hawaiian",
+	LangCode{"eng", "heb"}: "Hebrew",
+	LangCode{"eng", "hin"}: "Hindi",
+	LangCode{"eng", "hun"}: "Hungarian",
+	LangCode{"eng", "ice"}: "Icelandic",
+	LangCode{"eng", "ind"}: "Indonesian",
+	LangCode{"eng", "ita"}: "Italian",
+	LangCode{"eng", "khm"}: "Khmer",
+	LangCode{"eng", "kor"}: "Korean",
+	LangCode{"eng", "kur"}: "Kurdish",
+	LangCode{"eng", "lat"}: "Latin",
+	LangCode{"eng", "mal"}: "Malayalam",
+	LangCode{"eng", "mao"}: "Maori",
+	LangCode{"eng", "may"}: "Malay",
+	LangCode{"eng", "mnc"}: "Manchu",
+	LangCode{"eng", "mol"}: "Moldavian", // ISO 639 deprecated (https://iso639-3.sil.org/code/mol)
+	LangCode{"eng", "mon"}: "Mongolian",
+	LangCode{"eng", "nor"}: "Norwegian",
+	LangCode{"eng", "per"}: "Persian",
+	LangCode{"eng", "pol"}: "Polish",
+	LangCode{"eng", "por"}: "Portuguese",
+	LangCode{"eng", "rum"}: "Romanian",
+	LangCode{"eng", "rus"}: "Russian",
+	LangCode{"eng", "san"}: "Sanskrit",
+	LangCode{"eng", "scr"}: "Croatian", // Code doesn't seem to exist in ISO 639. Should be "hrv" instead? (https://iso639-3.sil.org/code/hrv)
+	LangCode{"eng", "slo"}: "Slovak",
+	LangCode{"eng", "slv"}: "Slovenian",
+	LangCode{"eng", "som"}: "Somali",
+	LangCode{"eng", "spa"}: "Spanish",
+	LangCode{"eng", "swa"}: "Swahili",
+	LangCode{"eng", "swe"}: "Swedish",
+	LangCode{"eng", "tah"}: "Tahitian",
+	LangCode{"eng", "tam"}: "Tamil",
+	LangCode{"eng", "tgl"}: "Tagalog",
+	LangCode{"eng", "tha"}: "Thai",
+	LangCode{"eng", "tib"}: "Tibetan",
+	LangCode{"eng", "tur"}: "Turkish",
+	LangCode{"eng", "ukr"}: "Ukrainian",
+	LangCode{"eng", "urd"}: "Urdu",
+	LangCode{"eng", "vie"}: "Vietnamese",
+	LangCode{"eng", "yid"}: "Yiddish",
+}
+
+// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
+var ISOtoHTML = map[string]string{
+	"afr": "af",  // Afrikaans
+	"ain": "ain", // Ainu
+	"alg": "alg", // Algonquian
+	"amh": "am",  // Amharic
+	"ara": "ar",  // Arabic
+	"arn": "arn", // Mapudungun
+	"bnt": "bnt", // Bantu
+	"bre": "br",  // Breton
+	"bul": "bg",  // Bulgarian
+	"bur": "my",  // Burmese
+	"chi": "zh",  // Chinese
+	"chn": "chn", // Chinook Jargon
+	"cze": "cs",  // Czech
+	"dan": "da",  // Danish
+	"dut": "nl",  // Dutch
+	"eng": "en",  // English
+	"epo": "eo",  // Esperanto
+	"est": "et",  // Estonian
+	"fil": "fil", // Filipino
+	"fin": "fi",  // Finnish
+	"fre": "fr",  // French
+	"geo": "ka",  // Georgian
+	"ger": "de",  // German
+	"glg": "gl",  // Galician
+	"grc": "grc", // Ancient Greek
+	"gre": "el",  // Modern Greek
+	"haw": "haw", // Hawaiian
+	"heb": "he",  // Hebrew
+	"hin": "hi",  // Hindi
+	"hun": "hu",  // Hungarian
+	"ice": "is",  // Icelandic
+	"ind": "id",  // Indonesian
+	"ita": "it",  // Italian
+	"jpn": "ja",  // Japanese
+	"khm": "km",  // Khmer
+	"kor": "ko",  // Korean
+	"kur": "ku",  // Kurdish
+	"lat": "la",  // Latin
+	"mal": "ml",  // Malayalam
+	"mao": "mi",  // Maori
+	"may": "ms",  // Malay
+	"mnc": "mnc", // Manchu
+	"mol": "ro",  // Moldavian
+	"mon": "mn",  // Mongolian
+	"nor": "no",  // Norwegian
+	"per": "fa",  // Persian
+	"pol": "pl",  // Polish
+	"por": "pt",  // Portuguese
+	"rum": "ro",  // Romanian
+	"rus": "ru",  // Russian
+	"san": "sa",  // Sanskrit
+	"scr": "hr",  // Croatian
+	"slo": "sk",  // Slovak
+	"slv": "sl",  // Slovenian
+	"som": "so",  // Somali
+	"spa": "es",  // Spanish
+	"swa": "sw",  // Swahili
+	"swe": "sv",  // Swedish
+	"tah": "ty",  // Tahitian
+	"tam": "ta",  // Tamil
+	"tgl": "tl",  // Tagalog
+	"tha": "th",  // Thai
+	"tib": "bo",  // Tibetan
+	"tur": "tr",  // Turkish
+	"ukr": "uk",  // Ukrainian
+	"urd": "ur",  // Urdu
+	"vie": "vi",  // Vietnamese
+	"yid": "yi",  // Yiddish
+}
--- a/jmdictForms.go
+++ b/jmdictForms.go
@ -0,0 +1,254 @@
+package yomichan
+
+import (
+	"os"
+	"strings"
+
+	"foosoft.net/projects/jmdict"
+	"golang.org/x/exp/slices"
+)
+
+func kata2hira(word string) string {
+	charMap := func(character rune) rune {
+		if (character >= 'ァ' && character <= 'ヶ') || (character >= 'ヽ' && character <= 'ヾ') {
+			return character - 0x60
+		} else {
+			return character
+		}
+	}
+	return strings.Map(charMap, word)
+}
+
+func (h *headword) InfoSymbols() string {
+	infoSymbols := []string{}
+	if h.IsPriority {
+		infoSymbols = append(infoSymbols, prioritySymbol)
+	}
+	if h.IsRareKanji {
+		infoSymbols = append(infoSymbols, rareKanjiSymbol)
+	}
+	if h.IsIrregular {
+		infoSymbols = append(infoSymbols, irregularSymbol)
+	}
+	if h.IsOutdated {
+		infoSymbols = append(infoSymbols, outdatedSymbol)
+	}
+	return strings.Join(infoSymbols[:], " | ")
+}
+
+func (h *headword) GlossText() string {
+	gloss := h.Expression
+	if h.IsAteji {
+		gloss = "〈" + gloss + "〉"
+	}
+	symbolText := h.InfoSymbols()
+	if symbolText != "" {
+		gloss += "（" + symbolText + "）"
+	}
+	return gloss
+}
+
+func (h *headword) TableColHeaderText() string {
+	text := h.KanjiForm()
+	if h.IsAteji {
+		text = "〈" + text + "〉"
+	}
+	return text
+}
+
+func (h *headword) TableRowHeaderText() string {
+	text := h.Reading
+	if h.IsGikun {
+		text = "〈" + text + "〉"
+	}
+	return text
+}
+
+func (h *headword) TableCellText() string {
+	text := h.InfoSymbols()
+	if text == "" {
+		return defaultSymbol
+	} else {
+		return text
+	}
+}
+
+func (h *headword) KanjiForm() string {
+	if h.IsKanaOnly() {
+		return "∅"
+	} else {
+		return h.Expression
+	}
+}
+
+func jmdNeedsFormTable(headwords []headword) bool {
+	// Does the entry contain more than 1 distinct reading?
+	// E.g. バカがい and ばかがい are not distinct.
+	uniqueReading := ""
+	for _, h := range headwords {
+		if h.IsGikun {
+			return true
+		} else if h.IsSearchOnly {
+			continue
+		} else if h.IsKanaOnly() {
+			continue
+		} else if uniqueReading == "" {
+			uniqueReading = kata2hira(h.Reading)
+		} else if uniqueReading != kata2hira(h.Reading) {
+			return true
+		}
+	}
+	return false
+}
+
+type formTableData struct {
+	kanjiForms    []string
+	readings      []string
+	colHeaderText map[string]string
+	rowHeaderText map[string]string
+	cellText      map[string]map[string]string
+}
+
+func tableData(headwords []headword) formTableData {
+	d := formTableData{
+		kanjiForms:    []string{},
+		readings:      []string{},
+		colHeaderText: make(map[string]string),
+		rowHeaderText: make(map[string]string),
+		cellText:      make(map[string]map[string]string),
+	}
+	for _, h := range headwords {
+		if h.IsSearchOnly {
+			continue
+		}
+		kanjiForm := h.KanjiForm()
+		if !slices.Contains(d.kanjiForms, kanjiForm) {
+			d.kanjiForms = append(d.kanjiForms, kanjiForm)
+			d.colHeaderText[kanjiForm] = h.TableColHeaderText()
+		}
+		reading := h.Reading
+		if !slices.Contains(d.readings, reading) {
+			d.readings = append(d.readings, reading)
+			d.rowHeaderText[reading] = h.TableRowHeaderText()
+			d.cellText[reading] = make(map[string]string)
+		}
+		d.cellText[reading][kanjiForm] = h.TableCellText()
+	}
+	return d
+}
+
+func formsTableGlossary(headwords []headword) []any {
+	d := tableData(headwords)
+
+	attr := contentAttr{}
+	centeredAttr := contentAttr{textAlign: "center"}
+	leftAttr := contentAttr{textAlign: "left"}
+
+	cornerCell := contentTableHeadCell(attr, "") // empty cell in upper left corner
+	headRowCells := []any{cornerCell}
+	for _, kanjiForm := range d.kanjiForms {
+		content := d.colHeaderText[kanjiForm]
+		cell := contentTableHeadCell(centeredAttr, content)
+		headRowCells = append(headRowCells, cell)
+	}
+	headRow := contentTableRow(attr, headRowCells...)
+	tableRows := []any{headRow}
+	for _, reading := range d.readings {
+		rowHeadCellText := d.rowHeaderText[reading]
+		rowHeadCell := contentTableHeadCell(leftAttr, rowHeadCellText)
+		rowCells := []any{rowHeadCell}
+		for _, kanjiForm := range d.kanjiForms {
+			text := d.cellText[reading][kanjiForm]
+			rowCell := contentTableCell(centeredAttr, text)
+			rowCells = append(rowCells, rowCell)
+		}
+		tableRow := contentTableRow(attr, rowCells...)
+		tableRows = append(tableRows, tableRow)
+	}
+	tableAttr := contentAttr{data: map[string]string{"content": "formsTable"}}
+	contentTable := contentTable(tableAttr, tableRows...)
+	content := contentStructure(contentTable)
+	return []any{content}
+}
+
+func formsGlossary(headwords []headword) []any {
+	glossary := []any{}
+	for _, h := range headwords {
+		if h.IsSearchOnly {
+			continue
+		}
+		text := h.GlossText()
+		glossary = append(glossary, text)
+	}
+	return glossary
+}
+
+func baseFormsTerm(entry jmdict.JmdictEntry) dbTerm {
+	term := dbTerm{Sequence: entry.Sequence}
+	headwords := extractHeadwords(entry)
+	if jmdNeedsFormTable(headwords) {
+		term.Glossary = formsTableGlossary(headwords)
+	} else {
+		term.Glossary = formsGlossary(headwords)
+	}
+	for _, sense := range entry.Sense {
+		rules := grammarRules(sense.PartsOfSpeech)
+		term.addRules(rules...)
+	}
+	return term
+}
+
+func formsExportDb(inputPath, outputPath, languageName, title string, stride int, pretty bool) error {
+	reader, err := os.Open(inputPath)
+	if err != nil {
+		return err
+	}
+	defer reader.Close()
+
+	dictionary, _, err := jmdict.LoadJmdictNoTransform(reader)
+	if err != nil {
+		return err
+	}
+
+	terms := dbTermList{}
+	for _, entry := range dictionary.Entries {
+		baseTerm := baseFormsTerm(entry)
+		headwords := extractHeadwords(entry)
+		for _, h := range headwords {
+			term := baseTerm
+			if h.IsSearchOnly {
+				term.Sequence = -term.Sequence
+			}
+			term.Expression = h.Expression
+			term.Reading = h.Reading
+			terms = append(terms, term)
+		}
+	}
+
+	if title == "" {
+		title = "JMdict Forms"
+	}
+
+	recordData := map[string]dbRecordList{
+		"term": terms.crush(),
+		"tag":  dbRecordList{},
+	}
+
+	jmdictDate := jmdictPublicationDate(dictionary)
+
+	index := dbIndex{
+		Title:       title,
+		Revision:    "JMdict." + jmdictDate,
+		Sequenced:   true,
+		Attribution: edrdgAttribution,
+	}
+	index.setDefaults()
+
+	return writeDb(
+		outputPath,
+		index,
+		recordData,
+		stride,
+		pretty,
+	)
+}
--- a/jmdictGlossary.go
+++ b/jmdictGlossary.go
@ -0,0 +1,300 @@
+package yomichan
+
+import (
+	"fmt"
+	"strconv"
+
+	"foosoft.net/projects/jmdict"
+)
+
+func glossaryContainsLanguage(glossary []jmdict.JmdictGlossary, language string) bool {
+	hasGlosses := false
+	for _, gloss := range glossary {
+		if glossContainsLanguage(gloss, language) {
+			hasGlosses = true
+			break
+		}
+	}
+	return hasGlosses
+}
+
+func glossContainsLanguage(gloss jmdict.JmdictGlossary, language string) bool {
+	if gloss.Language == nil && language != "eng" {
+		return false
+	} else if gloss.Language != nil && language != *gloss.Language {
+		return false
+	} else {
+		return true
+	}
+}
+
+func makeGlossListItem(gloss jmdict.JmdictGlossary, language string) any {
+	contents := []any{gloss.Content}
+	listItem := contentListItem(contentAttr{}, contents...)
+	return listItem
+}
+
+func makeInfoGlossListItem(gloss jmdict.JmdictGlossary, language string) any {
+	// Prepend gloss with "type" (literal, figurative, trademark, etc.)
+	glossTypeCode := *gloss.Type
+	contents := []any{}
+	if name, ok := glossTypeCodeToName[LangCode{language, glossTypeCode}]; ok {
+		if name != "" {
+			italicStyle := contentAttr{fontStyle: "italic"}
+			contents = append(contents, contentSpan(italicStyle, "("+name+")"), " ")
+		}
+	} else {
+		fmt.Println("Unknown glossary type code " + *gloss.Type + " for build language " + language)
+		contents = append(contents, "["+glossTypeCode+"] ")
+	}
+	contents = append(contents, gloss.Content)
+	listItem := contentListItem(contentAttr{}, contents...)
+	return listItem
+}
+
+func makeSourceLangListItem(sourceLanguage jmdict.JmdictSource, language string) any {
+	contents := []any{}
+
+	var srcLangCode string
+	if sourceLanguage.Language == nil {
+		srcLangCode = "eng"
+	} else {
+		srcLangCode = *sourceLanguage.Language
+	}
+
+	// Format: [Language] ([Partial?], [Wasei?]): [Original word?]
+	// [Language]
+	if langName, ok := langCodeToName[LangCode{language, srcLangCode}]; ok {
+		contents = append(contents, langName)
+	} else {
+		contents = append(contents, srcLangCode)
+		fmt.Println("Unable to convert ISO 639 code " + srcLangCode + " to its full name in language " + language)
+	}
+
+	// ([Partial?], [Wasei?])
+	var sourceLangTypeCode string
+	if sourceLanguage.Type == nil {
+		sourceLangTypeCode = ""
+	} else {
+		sourceLangTypeCode = *sourceLanguage.Type
+	}
+	var sourceLangType string
+	if val, ok := sourceLangTypeCodeToType[LangCode{language, sourceLangTypeCode}]; ok {
+		sourceLangType = val
+	} else {
+		sourceLangType = sourceLangTypeCode
+		fmt.Println("Unknown source language type code " + sourceLangTypeCode + " for build language " + language)
+	}
+	if sourceLangType != "" && sourceLanguage.Wasei == "y" {
+		contents = append(contents, " ("+sourceLangType+", wasei)")
+	} else if sourceLangType != "" {
+		contents = append(contents, " ("+sourceLangType+")")
+	} else if sourceLanguage.Wasei == "y" {
+		contents = append(contents, " (wasei)")
+	}
+
+	// : [Original word?]
+	if sourceLanguage.Content != "" {
+		contents = append(contents, ": ")
+		attr := contentAttr{lang: ISOtoHTML[srcLangCode]}
+		contents = append(contents, contentSpan(attr, sourceLanguage.Content))
+	}
+
+	listItem := contentListItem(contentAttr{}, contents...)
+	return listItem
+}
+
+func makeReferenceListItem(reference string, refType string, meta jmdictMetadata) any {
+	contents := []any{}
+	attr := contentAttr{}
+
+	hint := refNoteHint[LangCode{meta.language, refType}]
+	contents = append(contents, hint+": ")
+
+	refHeadword, senseNumber, ok := parseReference(reference)
+	if !ok {
+		contents = append(contents, "【"+reference+"】")
+		return contentListItem(attr, contents...)
+	}
+
+	sequence, ok := meta.referenceToSeq[reference]
+	if !ok {
+		contents = append(contents, "【"+reference+"】")
+		return contentListItem(attr, contents...)
+	}
+
+	targetSense := senseID{
+		sequence: sequence,
+		number:   senseNumber,
+	}
+
+	expHash := refHeadword.ExpHash()
+	doDisplayReading := (len(meta.expHashToReadings[expHash]) > 1)
+	doDisplaySenseNumber := (meta.seqToSenseCount[targetSense.sequence] > 1)
+	refGlossAttr := contentAttr{
+		fontSize:      "65%",
+		verticalAlign: "middle",
+		data:          map[string]string{"content": "refGlosses"},
+	}
+
+	contents = append(contents, refHeadword.ToInternalLink(doDisplayReading))
+	if doDisplaySenseNumber {
+		contents = append(contents, contentSpan(refGlossAttr, " "+strconv.Itoa(targetSense.number)+". "+meta.condensedGlosses[targetSense]))
+	} else {
+		contents = append(contents, contentSpan(refGlossAttr, " "+meta.condensedGlosses[targetSense]))
+	}
+
+	listItem := contentListItem(attr, contents...)
+	return listItem
+}
+
+func makeExampleListItem(sentence jmdict.JmdictExampleSentence) any {
+	if sentence.Lang == "jpn" {
+		return contentListItem(contentAttr{}, sentence.Text)
+	} else {
+		attr := contentAttr{
+			lang:          ISOtoHTML[sentence.Lang],
+			listStyleType: ISOtoFlag[sentence.Lang],
+		}
+		return contentListItem(attr, sentence.Text)
+	}
+}
+
+func listAttr(lang string, listStyleType string, dataContent string) contentAttr {
+	return contentAttr{
+		lang:          lang,
+		listStyleType: listStyleType,
+		data:          map[string]string{"content": dataContent},
+	}
+}
+
+func needsStructuredContent(sense jmdict.JmdictSense, language string) bool {
+	for _, gloss := range sense.Glossary {
+		if glossContainsLanguage(gloss, language) && gloss.Type != nil {
+			return true
+		}
+	}
+	if len(sense.SourceLanguages) > 0 {
+		return true
+	} else if len(sense.Information) > 0 {
+		return true
+	} else if len(sense.Antonyms) > 0 {
+		return true
+	} else if len(sense.References) > 0 {
+		return true
+	} else if len(sense.Examples) > 0 {
+		return true
+	} else {
+		return false
+	}
+}
+
+func createGlossaryContent(sense jmdict.JmdictSense, meta jmdictMetadata) any {
+	glossaryContents := []any{}
+
+	// Add normal glosses
+	glossListItems := []any{}
+	for _, gloss := range sense.Glossary {
+		if glossContainsLanguage(gloss, meta.language) && gloss.Type == nil {
+			listItem := makeGlossListItem(gloss, meta.language)
+			glossListItems = append(glossListItems, listItem)
+		}
+	}
+	if len(glossListItems) > 0 {
+		attr := listAttr(ISOtoHTML[meta.language], "circle", "glossary")
+		list := contentUnorderedList(attr, glossListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add information glosses
+	infoGlossListItems := []any{}
+	for _, gloss := range sense.Glossary {
+		if glossContainsLanguage(gloss, meta.language) && gloss.Type != nil {
+			listItem := makeInfoGlossListItem(gloss, meta.language)
+			infoGlossListItems = append(infoGlossListItems, listItem)
+		}
+	}
+	if len(infoGlossListItems) > 0 {
+		attr := listAttr(ISOtoHTML[meta.language], infoMarker, "infoGlossary")
+		list := contentUnorderedList(attr, infoGlossListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add language-of-origin / loanword information
+	sourceLangListItems := []any{}
+	for _, sourceLanguage := range sense.SourceLanguages {
+		listItem := makeSourceLangListItem(sourceLanguage, meta.language)
+		sourceLangListItems = append(sourceLangListItems, listItem)
+	}
+	if len(sourceLangListItems) > 0 {
+		attr := listAttr(ISOtoHTML[meta.language], langMarker, "sourceLanguages")
+		list := contentUnorderedList(attr, sourceLangListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add sense notes
+	noteListItems := []any{}
+	for _, information := range sense.Information {
+		listItem := contentListItem(contentAttr{}, information)
+		noteListItems = append(noteListItems, listItem)
+	}
+	if len(noteListItems) > 0 {
+		attr := listAttr(ISOtoHTML["jpn"], noteMarker, "notes") // notes often contain japanese text
+		list := contentUnorderedList(attr, noteListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add antonyms
+	antonymListItems := []any{}
+	for _, antonym := range sense.Antonyms {
+		listItem := makeReferenceListItem(antonym, "ant", meta)
+		antonymListItems = append(antonymListItems, listItem)
+	}
+	if len(antonymListItems) > 0 {
+		attr := listAttr(ISOtoHTML[meta.language], antonymMarker, "antonyms")
+		list := contentUnorderedList(attr, antonymListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add cross-references
+	referenceListItems := []any{}
+	for _, reference := range sense.References {
+		listItem := makeReferenceListItem(reference, "xref", meta)
+		referenceListItems = append(referenceListItems, listItem)
+	}
+	if len(referenceListItems) > 0 {
+		attr := listAttr(ISOtoHTML[meta.language], refMarker, "references")
+		list := contentUnorderedList(attr, referenceListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add example sentences
+	exampleListItems := []any{}
+	for _, example := range sense.Examples {
+		for _, sentence := range example.Sentences {
+			listItem := makeExampleListItem(sentence)
+			exampleListItems = append(exampleListItems, listItem)
+		}
+	}
+	if len(exampleListItems) > 0 {
+		attr := listAttr(ISOtoHTML["jpn"], ISOtoFlag["jpn"], "examples")
+		list := contentUnorderedList(attr, exampleListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	return contentStructure(glossaryContents...)
+}
+
+func createGlossary(sense jmdict.JmdictSense, meta jmdictMetadata) []any {
+	glossary := []any{}
+	if needsStructuredContent(sense, meta.language) {
+		glossary = append(glossary, createGlossaryContent(sense, meta))
+	} else {
+		for _, gloss := range sense.Glossary {
+			if glossContainsLanguage(gloss, meta.language) {
+				glossary = append(glossary, gloss.Content)
+			}
+		}
+	}
+	return glossary
+}
--- a/jmdictHeadword.go
+++ b/jmdictHeadword.go
@ -0,0 +1,267 @@
+package yomichan
+
+import (
+	"fmt"
+	"hash/fnv"
+	"regexp"
+	"strconv"
+
+	"foosoft.net/projects/jmdict"
+	"golang.org/x/exp/slices"
+)
+
+type headword struct {
+	Expression   string
+	Reading      string
+	TermTags     []string
+	Index        int
+	IsPriority   bool
+	IsIrregular  bool
+	IsOutdated   bool
+	IsRareKanji  bool
+	IsSearchOnly bool
+	IsAteji      bool
+	IsGikun      bool
+}
+
+type hash uint64
+
+func (h *headword) Hash() hash {
+	return hashText(h.Expression + "␞" + h.Reading)
+}
+
+func (h *headword) ExpHash() hash {
+	return hashText(h.Expression + "␞" + h.Expression)
+}
+
+func (h *headword) ReadingHash() hash {
+	return hashText(h.Reading + "␞" + h.Reading)
+}
+
+func hashText(s string) hash {
+	h := fnv.New64a()
+	h.Write([]byte(s))
+	return hash(h.Sum64())
+}
+
+func (h *headword) IsKanaOnly() bool {
+	if h.Expression != h.Reading {
+		return false
+	}
+	for _, char := range h.Expression {
+		if char >= 'ぁ' && char <= 'ヿ' {
+			// hiragana and katakana range
+			continue
+		} else if char >= '･' && char <= 'ﾟ' {
+			// halfwidth katakana range
+			continue
+		} else if char == '〜' {
+			continue
+		} else {
+			return false
+		}
+	}
+	return true
+}
+
+func (h *headword) Score() int {
+	score := 0
+	if h.IsPriority {
+		score += 1
+	}
+	if h.IsIrregular {
+		score -= 5
+	}
+	if h.IsOutdated {
+		score -= 5
+	}
+	if h.IsRareKanji {
+		score -= 5
+	}
+	if h.IsSearchOnly {
+		score -= 5
+	}
+	return score
+}
+
+func (h *headword) ToInternalLink(includeReading bool) any {
+	if !includeReading || h.Expression == h.Reading {
+		return contentInternalLink(
+			contentAttr{lang: ISOtoHTML["jpn"]},
+			h.Expression,
+		)
+	} else {
+		return contentSpan(
+			contentAttr{lang: ISOtoHTML["jpn"]},
+			contentInternalLink(contentAttr{}, h.Expression),
+			"（",
+			contentInternalLink(contentAttr{}, h.Reading),
+			"）",
+		)
+	}
+}
+
+func (h *headword) SetFlags(infoTags, freqTags []string) {
+	priorityTags := []string{"ichi1", "news1", "gai1", "spec1", "spec2"}
+	for _, priorityTag := range priorityTags {
+		if slices.Contains(freqTags, priorityTag) {
+			h.IsPriority = true
+			break
+		}
+	}
+	for _, infoTag := range infoTags {
+		switch infoTag {
+		case "iK", "ik", "io":
+			h.IsIrregular = true
+		case "oK", "ok":
+			h.IsOutdated = true
+		case "sK", "sk":
+			h.IsSearchOnly = true
+		case "rK":
+			h.IsRareKanji = true
+		case "ateji":
+			h.IsAteji = true
+		case "gikun":
+			h.IsGikun = true
+		}
+	}
+	if h.IsOutdated && h.IsRareKanji {
+		h.IsRareKanji = false
+	}
+}
+
+func (h *headword) SetTermTags(freqTags []string) {
+	h.TermTags = []string{}
+	if h.IsPriority {
+		h.TermTags = append(h.TermTags, priorityTagName)
+	}
+	for _, tag := range freqTags {
+		isNewsFreqTag, _ := regexp.MatchString(`nf\d\d`, tag)
+		if isNewsFreqTag {
+			// nf tags are divided into ranks of 500
+			// (nf01 to nf48), but it will be easier
+			// for the user to read 1k, 2k, etc.
+			var i int
+			if _, err := fmt.Sscanf(tag, "nf%2d", &i); err == nil {
+				i = (i + (i % 2)) / 2
+				newsTag := "news" + strconv.Itoa(i) + "k"
+				h.TermTags = append(h.TermTags, newsTag)
+			}
+		} else if tag == "news1" || tag == "news2" {
+			continue
+		} else {
+			tagWithoutTheNumber := tag[:len(tag)-1] // "ichi", "gai", or "spec"
+			h.TermTags = append(h.TermTags, tagWithoutTheNumber)
+		}
+	}
+	if h.IsIrregular {
+		h.TermTags = append(h.TermTags, irregularTagName)
+	}
+	if h.IsOutdated {
+		h.TermTags = append(h.TermTags, outdatedTagName)
+	}
+	if h.IsRareKanji {
+		h.TermTags = append(h.TermTags, rareKanjiTagName)
+	}
+	if h.IsAteji {
+		h.TermTags = append(h.TermTags, atejiTagName)
+	}
+	if h.IsGikun {
+		h.TermTags = append(h.TermTags, gikunTagName)
+	}
+}
+
+func newHeadword(kanji *jmdict.JmdictKanji, reading *jmdict.JmdictReading) headword {
+	h := headword{}
+	infoTags := []string{}
+	freqTags := []string{}
+	if kanji == nil {
+		h.Expression = reading.Reading
+		h.Reading = reading.Reading
+		infoTags = reading.Information
+		freqTags = reading.Priorities
+	} else if reading == nil {
+		// should only apply to search-only kanji terms
+		h.Expression = kanji.Expression
+		h.Reading = ""
+		infoTags = kanji.Information
+		freqTags = kanji.Priorities
+	} else {
+		h.Expression = kanji.Expression
+		h.Reading = reading.Reading
+		infoTags = union(kanji.Information, reading.Information)
+		freqTags = intersection(kanji.Priorities, reading.Priorities)
+	}
+	h.SetFlags(infoTags, freqTags)
+	h.SetTermTags(freqTags)
+	return h
+}
+
+func areAllKanjiIrregular(allKanji []jmdict.JmdictKanji) bool {
+	// If every kanji form is rare or irregular, then we'll make
+	// kana-only headwords for each kana form.
+	if len(allKanji) == 0 {
+		return false
+	}
+	for _, kanji := range allKanji {
+		h := newHeadword(&kanji, nil)
+		kanjiIsIrregular := h.IsRareKanji || h.IsIrregular || h.IsOutdated || h.IsSearchOnly
+		if !kanjiIsIrregular {
+			return false
+		}
+	}
+	return true
+}
+
+func extractHeadwords(entry jmdict.JmdictEntry) []headword {
+	headwords := []headword{}
+	allKanjiAreIrregular := areAllKanjiIrregular(entry.Kanji)
+
+	if allKanjiAreIrregular {
+		// Adding the reading-only terms before kanji+reading
+		// terms here for the sake of the Index property,
+		// which affects the yomichan term ranking.
+		for _, reading := range entry.Readings {
+			h := newHeadword(nil, &reading)
+			h.Index = len(headwords)
+			headwords = append(headwords, h)
+		}
+	}
+
+	for _, kanji := range entry.Kanji {
+		if slices.Contains(kanji.Information, "sK") {
+			// Search-only kanji forms do not have associated readings.
+			h := newHeadword(&kanji, nil)
+			h.Index = len(headwords)
+			headwords = append(headwords, h)
+			continue
+		}
+		for _, reading := range entry.Readings {
+			if reading.NoKanji != nil {
+				continue
+			} else if slices.Contains(reading.Information, "sk") {
+				// Search-only kana forms do not have associated kanji forms.
+				continue
+			} else if reading.Restrictions != nil && !slices.Contains(reading.Restrictions, kanji.Expression) {
+				continue
+			} else {
+				h := newHeadword(&kanji, &reading)
+				h.Index = len(headwords)
+				headwords = append(headwords, h)
+			}
+		}
+	}
+
+	if !allKanjiAreIrregular {
+		noKanjiInEntry := (len(entry.Kanji) == 0)
+		for _, reading := range entry.Readings {
+			if reading.NoKanji != nil || noKanjiInEntry || slices.Contains(reading.Information, "sk") {
+				h := newHeadword(nil, &reading)
+				h.Index = len(headwords)
+				headwords = append(headwords, h)
+			}
+		}
+	}
+
+	return headwords
+}
--- a/jmdictMetadata.go
+++ b/jmdictMetadata.go
@ -0,0 +1,158 @@
+package yomichan
+
+import (
+	"strings"
+
+	"foosoft.net/projects/jmdict"
+	"golang.org/x/exp/slices"
+)
+
+type sequence = int
+
+type jmdictMetadata struct {
+	language           string
+	condensedGlosses   map[senseID]string
+	seqToSenseCount    map[sequence]int
+	seqToMainHeadword  map[sequence]headword
+	expHashToReadings  map[hash][]string
+	headwordHashToSeqs map[hash][]sequence
+	references         []string
+	referenceToSeq     map[string]sequence
+	hashToSearchValues map[hash][]searchValue
+	seqToSearchHashes  map[sequence][]searchHash
+	hasMultipleForms   map[sequence]bool
+	maxSenseCount      int
+}
+
+type senseID struct {
+	sequence sequence
+	number   int
+}
+
+func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEntry) {
+
+	// Determine how many senses are in this entry for this language
+	if _, ok := meta.seqToSenseCount[entry.Sequence]; !ok {
+		senseCount := 0
+		for _, entrySense := range entry.Sense {
+			for _, gloss := range entrySense.Glossary {
+				if glossContainsLanguage(gloss, meta.language) {
+					senseCount += 1
+					break
+				}
+			}
+		}
+		meta.seqToSenseCount[entry.Sequence] = senseCount
+	}
+
+	if meta.seqToSenseCount[entry.Sequence] == 0 {
+		return
+	}
+
+	// main headwords (first ones that are found in entries).
+	if _, ok := meta.seqToMainHeadword[entry.Sequence]; !ok {
+		meta.seqToMainHeadword[entry.Sequence] = headword
+	}
+
+	// hash the term pair so we can determine if it's used
+	// in more than one JMdict entry later.
+	headwordHash := headword.Hash()
+	if !slices.Contains(meta.headwordHashToSeqs[headwordHash], entry.Sequence) {
+		meta.headwordHashToSeqs[headwordHash] = append(meta.headwordHashToSeqs[headwordHash], entry.Sequence)
+	}
+
+	// hash the expression so that we can determine if we
+	// need to disambiguate it by displaying its reading
+	// in reference notes later.
+	expHash := headword.ExpHash()
+	if !slices.Contains(meta.expHashToReadings[expHash], headword.Reading) {
+		meta.expHashToReadings[expHash] = append(meta.expHashToReadings[expHash], headword.Reading)
+	}
+
+	// e.g. for JMdict (English) we expect to end up with
+	// seqToHashedHeadwords[1260670] == 【元・もと】、【元・元】、【もと・もと】、【本・もと】、【本・本】、【素・もと】、【素・素】、【基・もと】、【基・基】
+	// used for correlating references to sequence numbers later.
+	searchHashes := []searchHash{
+		searchHash{headwordHash, headword.IsPriority},
+		searchHash{expHash, headword.IsPriority},
+		searchHash{headword.ReadingHash(), headword.IsPriority},
+	}
+	for _, x := range searchHashes {
+		if !slices.Contains(meta.seqToSearchHashes[entry.Sequence], x) {
+			meta.seqToSearchHashes[entry.Sequence] = append(meta.seqToSearchHashes[entry.Sequence], x)
+		}
+	}
+
+	currentSenseNumber := 1
+	for _, entrySense := range entry.Sense {
+		if !glossaryContainsLanguage(entrySense.Glossary, meta.language) {
+			continue
+		}
+		if entrySense.RestrictedReadings != nil && !slices.Contains(entrySense.RestrictedReadings, headword.Reading) {
+			currentSenseNumber += 1
+			continue
+		}
+		if entrySense.RestrictedKanji != nil && !slices.Contains(entrySense.RestrictedKanji, headword.Expression) {
+			currentSenseNumber += 1
+			continue
+		}
+
+		allReferences := append(entrySense.References, entrySense.Antonyms...)
+		for _, reference := range allReferences {
+			meta.references = append(meta.references, reference)
+		}
+
+		currentSense := senseID{entry.Sequence, currentSenseNumber}
+		if meta.condensedGlosses[currentSense] == "" {
+			glosses := []string{}
+			for _, gloss := range entrySense.Glossary {
+				if glossContainsLanguage(gloss, meta.language) && gloss.Type == nil {
+					glosses = append(glosses, gloss.Content)
+				}
+			}
+			meta.condensedGlosses[currentSense] = strings.Join(glosses, "; ")
+		}
+		currentSenseNumber += 1
+	}
+}
+
+func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMetadata {
+	meta := jmdictMetadata{
+		language:           langNameToCode[languageName],
+		seqToSenseCount:    make(map[sequence]int),
+		condensedGlosses:   make(map[senseID]string),
+		seqToMainHeadword:  make(map[sequence]headword),
+		expHashToReadings:  make(map[hash][]string),
+		seqToSearchHashes:  make(map[sequence][]searchHash),
+		headwordHashToSeqs: make(map[hash][]sequence),
+		references:         []string{},
+		hashToSearchValues: nil,
+		referenceToSeq:     nil,
+		hasMultipleForms:   make(map[sequence]bool),
+		maxSenseCount:      0,
+	}
+
+	for _, entry := range dictionary.Entries {
+		headwords := extractHeadwords(entry)
+		formCount := 0
+		for _, headword := range headwords {
+			meta.AddHeadword(headword, entry)
+			if !headword.IsSearchOnly {
+				formCount += 1
+			}
+		}
+		meta.hasMultipleForms[entry.Sequence] = (formCount > 1)
+	}
+
+	// this correlation process will be unnecessary once JMdict
+	// includes sequence numbers in its cross-reference data
+	meta.MakeReferenceToSeqMap()
+
+	for _, senseCount := range meta.seqToSenseCount {
+		if meta.maxSenseCount < senseCount {
+			meta.maxSenseCount = senseCount
+		}
+	}
+
+	return meta
+}
--- a/jmdictReferences.go
+++ b/jmdictReferences.go
@ -0,0 +1,166 @@
+package yomichan
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+/*
+ * In the future, JMdict will be updated to include sequence numbers
+ * with each cross reference. At that time, most of the functions and
+ * types defined in this file will become unnecessary.  see:
+ * https://www.edrdg.org/jmdict_edict_list/2022/msg00008.html
+ */
+
+type searchValue struct {
+	sequence   sequence
+	index      int
+	isPriority bool
+}
+
+type searchHash struct {
+	hash       hash
+	isPriority bool
+}
+
+func parseReference(reference string) (headword, int, bool) {
+	// Reference strings in JMDict currently consist of 3 parts at
+	// most, separated by ・ characters. The latter two parts are
+	// optional.  When the sense number is not specified, it is
+	// implied to be the first sense.
+	var h headword
+	var senseNumber int
+	ok := true
+	refParts := strings.Split(reference, "・")
+	if len(refParts) == 1 {
+		// (Kanji) or (Reading)
+		h = headword{Expression: refParts[0], Reading: refParts[0]}
+		senseNumber = 1
+	} else if len(refParts) == 2 {
+		// [Kanji + (Reading or Sense)] or (Reading + Sense)
+		val, err := strconv.Atoi(refParts[1])
+		if err == nil {
+			h = headword{Expression: refParts[0], Reading: refParts[0]}
+			senseNumber = val
+		} else {
+			h = headword{Expression: refParts[0], Reading: refParts[1]}
+			senseNumber = 1
+		}
+	} else if len(refParts) == 3 {
+		// Expression + Reading + Sense
+		h = headword{Expression: refParts[0], Reading: refParts[1]}
+		val, err := strconv.Atoi(strings.TrimSpace(refParts[2]))
+		if err == nil {
+			senseNumber = val
+		} else {
+			errortext := "Unexpected format (3rd part not integer) for x-ref \"" + reference + "\""
+			fmt.Println(errortext)
+			ok = false
+		}
+	} else {
+		errortext := "Unexpected format for x-ref \"" + reference + "\""
+		fmt.Println(errortext)
+		ok = false
+	}
+	return h, senseNumber, ok
+}
+
+func (meta *jmdictMetadata) MakeReferenceToSeqMap() {
+
+	meta.referenceToSeq = make(map[string]sequence)
+	meta.MakeHashToSearchValuesMap()
+
+	for _, reference := range meta.references {
+		if meta.referenceToSeq[reference] != 0 {
+			continue
+		}
+		seq := meta.FindBestSequence(reference)
+		if seq != 0 {
+			meta.referenceToSeq[reference] = seq
+		} else {
+			fmt.Println("Unable to convert reference to sequence number: `" + reference + "`")
+		}
+	}
+}
+
+func (meta *jmdictMetadata) MakeHashToSearchValuesMap() {
+	meta.hashToSearchValues = make(map[hash][]searchValue)
+	for seq, searchHashes := range meta.seqToSearchHashes {
+		for score, searchHash := range searchHashes {
+			searchValue := searchValue{
+				sequence:   seq,
+				index:      score,
+				isPriority: searchHash.isPriority,
+			}
+			meta.hashToSearchValues[searchHash.hash] =
+				append(meta.hashToSearchValues[searchHash.hash], searchValue)
+		}
+	}
+}
+
+/*
+ * Generally, correspondence is determined by the order in which term
+ * pairs are extracted from each JMdict entry. Take for example the
+ * JMdict entry for ご本, which contains a reference to 本 (without a
+ * reading specified). To correlate this reference with a sequence
+ * number, our program searches each entry for the hash of【本・本】.
+ * There are two entries in which it is found in JMdict (English):
+ *
+ * sequence 1260670: 【元・もと】、【元・元】、【もと・もと】、【本・もと】、【本・本】、【素・もと】、【素・素】、【基・もと】、【基・基】
+ * sequence 1522150: 【本・ほん】、【本・本】、【ほん・ほん】
+ *
+ * Because 【本・本】 is closer to the beginning of the array in the
+ * latter (i.e., has the lowest index), sequence number 1522150 is
+ * returned.
+ *
+ * In situations in which multiple sequences are found with the same
+ * score, the entry with a priority tag ("news1", "ichi1", "spec1",
+ * "spec2", "gai1") is given preference. This mostly affects
+ * katakana-only loanwords like ラグ.
+ *
+ * To improve accuracy, this method also checks to see if the
+ * reference's specified sense number really exists in the
+ * corresponding entry. For example, sequence 1582850 【如何で・いかんで】
+ * has a reference to sense #2 of いかん (no kanji specified), which
+ * could belong to 13 different sequences. However, sequences 1582850
+ * and 2829697 are the only 2 of those 13 which contain more than one
+ * sense. Incidentally, sequence 1582850 is the correct match.
+ *
+ * All else being equal, the entry with the smallest sequence number
+ * is chosen. References in the JMdict file are currently ambiguous,
+ * and getting this perfect won't be possible until sequence numbers
+ * are explictly identified in these references.  See:
+ * https://github.com/JMdictProject/JMdictIssues/issues/61
+ */
+func (meta *jmdictMetadata) FindBestSequence(reference string) sequence {
+	bestSeq := 0
+	lowestIndex := 100000
+	bestIsPriority := false
+	headword, senseNumber, ok := parseReference(reference)
+	if !ok {
+		return bestSeq
+	}
+	hash := headword.Hash()
+	for _, seqScore := range meta.hashToSearchValues[hash] {
+		if meta.seqToSenseCount[seqScore.sequence] < senseNumber {
+			// entry must contain the specified sense
+			continue
+		} else if lowestIndex < seqScore.index {
+			// lower indices are better
+			continue
+		} else if (lowestIndex == seqScore.index) && (bestIsPriority && !seqScore.isPriority) {
+			// if scores match, check priority
+			continue
+		} else if (lowestIndex == seqScore.index) && (bestIsPriority == seqScore.isPriority) && (bestSeq < seqScore.sequence) {
+			// if scores and priority match, check sequence number.
+			// lower sequence numbers are better
+			continue
+		} else {
+			lowestIndex = seqScore.index
+			bestSeq = seqScore.sequence
+			bestIsPriority = seqScore.isPriority
+		}
+	}
+	return bestSeq
+}
--- a/jmdictTags.go
+++ b/jmdictTags.go
@ -0,0 +1,348 @@
+package yomichan
+
+import (
+	"fmt"
+	"strconv"
+
+	"golang.org/x/exp/slices"
+)
+
+func senseNumberTags(maxSenseCount int) []dbTag {
+	tags := []dbTag{}
+	for i := 1; i <= maxSenseCount; i++ {
+		tag := dbTag{
+			Name:  strconv.Itoa(i),
+			Order: -10, // these tags will appear on the left side
+			Notes: "JMdict Sense #" + strconv.Itoa(i),
+		}
+		tags = append(tags, tag)
+	}
+	return tags
+}
+
+func newsFrequencyTags() []dbTag {
+	// 24,000 ranks divided into 24 tags, news1k ... news24k
+	tags := []dbTag{}
+	for i := 1; i <= 24; i++ {
+		tagName := "news" + strconv.Itoa(i) + "k"
+		var startRank string
+		if i == 1 {
+			startRank = "1"
+		} else {
+			// technically should be ",001", but that looks odd
+			startRank = strconv.Itoa(i-1) + ",000"
+		}
+		endRank := strconv.Itoa(i) + ",000"
+		tag := dbTag{
+			Name:     tagName,
+			Order:    -2,
+			Score:    0,
+			Category: "frequent",
+			Notes:    "ranked between the top " + startRank + " and " + endRank + " words in a frequency analysis of the Mainichi Shimbun (1990s)",
+		}
+		tags = append(tags, tag)
+	}
+	return tags
+}
+
+func entityTags(entities map[string]string) []dbTag {
+	tags := knownEntityTags()
+	for name, notes := range entities {
+		idx := slices.IndexFunc(tags, func(t dbTag) bool { return t.Name == name })
+		if idx != -1 {
+			tags[idx].Notes = notes
+		} else {
+			fmt.Println("Unknown tag type \"" + name + "\": " + notes)
+			unknownTag := dbTag{Name: name, Notes: notes}
+			tags = append(tags, unknownTag)
+		}
+	}
+	return tags
+}
+
+func customDbTags() []dbTag {
+	return []dbTag{
+		dbTag{Name: priorityTagName, Order: -10, Score: 10, Category: "popular", Notes: "high priority term"},
+		dbTag{Name: rareKanjiTagName, Order: 0, Score: -5, Category: "archaism", Notes: "rarely-used kanji form of this expression"},
+		dbTag{Name: irregularTagName, Order: 0, Score: -5, Category: "archaism", Notes: "irregular form of this expression"},
+		dbTag{Name: outdatedTagName, Order: 0, Score: -5, Category: "archaism", Notes: "outdated form of this expression"},
+		dbTag{Name: "ichi", Order: -2, Score: 0, Category: "frequent", Notes: "included in Ichimango Goi Bunruishuu (１万語語彙分類集)"},
+		dbTag{Name: "spec", Order: -2, Score: 0, Category: "frequent", Notes: "specified as common by JMdict editors"},
+		dbTag{Name: "gai", Order: -2, Score: 0, Category: "frequent", Notes: "common loanword (gairaigo・外来語)"},
+		dbTag{Name: "forms", Order: 0, Score: 0, Category: "", Notes: "other surface forms and readings"},
+	}
+}
+
+func knownEntityTags() []dbTag {
+	return []dbTag{
+		// see: https://www.edrdg.org/jmdictdb/cgi-bin/edhelp.py?svc=jmdict&sid=#kwabbr
+		// additional descriptions at the beginning of the JMdict file
+
+		// <re_inf> reading info
+		dbTag{Name: "gikun", Order: 0, Score: 0, Category: ""}, // gikun (meaning as reading) or jukujikun (special kanji reading)
+		dbTag{Name: "ik", Order: 0, Score: -5, Category: ""},   // word containing irregular kana usage
+		dbTag{Name: "ok", Order: 0, Score: -5, Category: ""},   // out-dated or obsolete kana usage
+		dbTag{Name: "sk", Order: 0, Score: -5, Category: ""},   // search-only kana form
+
+		// <ke_inf> kanji info
+		/* kanji info also has a "ik" entity that would go here if not already for the re_inf tag */
+		dbTag{Name: "ateji", Order: 0, Score: 0, Category: ""}, // ateji (phonetic) reading
+		dbTag{Name: "iK", Order: 0, Score: -5, Category: ""},   // word containing irregular kanji usage
+		dbTag{Name: "io", Order: 0, Score: -5, Category: ""},   // irregular okurigana usage
+		dbTag{Name: "oK", Order: 0, Score: -5, Category: ""},   // word containing out-dated kanji or kanji usage
+		dbTag{Name: "rK", Order: 0, Score: -5, Category: ""},   // rarely-used kanji form
+		dbTag{Name: "sK", Order: 0, Score: -5, Category: ""},   // search-only kanji form
+
+		// <misc> miscellaneous sense info
+		dbTag{Name: "abbr", Order: 0, Score: 0, Category: ""},              // abbreviation
+		dbTag{Name: "arch", Order: -4, Score: 0, Category: "archaism"},     // archaism
+		dbTag{Name: "char", Order: 0, Score: 0, Category: ""},              // character
+		dbTag{Name: "chn", Order: 0, Score: 0, Category: ""},               // children's language
+		dbTag{Name: "col", Order: 0, Score: 0, Category: ""},               // colloquialism
+		dbTag{Name: "company", Order: 0, Score: 0, Category: ""},           // company name
+		dbTag{Name: "creat", Order: 0, Score: 0, Category: ""},             // creature
+		dbTag{Name: "dated", Order: -4, Score: 0, Category: "archaism"},    // dated term
+		dbTag{Name: "dei", Order: 0, Score: 0, Category: ""},               // deity
+		dbTag{Name: "derog", Order: 0, Score: 0, Category: ""},             // derogatory
+		dbTag{Name: "doc", Order: 0, Score: 0, Category: ""},               // document
+		dbTag{Name: "euph", Order: 0, Score: 0, Category: ""},              // euphemistic
+		dbTag{Name: "ev", Order: 0, Score: 0, Category: ""},                // event
+		dbTag{Name: "fam", Order: 0, Score: 0, Category: ""},               // familiar language
+		dbTag{Name: "fem", Order: 0, Score: 0, Category: ""},               // female term or language
+		dbTag{Name: "fict", Order: 0, Score: 0, Category: ""},              // fiction
+		dbTag{Name: "form", Order: 0, Score: 0, Category: ""},              // formal or literary term
+		dbTag{Name: "given", Order: 0, Score: 0, Category: ""},             // given name or forename, gender not specified
+		dbTag{Name: "group", Order: 0, Score: 0, Category: ""},             // group
+		dbTag{Name: "hist", Order: 0, Score: 0, Category: ""},              // historical term
+		dbTag{Name: "hon", Order: 0, Score: 0, Category: ""},               // honorific or respectful (sonkeigo) language
+		dbTag{Name: "hum", Order: 0, Score: 0, Category: ""},               // humble (kenjougo) language
+		dbTag{Name: "id", Order: -5, Score: 0, Category: "expression"},     // idiomatic expression
+		dbTag{Name: "joc", Order: 0, Score: 0, Category: ""},               // jocular, humorous term
+		dbTag{Name: "leg", Order: 0, Score: 0, Category: ""},               // legend
+		dbTag{Name: "m-sl", Order: 0, Score: 0, Category: ""},              // manga slang
+		dbTag{Name: "male", Order: 0, Score: 0, Category: ""},              // male term or language
+		dbTag{Name: "myth", Order: 0, Score: 0, Category: ""},              // mythology
+		dbTag{Name: "net-sl", Order: 0, Score: 0, Category: ""},            // Internet slang
+		dbTag{Name: "obj", Order: 0, Score: 0, Category: ""},               // object
+		dbTag{Name: "obs", Order: -4, Score: 0, Category: "archaism"},      // obsolete term
+		dbTag{Name: "on-mim", Order: 0, Score: 0, Category: ""},            // onomatopoeic or mimetic word
+		dbTag{Name: "organization", Order: 0, Score: 0, Category: ""},      // organization name
+		dbTag{Name: "oth", Order: 0, Score: 0, Category: ""},               // other
+		dbTag{Name: "person", Order: 0, Score: 0, Category: ""},            // full name of a particular person
+		dbTag{Name: "place", Order: 0, Score: 0, Category: ""},             // place name
+		dbTag{Name: "poet", Order: 0, Score: 0, Category: ""},              // poetical term
+		dbTag{Name: "pol", Order: 0, Score: 0, Category: ""},               // polite (teineigo) language
+		dbTag{Name: "product", Order: 0, Score: 0, Category: ""},           // product name
+		dbTag{Name: "proverb", Order: 0, Score: 0, Category: "expression"}, // proverb
+		dbTag{Name: "quote", Order: 0, Score: 0, Category: "expression"},   // quotation
+		dbTag{Name: "rare", Order: -4, Score: 0, Category: "archaism"},     // rare
+		dbTag{Name: "relig", Order: 0, Score: 0, Category: ""},             // religion
+		dbTag{Name: "sens", Order: 0, Score: 0, Category: ""},              // sensitive
+		dbTag{Name: "serv", Order: 0, Score: 0, Category: ""},              // service
+		dbTag{Name: "ship", Order: 0, Score: 0, Category: ""},              // ship name
+		dbTag{Name: "sl", Order: 0, Score: 0, Category: ""},                // slang
+		dbTag{Name: "station", Order: 0, Score: 0, Category: ""},           // railway station
+		dbTag{Name: "surname", Order: 0, Score: 0, Category: ""},           // family or surname
+		dbTag{Name: "uk", Order: 0, Score: 0, Category: ""},                // word usually written using kana alone
+		dbTag{Name: "unclass", Order: 0, Score: 0, Category: ""},           // unclassified name
+		dbTag{Name: "vulg", Order: 0, Score: 0, Category: ""},              // vulgar expression or word
+		dbTag{Name: "work", Order: 0, Score: 0, Category: ""},              // work of art, literature, music, etc. name
+		dbTag{Name: "X", Order: 0, Score: 0, Category: ""},                 // rude or X-rated term (not displayed in educational software)
+		dbTag{Name: "yoji", Order: 0, Score: 0, Category: ""},              // yojijukugo
+
+		// <pos> part-of-speech info
+		dbTag{Name: "adj-f", Order: -3, Score: 0, Category: "partOfSpeech"},     // noun or verb acting prenominally
+		dbTag{Name: "adj-i", Order: -3, Score: 0, Category: "partOfSpeech"},     // adjective (keiyoushi)
+		dbTag{Name: "adj-ix", Order: -3, Score: 0, Category: "partOfSpeech"},    // adjective (keiyoushi) - yoi/ii class
+		dbTag{Name: "adj-kari", Order: -3, Score: 0, Category: "partOfSpeech"},  // 'kari' adjective (archaic)
+		dbTag{Name: "adj-ku", Order: -3, Score: 0, Category: "partOfSpeech"},    // 'ku' adjective (archaic)
+		dbTag{Name: "adj-na", Order: -3, Score: 0, Category: "partOfSpeech"},    // adjectival nouns or quasi-adjectives (keiyodoshi)
+		dbTag{Name: "adj-nari", Order: -3, Score: 0, Category: "partOfSpeech"},  // archaic/formal form of na-adjective
+		dbTag{Name: "adj-no", Order: -3, Score: 0, Category: "partOfSpeech"},    // nouns which may take the genitive case particle 'no'
+		dbTag{Name: "adj-pn", Order: -3, Score: 0, Category: "partOfSpeech"},    // pre-noun adjectival (rentaishi)
+		dbTag{Name: "adj-shiku", Order: -3, Score: 0, Category: "partOfSpeech"}, // 'shiku' adjective (archaic)
+		dbTag{Name: "adj-t", Order: -3, Score: 0, Category: "partOfSpeech"},     // 'taru' adjective
+		dbTag{Name: "adv", Order: -3, Score: 0, Category: "partOfSpeech"},       // adverb (fukushi)
+		dbTag{Name: "adv-to", Order: -3, Score: 0, Category: "partOfSpeech"},    // adverb taking the 'to' particle
+		dbTag{Name: "aux", Order: -3, Score: 0, Category: "partOfSpeech"},       // auxiliary
+		dbTag{Name: "aux-adj", Order: -3, Score: 0, Category: "partOfSpeech"},   // auxiliary adjective
+		dbTag{Name: "aux-v", Order: -3, Score: 0, Category: "partOfSpeech"},     // auxiliary verb
+		dbTag{Name: "conj", Order: -3, Score: 0, Category: "partOfSpeech"},      // conjunction
+		dbTag{Name: "cop", Order: -3, Score: 0, Category: "partOfSpeech"},       // copula
+		dbTag{Name: "ctr", Order: -3, Score: 0, Category: "partOfSpeech"},       // counter
+		dbTag{Name: "exp", Order: -5, Score: 0, Category: "expression"},         // expressions (phrases, clauses, etc.)
+		dbTag{Name: "int", Order: -3, Score: 0, Category: "partOfSpeech"},       // interjection (kandoushi)
+		dbTag{Name: "n", Order: -3, Score: 0, Category: "partOfSpeech"},         // noun (common) (futsuumeishi)
+		dbTag{Name: "n-adv", Order: -3, Score: 0, Category: "partOfSpeech"},     // adverbial noun (fukushitekimeishi)
+		dbTag{Name: "n-pr", Order: -3, Score: 0, Category: "partOfSpeech"},      // proper noun
+		dbTag{Name: "n-pref", Order: -3, Score: 0, Category: "partOfSpeech"},    // noun, used as a prefix
+		dbTag{Name: "n-suf", Order: -3, Score: 0, Category: "partOfSpeech"},     // noun, used as a suffix
+		dbTag{Name: "n-t", Order: -3, Score: 0, Category: "partOfSpeech"},       // noun (temporal) (jisoumeishi)
+		dbTag{Name: "num", Order: -3, Score: 0, Category: "partOfSpeech"},       // numeric
+		dbTag{Name: "pn", Order: -3, Score: 0, Category: "partOfSpeech"},        // pronoun
+		dbTag{Name: "pref", Order: -3, Score: 0, Category: "partOfSpeech"},      // prefix
+		dbTag{Name: "prt", Order: -3, Score: 0, Category: "partOfSpeech"},       // particle
+		dbTag{Name: "suf", Order: -3, Score: 0, Category: "partOfSpeech"},       // suffix
+		dbTag{Name: "unc", Order: -3, Score: 0, Category: "partOfSpeech"},       // unclassified
+		dbTag{Name: "v-unspec", Order: -3, Score: 0, Category: "partOfSpeech"},  // verb unspecified
+		dbTag{Name: "v1", Order: -3, Score: 0, Category: "partOfSpeech"},        // Ichidan verb
+		dbTag{Name: "v1-s", Order: -3, Score: 0, Category: "partOfSpeech"},      // Ichidan verb - kureru special class
+		dbTag{Name: "v2a-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb with 'u' ending (archaic)
+		dbTag{Name: "v2b-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'bu' ending (archaic)
+		dbTag{Name: "v2b-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'bu' ending (archaic)
+		dbTag{Name: "v2d-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'dzu' ending (archaic)
+		dbTag{Name: "v2d-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'dzu' ending (archaic)
+		dbTag{Name: "v2g-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'gu' ending (archaic)
+		dbTag{Name: "v2g-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'gu' ending (archaic)
+		dbTag{Name: "v2h-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'hu/fu' ending (archaic)
+		dbTag{Name: "v2h-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'hu/fu' ending (archaic)
+		dbTag{Name: "v2k-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'ku' ending (archaic)
+		dbTag{Name: "v2k-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'ku' ending (archaic)
+		dbTag{Name: "v2m-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'mu' ending (archaic)
+		dbTag{Name: "v2m-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'mu' ending (archaic)
+		dbTag{Name: "v2n-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'nu' ending (archaic)
+		dbTag{Name: "v2r-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'ru' ending (archaic)
+		dbTag{Name: "v2r-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'ru' ending (archaic)
+		dbTag{Name: "v2s-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'su' ending (archaic)
+		dbTag{Name: "v2t-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'tsu' ending (archaic)
+		dbTag{Name: "v2t-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'tsu' ending (archaic)
+		dbTag{Name: "v2w-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'u' ending and 'we' conjugation (archaic)
+		dbTag{Name: "v2y-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'yu' ending (archaic)
+		dbTag{Name: "v2y-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'yu' ending (archaic)
+		dbTag{Name: "v2z-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'zu' ending (archaic)
+		dbTag{Name: "v4b", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'bu' ending (archaic)
+		dbTag{Name: "v4g", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'gu' ending (archaic)
+		dbTag{Name: "v4h", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'hu/fu' ending (archaic)
+		dbTag{Name: "v4k", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'ku' ending (archaic)
+		dbTag{Name: "v4m", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'mu' ending (archaic)
+		dbTag{Name: "v4n", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'nu' ending (archaic)
+		dbTag{Name: "v4r", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'ru' ending (archaic)
+		dbTag{Name: "v4s", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'su' ending (archaic)
+		dbTag{Name: "v4t", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'tsu' ending (archaic)
+		dbTag{Name: "v5aru", Order: -3, Score: 0, Category: "partOfSpeech"},     // Godan verb - -aru special class
+		dbTag{Name: "v5b", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'bu' ending
+		dbTag{Name: "v5g", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'gu' ending
+		dbTag{Name: "v5k", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'ku' ending
+		dbTag{Name: "v5k-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Godan verb - Iku/Yuku special class
+		dbTag{Name: "v5m", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'mu' ending
+		dbTag{Name: "v5n", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'nu' ending
+		dbTag{Name: "v5r", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'ru' ending
+		dbTag{Name: "v5r-i", Order: -3, Score: 0, Category: "partOfSpeech"},     // Godan verb with 'ru' ending (irregular verb)
+		dbTag{Name: "v5s", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'su' ending
+		dbTag{Name: "v5t", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'tsu' ending
+		dbTag{Name: "v5u", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'u' ending
+		dbTag{Name: "v5u-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Godan verb with 'u' ending (special class)
+		dbTag{Name: "v5uru", Order: -3, Score: 0, Category: "partOfSpeech"},     // Godan verb - Uru old class verb (old form of Eru)
+		dbTag{Name: "vi", Order: -3, Score: 0, Category: "partOfSpeech"},        // intransitive verb
+		dbTag{Name: "vk", Order: -3, Score: 0, Category: "partOfSpeech"},        // Kuru verb - special class
+		dbTag{Name: "vn", Order: -3, Score: 0, Category: "partOfSpeech"},        // irregular nu verb
+		dbTag{Name: "vr", Order: -3, Score: 0, Category: "partOfSpeech"},        // irregular ru verb, plain form ends with -ri
+		dbTag{Name: "vs", Order: -3, Score: 0, Category: "partOfSpeech"},        // noun or participle which takes the aux. verb suru
+		dbTag{Name: "vs-c", Order: -3, Score: 0, Category: "partOfSpeech"},      // su verb - precursor to the modern suru
+		dbTag{Name: "vs-i", Order: -3, Score: 0, Category: "partOfSpeech"},      // suru verb - included
+		dbTag{Name: "vs-s", Order: -3, Score: 0, Category: "partOfSpeech"},      // suru verb - special class
+		dbTag{Name: "vt", Order: -3, Score: 0, Category: "partOfSpeech"},        // transitive verb
+		dbTag{Name: "vz", Order: -3, Score: 0, Category: "partOfSpeech"},        // Ichidan verb - zuru verb (alternative form of -jiru verbs)
+
+		// <field> usage domain
+		dbTag{Name: "agric", Order: 0, Score: 0, Category: ""},    // agriculture
+		dbTag{Name: "anat", Order: 0, Score: 0, Category: ""},     // anatomy
+		dbTag{Name: "archeol", Order: 0, Score: 0, Category: ""},  // archeology
+		dbTag{Name: "archit", Order: 0, Score: 0, Category: ""},   // architecture
+		dbTag{Name: "art", Order: 0, Score: 0, Category: ""},      // art, aesthetics
+		dbTag{Name: "astron", Order: 0, Score: 0, Category: ""},   // astronomy
+		dbTag{Name: "audvid", Order: 0, Score: 0, Category: ""},   // audiovisual
+		dbTag{Name: "aviat", Order: 0, Score: 0, Category: ""},    // aviation
+		dbTag{Name: "baseb", Order: 0, Score: 0, Category: ""},    // baseball
+		dbTag{Name: "biochem", Order: 0, Score: 0, Category: ""},  // biochemistry
+		dbTag{Name: "biol", Order: 0, Score: 0, Category: ""},     // biology
+		dbTag{Name: "bot", Order: 0, Score: 0, Category: ""},      // botany
+		dbTag{Name: "Buddh", Order: 0, Score: 0, Category: ""},    // Buddhism
+		dbTag{Name: "bus", Order: 0, Score: 0, Category: ""},      // business
+		dbTag{Name: "cards", Order: 0, Score: 0, Category: ""},    // card games
+		dbTag{Name: "chem", Order: 0, Score: 0, Category: ""},     // chemistry
+		dbTag{Name: "Christn", Order: 0, Score: 0, Category: ""},  // Christianity
+		dbTag{Name: "cloth", Order: 0, Score: 0, Category: ""},    // clothing
+		dbTag{Name: "comp", Order: 0, Score: 0, Category: ""},     // computing
+		dbTag{Name: "cryst", Order: 0, Score: 0, Category: ""},    // crystallography
+		dbTag{Name: "dent", Order: 0, Score: 0, Category: ""},     // dentistry
+		dbTag{Name: "ecol", Order: 0, Score: 0, Category: ""},     // ecology
+		dbTag{Name: "econ", Order: 0, Score: 0, Category: ""},     // economics
+		dbTag{Name: "elec", Order: 0, Score: 0, Category: ""},     // electricity, elec. eng.
+		dbTag{Name: "electr", Order: 0, Score: 0, Category: ""},   // electronics
+		dbTag{Name: "embryo", Order: 0, Score: 0, Category: ""},   // embryology
+		dbTag{Name: "engr", Order: 0, Score: 0, Category: ""},     // engineering
+		dbTag{Name: "ent", Order: 0, Score: 0, Category: ""},      // entomology
+		dbTag{Name: "film", Order: 0, Score: 0, Category: ""},     // film
+		dbTag{Name: "finc", Order: 0, Score: 0, Category: ""},     // finance
+		dbTag{Name: "fish", Order: 0, Score: 0, Category: ""},     // fishing
+		dbTag{Name: "food", Order: 0, Score: 0, Category: ""},     // food, cooking
+		dbTag{Name: "gardn", Order: 0, Score: 0, Category: ""},    // gardening, horticulture
+		dbTag{Name: "genet", Order: 0, Score: 0, Category: ""},    // genetics
+		dbTag{Name: "geogr", Order: 0, Score: 0, Category: ""},    // geography
+		dbTag{Name: "geol", Order: 0, Score: 0, Category: ""},     // geology
+		dbTag{Name: "geom", Order: 0, Score: 0, Category: ""},     // geometry
+		dbTag{Name: "go", Order: 0, Score: 0, Category: ""},       // go (game)
+		dbTag{Name: "golf", Order: 0, Score: 0, Category: ""},     // golf
+		dbTag{Name: "gramm", Order: 0, Score: 0, Category: ""},    // grammar
+		dbTag{Name: "grmyth", Order: 0, Score: 0, Category: ""},   // Greek mythology
+		dbTag{Name: "hanaf", Order: 0, Score: 0, Category: ""},    // hanafuda
+		dbTag{Name: "horse", Order: 0, Score: 0, Category: ""},    // horse racing
+		dbTag{Name: "kabuki", Order: 0, Score: 0, Category: ""},   // kabuki
+		dbTag{Name: "law", Order: 0, Score: 0, Category: ""},      // law
+		dbTag{Name: "ling", Order: 0, Score: 0, Category: ""},     // linguistics
+		dbTag{Name: "logic", Order: 0, Score: 0, Category: ""},    // logic
+		dbTag{Name: "MA", Order: 0, Score: 0, Category: ""},       // martial arts
+		dbTag{Name: "mahj", Order: 0, Score: 0, Category: ""},     // mahjong
+		dbTag{Name: "manga", Order: 0, Score: 0, Category: ""},    // manga
+		dbTag{Name: "math", Order: 0, Score: 0, Category: ""},     // mathematics
+		dbTag{Name: "mech", Order: 0, Score: 0, Category: ""},     // mechanical engineering
+		dbTag{Name: "med", Order: 0, Score: 0, Category: ""},      // medicine
+		dbTag{Name: "met", Order: 0, Score: 0, Category: ""},      // meteorology
+		dbTag{Name: "mil", Order: 0, Score: 0, Category: ""},      // military
+		dbTag{Name: "mining", Order: 0, Score: 0, Category: ""},   // mining
+		dbTag{Name: "music", Order: 0, Score: 0, Category: ""},    // music
+		dbTag{Name: "noh", Order: 0, Score: 0, Category: ""},      // noh
+		dbTag{Name: "ornith", Order: 0, Score: 0, Category: ""},   // ornithology
+		dbTag{Name: "paleo", Order: 0, Score: 0, Category: ""},    // paleontology
+		dbTag{Name: "pathol", Order: 0, Score: 0, Category: ""},   // pathology
+		dbTag{Name: "pharm", Order: 0, Score: 0, Category: ""},    // pharmacy
+		dbTag{Name: "phil", Order: 0, Score: 0, Category: ""},     // philosophy
+		dbTag{Name: "photo", Order: 0, Score: 0, Category: ""},    // photography
+		dbTag{Name: "physics", Order: 0, Score: 0, Category: ""},  // physics
+		dbTag{Name: "physiol", Order: 0, Score: 0, Category: ""},  // physiology
+		dbTag{Name: "politics", Order: 0, Score: 0, Category: ""}, // politics
+		dbTag{Name: "print", Order: 0, Score: 0, Category: ""},    // printing
+		dbTag{Name: "psy", Order: 0, Score: 0, Category: ""},      // psychiatry
+		dbTag{Name: "psyanal", Order: 0, Score: 0, Category: ""},  // psychoanalysis
+		dbTag{Name: "psych", Order: 0, Score: 0, Category: ""},    // psychology
+		dbTag{Name: "rail", Order: 0, Score: 0, Category: ""},     // railway
+		dbTag{Name: "rommyth", Order: 0, Score: 0, Category: ""},  // Roman mythology
+		dbTag{Name: "Shinto", Order: 0, Score: 0, Category: ""},   // Shinto
+		dbTag{Name: "shogi", Order: 0, Score: 0, Category: ""},    // shogi
+		dbTag{Name: "ski", Order: 0, Score: 0, Category: ""},      // skiing
+		dbTag{Name: "sports", Order: 0, Score: 0, Category: ""},   // sports
+		dbTag{Name: "stat", Order: 0, Score: 0, Category: ""},     // statistics
+		dbTag{Name: "stockm", Order: 0, Score: 0, Category: ""},   // stock market
+		dbTag{Name: "sumo", Order: 0, Score: 0, Category: ""},     // sumo
+		dbTag{Name: "telec", Order: 0, Score: 0, Category: ""},    // telecommunications
+		dbTag{Name: "tradem", Order: 0, Score: 0, Category: ""},   // trademark
+		dbTag{Name: "tv", Order: 0, Score: 0, Category: ""},       // television
+		dbTag{Name: "vidg", Order: 0, Score: 0, Category: ""},     // video games
+		dbTag{Name: "zool", Order: 0, Score: 0, Category: ""},     // zoology
+
+		// <dial> dialect
+		dbTag{Name: "bra", Order: 0, Score: 0, Category: ""},  // Brazilian
+		dbTag{Name: "hob", Order: 0, Score: 0, Category: ""},  // Hokkaido-ben
+		dbTag{Name: "ksb", Order: 0, Score: 0, Category: ""},  // Kansai-ben
+		dbTag{Name: "ktb", Order: 0, Score: 0, Category: ""},  // Kantou-ben
+		dbTag{Name: "kyb", Order: 0, Score: 0, Category: ""},  // Kyoto-ben
+		dbTag{Name: "kyu", Order: 0, Score: 0, Category: ""},  // Kyuushuu-ben
+		dbTag{Name: "nab", Order: 0, Score: 0, Category: ""},  // Nagano-ben
+		dbTag{Name: "osb", Order: 0, Score: 0, Category: ""},  // Osaka-ben
+		dbTag{Name: "rkb", Order: 0, Score: 0, Category: ""},  // Ryuukyuu-ben
+		dbTag{Name: "thb", Order: 0, Score: 0, Category: ""},  // Touhoku-ben
+		dbTag{Name: "tsb", Order: 0, Score: 0, Category: ""},  // Tosa-ben
+		dbTag{Name: "tsug", Order: 0, Score: 0, Category: ""}, // Tsugaru-ben
+	}
+}
--- a/structuredContent.go
+++ b/structuredContent.go
@ -0,0 +1,192 @@
+package yomichan
+
+type contentAttr struct {
+	lang               string
+	fontStyle          string   // normal, italic
+	fontWeight         string   // normal, bold
+	fontSize           string   // small, medium, large, smaller, 80%, 125%, etc.
+	textDecorationLine []string // underline, overline, line-through
+	verticalAlign      string   // baseline, sub, super, text-top, text-bottom, middle, top, bottom
+	textAlign          string   // start, end, left, right, center, justify, justify-all, match-parent
+	marginTop          int
+	marginLeft         int
+	marginRight        int
+	marginBottom       int
+	listStyleType      string
+	data               map[string]string
+}
+
+// if the array contains adjacent strings, concatenate them.
+// ex: ["one", "two", content_structure, "four"] -> ["onetwo", content_structure, "four"]
+// if the array only contains strings, return a concatenated string.
+// ex: ["one", "two"] -> "onetwo"
+func contentReduce(contents []any) any {
+	if len(contents) == 1 {
+		return contents[0]
+	}
+	newContents := []any{}
+	var accumulator string
+	for _, content := range contents {
+		switch v := content.(type) {
+		case string:
+			accumulator = accumulator + v
+		default:
+			if accumulator != "" {
+				newContents = append(newContents, accumulator)
+				accumulator = ""
+			}
+			newContents = append(newContents, content)
+		}
+	}
+	if accumulator != "" {
+		newContents = append(newContents, accumulator)
+	}
+	if len(newContents) == 1 {
+		return newContents[0]
+	} else {
+		return newContents
+	}
+}
+
+func contentStructure(contents ...any) map[string]any {
+	return map[string]any{
+		"type":    "structured-content",
+		"content": contentReduce(contents),
+	}
+}
+
+func contentRuby(attr contentAttr, ruby string, contents ...any) map[string]any {
+	rubyContent := map[string]any{
+		"tag": "ruby",
+		"content": []any{
+			contentReduce(contents),
+			map[string]string{"tag": "rp", "content": "("},
+			map[string]string{"tag": "rt", "content": ruby},
+			map[string]string{"tag": "rp", "content": ")"},
+		},
+	}
+	if attr.lang != "" {
+		rubyContent["lang"] = attr.lang
+	}
+	if len(attr.data) != 0 {
+		rubyContent["data"] = attr.data
+	}
+	return rubyContent
+}
+
+func contentInternalLink(attr contentAttr, query string, contents ...any) map[string]any {
+	linkContent := map[string]any{
+		"tag":  "a",
+		"href": "?query=" + query + "&wildcards=off",
+	}
+	if len(contents) == 0 {
+		linkContent["content"] = query
+	} else {
+		linkContent["content"] = contentReduce(contents)
+	}
+	if attr.lang != "" {
+		linkContent["lang"] = attr.lang
+	}
+	if len(attr.data) != 0 {
+		linkContent["data"] = attr.data
+	}
+	return linkContent
+}
+
+func contentSpan(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "span", contents...)
+}
+
+func contentDiv(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "div", contents...)
+}
+
+func contentListItem(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "li", contents...)
+}
+
+func contentOrderedList(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "ol", contents...)
+}
+
+func contentUnorderedList(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "ul", contents...)
+}
+
+func contentTable(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "table", contents...)
+}
+
+func contentTableHead(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "thead", contents...)
+}
+
+func contentTableBody(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "tbody", contents...)
+}
+
+func contentTableRow(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "tr", contents...)
+}
+
+func contentTableHeadCell(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "th", contents...)
+}
+
+func contentTableCell(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "td", contents...)
+}
+
+func contentStyledContainer(attr contentAttr, tag string, contents ...any) map[string]any {
+	container := map[string]any{"tag": tag}
+	container["content"] = contentReduce(contents)
+	if attr.lang != "" {
+		container["lang"] = attr.lang
+	}
+	if len(attr.data) != 0 {
+		container["data"] = attr.data
+	}
+	style := contentStyle(attr)
+	if len(style) != 0 {
+		container["style"] = style
+	}
+	return container
+}
+
+func contentStyle(attr contentAttr) map[string]any {
+	style := make(map[string]any)
+	if attr.fontStyle != "" {
+		style["fontStyle"] = attr.fontStyle
+	}
+	if attr.fontWeight != "" {
+		style["fontWeight"] = attr.fontWeight
+	}
+	if attr.fontSize != "" {
+		style["fontSize"] = attr.fontSize
+	}
+	if len(attr.textDecorationLine) != 0 {
+		style["textDecorationLine"] = attr.textDecorationLine
+	}
+	if attr.verticalAlign != "" {
+		style["verticalAlign"] = attr.verticalAlign
+	}
+	if attr.textAlign != "" {
+		style["textAlign"] = attr.textAlign
+	}
+	if attr.marginTop != 0 {
+		style["marginTop"] = attr.marginTop
+	}
+	if attr.marginLeft != 0 {
+		style["marginLeft"] = attr.marginLeft
+	}
+	if attr.marginRight != 0 {
+		style["marginRight"] = attr.marginRight
+	}
+	if attr.marginBottom != 0 {
+		style["marginBottom"] = attr.marginBottom
+	}
+	if attr.listStyleType != "" {
+		style["listStyleType"] = attr.listStyleType
+	}
+	return style
+}