From 853d0b33dc30dde465a8d62798b17467df0088a5 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sun, 22 Jan 2023 14:14:33 -0600
Subject: [PATCH 01/19] Use empty interface type for dictionary glossaries

Necesssary for structured content support
---
 common.go       | 6 +++---
 daijirin.go     | 4 ++--
 daijisen.go     | 4 ++--
 enamdict.go     | 4 +++-
 gakken.go       | 4 ++--
 kotowaza.go     | 2 +-
 koujien.go      | 4 ++--
 meikyou.go      | 4 ++--
 shougakukan2.go | 2 +-
 wadai.go        | 2 +-
 10 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/common.go b/common.go
index aa566c0..f3ff84c 100644
--- a/common.go
+++ b/common.go
@@ -21,7 +21,7 @@ const (
 
 const databaseFormat = 3
 
-type dbRecord []interface{}
+type dbRecord []any
 type dbRecordList []dbRecord
 
 type dbTag struct {
@@ -46,7 +46,7 @@ func (meta dbTagList) crush() dbRecordList {
 type dbMeta struct {
 	Expression string
 	Mode       string
-	Data       interface{}
+	Data       any
 }
 
 type dbMetaList []dbMeta
@@ -66,7 +66,7 @@ type dbTerm struct {
 	DefinitionTags []string
 	Rules          []string
 	Score          int
-	Glossary       []string
+	Glossary       []any
 	Sequence       int
 	TermTags       []string
 }
diff --git a/daijirin.go b/daijirin.go
index 2c2b190..abc30e6 100644
--- a/daijirin.go
+++ b/daijirin.go
@@ -65,7 +65,7 @@ func (e *daijirinExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
 		for _, reading := range readings {
 			term := dbTerm{
 				Expression: reading,
-				Glossary:   []string{entry.Text},
+				Glossary:   []any{entry.Text},
 				Sequence:   sequence,
 			}
 
@@ -79,7 +79,7 @@ func (e *daijirinExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
 				term := dbTerm{
 					Expression: expression,
 					Reading:    reading,
-					Glossary:   []string{entry.Text},
+					Glossary:   []any{entry.Text},
 					Sequence:   sequence,
 				}
 
diff --git a/daijisen.go b/daijisen.go
index 5d663df..332bc46 100644
--- a/daijisen.go
+++ b/daijisen.go
@@ -70,7 +70,7 @@ func (e *daijisenExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
 	if len(expressions) == 0 {
 		term := dbTerm{
 			Expression: reading,
-			Glossary:   []string{entry.Text},
+			Glossary:   []any{entry.Text},
 			Sequence:   sequence,
 		}
 
@@ -82,7 +82,7 @@ func (e *daijisenExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
 			term := dbTerm{
 				Expression: expression,
 				Reading:    reading,
-				Glossary:   []string{entry.Text},
+				Glossary:   []any{entry.Text},
 				Sequence:   sequence,
 			}
 
diff --git a/enamdict.go b/enamdict.go
index f3df513..be12d5b 100644
--- a/enamdict.go
+++ b/enamdict.go
@@ -53,7 +53,9 @@ func jmnedictExtractTerms(enamdictEntry jmdict.JmnedictEntry) []dbTerm {
 		}
 
 		for _, trans := range enamdictEntry.Translations {
-			term.Glossary = append(term.Glossary, trans.Translations...)
+			for _, translation := range trans.Translations {
+				term.Glossary = append(term.Glossary, translation)
+			}
 			term.addDefinitionTags(trans.NameTypes...)
 		}
 
diff --git a/gakken.go b/gakken.go
index b25f989..58e96b4 100644
--- a/gakken.go
+++ b/gakken.go
@@ -90,7 +90,7 @@ func (e *gakkenExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTe
 		for _, reading := range readings {
 			term := dbTerm{
 				Expression: reading,
-				Glossary:   []string{entryText},
+				Glossary:   []any{entryText},
 				Sequence:   sequence,
 			}
 
@@ -107,7 +107,7 @@ func (e *gakkenExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTe
 				term := dbTerm{
 					Expression: expression,
 					Reading:    reading,
-					Glossary:   []string{entryText},
+					Glossary:   []any{entryText},
 					Sequence:   sequence,
 				}
 
diff --git a/kotowaza.go b/kotowaza.go
index 7f713ae..fca8f7d 100644
--- a/kotowaza.go
+++ b/kotowaza.go
@@ -72,7 +72,7 @@ func (e *kotowazaExtractor) extractTerms(entry zig.BookEntry, sequence int) []db
 			term := dbTerm{
 				Expression: expression,
 				Reading:    reading,
-				Glossary:   []string{entry.Text},
+				Glossary:   []any{entry.Text},
 				Sequence:   sequence,
 			}
 
diff --git a/koujien.go b/koujien.go
index 89b7379..049d5a0 100644
--- a/koujien.go
+++ b/koujien.go
@@ -75,7 +75,7 @@ func (e *koujienExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbT
 		for _, reading := range readings {
 			term := dbTerm{
 				Expression: reading,
-				Glossary:   []string{entry.Text},
+				Glossary:   []any{entry.Text},
 				Sequence:   sequence,
 			}
 
@@ -89,7 +89,7 @@ func (e *koujienExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbT
 				term := dbTerm{
 					Expression: expression,
 					Reading:    reading,
-					Glossary:   []string{entry.Text},
+					Glossary:   []any{entry.Text},
 					Sequence:   sequence,
 				}
 
diff --git a/meikyou.go b/meikyou.go
index 78a3081..2ea33fe 100644
--- a/meikyou.go
+++ b/meikyou.go
@@ -106,7 +106,7 @@ func (e *meikyouExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbT
 		for _, reading := range readings {
 			term := dbTerm{
 				Expression: reading,
-				Glossary:   []string{entry.Text},
+				Glossary:   []any{entry.Text},
 				Sequence:   sequence,
 			}
 
@@ -120,7 +120,7 @@ func (e *meikyouExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbT
 				term := dbTerm{
 					Expression: expression,
 					Reading:    reading,
-					Glossary:   []string{entry.Text},
+					Glossary:   []any{entry.Text},
 					Sequence:   sequence,
 				}
 
diff --git a/shougakukan2.go b/shougakukan2.go
index 5b06ea6..7b16549 100644
--- a/shougakukan2.go
+++ b/shougakukan2.go
@@ -93,7 +93,7 @@ func (e *shougakukan2Extractor) extractTerms(entry zig.BookEntry, sequence int)
 			terms = append(terms, dbTerm{
 				Expression: expression,
 				Reading:    reading,
-				Glossary:   []string{entry.Text},
+				Glossary:   []any{entry.Text},
 				Sequence:   sequence,
 			})
 		}
diff --git a/wadai.go b/wadai.go
index 2507b92..0226f64 100644
--- a/wadai.go
+++ b/wadai.go
@@ -74,7 +74,7 @@ func (e *wadaiExtractor) extractTerms(entry zig.BookEntry, sequence int) []dbTer
 		term := dbTerm{
 			Expression: expression,
 			Reading:    reading,
-			Glossary:   []string{entry.Text},
+			Glossary:   []any{entry.Text},
 			Sequence:   sequence,
 		}
 

From 56f98959677969e6e9f77a56bfd99fdd555a1e42 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sun, 22 Jan 2023 14:27:02 -0600
Subject: [PATCH 02/19] Add struct for handling index.json data

---
 common.go    | 42 ++++++++++++++++++++++++++----------------
 edict.go     | 14 ++++++++++----
 enamdict.go  | 15 ++++++++++-----
 epwing.go    | 13 ++++++++++---
 frequency.go | 15 ++++++++++-----
 kanjidic.go  | 15 ++++++++++-----
 rikai.go     | 15 ++++++++++-----
 7 files changed, 86 insertions(+), 43 deletions(-)

diff --git a/common.go b/common.go
index f3ff84c..ec331d6 100644
--- a/common.go
+++ b/common.go
@@ -19,8 +19,6 @@ const (
 	DefaultTitle    = ""
 )
 
-const databaseFormat = 3
-
 type dbRecord []any
 type dbRecordList []dbRecord
 
@@ -142,11 +140,34 @@ func (kanji dbKanjiList) crush() dbRecordList {
 	return results
 }
 
-func writeDb(outputPath, title, revision string, sequenced bool, recordData map[string]dbRecordList, stride int, pretty bool) error {
+type dbIndex struct {
+	Title       string `json:"title"`
+	Format      int    `json:"format"`
+	Revision    string `json:"revision"`
+	Sequenced   bool   `json:"sequenced"`
+	Author      string `json:"author"`
+	Url         string `json:"url"`
+	Description string `json:"description"`
+	Attribution string `json:"attribution"`
+}
+
+func (index *dbIndex) setDefaults() {
+	if index.Format == 0 {
+		index.Format = 3
+	}
+	if index.Author == "" {
+		index.Author = "yomichan-import"
+	}
+	if index.Url == "" {
+		index.Url = "https://github.com/FooSoft/yomichan-import"
+	}
+}
+
+func writeDb(outputPath string, index dbIndex, recordData map[string]dbRecordList, stride int, pretty bool) error {
 	var zbuff bytes.Buffer
 	zip := zip.NewWriter(&zbuff)
 
-	marshalJSON := func(obj interface{}, pretty bool) ([]byte, error) {
+	marshalJSON := func(obj any, pretty bool) ([]byte, error) {
 		if pretty {
 			return json.MarshalIndent(obj, "", "    ")
 		}
@@ -186,17 +207,6 @@ func writeDb(outputPath, title, revision string, sequenced bool, recordData map[
 	}
 
 	var err error
-	var db struct {
-		Title     string `json:"title"`
-		Format    int    `json:"format"`
-		Revision  string `json:"revision"`
-		Sequenced bool   `json:"sequenced"`
-	}
-
-	db.Title = title
-	db.Format = databaseFormat
-	db.Revision = revision
-	db.Sequenced = sequenced
 
 	for recordType, recordEntries := range recordData {
 		if _, err := writeDbRecords(recordType, recordEntries); err != nil {
@@ -204,7 +214,7 @@ func writeDb(outputPath, title, revision string, sequenced bool, recordData map[
 		}
 	}
 
-	bytes, err := marshalJSON(db, pretty)
+	bytes, err := marshalJSON(index, pretty)
 	if err != nil {
 		return err
 	}
diff --git a/edict.go b/edict.go
index f30dfdb..b6326f0 100644
--- a/edict.go
+++ b/edict.go
@@ -7,7 +7,7 @@ import (
 	"foosoft.net/projects/jmdict"
 )
 
-const jmdictRevision = "jmdict4"
+const edrdgAttribution = "This publication has included material from the JMdict (EDICT, etc.) dictionary files in accordance with the licence provisions of the Electronic Dictionaries Research Group. See http://www.edrdg.org/"
 
 func jmdictBuildRules(term *dbTerm) {
 	for _, tag := range term.DefinitionTags {
@@ -234,11 +234,17 @@ func jmdictExportDb(inputPath, outputPath, language, title string, stride int, p
 		"tag":  jmdictBuildTagMeta(entities).crush(),
 	}
 
+	index := dbIndex{
+		Title:       title,
+		Revision:    "jmdict4",
+		Sequenced:   true,
+		Attribution: edrdgAttribution,
+	}
+	index.setDefaults()
+
 	return writeDb(
 		outputPath,
-		title,
-		jmdictRevision,
-		true,
+		index,
 		recordData,
 		stride,
 		pretty,
diff --git a/enamdict.go b/enamdict.go
index be12d5b..e0c1cb0 100644
--- a/enamdict.go
+++ b/enamdict.go
@@ -6,8 +6,6 @@ import (
 	"foosoft.net/projects/jmdict"
 )
 
-const jmnedictRevision = "jmnedict1"
-
 func jmnedictBuildTagMeta(entities map[string]string) dbTagList {
 	var tags dbTagList
 
@@ -103,11 +101,18 @@ func jmnedictExportDb(inputPath, outputPath, language, title string, stride int,
 		"tag":  jmnedictBuildTagMeta(entities).crush(),
 	}
 
+	index := dbIndex{
+		Title:       title,
+		Revision:    "jmnedict1",
+		Sequenced:   true,
+		Description: "",
+		Attribution: edrdgAttribution,
+	}
+	index.setDefaults()
+
 	return writeDb(
 		outputPath,
-		title,
-		jmnedictRevision,
-		true,
+		index,
 		recordData,
 		stride,
 		pretty,
diff --git a/epwing.go b/epwing.go
index 37516c6..83b54b8 100644
--- a/epwing.go
+++ b/epwing.go
@@ -101,11 +101,18 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p
 		"term":  terms.crush(),
 	}
 
+	index := dbIndex{
+		Title:       title,
+		Revision:    strings.Join(revisions, ";"),
+		Sequenced:   true,
+		Description: "",
+		Attribution: "",
+	}
+	index.setDefaults()
+
 	return writeDb(
 		outputPath,
-		title,
-		strings.Join(revisions, ";"),
-		true,
+		index,
 		recordData,
 		stride,
 		pretty,
diff --git a/frequency.go b/frequency.go
index 694ed67..5d9f06a 100644
--- a/frequency.go
+++ b/frequency.go
@@ -7,8 +7,6 @@ import (
 	"strings"
 )
 
-const frequencyRevision = "frequency1"
-
 func frequencyTermsExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
 	return frequncyExportDb(inputPath, outputPath, language, title, stride, pretty, "term_meta")
 }
@@ -57,11 +55,18 @@ func frequncyExportDb(inputPath, outputPath, language, title string, stride int,
 		key: frequencies.crush(),
 	}
 
+	index := dbIndex{
+		Title:       title,
+		Revision:    "frequency1",
+		Sequenced:   false,
+		Description: "",
+		Attribution: "",
+	}
+	index.setDefaults()
+
 	return writeDb(
 		outputPath,
-		title,
-		frequencyRevision,
-		false,
+		index,
 		recordData,
 		stride,
 		pretty,
diff --git a/kanjidic.go b/kanjidic.go
index 37bebdd..e1c42d9 100644
--- a/kanjidic.go
+++ b/kanjidic.go
@@ -7,8 +7,6 @@ import (
 	"foosoft.net/projects/jmdict"
 )
 
-const kanjidicRevision = "kanjidic2"
-
 func kanjidicExtractKanji(entry jmdict.KanjidicCharacter, language string) *dbKanji {
 	if entry.ReadingMeaning == nil {
 		return nil
@@ -161,11 +159,18 @@ func kanjidicExportDb(inputPath, outputPath, language, title string, stride int,
 		"tag":   tags.crush(),
 	}
 
+	index := dbIndex{
+		Title:       title,
+		Revision:    "kanjidic2",
+		Sequenced:   false,
+		Description: "",
+		Attribution: edrdgAttribution,
+	}
+	index.setDefaults()
+
 	return writeDb(
 		outputPath,
-		title,
-		kanjidicRevision,
-		false,
+		index,
 		recordData,
 		stride,
 		pretty,
diff --git a/rikai.go b/rikai.go
index 651bc44..f3b6b12 100644
--- a/rikai.go
+++ b/rikai.go
@@ -8,8 +8,6 @@ import (
 	_ "github.com/mattn/go-sqlite3"
 )
 
-const rikaiRevision = "rikai2"
-
 type rikaiEntry struct {
 	kanji string
 	kana  string
@@ -154,11 +152,18 @@ func rikaiExportDb(inputPath, outputPath, language, title string, stride int, pr
 		"tag":  tags.crush(),
 	}
 
+	index := dbIndex{
+		Title:       title,
+		Revision:    "rikai2",
+		Sequenced:   true,
+		Description: "",
+		Attribution: "",
+	}
+	index.setDefaults()
+
 	return writeDb(
 		outputPath,
-		title,
-		rikaiRevision,
-		true,
+		index,
 		recordData,
 		stride,
 		pretty,

From 73fb99286583a0ac1f82efabf9172e06ec796968 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sun, 22 Jan 2023 14:32:45 -0600
Subject: [PATCH 03/19] Add intersection and union functions for string arrays

---
 common.go | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/common.go b/common.go
index ec331d6..5f2dab3 100644
--- a/common.go
+++ b/common.go
@@ -262,6 +262,39 @@ func hasString(needle string, haystack []string) bool {
 	return false
 }
 
+func intersection(s1, s2 []string) []string {
+	s := []string{}
+	m := make(map[string]bool)
+	for _, e := range s1 {
+		m[e] = true
+	}
+	for _, e := range s2 {
+		if m[e] {
+			s = append(s, e)
+			m[e] = false
+		}
+	}
+	return s
+}
+
+func union(s1, s2 []string) []string {
+	s := []string{}
+	m := make(map[string]bool)
+	for _, e := range s1 {
+		if !m[e] {
+			s = append(s, e)
+			m[e] = true
+		}
+	}
+	for _, e := range s2 {
+		if !m[e] {
+			s = append(s, e)
+			m[e] = true
+		}
+	}
+	return s
+}
+
 func detectFormat(path string) (string, error) {
 	switch filepath.Ext(path) {
 	case ".sqlite":

From abc28bb19dc7bc6f7efba09c0720b21fbbc19b11 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sun, 22 Jan 2023 14:37:18 -0600
Subject: [PATCH 04/19] Add new JMdict version

---
 common.go            |   5 +-
 edict.go             | 252 -------------------------------
 go.mod               |   1 +
 go.sum               |   2 +
 jmdict.go            | 221 +++++++++++++++++++++++++++
 jmdictConstants.go   | 215 ++++++++++++++++++++++++++
 jmdictForms.go       | 254 +++++++++++++++++++++++++++++++
 jmdictGlossary.go    | 300 +++++++++++++++++++++++++++++++++++++
 jmdictHeadword.go    | 267 +++++++++++++++++++++++++++++++++
 jmdictMetadata.go    | 158 ++++++++++++++++++++
 jmdictReferences.go  | 166 +++++++++++++++++++++
 jmdictTags.go        | 348 +++++++++++++++++++++++++++++++++++++++++++
 structuredContent.go | 192 ++++++++++++++++++++++++
 13 files changed, 2127 insertions(+), 254 deletions(-)
 delete mode 100644 edict.go
 create mode 100644 jmdict.go
 create mode 100644 jmdictConstants.go
 create mode 100644 jmdictForms.go
 create mode 100644 jmdictGlossary.go
 create mode 100644 jmdictHeadword.go
 create mode 100644 jmdictMetadata.go
 create mode 100644 jmdictReferences.go
 create mode 100644 jmdictTags.go
 create mode 100644 structuredContent.go

diff --git a/common.go b/common.go
index 5f2dab3..9d6b2aa 100644
--- a/common.go
+++ b/common.go
@@ -306,7 +306,7 @@ func detectFormat(path string) (string, error) {
 	}
 
 	switch filepath.Base(path) {
-	case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml":
+	case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml", "JMdict_e_examp":
 		return "edict", nil
 	case "JMnedict", "JMnedict.xml":
 		return "enamdict", nil
@@ -336,7 +336,8 @@ func detectFormat(path string) (string, error) {
 
 func ExportDb(inputPath, outputPath, format, language, title string, stride int, pretty bool) error {
 	handlers := map[string]func(string, string, string, string, int, bool) error{
-		"edict":     jmdictExportDb,
+		"edict":     jmdExportDb,
+		"forms":     formsExportDb,
 		"enamdict":  jmnedictExportDb,
 		"epwing":    epwingExportDb,
 		"kanjidic":  kanjidicExportDb,
diff --git a/edict.go b/edict.go
deleted file mode 100644
index b6326f0..0000000
--- a/edict.go
+++ /dev/null
@@ -1,252 +0,0 @@
-package yomichan
-
-import (
-	"os"
-	"strings"
-
-	"foosoft.net/projects/jmdict"
-)
-
-const edrdgAttribution = "This publication has included material from the JMdict (EDICT, etc.) dictionary files in accordance with the licence provisions of the Electronic Dictionaries Research Group. See http://www.edrdg.org/"
-
-func jmdictBuildRules(term *dbTerm) {
-	for _, tag := range term.DefinitionTags {
-		switch tag {
-		case "adj-i", "v1", "vk", "vz":
-			term.addRules(tag)
-		default:
-			if strings.HasPrefix(tag, "v5") {
-				term.addRules("v5")
-			} else if strings.HasPrefix(tag, "vs-") {
-				term.addRules("vs")
-			}
-		}
-	}
-}
-
-func jmdictBuildScore(term *dbTerm) {
-	for _, tag := range term.DefinitionTags {
-		switch tag {
-		case "arch":
-			term.Score -= 100
-		}
-	}
-	for _, tag := range term.TermTags {
-		switch tag {
-		case "news", "ichi", "spec", "gai1":
-			term.Score += 100
-		case "P":
-			term.Score += 500
-		case "iK", "ik", "ok", "oK", "io", "oik":
-			term.Score -= 100
-		}
-	}
-}
-
-func jmdictAddPriorities(term *dbTerm, priorities ...string) {
-	for _, priority := range priorities {
-		switch priority {
-		case "news1", "ichi1", "spec1", "gai1":
-			term.addTermTags("P")
-			fallthrough
-		case "news2", "ichi2", "spec2", "gai2":
-			term.addTermTags(priority[:len(priority)-1])
-		}
-	}
-}
-
-func jmdictBuildTagMeta(entities map[string]string) dbTagList {
-	tags := dbTagList{
-		dbTag{Name: "news", Notes: "appears frequently in Mainichi Shimbun", Category: "frequent", Order: -2},
-		dbTag{Name: "ichi", Notes: "listed as common in Ichimango Goi Bunruishuu", Category: "frequent", Order: -2},
-		dbTag{Name: "spec", Notes: "common words not included in frequency lists", Category: "frequent", Order: -2},
-		dbTag{Name: "gai", Notes: "common loanword", Category: "frequent", Order: -2},
-		dbTag{Name: "P", Notes: "popular term", Category: "popular", Order: -10, Score: 10},
-	}
-
-	for name, value := range entities {
-		tag := dbTag{Name: name, Notes: value}
-
-		switch name {
-		case "exp", "id":
-			tag.Category = "expression"
-			tag.Order = -5
-		case "arch":
-			tag.Category = "archaism"
-			tag.Order = -4
-		case "iK", "ik", "ok", "oK", "io", "oik":
-			tag.Score = -5
-		case "adj-f", "adj-i", "adj-ix", "adj-ku", "adj-na", "adj-nari", "adj-no", "adj-pn", "adj-shiku", "adj-t", "adv", "adv-to", "aux-adj",
-			"aux", "aux-v", "conj", "cop-da", "ctr", "int", "n-adv", "n", "n-pref", "n-pr", "n-suf", "n-t", "num", "pn", "pref", "prt", "suf",
-			"unc", "v1", "v1-s", "v2a-s", "v2b-k", "v2d-s", "v2g-k", "v2g-s", "v2h-k", "v2h-s", "v2k-k", "v2k-s", "v2m-s", "v2n-s", "v2r-k",
-			"v2r-s", "v2s-s", "v2t-k", "v2t-s", "v2w-s", "v2y-k", "v2y-s", "v2z-s", "v4b", "v4h", "v4k", "v4m", "v4r", "v4s", "v4t", "v5aru",
-			"v5b", "v5g", "v5k", "v5k-s", "v5m", "v5n", "v5r-i", "v5r", "v5s", "v5t", "v5u", "v5u-s", "vi", "vk", "vn", "vr", "vs-c", "vs-i",
-			"vs", "vs-s", "vt", "vz":
-			tag.Category = "partOfSpeech"
-			tag.Order = -3
-		}
-
-		tags = append(tags, tag)
-	}
-
-	return tags
-}
-
-func jmdictExtractTerms(edictEntry jmdict.JmdictEntry, language string) []dbTerm {
-	var terms []dbTerm
-
-	convert := func(reading jmdict.JmdictReading, kanji *jmdict.JmdictKanji) {
-		if kanji != nil && reading.Restrictions != nil && !hasString(kanji.Expression, reading.Restrictions) {
-			return
-		}
-
-		var termBase dbTerm
-		termBase.addTermTags(reading.Information...)
-
-		if kanji == nil {
-			termBase.Expression = reading.Reading
-			jmdictAddPriorities(&termBase, reading.Priorities...)
-		} else {
-			termBase.Expression = kanji.Expression
-			termBase.Reading = reading.Reading
-			termBase.addTermTags(kanji.Information...)
-
-			for _, priority := range kanji.Priorities {
-				if hasString(priority, reading.Priorities) {
-					jmdictAddPriorities(&termBase, priority)
-				}
-			}
-		}
-
-		var partsOfSpeech []string
-		for index, sense := range edictEntry.Sense {
-
-			if len(sense.PartsOfSpeech) != 0 {
-				partsOfSpeech = sense.PartsOfSpeech
-			}
-
-			if sense.RestrictedReadings != nil && !hasString(reading.Reading, sense.RestrictedReadings) {
-				continue
-			}
-
-			if kanji != nil && sense.RestrictedKanji != nil && !hasString(kanji.Expression, sense.RestrictedKanji) {
-				continue
-			}
-
-			term := dbTerm{
-				Reading:    termBase.Reading,
-				Expression: termBase.Expression,
-				Score:      len(edictEntry.Sense) - index,
-				Sequence:   edictEntry.Sequence,
-			}
-
-			for _, glossary := range sense.Glossary {
-				if glossary.Language == nil && language == "" || glossary.Language != nil && language == *glossary.Language {
-					term.Glossary = append(term.Glossary, glossary.Content)
-				}
-			}
-
-			if len(term.Glossary) == 0 {
-				continue
-			}
-
-			term.addDefinitionTags(termBase.DefinitionTags...)
-			term.addTermTags(termBase.TermTags...)
-			term.addDefinitionTags(partsOfSpeech...)
-			term.addDefinitionTags(sense.Fields...)
-			term.addDefinitionTags(sense.Misc...)
-			term.addDefinitionTags(sense.Dialects...)
-
-			jmdictBuildRules(&term)
-			jmdictBuildScore(&term)
-
-			terms = append(terms, term)
-		}
-	}
-
-	if len(edictEntry.Kanji) > 0 {
-		for _, kanji := range edictEntry.Kanji {
-			for _, reading := range edictEntry.Readings {
-				if reading.NoKanji == nil {
-					convert(reading, &kanji)
-				}
-			}
-		}
-		for _, reading := range edictEntry.Readings {
-			if reading.NoKanji != nil {
-				convert(reading, nil)
-			}
-		}
-	} else {
-		for _, reading := range edictEntry.Readings {
-			convert(reading, nil)
-		}
-	}
-
-	return terms
-}
-
-func jmdictExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
-	reader, err := os.Open(inputPath)
-	if err != nil {
-		return err
-	}
-	defer reader.Close()
-
-	dict, entities, err := jmdict.LoadJmdictNoTransform(reader)
-	if err != nil {
-		return err
-	}
-
-	var langTag string
-	switch language {
-	case "dutch":
-		langTag = "dut"
-	case "french":
-		langTag = "fre"
-	case "german":
-		langTag = "ger"
-	case "hungarian":
-		langTag = "hun"
-	case "italian":
-		langTag = "ita"
-	case "russian":
-		langTag = "rus"
-	case "slovenian":
-		langTag = "slv"
-	case "spanish":
-		langTag = "spa"
-	case "swedish":
-		langTag = "swe"
-	}
-
-	var terms dbTermList
-	for _, entry := range dict.Entries {
-		terms = append(terms, jmdictExtractTerms(entry, langTag)...)
-	}
-
-	if title == "" {
-		title = "JMdict"
-	}
-
-	recordData := map[string]dbRecordList{
-		"term": terms.crush(),
-		"tag":  jmdictBuildTagMeta(entities).crush(),
-	}
-
-	index := dbIndex{
-		Title:       title,
-		Revision:    "jmdict4",
-		Sequenced:   true,
-		Attribution: edrdgAttribution,
-	}
-	index.setDefaults()
-
-	return writeDb(
-		outputPath,
-		index,
-		recordData,
-		stride,
-		pretty,
-	)
-}
diff --git a/go.mod b/go.mod
index 0bca3dd..4f31a22 100644
--- a/go.mod
+++ b/go.mod
@@ -7,6 +7,7 @@ require (
 	foosoft.net/projects/zero-epwing-go v0.0.0-20220704035039-bc008453615d
 	github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e
 	github.com/mattn/go-sqlite3 v1.14.14
+	golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f
 )
 
 require golang.org/x/text v0.3.7 // indirect
diff --git a/go.sum b/go.sum
index ca51ada..4dd5f91 100644
--- a/go.sum
+++ b/go.sum
@@ -6,5 +6,7 @@ github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e h1:wSQCJiig/QkoUnpvelSP
 github.com/andlabs/ui v0.0.0-20200610043537-70a69d6ae31e/go.mod h1:5G2EjwzgZUPnnReoKvPWVneT8APYbyKkihDVAHUi0II=
 github.com/mattn/go-sqlite3 v1.14.14 h1:qZgc/Rwetq+MtyE18WhzjokPD93dNqLGNT3QJuLvBGw=
 github.com/mattn/go-sqlite3 v1.14.14/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
+golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f h1:90Jq/vvGVDsqj8QqCynjFw9MCerDguSMODLYII416Y8=
+golang.org/x/exp v0.0.0-20221207211629-99ab8fa1c11f/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc=
 golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
diff --git a/jmdict.go b/jmdict.go
new file mode 100644
index 0000000..74809e7
--- /dev/null
+++ b/jmdict.go
@@ -0,0 +1,221 @@
+package yomichan
+
+import (
+	"os"
+	"regexp"
+	"strconv"
+	"strings"
+
+	"foosoft.net/projects/jmdict"
+	"golang.org/x/exp/slices"
+)
+
+func grammarRules(partsOfSpeech []string) []string {
+	rules := []string{}
+	for _, partOfSpeech := range partsOfSpeech {
+		switch partOfSpeech {
+		case "adj-i", "vk", "vz":
+			rules = append(rules, partOfSpeech)
+		default:
+			if strings.HasPrefix(partOfSpeech, "v5") {
+				rules = append(rules, "v5")
+			} else if strings.HasPrefix(partOfSpeech, "v1") {
+				rules = append(rules, "v1")
+			} else if strings.HasPrefix(partOfSpeech, "vs-") {
+				rules = append(rules, "vs")
+			}
+		}
+	}
+	return rules
+}
+
+func calculateTermScore(senseNumber int, headword headword) int {
+	const senseWeight int = 1
+	const entryPositionWeight int = 100
+	const priorityWeight int = 10000
+
+	score := 0
+	score -= (senseNumber - 1) * senseWeight
+	score -= headword.Index * entryPositionWeight
+	score += headword.Score() * priorityWeight
+
+	return score
+}
+
+func doDisplaySenseNumberTag(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) bool {
+	// Display sense numbers if the entry has more than one sense
+	// or if the headword is found in multiple entries.
+	hash := headword.Hash()
+	if meta.seqToSenseCount[entry.Sequence] > 1 {
+		return true
+	} else if len(meta.headwordHashToSeqs[hash]) > 1 {
+		return true
+	} else {
+		return false
+	}
+}
+
+func jmdictPublicationDate(dictionary jmdict.Jmdict) string {
+	dateEntry := dictionary.Entries[len(dictionary.Entries)-1]
+	r := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`)
+	jmdictDate := r.FindString(dateEntry.Sense[0].Glossary[0].Content)
+	return jmdictDate
+}
+
+func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
+	term := baseFormsTerm(entry)
+	term.Expression = headword.Expression
+	term.Reading = headword.Reading
+
+	term.addTermTags(headword.TermTags...)
+
+	term.addDefinitionTags("forms")
+	senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
+	term.Score = calculateTermScore(senseNumber, headword)
+	return term
+}
+
+func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
+	term := dbTerm{
+		Expression: headword.Expression,
+		Sequence:   -entry.Sequence,
+	}
+	for _, sense := range entry.Sense {
+		rules := grammarRules(sense.PartsOfSpeech)
+		term.addRules(rules...)
+	}
+	term.addTermTags(headword.TermTags...)
+	term.Score = calculateTermScore(0, headword)
+
+	redirectHeadword := meta.seqToMainHeadword[entry.Sequence]
+	expHash := redirectHeadword.ExpHash()
+	doDisplayReading := (len(meta.expHashToReadings[expHash]) > 1)
+
+	content := contentSpan(
+		contentAttr{fontSize: "130%"},
+		"⟶",
+		redirectHeadword.ToInternalLink(doDisplayReading),
+	)
+
+	term.Glossary = []any{contentStructure(content)}
+	return term
+}
+
+func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
+	term := dbTerm{
+		Expression: headword.Expression,
+		Reading:    headword.Reading,
+		Sequence:   entry.Sequence,
+	}
+
+	term.Glossary = createGlossary(sense, meta)
+
+	term.addTermTags(headword.TermTags...)
+
+	if doDisplaySenseNumberTag(headword, entry, meta) {
+		senseNumberTag := strconv.Itoa(senseNumber)
+		term.addDefinitionTags(senseNumberTag)
+	}
+	term.addDefinitionTags(sense.PartsOfSpeech...)
+	term.addDefinitionTags(sense.Fields...)
+	term.addDefinitionTags(sense.Misc...)
+	term.addDefinitionTags(sense.Dialects...)
+
+	rules := grammarRules(sense.PartsOfSpeech)
+	term.addRules(rules...)
+
+	term.Score = calculateTermScore(senseNumber, headword)
+
+	return term
+}
+
+func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {
+	if meta.seqToSenseCount[entry.Sequence] == 0 {
+		return nil, false
+	}
+	if headword.IsSearchOnly {
+		searchTerm := createSearchTerm(headword, entry, meta)
+		return []dbTerm{searchTerm}, true
+	}
+	terms := []dbTerm{}
+	senseNumber := 1
+	for _, sense := range entry.Sense {
+		if !glossaryContainsLanguage(sense.Glossary, meta.language) {
+			continue
+		}
+		if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
+			senseNumber += 1
+			continue
+		}
+		if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
+			senseNumber += 1
+			continue
+		}
+		senseTerm := createSenseTerm(sense, senseNumber, headword, entry, meta)
+		senseNumber += 1
+		terms = append(terms, senseTerm)
+	}
+
+	if meta.hasMultipleForms[entry.Sequence] {
+		formsTerm := createFormsTerm(headword, entry, meta)
+		terms = append(terms, formsTerm)
+	}
+	return terms, true
+}
+
+func jmdExportDb(inputPath string, outputPath string, languageName string, title string, stride int, pretty bool) error {
+	reader, err := os.Open(inputPath)
+	if err != nil {
+		return err
+	}
+	defer reader.Close()
+
+	dictionary, entities, err := jmdict.LoadJmdictNoTransform(reader)
+	if err != nil {
+		return err
+	}
+
+	meta := newJmdictMetadata(dictionary, languageName)
+
+	terms := dbTermList{}
+	for _, entry := range dictionary.Entries {
+		headwords := extractHeadwords(entry)
+		for _, headword := range headwords {
+			if newTerms, ok := extractTerms(headword, entry, meta); ok {
+				terms = append(terms, newTerms...)
+			}
+		}
+	}
+
+	tags := dbTagList{}
+	tags = append(tags, entityTags(entities)...)
+	tags = append(tags, senseNumberTags(meta.maxSenseCount)...)
+	tags = append(tags, newsFrequencyTags()...)
+	tags = append(tags, customDbTags()...)
+
+	recordData := map[string]dbRecordList{
+		"term": terms.crush(),
+		"tag":  tags.crush(),
+	}
+
+	if title == "" {
+		title = "JMdict"
+	}
+	jmdictDate := jmdictPublicationDate(dictionary)
+
+	index := dbIndex{
+		Title:       title,
+		Revision:    "JMdict." + jmdictDate,
+		Sequenced:   true,
+		Attribution: edrdgAttribution,
+	}
+	index.setDefaults()
+
+	return writeDb(
+		outputPath,
+		index,
+		recordData,
+		stride,
+		pretty,
+	)
+}
diff --git a/jmdictConstants.go b/jmdictConstants.go
new file mode 100644
index 0000000..1d49194
--- /dev/null
+++ b/jmdictConstants.go
@@ -0,0 +1,215 @@
+package yomichan
+
+type LangCode struct {
+	language string
+	code     string
+}
+
+const edrdgAttribution = "This publication has included material from the JMdict (EDICT, etc.) dictionary files in accordance with the licence provisions of the Electronic Dictionaries Research Group. See http://www.edrdg.org/"
+
+const prioritySymbol = "★"
+const rareKanjiSymbol = "🅁"
+const irregularSymbol = "⚠"
+const outdatedSymbol = "⛬"
+const defaultSymbol = "㊒"
+
+const priorityTagName = "⭐"
+const rareKanjiTagName = "R"
+const irregularTagName = "⚠️"
+const outdatedTagName = "⛬"
+const atejiTagName = "ateji"
+const gikunTagName = "gikun"
+
+const langMarker = "'🌐 '"
+const noteMarker = "'📝 '"
+const infoMarker = "'ℹ️ '"
+const refMarker = "'➡️ '"
+const antonymMarker = "'🔄 '"
+
+var ISOtoFlag = map[string]string{
+	"":    "'🇬🇧 '",
+	"eng": "'🇬🇧 '",
+	"dut": "'🇳🇱 '",
+	"fre": "'🇫🇷 '",
+	"ger": "'🇩🇪 '",
+	"hun": "'🇭🇺 '",
+	"ita": "'🇮🇹 '",
+	"jpn": "'🇯🇵 '",
+	"rus": "'🇷🇺 '",
+	"slv": "'🇸🇮 '",
+	"spa": "'🇪🇸 '",
+	"swe": "'🇸🇪 '",
+}
+
+var langNameToCode = map[string]string{
+	"":          "eng",
+	"english":   "eng",
+	"dutch":     "dut",
+	"french":    "fre",
+	"german":    "ger",
+	"hungarian": "hun",
+	"italian":   "ita",
+	"russian":   "rus",
+	"slovenian": "slv",
+	"spanish":   "spa",
+	"swedish":   "swe",
+}
+
+var glossTypeCodeToName = map[LangCode]string{
+	LangCode{"eng", "lit"}:  "literally",
+	LangCode{"eng", "fig"}:  "figuratively",
+	LangCode{"eng", "expl"}: "", // don't need to tell the user that an explanation is an explanation
+	LangCode{"eng", "tm"}:   "trademark",
+}
+
+var refNoteHint = map[LangCode]string{
+	LangCode{"eng", "xref"}: "see",
+	LangCode{"eng", "ant"}:  "antonym",
+}
+
+var sourceLangTypeCodeToType = map[LangCode]string{
+	LangCode{"eng", "part"}: "partial",
+	LangCode{"eng", ""}:     "", // implied "full"
+}
+
+var langCodeToName = map[LangCode]string{
+	LangCode{"eng", "afr"}: "Afrikaans",
+	LangCode{"eng", "ain"}: "Ainu",
+	LangCode{"eng", "alg"}: "Algonquian",
+	LangCode{"eng", "amh"}: "Amharic",
+	LangCode{"eng", "ara"}: "Arabic",
+	LangCode{"eng", "arn"}: "Mapudungun",
+	LangCode{"eng", "bnt"}: "Bantu",
+	LangCode{"eng", "bre"}: "Breton",
+	LangCode{"eng", "bul"}: "Bulgarian",
+	LangCode{"eng", "bur"}: "Burmese",
+	LangCode{"eng", "chi"}: "Chinese",
+	LangCode{"eng", "chn"}: "Chinook Jargon",
+	LangCode{"eng", "cze"}: "Czech",
+	LangCode{"eng", "dan"}: "Danish",
+	LangCode{"eng", "dut"}: "Dutch",
+	LangCode{"eng", "eng"}: "English",
+	LangCode{"eng", "epo"}: "Esperanto",
+	LangCode{"eng", "est"}: "Estonian",
+	LangCode{"eng", "fil"}: "Filipino",
+	LangCode{"eng", "fin"}: "Finnish",
+	LangCode{"eng", "fre"}: "French",
+	LangCode{"eng", "geo"}: "Georgian",
+	LangCode{"eng", "ger"}: "German",
+	LangCode{"eng", "glg"}: "Galician",
+	LangCode{"eng", "grc"}: "Ancient Greek",
+	LangCode{"eng", "gre"}: "Modern Greek",
+	LangCode{"eng", "haw"}: "Hawaiian",
+	LangCode{"eng", "heb"}: "Hebrew",
+	LangCode{"eng", "hin"}: "Hindi",
+	LangCode{"eng", "hun"}: "Hungarian",
+	LangCode{"eng", "ice"}: "Icelandic",
+	LangCode{"eng", "ind"}: "Indonesian",
+	LangCode{"eng", "ita"}: "Italian",
+	LangCode{"eng", "khm"}: "Khmer",
+	LangCode{"eng", "kor"}: "Korean",
+	LangCode{"eng", "kur"}: "Kurdish",
+	LangCode{"eng", "lat"}: "Latin",
+	LangCode{"eng", "mal"}: "Malayalam",
+	LangCode{"eng", "mao"}: "Maori",
+	LangCode{"eng", "may"}: "Malay",
+	LangCode{"eng", "mnc"}: "Manchu",
+	LangCode{"eng", "mol"}: "Moldavian", // ISO 639 deprecated (https://iso639-3.sil.org/code/mol)
+	LangCode{"eng", "mon"}: "Mongolian",
+	LangCode{"eng", "nor"}: "Norwegian",
+	LangCode{"eng", "per"}: "Persian",
+	LangCode{"eng", "pol"}: "Polish",
+	LangCode{"eng", "por"}: "Portuguese",
+	LangCode{"eng", "rum"}: "Romanian",
+	LangCode{"eng", "rus"}: "Russian",
+	LangCode{"eng", "san"}: "Sanskrit",
+	LangCode{"eng", "scr"}: "Croatian", // Code doesn't seem to exist in ISO 639. Should be "hrv" instead? (https://iso639-3.sil.org/code/hrv)
+	LangCode{"eng", "slo"}: "Slovak",
+	LangCode{"eng", "slv"}: "Slovenian",
+	LangCode{"eng", "som"}: "Somali",
+	LangCode{"eng", "spa"}: "Spanish",
+	LangCode{"eng", "swa"}: "Swahili",
+	LangCode{"eng", "swe"}: "Swedish",
+	LangCode{"eng", "tah"}: "Tahitian",
+	LangCode{"eng", "tam"}: "Tamil",
+	LangCode{"eng", "tgl"}: "Tagalog",
+	LangCode{"eng", "tha"}: "Thai",
+	LangCode{"eng", "tib"}: "Tibetan",
+	LangCode{"eng", "tur"}: "Turkish",
+	LangCode{"eng", "ukr"}: "Ukrainian",
+	LangCode{"eng", "urd"}: "Urdu",
+	LangCode{"eng", "vie"}: "Vietnamese",
+	LangCode{"eng", "yid"}: "Yiddish",
+}
+
+// https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
+var ISOtoHTML = map[string]string{
+	"afr": "af",  // Afrikaans
+	"ain": "ain", // Ainu
+	"alg": "alg", // Algonquian
+	"amh": "am",  // Amharic
+	"ara": "ar",  // Arabic
+	"arn": "arn", // Mapudungun
+	"bnt": "bnt", // Bantu
+	"bre": "br",  // Breton
+	"bul": "bg",  // Bulgarian
+	"bur": "my",  // Burmese
+	"chi": "zh",  // Chinese
+	"chn": "chn", // Chinook Jargon
+	"cze": "cs",  // Czech
+	"dan": "da",  // Danish
+	"dut": "nl",  // Dutch
+	"eng": "en",  // English
+	"epo": "eo",  // Esperanto
+	"est": "et",  // Estonian
+	"fil": "fil", // Filipino
+	"fin": "fi",  // Finnish
+	"fre": "fr",  // French
+	"geo": "ka",  // Georgian
+	"ger": "de",  // German
+	"glg": "gl",  // Galician
+	"grc": "grc", // Ancient Greek
+	"gre": "el",  // Modern Greek
+	"haw": "haw", // Hawaiian
+	"heb": "he",  // Hebrew
+	"hin": "hi",  // Hindi
+	"hun": "hu",  // Hungarian
+	"ice": "is",  // Icelandic
+	"ind": "id",  // Indonesian
+	"ita": "it",  // Italian
+	"jpn": "ja",  // Japanese
+	"khm": "km",  // Khmer
+	"kor": "ko",  // Korean
+	"kur": "ku",  // Kurdish
+	"lat": "la",  // Latin
+	"mal": "ml",  // Malayalam
+	"mao": "mi",  // Maori
+	"may": "ms",  // Malay
+	"mnc": "mnc", // Manchu
+	"mol": "ro",  // Moldavian
+	"mon": "mn",  // Mongolian
+	"nor": "no",  // Norwegian
+	"per": "fa",  // Persian
+	"pol": "pl",  // Polish
+	"por": "pt",  // Portuguese
+	"rum": "ro",  // Romanian
+	"rus": "ru",  // Russian
+	"san": "sa",  // Sanskrit
+	"scr": "hr",  // Croatian
+	"slo": "sk",  // Slovak
+	"slv": "sl",  // Slovenian
+	"som": "so",  // Somali
+	"spa": "es",  // Spanish
+	"swa": "sw",  // Swahili
+	"swe": "sv",  // Swedish
+	"tah": "ty",  // Tahitian
+	"tam": "ta",  // Tamil
+	"tgl": "tl",  // Tagalog
+	"tha": "th",  // Thai
+	"tib": "bo",  // Tibetan
+	"tur": "tr",  // Turkish
+	"ukr": "uk",  // Ukrainian
+	"urd": "ur",  // Urdu
+	"vie": "vi",  // Vietnamese
+	"yid": "yi",  // Yiddish
+}
diff --git a/jmdictForms.go b/jmdictForms.go
new file mode 100644
index 0000000..76eba34
--- /dev/null
+++ b/jmdictForms.go
@@ -0,0 +1,254 @@
+package yomichan
+
+import (
+	"os"
+	"strings"
+
+	"foosoft.net/projects/jmdict"
+	"golang.org/x/exp/slices"
+)
+
+func kata2hira(word string) string {
+	charMap := func(character rune) rune {
+		if (character >= 'ァ' && character <= 'ヶ') || (character >= 'ヽ' && character <= 'ヾ') {
+			return character - 0x60
+		} else {
+			return character
+		}
+	}
+	return strings.Map(charMap, word)
+}
+
+func (h *headword) InfoSymbols() string {
+	infoSymbols := []string{}
+	if h.IsPriority {
+		infoSymbols = append(infoSymbols, prioritySymbol)
+	}
+	if h.IsRareKanji {
+		infoSymbols = append(infoSymbols, rareKanjiSymbol)
+	}
+	if h.IsIrregular {
+		infoSymbols = append(infoSymbols, irregularSymbol)
+	}
+	if h.IsOutdated {
+		infoSymbols = append(infoSymbols, outdatedSymbol)
+	}
+	return strings.Join(infoSymbols[:], " | ")
+}
+
+func (h *headword) GlossText() string {
+	gloss := h.Expression
+	if h.IsAteji {
+		gloss = "〈" + gloss + "〉"
+	}
+	symbolText := h.InfoSymbols()
+	if symbolText != "" {
+		gloss += "（" + symbolText + "）"
+	}
+	return gloss
+}
+
+func (h *headword) TableColHeaderText() string {
+	text := h.KanjiForm()
+	if h.IsAteji {
+		text = "〈" + text + "〉"
+	}
+	return text
+}
+
+func (h *headword) TableRowHeaderText() string {
+	text := h.Reading
+	if h.IsGikun {
+		text = "〈" + text + "〉"
+	}
+	return text
+}
+
+func (h *headword) TableCellText() string {
+	text := h.InfoSymbols()
+	if text == "" {
+		return defaultSymbol
+	} else {
+		return text
+	}
+}
+
+func (h *headword) KanjiForm() string {
+	if h.IsKanaOnly() {
+		return "∅"
+	} else {
+		return h.Expression
+	}
+}
+
+func jmdNeedsFormTable(headwords []headword) bool {
+	// Does the entry contain more than 1 distinct reading?
+	// E.g. バカがい and ばかがい are not distinct.
+	uniqueReading := ""
+	for _, h := range headwords {
+		if h.IsGikun {
+			return true
+		} else if h.IsSearchOnly {
+			continue
+		} else if h.IsKanaOnly() {
+			continue
+		} else if uniqueReading == "" {
+			uniqueReading = kata2hira(h.Reading)
+		} else if uniqueReading != kata2hira(h.Reading) {
+			return true
+		}
+	}
+	return false
+}
+
+type formTableData struct {
+	kanjiForms    []string
+	readings      []string
+	colHeaderText map[string]string
+	rowHeaderText map[string]string
+	cellText      map[string]map[string]string
+}
+
+func tableData(headwords []headword) formTableData {
+	d := formTableData{
+		kanjiForms:    []string{},
+		readings:      []string{},
+		colHeaderText: make(map[string]string),
+		rowHeaderText: make(map[string]string),
+		cellText:      make(map[string]map[string]string),
+	}
+	for _, h := range headwords {
+		if h.IsSearchOnly {
+			continue
+		}
+		kanjiForm := h.KanjiForm()
+		if !slices.Contains(d.kanjiForms, kanjiForm) {
+			d.kanjiForms = append(d.kanjiForms, kanjiForm)
+			d.colHeaderText[kanjiForm] = h.TableColHeaderText()
+		}
+		reading := h.Reading
+		if !slices.Contains(d.readings, reading) {
+			d.readings = append(d.readings, reading)
+			d.rowHeaderText[reading] = h.TableRowHeaderText()
+			d.cellText[reading] = make(map[string]string)
+		}
+		d.cellText[reading][kanjiForm] = h.TableCellText()
+	}
+	return d
+}
+
+func formsTableGlossary(headwords []headword) []any {
+	d := tableData(headwords)
+
+	attr := contentAttr{}
+	centeredAttr := contentAttr{textAlign: "center"}
+	leftAttr := contentAttr{textAlign: "left"}
+
+	cornerCell := contentTableHeadCell(attr, "") // empty cell in upper left corner
+	headRowCells := []any{cornerCell}
+	for _, kanjiForm := range d.kanjiForms {
+		content := d.colHeaderText[kanjiForm]
+		cell := contentTableHeadCell(centeredAttr, content)
+		headRowCells = append(headRowCells, cell)
+	}
+	headRow := contentTableRow(attr, headRowCells...)
+	tableRows := []any{headRow}
+	for _, reading := range d.readings {
+		rowHeadCellText := d.rowHeaderText[reading]
+		rowHeadCell := contentTableHeadCell(leftAttr, rowHeadCellText)
+		rowCells := []any{rowHeadCell}
+		for _, kanjiForm := range d.kanjiForms {
+			text := d.cellText[reading][kanjiForm]
+			rowCell := contentTableCell(centeredAttr, text)
+			rowCells = append(rowCells, rowCell)
+		}
+		tableRow := contentTableRow(attr, rowCells...)
+		tableRows = append(tableRows, tableRow)
+	}
+	tableAttr := contentAttr{data: map[string]string{"content": "formsTable"}}
+	contentTable := contentTable(tableAttr, tableRows...)
+	content := contentStructure(contentTable)
+	return []any{content}
+}
+
+func formsGlossary(headwords []headword) []any {
+	glossary := []any{}
+	for _, h := range headwords {
+		if h.IsSearchOnly {
+			continue
+		}
+		text := h.GlossText()
+		glossary = append(glossary, text)
+	}
+	return glossary
+}
+
+func baseFormsTerm(entry jmdict.JmdictEntry) dbTerm {
+	term := dbTerm{Sequence: entry.Sequence}
+	headwords := extractHeadwords(entry)
+	if jmdNeedsFormTable(headwords) {
+		term.Glossary = formsTableGlossary(headwords)
+	} else {
+		term.Glossary = formsGlossary(headwords)
+	}
+	for _, sense := range entry.Sense {
+		rules := grammarRules(sense.PartsOfSpeech)
+		term.addRules(rules...)
+	}
+	return term
+}
+
+func formsExportDb(inputPath, outputPath, languageName, title string, stride int, pretty bool) error {
+	reader, err := os.Open(inputPath)
+	if err != nil {
+		return err
+	}
+	defer reader.Close()
+
+	dictionary, _, err := jmdict.LoadJmdictNoTransform(reader)
+	if err != nil {
+		return err
+	}
+
+	terms := dbTermList{}
+	for _, entry := range dictionary.Entries {
+		baseTerm := baseFormsTerm(entry)
+		headwords := extractHeadwords(entry)
+		for _, h := range headwords {
+			term := baseTerm
+			if h.IsSearchOnly {
+				term.Sequence = -term.Sequence
+			}
+			term.Expression = h.Expression
+			term.Reading = h.Reading
+			terms = append(terms, term)
+		}
+	}
+
+	if title == "" {
+		title = "JMdict Forms"
+	}
+
+	recordData := map[string]dbRecordList{
+		"term": terms.crush(),
+		"tag":  dbRecordList{},
+	}
+
+	jmdictDate := jmdictPublicationDate(dictionary)
+
+	index := dbIndex{
+		Title:       title,
+		Revision:    "JMdict." + jmdictDate,
+		Sequenced:   true,
+		Attribution: edrdgAttribution,
+	}
+	index.setDefaults()
+
+	return writeDb(
+		outputPath,
+		index,
+		recordData,
+		stride,
+		pretty,
+	)
+}
diff --git a/jmdictGlossary.go b/jmdictGlossary.go
new file mode 100644
index 0000000..0260cbf
--- /dev/null
+++ b/jmdictGlossary.go
@@ -0,0 +1,300 @@
+package yomichan
+
+import (
+	"fmt"
+	"strconv"
+
+	"foosoft.net/projects/jmdict"
+)
+
+func glossaryContainsLanguage(glossary []jmdict.JmdictGlossary, language string) bool {
+	hasGlosses := false
+	for _, gloss := range glossary {
+		if glossContainsLanguage(gloss, language) {
+			hasGlosses = true
+			break
+		}
+	}
+	return hasGlosses
+}
+
+func glossContainsLanguage(gloss jmdict.JmdictGlossary, language string) bool {
+	if gloss.Language == nil && language != "eng" {
+		return false
+	} else if gloss.Language != nil && language != *gloss.Language {
+		return false
+	} else {
+		return true
+	}
+}
+
+func makeGlossListItem(gloss jmdict.JmdictGlossary, language string) any {
+	contents := []any{gloss.Content}
+	listItem := contentListItem(contentAttr{}, contents...)
+	return listItem
+}
+
+func makeInfoGlossListItem(gloss jmdict.JmdictGlossary, language string) any {
+	// Prepend gloss with "type" (literal, figurative, trademark, etc.)
+	glossTypeCode := *gloss.Type
+	contents := []any{}
+	if name, ok := glossTypeCodeToName[LangCode{language, glossTypeCode}]; ok {
+		if name != "" {
+			italicStyle := contentAttr{fontStyle: "italic"}
+			contents = append(contents, contentSpan(italicStyle, "("+name+")"), " ")
+		}
+	} else {
+		fmt.Println("Unknown glossary type code " + *gloss.Type + " for build language " + language)
+		contents = append(contents, "["+glossTypeCode+"] ")
+	}
+	contents = append(contents, gloss.Content)
+	listItem := contentListItem(contentAttr{}, contents...)
+	return listItem
+}
+
+func makeSourceLangListItem(sourceLanguage jmdict.JmdictSource, language string) any {
+	contents := []any{}
+
+	var srcLangCode string
+	if sourceLanguage.Language == nil {
+		srcLangCode = "eng"
+	} else {
+		srcLangCode = *sourceLanguage.Language
+	}
+
+	// Format: [Language] ([Partial?], [Wasei?]): [Original word?]
+	// [Language]
+	if langName, ok := langCodeToName[LangCode{language, srcLangCode}]; ok {
+		contents = append(contents, langName)
+	} else {
+		contents = append(contents, srcLangCode)
+		fmt.Println("Unable to convert ISO 639 code " + srcLangCode + " to its full name in language " + language)
+	}
+
+	// ([Partial?], [Wasei?])
+	var sourceLangTypeCode string
+	if sourceLanguage.Type == nil {
+		sourceLangTypeCode = ""
+	} else {
+		sourceLangTypeCode = *sourceLanguage.Type
+	}
+	var sourceLangType string
+	if val, ok := sourceLangTypeCodeToType[LangCode{language, sourceLangTypeCode}]; ok {
+		sourceLangType = val
+	} else {
+		sourceLangType = sourceLangTypeCode
+		fmt.Println("Unknown source language type code " + sourceLangTypeCode + " for build language " + language)
+	}
+	if sourceLangType != "" && sourceLanguage.Wasei == "y" {
+		contents = append(contents, " ("+sourceLangType+", wasei)")
+	} else if sourceLangType != "" {
+		contents = append(contents, " ("+sourceLangType+")")
+	} else if sourceLanguage.Wasei == "y" {
+		contents = append(contents, " (wasei)")
+	}
+
+	// : [Original word?]
+	if sourceLanguage.Content != "" {
+		contents = append(contents, ": ")
+		attr := contentAttr{lang: ISOtoHTML[srcLangCode]}
+		contents = append(contents, contentSpan(attr, sourceLanguage.Content))
+	}
+
+	listItem := contentListItem(contentAttr{}, contents...)
+	return listItem
+}
+
+func makeReferenceListItem(reference string, refType string, meta jmdictMetadata) any {
+	contents := []any{}
+	attr := contentAttr{}
+
+	hint := refNoteHint[LangCode{meta.language, refType}]
+	contents = append(contents, hint+": ")
+
+	refHeadword, senseNumber, ok := parseReference(reference)
+	if !ok {
+		contents = append(contents, "【"+reference+"】")
+		return contentListItem(attr, contents...)
+	}
+
+	sequence, ok := meta.referenceToSeq[reference]
+	if !ok {
+		contents = append(contents, "【"+reference+"】")
+		return contentListItem(attr, contents...)
+	}
+
+	targetSense := senseID{
+		sequence: sequence,
+		number:   senseNumber,
+	}
+
+	expHash := refHeadword.ExpHash()
+	doDisplayReading := (len(meta.expHashToReadings[expHash]) > 1)
+	doDisplaySenseNumber := (meta.seqToSenseCount[targetSense.sequence] > 1)
+	refGlossAttr := contentAttr{
+		fontSize:      "65%",
+		verticalAlign: "middle",
+		data:          map[string]string{"content": "refGlosses"},
+	}
+
+	contents = append(contents, refHeadword.ToInternalLink(doDisplayReading))
+	if doDisplaySenseNumber {
+		contents = append(contents, contentSpan(refGlossAttr, " "+strconv.Itoa(targetSense.number)+". "+meta.condensedGlosses[targetSense]))
+	} else {
+		contents = append(contents, contentSpan(refGlossAttr, " "+meta.condensedGlosses[targetSense]))
+	}
+
+	listItem := contentListItem(attr, contents...)
+	return listItem
+}
+
+func makeExampleListItem(sentence jmdict.JmdictExampleSentence) any {
+	if sentence.Lang == "jpn" {
+		return contentListItem(contentAttr{}, sentence.Text)
+	} else {
+		attr := contentAttr{
+			lang:          ISOtoHTML[sentence.Lang],
+			listStyleType: ISOtoFlag[sentence.Lang],
+		}
+		return contentListItem(attr, sentence.Text)
+	}
+}
+
+func listAttr(lang string, listStyleType string, dataContent string) contentAttr {
+	return contentAttr{
+		lang:          lang,
+		listStyleType: listStyleType,
+		data:          map[string]string{"content": dataContent},
+	}
+}
+
+func needsStructuredContent(sense jmdict.JmdictSense, language string) bool {
+	for _, gloss := range sense.Glossary {
+		if glossContainsLanguage(gloss, language) && gloss.Type != nil {
+			return true
+		}
+	}
+	if len(sense.SourceLanguages) > 0 {
+		return true
+	} else if len(sense.Information) > 0 {
+		return true
+	} else if len(sense.Antonyms) > 0 {
+		return true
+	} else if len(sense.References) > 0 {
+		return true
+	} else if len(sense.Examples) > 0 {
+		return true
+	} else {
+		return false
+	}
+}
+
+func createGlossaryContent(sense jmdict.JmdictSense, meta jmdictMetadata) any {
+	glossaryContents := []any{}
+
+	// Add normal glosses
+	glossListItems := []any{}
+	for _, gloss := range sense.Glossary {
+		if glossContainsLanguage(gloss, meta.language) && gloss.Type == nil {
+			listItem := makeGlossListItem(gloss, meta.language)
+			glossListItems = append(glossListItems, listItem)
+		}
+	}
+	if len(glossListItems) > 0 {
+		attr := listAttr(ISOtoHTML[meta.language], "circle", "glossary")
+		list := contentUnorderedList(attr, glossListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add information glosses
+	infoGlossListItems := []any{}
+	for _, gloss := range sense.Glossary {
+		if glossContainsLanguage(gloss, meta.language) && gloss.Type != nil {
+			listItem := makeInfoGlossListItem(gloss, meta.language)
+			infoGlossListItems = append(infoGlossListItems, listItem)
+		}
+	}
+	if len(infoGlossListItems) > 0 {
+		attr := listAttr(ISOtoHTML[meta.language], infoMarker, "infoGlossary")
+		list := contentUnorderedList(attr, infoGlossListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add language-of-origin / loanword information
+	sourceLangListItems := []any{}
+	for _, sourceLanguage := range sense.SourceLanguages {
+		listItem := makeSourceLangListItem(sourceLanguage, meta.language)
+		sourceLangListItems = append(sourceLangListItems, listItem)
+	}
+	if len(sourceLangListItems) > 0 {
+		attr := listAttr(ISOtoHTML[meta.language], langMarker, "sourceLanguages")
+		list := contentUnorderedList(attr, sourceLangListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add sense notes
+	noteListItems := []any{}
+	for _, information := range sense.Information {
+		listItem := contentListItem(contentAttr{}, information)
+		noteListItems = append(noteListItems, listItem)
+	}
+	if len(noteListItems) > 0 {
+		attr := listAttr(ISOtoHTML["jpn"], noteMarker, "notes") // notes often contain japanese text
+		list := contentUnorderedList(attr, noteListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add antonyms
+	antonymListItems := []any{}
+	for _, antonym := range sense.Antonyms {
+		listItem := makeReferenceListItem(antonym, "ant", meta)
+		antonymListItems = append(antonymListItems, listItem)
+	}
+	if len(antonymListItems) > 0 {
+		attr := listAttr(ISOtoHTML[meta.language], antonymMarker, "antonyms")
+		list := contentUnorderedList(attr, antonymListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add cross-references
+	referenceListItems := []any{}
+	for _, reference := range sense.References {
+		listItem := makeReferenceListItem(reference, "xref", meta)
+		referenceListItems = append(referenceListItems, listItem)
+	}
+	if len(referenceListItems) > 0 {
+		attr := listAttr(ISOtoHTML[meta.language], refMarker, "references")
+		list := contentUnorderedList(attr, referenceListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	// Add example sentences
+	exampleListItems := []any{}
+	for _, example := range sense.Examples {
+		for _, sentence := range example.Sentences {
+			listItem := makeExampleListItem(sentence)
+			exampleListItems = append(exampleListItems, listItem)
+		}
+	}
+	if len(exampleListItems) > 0 {
+		attr := listAttr(ISOtoHTML["jpn"], ISOtoFlag["jpn"], "examples")
+		list := contentUnorderedList(attr, exampleListItems...)
+		glossaryContents = append(glossaryContents, list)
+	}
+
+	return contentStructure(glossaryContents...)
+}
+
+func createGlossary(sense jmdict.JmdictSense, meta jmdictMetadata) []any {
+	glossary := []any{}
+	if needsStructuredContent(sense, meta.language) {
+		glossary = append(glossary, createGlossaryContent(sense, meta))
+	} else {
+		for _, gloss := range sense.Glossary {
+			if glossContainsLanguage(gloss, meta.language) {
+				glossary = append(glossary, gloss.Content)
+			}
+		}
+	}
+	return glossary
+}
diff --git a/jmdictHeadword.go b/jmdictHeadword.go
new file mode 100644
index 0000000..a1a75cb
--- /dev/null
+++ b/jmdictHeadword.go
@@ -0,0 +1,267 @@
+package yomichan
+
+import (
+	"fmt"
+	"hash/fnv"
+	"regexp"
+	"strconv"
+
+	"foosoft.net/projects/jmdict"
+	"golang.org/x/exp/slices"
+)
+
+type headword struct {
+	Expression   string
+	Reading      string
+	TermTags     []string
+	Index        int
+	IsPriority   bool
+	IsIrregular  bool
+	IsOutdated   bool
+	IsRareKanji  bool
+	IsSearchOnly bool
+	IsAteji      bool
+	IsGikun      bool
+}
+
+type hash uint64
+
+func (h *headword) Hash() hash {
+	return hashText(h.Expression + "␞" + h.Reading)
+}
+
+func (h *headword) ExpHash() hash {
+	return hashText(h.Expression + "␞" + h.Expression)
+}
+
+func (h *headword) ReadingHash() hash {
+	return hashText(h.Reading + "␞" + h.Reading)
+}
+
+func hashText(s string) hash {
+	h := fnv.New64a()
+	h.Write([]byte(s))
+	return hash(h.Sum64())
+}
+
+func (h *headword) IsKanaOnly() bool {
+	if h.Expression != h.Reading {
+		return false
+	}
+	for _, char := range h.Expression {
+		if char >= 'ぁ' && char <= 'ヿ' {
+			// hiragana and katakana range
+			continue
+		} else if char >= '･' && char <= 'ﾟ' {
+			// halfwidth katakana range
+			continue
+		} else if char == '〜' {
+			continue
+		} else {
+			return false
+		}
+	}
+	return true
+}
+
+func (h *headword) Score() int {
+	score := 0
+	if h.IsPriority {
+		score += 1
+	}
+	if h.IsIrregular {
+		score -= 5
+	}
+	if h.IsOutdated {
+		score -= 5
+	}
+	if h.IsRareKanji {
+		score -= 5
+	}
+	if h.IsSearchOnly {
+		score -= 5
+	}
+	return score
+}
+
+func (h *headword) ToInternalLink(includeReading bool) any {
+	if !includeReading || h.Expression == h.Reading {
+		return contentInternalLink(
+			contentAttr{lang: ISOtoHTML["jpn"]},
+			h.Expression,
+		)
+	} else {
+		return contentSpan(
+			contentAttr{lang: ISOtoHTML["jpn"]},
+			contentInternalLink(contentAttr{}, h.Expression),
+			"（",
+			contentInternalLink(contentAttr{}, h.Reading),
+			"）",
+		)
+	}
+}
+
+func (h *headword) SetFlags(infoTags, freqTags []string) {
+	priorityTags := []string{"ichi1", "news1", "gai1", "spec1", "spec2"}
+	for _, priorityTag := range priorityTags {
+		if slices.Contains(freqTags, priorityTag) {
+			h.IsPriority = true
+			break
+		}
+	}
+	for _, infoTag := range infoTags {
+		switch infoTag {
+		case "iK", "ik", "io":
+			h.IsIrregular = true
+		case "oK", "ok":
+			h.IsOutdated = true
+		case "sK", "sk":
+			h.IsSearchOnly = true
+		case "rK":
+			h.IsRareKanji = true
+		case "ateji":
+			h.IsAteji = true
+		case "gikun":
+			h.IsGikun = true
+		}
+	}
+	if h.IsOutdated && h.IsRareKanji {
+		h.IsRareKanji = false
+	}
+}
+
+func (h *headword) SetTermTags(freqTags []string) {
+	h.TermTags = []string{}
+	if h.IsPriority {
+		h.TermTags = append(h.TermTags, priorityTagName)
+	}
+	for _, tag := range freqTags {
+		isNewsFreqTag, _ := regexp.MatchString(`nf\d\d`, tag)
+		if isNewsFreqTag {
+			// nf tags are divided into ranks of 500
+			// (nf01 to nf48), but it will be easier
+			// for the user to read 1k, 2k, etc.
+			var i int
+			if _, err := fmt.Sscanf(tag, "nf%2d", &i); err == nil {
+				i = (i + (i % 2)) / 2
+				newsTag := "news" + strconv.Itoa(i) + "k"
+				h.TermTags = append(h.TermTags, newsTag)
+			}
+		} else if tag == "news1" || tag == "news2" {
+			continue
+		} else {
+			tagWithoutTheNumber := tag[:len(tag)-1] // "ichi", "gai", or "spec"
+			h.TermTags = append(h.TermTags, tagWithoutTheNumber)
+		}
+	}
+	if h.IsIrregular {
+		h.TermTags = append(h.TermTags, irregularTagName)
+	}
+	if h.IsOutdated {
+		h.TermTags = append(h.TermTags, outdatedTagName)
+	}
+	if h.IsRareKanji {
+		h.TermTags = append(h.TermTags, rareKanjiTagName)
+	}
+	if h.IsAteji {
+		h.TermTags = append(h.TermTags, atejiTagName)
+	}
+	if h.IsGikun {
+		h.TermTags = append(h.TermTags, gikunTagName)
+	}
+}
+
+func newHeadword(kanji *jmdict.JmdictKanji, reading *jmdict.JmdictReading) headword {
+	h := headword{}
+	infoTags := []string{}
+	freqTags := []string{}
+	if kanji == nil {
+		h.Expression = reading.Reading
+		h.Reading = reading.Reading
+		infoTags = reading.Information
+		freqTags = reading.Priorities
+	} else if reading == nil {
+		// should only apply to search-only kanji terms
+		h.Expression = kanji.Expression
+		h.Reading = ""
+		infoTags = kanji.Information
+		freqTags = kanji.Priorities
+	} else {
+		h.Expression = kanji.Expression
+		h.Reading = reading.Reading
+		infoTags = union(kanji.Information, reading.Information)
+		freqTags = intersection(kanji.Priorities, reading.Priorities)
+	}
+	h.SetFlags(infoTags, freqTags)
+	h.SetTermTags(freqTags)
+	return h
+}
+
+func areAllKanjiIrregular(allKanji []jmdict.JmdictKanji) bool {
+	// If every kanji form is rare or irregular, then we'll make
+	// kana-only headwords for each kana form.
+	if len(allKanji) == 0 {
+		return false
+	}
+	for _, kanji := range allKanji {
+		h := newHeadword(&kanji, nil)
+		kanjiIsIrregular := h.IsRareKanji || h.IsIrregular || h.IsOutdated || h.IsSearchOnly
+		if !kanjiIsIrregular {
+			return false
+		}
+	}
+	return true
+}
+
+func extractHeadwords(entry jmdict.JmdictEntry) []headword {
+	headwords := []headword{}
+	allKanjiAreIrregular := areAllKanjiIrregular(entry.Kanji)
+
+	if allKanjiAreIrregular {
+		// Adding the reading-only terms before kanji+reading
+		// terms here for the sake of the Index property,
+		// which affects the yomichan term ranking.
+		for _, reading := range entry.Readings {
+			h := newHeadword(nil, &reading)
+			h.Index = len(headwords)
+			headwords = append(headwords, h)
+		}
+	}
+
+	for _, kanji := range entry.Kanji {
+		if slices.Contains(kanji.Information, "sK") {
+			// Search-only kanji forms do not have associated readings.
+			h := newHeadword(&kanji, nil)
+			h.Index = len(headwords)
+			headwords = append(headwords, h)
+			continue
+		}
+		for _, reading := range entry.Readings {
+			if reading.NoKanji != nil {
+				continue
+			} else if slices.Contains(reading.Information, "sk") {
+				// Search-only kana forms do not have associated kanji forms.
+				continue
+			} else if reading.Restrictions != nil && !slices.Contains(reading.Restrictions, kanji.Expression) {
+				continue
+			} else {
+				h := newHeadword(&kanji, &reading)
+				h.Index = len(headwords)
+				headwords = append(headwords, h)
+			}
+		}
+	}
+
+	if !allKanjiAreIrregular {
+		noKanjiInEntry := (len(entry.Kanji) == 0)
+		for _, reading := range entry.Readings {
+			if reading.NoKanji != nil || noKanjiInEntry || slices.Contains(reading.Information, "sk") {
+				h := newHeadword(nil, &reading)
+				h.Index = len(headwords)
+				headwords = append(headwords, h)
+			}
+		}
+	}
+
+	return headwords
+}
diff --git a/jmdictMetadata.go b/jmdictMetadata.go
new file mode 100644
index 0000000..ec92827
--- /dev/null
+++ b/jmdictMetadata.go
@@ -0,0 +1,158 @@
+package yomichan
+
+import (
+	"strings"
+
+	"foosoft.net/projects/jmdict"
+	"golang.org/x/exp/slices"
+)
+
+type sequence = int
+
+type jmdictMetadata struct {
+	language           string
+	condensedGlosses   map[senseID]string
+	seqToSenseCount    map[sequence]int
+	seqToMainHeadword  map[sequence]headword
+	expHashToReadings  map[hash][]string
+	headwordHashToSeqs map[hash][]sequence
+	references         []string
+	referenceToSeq     map[string]sequence
+	hashToSearchValues map[hash][]searchValue
+	seqToSearchHashes  map[sequence][]searchHash
+	hasMultipleForms   map[sequence]bool
+	maxSenseCount      int
+}
+
+type senseID struct {
+	sequence sequence
+	number   int
+}
+
+func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEntry) {
+
+	// Determine how many senses are in this entry for this language
+	if _, ok := meta.seqToSenseCount[entry.Sequence]; !ok {
+		senseCount := 0
+		for _, entrySense := range entry.Sense {
+			for _, gloss := range entrySense.Glossary {
+				if glossContainsLanguage(gloss, meta.language) {
+					senseCount += 1
+					break
+				}
+			}
+		}
+		meta.seqToSenseCount[entry.Sequence] = senseCount
+	}
+
+	if meta.seqToSenseCount[entry.Sequence] == 0 {
+		return
+	}
+
+	// main headwords (first ones that are found in entries).
+	if _, ok := meta.seqToMainHeadword[entry.Sequence]; !ok {
+		meta.seqToMainHeadword[entry.Sequence] = headword
+	}
+
+	// hash the term pair so we can determine if it's used
+	// in more than one JMdict entry later.
+	headwordHash := headword.Hash()
+	if !slices.Contains(meta.headwordHashToSeqs[headwordHash], entry.Sequence) {
+		meta.headwordHashToSeqs[headwordHash] = append(meta.headwordHashToSeqs[headwordHash], entry.Sequence)
+	}
+
+	// hash the expression so that we can determine if we
+	// need to disambiguate it by displaying its reading
+	// in reference notes later.
+	expHash := headword.ExpHash()
+	if !slices.Contains(meta.expHashToReadings[expHash], headword.Reading) {
+		meta.expHashToReadings[expHash] = append(meta.expHashToReadings[expHash], headword.Reading)
+	}
+
+	// e.g. for JMdict (English) we expect to end up with
+	// seqToHashedHeadwords[1260670] == 【元・もと】、【元・元】、【もと・もと】、【本・もと】、【本・本】、【素・もと】、【素・素】、【基・もと】、【基・基】
+	// used for correlating references to sequence numbers later.
+	searchHashes := []searchHash{
+		searchHash{headwordHash, headword.IsPriority},
+		searchHash{expHash, headword.IsPriority},
+		searchHash{headword.ReadingHash(), headword.IsPriority},
+	}
+	for _, x := range searchHashes {
+		if !slices.Contains(meta.seqToSearchHashes[entry.Sequence], x) {
+			meta.seqToSearchHashes[entry.Sequence] = append(meta.seqToSearchHashes[entry.Sequence], x)
+		}
+	}
+
+	currentSenseNumber := 1
+	for _, entrySense := range entry.Sense {
+		if !glossaryContainsLanguage(entrySense.Glossary, meta.language) {
+			continue
+		}
+		if entrySense.RestrictedReadings != nil && !slices.Contains(entrySense.RestrictedReadings, headword.Reading) {
+			currentSenseNumber += 1
+			continue
+		}
+		if entrySense.RestrictedKanji != nil && !slices.Contains(entrySense.RestrictedKanji, headword.Expression) {
+			currentSenseNumber += 1
+			continue
+		}
+
+		allReferences := append(entrySense.References, entrySense.Antonyms...)
+		for _, reference := range allReferences {
+			meta.references = append(meta.references, reference)
+		}
+
+		currentSense := senseID{entry.Sequence, currentSenseNumber}
+		if meta.condensedGlosses[currentSense] == "" {
+			glosses := []string{}
+			for _, gloss := range entrySense.Glossary {
+				if glossContainsLanguage(gloss, meta.language) && gloss.Type == nil {
+					glosses = append(glosses, gloss.Content)
+				}
+			}
+			meta.condensedGlosses[currentSense] = strings.Join(glosses, "; ")
+		}
+		currentSenseNumber += 1
+	}
+}
+
+func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMetadata {
+	meta := jmdictMetadata{
+		language:           langNameToCode[languageName],
+		seqToSenseCount:    make(map[sequence]int),
+		condensedGlosses:   make(map[senseID]string),
+		seqToMainHeadword:  make(map[sequence]headword),
+		expHashToReadings:  make(map[hash][]string),
+		seqToSearchHashes:  make(map[sequence][]searchHash),
+		headwordHashToSeqs: make(map[hash][]sequence),
+		references:         []string{},
+		hashToSearchValues: nil,
+		referenceToSeq:     nil,
+		hasMultipleForms:   make(map[sequence]bool),
+		maxSenseCount:      0,
+	}
+
+	for _, entry := range dictionary.Entries {
+		headwords := extractHeadwords(entry)
+		formCount := 0
+		for _, headword := range headwords {
+			meta.AddHeadword(headword, entry)
+			if !headword.IsSearchOnly {
+				formCount += 1
+			}
+		}
+		meta.hasMultipleForms[entry.Sequence] = (formCount > 1)
+	}
+
+	// this correlation process will be unnecessary once JMdict
+	// includes sequence numbers in its cross-reference data
+	meta.MakeReferenceToSeqMap()
+
+	for _, senseCount := range meta.seqToSenseCount {
+		if meta.maxSenseCount < senseCount {
+			meta.maxSenseCount = senseCount
+		}
+	}
+
+	return meta
+}
diff --git a/jmdictReferences.go b/jmdictReferences.go
new file mode 100644
index 0000000..71a7501
--- /dev/null
+++ b/jmdictReferences.go
@@ -0,0 +1,166 @@
+package yomichan
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+/*
+ * In the future, JMdict will be updated to include sequence numbers
+ * with each cross reference. At that time, most of the functions and
+ * types defined in this file will become unnecessary.  see:
+ * https://www.edrdg.org/jmdict_edict_list/2022/msg00008.html
+ */
+
+type searchValue struct {
+	sequence   sequence
+	index      int
+	isPriority bool
+}
+
+type searchHash struct {
+	hash       hash
+	isPriority bool
+}
+
+func parseReference(reference string) (headword, int, bool) {
+	// Reference strings in JMDict currently consist of 3 parts at
+	// most, separated by ・ characters. The latter two parts are
+	// optional.  When the sense number is not specified, it is
+	// implied to be the first sense.
+	var h headword
+	var senseNumber int
+	ok := true
+	refParts := strings.Split(reference, "・")
+	if len(refParts) == 1 {
+		// (Kanji) or (Reading)
+		h = headword{Expression: refParts[0], Reading: refParts[0]}
+		senseNumber = 1
+	} else if len(refParts) == 2 {
+		// [Kanji + (Reading or Sense)] or (Reading + Sense)
+		val, err := strconv.Atoi(refParts[1])
+		if err == nil {
+			h = headword{Expression: refParts[0], Reading: refParts[0]}
+			senseNumber = val
+		} else {
+			h = headword{Expression: refParts[0], Reading: refParts[1]}
+			senseNumber = 1
+		}
+	} else if len(refParts) == 3 {
+		// Expression + Reading + Sense
+		h = headword{Expression: refParts[0], Reading: refParts[1]}
+		val, err := strconv.Atoi(strings.TrimSpace(refParts[2]))
+		if err == nil {
+			senseNumber = val
+		} else {
+			errortext := "Unexpected format (3rd part not integer) for x-ref \"" + reference + "\""
+			fmt.Println(errortext)
+			ok = false
+		}
+	} else {
+		errortext := "Unexpected format for x-ref \"" + reference + "\""
+		fmt.Println(errortext)
+		ok = false
+	}
+	return h, senseNumber, ok
+}
+
+func (meta *jmdictMetadata) MakeReferenceToSeqMap() {
+
+	meta.referenceToSeq = make(map[string]sequence)
+	meta.MakeHashToSearchValuesMap()
+
+	for _, reference := range meta.references {
+		if meta.referenceToSeq[reference] != 0 {
+			continue
+		}
+		seq := meta.FindBestSequence(reference)
+		if seq != 0 {
+			meta.referenceToSeq[reference] = seq
+		} else {
+			fmt.Println("Unable to convert reference to sequence number: `" + reference + "`")
+		}
+	}
+}
+
+func (meta *jmdictMetadata) MakeHashToSearchValuesMap() {
+	meta.hashToSearchValues = make(map[hash][]searchValue)
+	for seq, searchHashes := range meta.seqToSearchHashes {
+		for score, searchHash := range searchHashes {
+			searchValue := searchValue{
+				sequence:   seq,
+				index:      score,
+				isPriority: searchHash.isPriority,
+			}
+			meta.hashToSearchValues[searchHash.hash] =
+				append(meta.hashToSearchValues[searchHash.hash], searchValue)
+		}
+	}
+}
+
+/*
+ * Generally, correspondence is determined by the order in which term
+ * pairs are extracted from each JMdict entry. Take for example the
+ * JMdict entry for ご本, which contains a reference to 本 (without a
+ * reading specified). To correlate this reference with a sequence
+ * number, our program searches each entry for the hash of【本・本】.
+ * There are two entries in which it is found in JMdict (English):
+ *
+ * sequence 1260670: 【元・もと】、【元・元】、【もと・もと】、【本・もと】、【本・本】、【素・もと】、【素・素】、【基・もと】、【基・基】
+ * sequence 1522150: 【本・ほん】、【本・本】、【ほん・ほん】
+ *
+ * Because 【本・本】 is closer to the beginning of the array in the
+ * latter (i.e., has the lowest index), sequence number 1522150 is
+ * returned.
+ *
+ * In situations in which multiple sequences are found with the same
+ * score, the entry with a priority tag ("news1", "ichi1", "spec1",
+ * "spec2", "gai1") is given preference. This mostly affects
+ * katakana-only loanwords like ラグ.
+ *
+ * To improve accuracy, this method also checks to see if the
+ * reference's specified sense number really exists in the
+ * corresponding entry. For example, sequence 1582850 【如何で・いかんで】
+ * has a reference to sense #2 of いかん (no kanji specified), which
+ * could belong to 13 different sequences. However, sequences 1582850
+ * and 2829697 are the only 2 of those 13 which contain more than one
+ * sense. Incidentally, sequence 1582850 is the correct match.
+ *
+ * All else being equal, the entry with the smallest sequence number
+ * is chosen. References in the JMdict file are currently ambiguous,
+ * and getting this perfect won't be possible until sequence numbers
+ * are explictly identified in these references.  See:
+ * https://github.com/JMdictProject/JMdictIssues/issues/61
+ */
+func (meta *jmdictMetadata) FindBestSequence(reference string) sequence {
+	bestSeq := 0
+	lowestIndex := 100000
+	bestIsPriority := false
+	headword, senseNumber, ok := parseReference(reference)
+	if !ok {
+		return bestSeq
+	}
+	hash := headword.Hash()
+	for _, seqScore := range meta.hashToSearchValues[hash] {
+		if meta.seqToSenseCount[seqScore.sequence] < senseNumber {
+			// entry must contain the specified sense
+			continue
+		} else if lowestIndex < seqScore.index {
+			// lower indices are better
+			continue
+		} else if (lowestIndex == seqScore.index) && (bestIsPriority && !seqScore.isPriority) {
+			// if scores match, check priority
+			continue
+		} else if (lowestIndex == seqScore.index) && (bestIsPriority == seqScore.isPriority) && (bestSeq < seqScore.sequence) {
+			// if scores and priority match, check sequence number.
+			// lower sequence numbers are better
+			continue
+		} else {
+			lowestIndex = seqScore.index
+			bestSeq = seqScore.sequence
+			bestIsPriority = seqScore.isPriority
+		}
+	}
+	return bestSeq
+}
diff --git a/jmdictTags.go b/jmdictTags.go
new file mode 100644
index 0000000..b444c47
--- /dev/null
+++ b/jmdictTags.go
@@ -0,0 +1,348 @@
+package yomichan
+
+import (
+	"fmt"
+	"strconv"
+
+	"golang.org/x/exp/slices"
+)
+
+func senseNumberTags(maxSenseCount int) []dbTag {
+	tags := []dbTag{}
+	for i := 1; i <= maxSenseCount; i++ {
+		tag := dbTag{
+			Name:  strconv.Itoa(i),
+			Order: -10, // these tags will appear on the left side
+			Notes: "JMdict Sense #" + strconv.Itoa(i),
+		}
+		tags = append(tags, tag)
+	}
+	return tags
+}
+
+func newsFrequencyTags() []dbTag {
+	// 24,000 ranks divided into 24 tags, news1k ... news24k
+	tags := []dbTag{}
+	for i := 1; i <= 24; i++ {
+		tagName := "news" + strconv.Itoa(i) + "k"
+		var startRank string
+		if i == 1 {
+			startRank = "1"
+		} else {
+			// technically should be ",001", but that looks odd
+			startRank = strconv.Itoa(i-1) + ",000"
+		}
+		endRank := strconv.Itoa(i) + ",000"
+		tag := dbTag{
+			Name:     tagName,
+			Order:    -2,
+			Score:    0,
+			Category: "frequent",
+			Notes:    "ranked between the top " + startRank + " and " + endRank + " words in a frequency analysis of the Mainichi Shimbun (1990s)",
+		}
+		tags = append(tags, tag)
+	}
+	return tags
+}
+
+func entityTags(entities map[string]string) []dbTag {
+	tags := knownEntityTags()
+	for name, notes := range entities {
+		idx := slices.IndexFunc(tags, func(t dbTag) bool { return t.Name == name })
+		if idx != -1 {
+			tags[idx].Notes = notes
+		} else {
+			fmt.Println("Unknown tag type \"" + name + "\": " + notes)
+			unknownTag := dbTag{Name: name, Notes: notes}
+			tags = append(tags, unknownTag)
+		}
+	}
+	return tags
+}
+
+func customDbTags() []dbTag {
+	return []dbTag{
+		dbTag{Name: priorityTagName, Order: -10, Score: 10, Category: "popular", Notes: "high priority term"},
+		dbTag{Name: rareKanjiTagName, Order: 0, Score: -5, Category: "archaism", Notes: "rarely-used kanji form of this expression"},
+		dbTag{Name: irregularTagName, Order: 0, Score: -5, Category: "archaism", Notes: "irregular form of this expression"},
+		dbTag{Name: outdatedTagName, Order: 0, Score: -5, Category: "archaism", Notes: "outdated form of this expression"},
+		dbTag{Name: "ichi", Order: -2, Score: 0, Category: "frequent", Notes: "included in Ichimango Goi Bunruishuu (１万語語彙分類集)"},
+		dbTag{Name: "spec", Order: -2, Score: 0, Category: "frequent", Notes: "specified as common by JMdict editors"},
+		dbTag{Name: "gai", Order: -2, Score: 0, Category: "frequent", Notes: "common loanword (gairaigo・外来語)"},
+		dbTag{Name: "forms", Order: 0, Score: 0, Category: "", Notes: "other surface forms and readings"},
+	}
+}
+
+func knownEntityTags() []dbTag {
+	return []dbTag{
+		// see: https://www.edrdg.org/jmdictdb/cgi-bin/edhelp.py?svc=jmdict&sid=#kwabbr
+		// additional descriptions at the beginning of the JMdict file
+
+		// <re_inf> reading info
+		dbTag{Name: "gikun", Order: 0, Score: 0, Category: ""}, // gikun (meaning as reading) or jukujikun (special kanji reading)
+		dbTag{Name: "ik", Order: 0, Score: -5, Category: ""},   // word containing irregular kana usage
+		dbTag{Name: "ok", Order: 0, Score: -5, Category: ""},   // out-dated or obsolete kana usage
+		dbTag{Name: "sk", Order: 0, Score: -5, Category: ""},   // search-only kana form
+
+		// <ke_inf> kanji info
+		/* kanji info also has a "ik" entity that would go here if not already for the re_inf tag */
+		dbTag{Name: "ateji", Order: 0, Score: 0, Category: ""}, // ateji (phonetic) reading
+		dbTag{Name: "iK", Order: 0, Score: -5, Category: ""},   // word containing irregular kanji usage
+		dbTag{Name: "io", Order: 0, Score: -5, Category: ""},   // irregular okurigana usage
+		dbTag{Name: "oK", Order: 0, Score: -5, Category: ""},   // word containing out-dated kanji or kanji usage
+		dbTag{Name: "rK", Order: 0, Score: -5, Category: ""},   // rarely-used kanji form
+		dbTag{Name: "sK", Order: 0, Score: -5, Category: ""},   // search-only kanji form
+
+		// <misc> miscellaneous sense info
+		dbTag{Name: "abbr", Order: 0, Score: 0, Category: ""},              // abbreviation
+		dbTag{Name: "arch", Order: -4, Score: 0, Category: "archaism"},     // archaism
+		dbTag{Name: "char", Order: 0, Score: 0, Category: ""},              // character
+		dbTag{Name: "chn", Order: 0, Score: 0, Category: ""},               // children's language
+		dbTag{Name: "col", Order: 0, Score: 0, Category: ""},               // colloquialism
+		dbTag{Name: "company", Order: 0, Score: 0, Category: ""},           // company name
+		dbTag{Name: "creat", Order: 0, Score: 0, Category: ""},             // creature
+		dbTag{Name: "dated", Order: -4, Score: 0, Category: "archaism"},    // dated term
+		dbTag{Name: "dei", Order: 0, Score: 0, Category: ""},               // deity
+		dbTag{Name: "derog", Order: 0, Score: 0, Category: ""},             // derogatory
+		dbTag{Name: "doc", Order: 0, Score: 0, Category: ""},               // document
+		dbTag{Name: "euph", Order: 0, Score: 0, Category: ""},              // euphemistic
+		dbTag{Name: "ev", Order: 0, Score: 0, Category: ""},                // event
+		dbTag{Name: "fam", Order: 0, Score: 0, Category: ""},               // familiar language
+		dbTag{Name: "fem", Order: 0, Score: 0, Category: ""},               // female term or language
+		dbTag{Name: "fict", Order: 0, Score: 0, Category: ""},              // fiction
+		dbTag{Name: "form", Order: 0, Score: 0, Category: ""},              // formal or literary term
+		dbTag{Name: "given", Order: 0, Score: 0, Category: ""},             // given name or forename, gender not specified
+		dbTag{Name: "group", Order: 0, Score: 0, Category: ""},             // group
+		dbTag{Name: "hist", Order: 0, Score: 0, Category: ""},              // historical term
+		dbTag{Name: "hon", Order: 0, Score: 0, Category: ""},               // honorific or respectful (sonkeigo) language
+		dbTag{Name: "hum", Order: 0, Score: 0, Category: ""},               // humble (kenjougo) language
+		dbTag{Name: "id", Order: -5, Score: 0, Category: "expression"},     // idiomatic expression
+		dbTag{Name: "joc", Order: 0, Score: 0, Category: ""},               // jocular, humorous term
+		dbTag{Name: "leg", Order: 0, Score: 0, Category: ""},               // legend
+		dbTag{Name: "m-sl", Order: 0, Score: 0, Category: ""},              // manga slang
+		dbTag{Name: "male", Order: 0, Score: 0, Category: ""},              // male term or language
+		dbTag{Name: "myth", Order: 0, Score: 0, Category: ""},              // mythology
+		dbTag{Name: "net-sl", Order: 0, Score: 0, Category: ""},            // Internet slang
+		dbTag{Name: "obj", Order: 0, Score: 0, Category: ""},               // object
+		dbTag{Name: "obs", Order: -4, Score: 0, Category: "archaism"},      // obsolete term
+		dbTag{Name: "on-mim", Order: 0, Score: 0, Category: ""},            // onomatopoeic or mimetic word
+		dbTag{Name: "organization", Order: 0, Score: 0, Category: ""},      // organization name
+		dbTag{Name: "oth", Order: 0, Score: 0, Category: ""},               // other
+		dbTag{Name: "person", Order: 0, Score: 0, Category: ""},            // full name of a particular person
+		dbTag{Name: "place", Order: 0, Score: 0, Category: ""},             // place name
+		dbTag{Name: "poet", Order: 0, Score: 0, Category: ""},              // poetical term
+		dbTag{Name: "pol", Order: 0, Score: 0, Category: ""},               // polite (teineigo) language
+		dbTag{Name: "product", Order: 0, Score: 0, Category: ""},           // product name
+		dbTag{Name: "proverb", Order: 0, Score: 0, Category: "expression"}, // proverb
+		dbTag{Name: "quote", Order: 0, Score: 0, Category: "expression"},   // quotation
+		dbTag{Name: "rare", Order: -4, Score: 0, Category: "archaism"},     // rare
+		dbTag{Name: "relig", Order: 0, Score: 0, Category: ""},             // religion
+		dbTag{Name: "sens", Order: 0, Score: 0, Category: ""},              // sensitive
+		dbTag{Name: "serv", Order: 0, Score: 0, Category: ""},              // service
+		dbTag{Name: "ship", Order: 0, Score: 0, Category: ""},              // ship name
+		dbTag{Name: "sl", Order: 0, Score: 0, Category: ""},                // slang
+		dbTag{Name: "station", Order: 0, Score: 0, Category: ""},           // railway station
+		dbTag{Name: "surname", Order: 0, Score: 0, Category: ""},           // family or surname
+		dbTag{Name: "uk", Order: 0, Score: 0, Category: ""},                // word usually written using kana alone
+		dbTag{Name: "unclass", Order: 0, Score: 0, Category: ""},           // unclassified name
+		dbTag{Name: "vulg", Order: 0, Score: 0, Category: ""},              // vulgar expression or word
+		dbTag{Name: "work", Order: 0, Score: 0, Category: ""},              // work of art, literature, music, etc. name
+		dbTag{Name: "X", Order: 0, Score: 0, Category: ""},                 // rude or X-rated term (not displayed in educational software)
+		dbTag{Name: "yoji", Order: 0, Score: 0, Category: ""},              // yojijukugo
+
+		// <pos> part-of-speech info
+		dbTag{Name: "adj-f", Order: -3, Score: 0, Category: "partOfSpeech"},     // noun or verb acting prenominally
+		dbTag{Name: "adj-i", Order: -3, Score: 0, Category: "partOfSpeech"},     // adjective (keiyoushi)
+		dbTag{Name: "adj-ix", Order: -3, Score: 0, Category: "partOfSpeech"},    // adjective (keiyoushi) - yoi/ii class
+		dbTag{Name: "adj-kari", Order: -3, Score: 0, Category: "partOfSpeech"},  // 'kari' adjective (archaic)
+		dbTag{Name: "adj-ku", Order: -3, Score: 0, Category: "partOfSpeech"},    // 'ku' adjective (archaic)
+		dbTag{Name: "adj-na", Order: -3, Score: 0, Category: "partOfSpeech"},    // adjectival nouns or quasi-adjectives (keiyodoshi)
+		dbTag{Name: "adj-nari", Order: -3, Score: 0, Category: "partOfSpeech"},  // archaic/formal form of na-adjective
+		dbTag{Name: "adj-no", Order: -3, Score: 0, Category: "partOfSpeech"},    // nouns which may take the genitive case particle 'no'
+		dbTag{Name: "adj-pn", Order: -3, Score: 0, Category: "partOfSpeech"},    // pre-noun adjectival (rentaishi)
+		dbTag{Name: "adj-shiku", Order: -3, Score: 0, Category: "partOfSpeech"}, // 'shiku' adjective (archaic)
+		dbTag{Name: "adj-t", Order: -3, Score: 0, Category: "partOfSpeech"},     // 'taru' adjective
+		dbTag{Name: "adv", Order: -3, Score: 0, Category: "partOfSpeech"},       // adverb (fukushi)
+		dbTag{Name: "adv-to", Order: -3, Score: 0, Category: "partOfSpeech"},    // adverb taking the 'to' particle
+		dbTag{Name: "aux", Order: -3, Score: 0, Category: "partOfSpeech"},       // auxiliary
+		dbTag{Name: "aux-adj", Order: -3, Score: 0, Category: "partOfSpeech"},   // auxiliary adjective
+		dbTag{Name: "aux-v", Order: -3, Score: 0, Category: "partOfSpeech"},     // auxiliary verb
+		dbTag{Name: "conj", Order: -3, Score: 0, Category: "partOfSpeech"},      // conjunction
+		dbTag{Name: "cop", Order: -3, Score: 0, Category: "partOfSpeech"},       // copula
+		dbTag{Name: "ctr", Order: -3, Score: 0, Category: "partOfSpeech"},       // counter
+		dbTag{Name: "exp", Order: -5, Score: 0, Category: "expression"},         // expressions (phrases, clauses, etc.)
+		dbTag{Name: "int", Order: -3, Score: 0, Category: "partOfSpeech"},       // interjection (kandoushi)
+		dbTag{Name: "n", Order: -3, Score: 0, Category: "partOfSpeech"},         // noun (common) (futsuumeishi)
+		dbTag{Name: "n-adv", Order: -3, Score: 0, Category: "partOfSpeech"},     // adverbial noun (fukushitekimeishi)
+		dbTag{Name: "n-pr", Order: -3, Score: 0, Category: "partOfSpeech"},      // proper noun
+		dbTag{Name: "n-pref", Order: -3, Score: 0, Category: "partOfSpeech"},    // noun, used as a prefix
+		dbTag{Name: "n-suf", Order: -3, Score: 0, Category: "partOfSpeech"},     // noun, used as a suffix
+		dbTag{Name: "n-t", Order: -3, Score: 0, Category: "partOfSpeech"},       // noun (temporal) (jisoumeishi)
+		dbTag{Name: "num", Order: -3, Score: 0, Category: "partOfSpeech"},       // numeric
+		dbTag{Name: "pn", Order: -3, Score: 0, Category: "partOfSpeech"},        // pronoun
+		dbTag{Name: "pref", Order: -3, Score: 0, Category: "partOfSpeech"},      // prefix
+		dbTag{Name: "prt", Order: -3, Score: 0, Category: "partOfSpeech"},       // particle
+		dbTag{Name: "suf", Order: -3, Score: 0, Category: "partOfSpeech"},       // suffix
+		dbTag{Name: "unc", Order: -3, Score: 0, Category: "partOfSpeech"},       // unclassified
+		dbTag{Name: "v-unspec", Order: -3, Score: 0, Category: "partOfSpeech"},  // verb unspecified
+		dbTag{Name: "v1", Order: -3, Score: 0, Category: "partOfSpeech"},        // Ichidan verb
+		dbTag{Name: "v1-s", Order: -3, Score: 0, Category: "partOfSpeech"},      // Ichidan verb - kureru special class
+		dbTag{Name: "v2a-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb with 'u' ending (archaic)
+		dbTag{Name: "v2b-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'bu' ending (archaic)
+		dbTag{Name: "v2b-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'bu' ending (archaic)
+		dbTag{Name: "v2d-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'dzu' ending (archaic)
+		dbTag{Name: "v2d-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'dzu' ending (archaic)
+		dbTag{Name: "v2g-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'gu' ending (archaic)
+		dbTag{Name: "v2g-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'gu' ending (archaic)
+		dbTag{Name: "v2h-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'hu/fu' ending (archaic)
+		dbTag{Name: "v2h-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'hu/fu' ending (archaic)
+		dbTag{Name: "v2k-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'ku' ending (archaic)
+		dbTag{Name: "v2k-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'ku' ending (archaic)
+		dbTag{Name: "v2m-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'mu' ending (archaic)
+		dbTag{Name: "v2m-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'mu' ending (archaic)
+		dbTag{Name: "v2n-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'nu' ending (archaic)
+		dbTag{Name: "v2r-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'ru' ending (archaic)
+		dbTag{Name: "v2r-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'ru' ending (archaic)
+		dbTag{Name: "v2s-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'su' ending (archaic)
+		dbTag{Name: "v2t-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'tsu' ending (archaic)
+		dbTag{Name: "v2t-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'tsu' ending (archaic)
+		dbTag{Name: "v2w-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'u' ending and 'we' conjugation (archaic)
+		dbTag{Name: "v2y-k", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (upper class) with 'yu' ending (archaic)
+		dbTag{Name: "v2y-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'yu' ending (archaic)
+		dbTag{Name: "v2z-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Nidan verb (lower class) with 'zu' ending (archaic)
+		dbTag{Name: "v4b", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'bu' ending (archaic)
+		dbTag{Name: "v4g", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'gu' ending (archaic)
+		dbTag{Name: "v4h", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'hu/fu' ending (archaic)
+		dbTag{Name: "v4k", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'ku' ending (archaic)
+		dbTag{Name: "v4m", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'mu' ending (archaic)
+		dbTag{Name: "v4n", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'nu' ending (archaic)
+		dbTag{Name: "v4r", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'ru' ending (archaic)
+		dbTag{Name: "v4s", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'su' ending (archaic)
+		dbTag{Name: "v4t", Order: -3, Score: 0, Category: "partOfSpeech"},       // Yodan verb with 'tsu' ending (archaic)
+		dbTag{Name: "v5aru", Order: -3, Score: 0, Category: "partOfSpeech"},     // Godan verb - -aru special class
+		dbTag{Name: "v5b", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'bu' ending
+		dbTag{Name: "v5g", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'gu' ending
+		dbTag{Name: "v5k", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'ku' ending
+		dbTag{Name: "v5k-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Godan verb - Iku/Yuku special class
+		dbTag{Name: "v5m", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'mu' ending
+		dbTag{Name: "v5n", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'nu' ending
+		dbTag{Name: "v5r", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'ru' ending
+		dbTag{Name: "v5r-i", Order: -3, Score: 0, Category: "partOfSpeech"},     // Godan verb with 'ru' ending (irregular verb)
+		dbTag{Name: "v5s", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'su' ending
+		dbTag{Name: "v5t", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'tsu' ending
+		dbTag{Name: "v5u", Order: -3, Score: 0, Category: "partOfSpeech"},       // Godan verb with 'u' ending
+		dbTag{Name: "v5u-s", Order: -3, Score: 0, Category: "partOfSpeech"},     // Godan verb with 'u' ending (special class)
+		dbTag{Name: "v5uru", Order: -3, Score: 0, Category: "partOfSpeech"},     // Godan verb - Uru old class verb (old form of Eru)
+		dbTag{Name: "vi", Order: -3, Score: 0, Category: "partOfSpeech"},        // intransitive verb
+		dbTag{Name: "vk", Order: -3, Score: 0, Category: "partOfSpeech"},        // Kuru verb - special class
+		dbTag{Name: "vn", Order: -3, Score: 0, Category: "partOfSpeech"},        // irregular nu verb
+		dbTag{Name: "vr", Order: -3, Score: 0, Category: "partOfSpeech"},        // irregular ru verb, plain form ends with -ri
+		dbTag{Name: "vs", Order: -3, Score: 0, Category: "partOfSpeech"},        // noun or participle which takes the aux. verb suru
+		dbTag{Name: "vs-c", Order: -3, Score: 0, Category: "partOfSpeech"},      // su verb - precursor to the modern suru
+		dbTag{Name: "vs-i", Order: -3, Score: 0, Category: "partOfSpeech"},      // suru verb - included
+		dbTag{Name: "vs-s", Order: -3, Score: 0, Category: "partOfSpeech"},      // suru verb - special class
+		dbTag{Name: "vt", Order: -3, Score: 0, Category: "partOfSpeech"},        // transitive verb
+		dbTag{Name: "vz", Order: -3, Score: 0, Category: "partOfSpeech"},        // Ichidan verb - zuru verb (alternative form of -jiru verbs)
+
+		// <field> usage domain
+		dbTag{Name: "agric", Order: 0, Score: 0, Category: ""},    // agriculture
+		dbTag{Name: "anat", Order: 0, Score: 0, Category: ""},     // anatomy
+		dbTag{Name: "archeol", Order: 0, Score: 0, Category: ""},  // archeology
+		dbTag{Name: "archit", Order: 0, Score: 0, Category: ""},   // architecture
+		dbTag{Name: "art", Order: 0, Score: 0, Category: ""},      // art, aesthetics
+		dbTag{Name: "astron", Order: 0, Score: 0, Category: ""},   // astronomy
+		dbTag{Name: "audvid", Order: 0, Score: 0, Category: ""},   // audiovisual
+		dbTag{Name: "aviat", Order: 0, Score: 0, Category: ""},    // aviation
+		dbTag{Name: "baseb", Order: 0, Score: 0, Category: ""},    // baseball
+		dbTag{Name: "biochem", Order: 0, Score: 0, Category: ""},  // biochemistry
+		dbTag{Name: "biol", Order: 0, Score: 0, Category: ""},     // biology
+		dbTag{Name: "bot", Order: 0, Score: 0, Category: ""},      // botany
+		dbTag{Name: "Buddh", Order: 0, Score: 0, Category: ""},    // Buddhism
+		dbTag{Name: "bus", Order: 0, Score: 0, Category: ""},      // business
+		dbTag{Name: "cards", Order: 0, Score: 0, Category: ""},    // card games
+		dbTag{Name: "chem", Order: 0, Score: 0, Category: ""},     // chemistry
+		dbTag{Name: "Christn", Order: 0, Score: 0, Category: ""},  // Christianity
+		dbTag{Name: "cloth", Order: 0, Score: 0, Category: ""},    // clothing
+		dbTag{Name: "comp", Order: 0, Score: 0, Category: ""},     // computing
+		dbTag{Name: "cryst", Order: 0, Score: 0, Category: ""},    // crystallography
+		dbTag{Name: "dent", Order: 0, Score: 0, Category: ""},     // dentistry
+		dbTag{Name: "ecol", Order: 0, Score: 0, Category: ""},     // ecology
+		dbTag{Name: "econ", Order: 0, Score: 0, Category: ""},     // economics
+		dbTag{Name: "elec", Order: 0, Score: 0, Category: ""},     // electricity, elec. eng.
+		dbTag{Name: "electr", Order: 0, Score: 0, Category: ""},   // electronics
+		dbTag{Name: "embryo", Order: 0, Score: 0, Category: ""},   // embryology
+		dbTag{Name: "engr", Order: 0, Score: 0, Category: ""},     // engineering
+		dbTag{Name: "ent", Order: 0, Score: 0, Category: ""},      // entomology
+		dbTag{Name: "film", Order: 0, Score: 0, Category: ""},     // film
+		dbTag{Name: "finc", Order: 0, Score: 0, Category: ""},     // finance
+		dbTag{Name: "fish", Order: 0, Score: 0, Category: ""},     // fishing
+		dbTag{Name: "food", Order: 0, Score: 0, Category: ""},     // food, cooking
+		dbTag{Name: "gardn", Order: 0, Score: 0, Category: ""},    // gardening, horticulture
+		dbTag{Name: "genet", Order: 0, Score: 0, Category: ""},    // genetics
+		dbTag{Name: "geogr", Order: 0, Score: 0, Category: ""},    // geography
+		dbTag{Name: "geol", Order: 0, Score: 0, Category: ""},     // geology
+		dbTag{Name: "geom", Order: 0, Score: 0, Category: ""},     // geometry
+		dbTag{Name: "go", Order: 0, Score: 0, Category: ""},       // go (game)
+		dbTag{Name: "golf", Order: 0, Score: 0, Category: ""},     // golf
+		dbTag{Name: "gramm", Order: 0, Score: 0, Category: ""},    // grammar
+		dbTag{Name: "grmyth", Order: 0, Score: 0, Category: ""},   // Greek mythology
+		dbTag{Name: "hanaf", Order: 0, Score: 0, Category: ""},    // hanafuda
+		dbTag{Name: "horse", Order: 0, Score: 0, Category: ""},    // horse racing
+		dbTag{Name: "kabuki", Order: 0, Score: 0, Category: ""},   // kabuki
+		dbTag{Name: "law", Order: 0, Score: 0, Category: ""},      // law
+		dbTag{Name: "ling", Order: 0, Score: 0, Category: ""},     // linguistics
+		dbTag{Name: "logic", Order: 0, Score: 0, Category: ""},    // logic
+		dbTag{Name: "MA", Order: 0, Score: 0, Category: ""},       // martial arts
+		dbTag{Name: "mahj", Order: 0, Score: 0, Category: ""},     // mahjong
+		dbTag{Name: "manga", Order: 0, Score: 0, Category: ""},    // manga
+		dbTag{Name: "math", Order: 0, Score: 0, Category: ""},     // mathematics
+		dbTag{Name: "mech", Order: 0, Score: 0, Category: ""},     // mechanical engineering
+		dbTag{Name: "med", Order: 0, Score: 0, Category: ""},      // medicine
+		dbTag{Name: "met", Order: 0, Score: 0, Category: ""},      // meteorology
+		dbTag{Name: "mil", Order: 0, Score: 0, Category: ""},      // military
+		dbTag{Name: "mining", Order: 0, Score: 0, Category: ""},   // mining
+		dbTag{Name: "music", Order: 0, Score: 0, Category: ""},    // music
+		dbTag{Name: "noh", Order: 0, Score: 0, Category: ""},      // noh
+		dbTag{Name: "ornith", Order: 0, Score: 0, Category: ""},   // ornithology
+		dbTag{Name: "paleo", Order: 0, Score: 0, Category: ""},    // paleontology
+		dbTag{Name: "pathol", Order: 0, Score: 0, Category: ""},   // pathology
+		dbTag{Name: "pharm", Order: 0, Score: 0, Category: ""},    // pharmacy
+		dbTag{Name: "phil", Order: 0, Score: 0, Category: ""},     // philosophy
+		dbTag{Name: "photo", Order: 0, Score: 0, Category: ""},    // photography
+		dbTag{Name: "physics", Order: 0, Score: 0, Category: ""},  // physics
+		dbTag{Name: "physiol", Order: 0, Score: 0, Category: ""},  // physiology
+		dbTag{Name: "politics", Order: 0, Score: 0, Category: ""}, // politics
+		dbTag{Name: "print", Order: 0, Score: 0, Category: ""},    // printing
+		dbTag{Name: "psy", Order: 0, Score: 0, Category: ""},      // psychiatry
+		dbTag{Name: "psyanal", Order: 0, Score: 0, Category: ""},  // psychoanalysis
+		dbTag{Name: "psych", Order: 0, Score: 0, Category: ""},    // psychology
+		dbTag{Name: "rail", Order: 0, Score: 0, Category: ""},     // railway
+		dbTag{Name: "rommyth", Order: 0, Score: 0, Category: ""},  // Roman mythology
+		dbTag{Name: "Shinto", Order: 0, Score: 0, Category: ""},   // Shinto
+		dbTag{Name: "shogi", Order: 0, Score: 0, Category: ""},    // shogi
+		dbTag{Name: "ski", Order: 0, Score: 0, Category: ""},      // skiing
+		dbTag{Name: "sports", Order: 0, Score: 0, Category: ""},   // sports
+		dbTag{Name: "stat", Order: 0, Score: 0, Category: ""},     // statistics
+		dbTag{Name: "stockm", Order: 0, Score: 0, Category: ""},   // stock market
+		dbTag{Name: "sumo", Order: 0, Score: 0, Category: ""},     // sumo
+		dbTag{Name: "telec", Order: 0, Score: 0, Category: ""},    // telecommunications
+		dbTag{Name: "tradem", Order: 0, Score: 0, Category: ""},   // trademark
+		dbTag{Name: "tv", Order: 0, Score: 0, Category: ""},       // television
+		dbTag{Name: "vidg", Order: 0, Score: 0, Category: ""},     // video games
+		dbTag{Name: "zool", Order: 0, Score: 0, Category: ""},     // zoology
+
+		// <dial> dialect
+		dbTag{Name: "bra", Order: 0, Score: 0, Category: ""},  // Brazilian
+		dbTag{Name: "hob", Order: 0, Score: 0, Category: ""},  // Hokkaido-ben
+		dbTag{Name: "ksb", Order: 0, Score: 0, Category: ""},  // Kansai-ben
+		dbTag{Name: "ktb", Order: 0, Score: 0, Category: ""},  // Kantou-ben
+		dbTag{Name: "kyb", Order: 0, Score: 0, Category: ""},  // Kyoto-ben
+		dbTag{Name: "kyu", Order: 0, Score: 0, Category: ""},  // Kyuushuu-ben
+		dbTag{Name: "nab", Order: 0, Score: 0, Category: ""},  // Nagano-ben
+		dbTag{Name: "osb", Order: 0, Score: 0, Category: ""},  // Osaka-ben
+		dbTag{Name: "rkb", Order: 0, Score: 0, Category: ""},  // Ryuukyuu-ben
+		dbTag{Name: "thb", Order: 0, Score: 0, Category: ""},  // Touhoku-ben
+		dbTag{Name: "tsb", Order: 0, Score: 0, Category: ""},  // Tosa-ben
+		dbTag{Name: "tsug", Order: 0, Score: 0, Category: ""}, // Tsugaru-ben
+	}
+}
diff --git a/structuredContent.go b/structuredContent.go
new file mode 100644
index 0000000..ded8229
--- /dev/null
+++ b/structuredContent.go
@@ -0,0 +1,192 @@
+package yomichan
+
+type contentAttr struct {
+	lang               string
+	fontStyle          string   // normal, italic
+	fontWeight         string   // normal, bold
+	fontSize           string   // small, medium, large, smaller, 80%, 125%, etc.
+	textDecorationLine []string // underline, overline, line-through
+	verticalAlign      string   // baseline, sub, super, text-top, text-bottom, middle, top, bottom
+	textAlign          string   // start, end, left, right, center, justify, justify-all, match-parent
+	marginTop          int
+	marginLeft         int
+	marginRight        int
+	marginBottom       int
+	listStyleType      string
+	data               map[string]string
+}
+
+// if the array contains adjacent strings, concatenate them.
+// ex: ["one", "two", content_structure, "four"] -> ["onetwo", content_structure, "four"]
+// if the array only contains strings, return a concatenated string.
+// ex: ["one", "two"] -> "onetwo"
+func contentReduce(contents []any) any {
+	if len(contents) == 1 {
+		return contents[0]
+	}
+	newContents := []any{}
+	var accumulator string
+	for _, content := range contents {
+		switch v := content.(type) {
+		case string:
+			accumulator = accumulator + v
+		default:
+			if accumulator != "" {
+				newContents = append(newContents, accumulator)
+				accumulator = ""
+			}
+			newContents = append(newContents, content)
+		}
+	}
+	if accumulator != "" {
+		newContents = append(newContents, accumulator)
+	}
+	if len(newContents) == 1 {
+		return newContents[0]
+	} else {
+		return newContents
+	}
+}
+
+func contentStructure(contents ...any) map[string]any {
+	return map[string]any{
+		"type":    "structured-content",
+		"content": contentReduce(contents),
+	}
+}
+
+func contentRuby(attr contentAttr, ruby string, contents ...any) map[string]any {
+	rubyContent := map[string]any{
+		"tag": "ruby",
+		"content": []any{
+			contentReduce(contents),
+			map[string]string{"tag": "rp", "content": "("},
+			map[string]string{"tag": "rt", "content": ruby},
+			map[string]string{"tag": "rp", "content": ")"},
+		},
+	}
+	if attr.lang != "" {
+		rubyContent["lang"] = attr.lang
+	}
+	if len(attr.data) != 0 {
+		rubyContent["data"] = attr.data
+	}
+	return rubyContent
+}
+
+func contentInternalLink(attr contentAttr, query string, contents ...any) map[string]any {
+	linkContent := map[string]any{
+		"tag":  "a",
+		"href": "?query=" + query + "&wildcards=off",
+	}
+	if len(contents) == 0 {
+		linkContent["content"] = query
+	} else {
+		linkContent["content"] = contentReduce(contents)
+	}
+	if attr.lang != "" {
+		linkContent["lang"] = attr.lang
+	}
+	if len(attr.data) != 0 {
+		linkContent["data"] = attr.data
+	}
+	return linkContent
+}
+
+func contentSpan(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "span", contents...)
+}
+
+func contentDiv(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "div", contents...)
+}
+
+func contentListItem(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "li", contents...)
+}
+
+func contentOrderedList(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "ol", contents...)
+}
+
+func contentUnorderedList(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "ul", contents...)
+}
+
+func contentTable(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "table", contents...)
+}
+
+func contentTableHead(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "thead", contents...)
+}
+
+func contentTableBody(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "tbody", contents...)
+}
+
+func contentTableRow(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "tr", contents...)
+}
+
+func contentTableHeadCell(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "th", contents...)
+}
+
+func contentTableCell(attr contentAttr, contents ...any) map[string]any {
+	return contentStyledContainer(attr, "td", contents...)
+}
+
+func contentStyledContainer(attr contentAttr, tag string, contents ...any) map[string]any {
+	container := map[string]any{"tag": tag}
+	container["content"] = contentReduce(contents)
+	if attr.lang != "" {
+		container["lang"] = attr.lang
+	}
+	if len(attr.data) != 0 {
+		container["data"] = attr.data
+	}
+	style := contentStyle(attr)
+	if len(style) != 0 {
+		container["style"] = style
+	}
+	return container
+}
+
+func contentStyle(attr contentAttr) map[string]any {
+	style := make(map[string]any)
+	if attr.fontStyle != "" {
+		style["fontStyle"] = attr.fontStyle
+	}
+	if attr.fontWeight != "" {
+		style["fontWeight"] = attr.fontWeight
+	}
+	if attr.fontSize != "" {
+		style["fontSize"] = attr.fontSize
+	}
+	if len(attr.textDecorationLine) != 0 {
+		style["textDecorationLine"] = attr.textDecorationLine
+	}
+	if attr.verticalAlign != "" {
+		style["verticalAlign"] = attr.verticalAlign
+	}
+	if attr.textAlign != "" {
+		style["textAlign"] = attr.textAlign
+	}
+	if attr.marginTop != 0 {
+		style["marginTop"] = attr.marginTop
+	}
+	if attr.marginLeft != 0 {
+		style["marginLeft"] = attr.marginLeft
+	}
+	if attr.marginRight != 0 {
+		style["marginRight"] = attr.marginRight
+	}
+	if attr.marginBottom != 0 {
+		style["marginBottom"] = attr.marginBottom
+	}
+	if attr.listStyleType != "" {
+		style["listStyleType"] = attr.listStyleType
+	}
+	return style
+}

From 972dc6c4e99f21adb8e64e691da9604a23ffb33d Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sun, 22 Jan 2023 14:40:39 -0600
Subject: [PATCH 05/19] Update dictionary build script

---
 scripts/build_dicts.sh | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/scripts/build_dicts.sh b/scripts/build_dicts.sh
index b91d9a5..764d1d0 100755
--- a/scripts/build_dicts.sh
+++ b/scripts/build_dicts.sh
@@ -5,13 +5,23 @@ go get foosoft.net/projects/yomichan-import/yomichan
 mkdir -p src
 mkdir -p dst
 
-if [ ! -f src/JMdict ]; then
-    wget http://ftp.monash.edu/pub/nihongo/JMdict.gz
-    gunzip -c JMdict.gz > src/JMdict
-fi
+function refresh_source () {
+    NOW=$(date '+%s')
+    YESTERDAY=$((NOW - 86400)) # 86,400 seconds in 24 hours
+    if [ ! -f "src/$1" ]; then
+        wget "ftp.edrdg.org/pub/Nihongo/$1.gz"
+        gunzip -c "$1.gz" > "src/$1"
+    elif [[ $YESTERDAY -gt $(date -r "src/$1" '+%s') ]]; then
+        rsync "ftp.edrdg.org::nihongo/$1" "src/$1"
+    fi
+}
 
+refresh_source "JMdict_e_examp"
+yomichan -language="english" -title="JMdict" src/JMdict_e_examp dst/jmdict_english_with_examples.zip
+
+refresh_source "JMdict"
+yomichan -language="english"   -title="JMdict"             src/JMdict dst/jmdict_english.zip
 yomichan -language="dutch"     -title="JMdict (Dutch)"     src/JMdict dst/jmdict_dutch.zip
-yomichan -language="english"   -title="JMdict (English)"   src/JMdict dst/jmdict_english.zip
 yomichan -language="french"    -title="JMdict (French)"    src/JMdict dst/jmdict_french.zip
 yomichan -language="german"    -title="JMdict (German)"    src/JMdict dst/jmdict_german.zip
 yomichan -language="hungarian" -title="JMdict (Hungarian)" src/JMdict dst/jmdict_hungarian.zip
@@ -20,19 +30,13 @@ yomichan -language="slovenian" -title="JMdict (Slovenian)" src/JMdict dst/jmdict
 yomichan -language="spanish"   -title="JMdict (Spanish)"   src/JMdict dst/jmdict_spanish.zip
 yomichan -language="swedish"   -title="JMdict (Swedish)"   src/JMdict dst/jmdict_swedish.zip
 
-if [ ! -f src/JMnedict.xml ]; then
-    wget http://ftp.monash.edu/pub/nihongo/JMnedict.xml.gz
-    gunzip -c JMnedict.xml.gz > src/JMnedict.xml
-fi
+yomichan -format="forms"       -title="JMdict Forms"       src/JMdict dst/jmdict_forms.zip
 
+refresh_source "JMnedict.xml"
 yomichan src/JMnedict.xml dst/jmnedict.zip
 
-if [ ! -f src/kanjidic2.xml ]; then
-    wget http://www.edrdg.org/kanjidic/kanjidic2.xml.gz
-    gunzip -c kanjidic2.xml.gz > src/kanjidic2.xml
-fi
-
-yomichan -language="english"    -title="KANJIDIC (English)"    src/kanjidic2.xml dst/kanjidic_english.zip
+refresh_source "kanjidic2.xml"
+yomichan -language="english"    -title="KANJIDIC"              src/kanjidic2.xml dst/kanjidic_english.zip
 yomichan -language="french"     -title="KANJIDIC (French)"     src/kanjidic2.xml dst/kanjidic_french.zip
 yomichan -language="portuguese" -title="KANJIDIC (Portuguese)" src/kanjidic2.xml dst/kanjidic_portuguese.zip
 yomichan -language="spanish"    -title="KANJIDIC (Spanish)"    src/kanjidic2.xml dst/kanjidic_spanish.zip

From 8451803bfd2e6f516e1c55464c96cc209f3336d6 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sun, 22 Jan 2023 15:00:13 -0600
Subject: [PATCH 06/19] Update copyright

---
 LICENSE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LICENSE b/LICENSE
index f13e263..3901c0e 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright 2016-2022 Alex Yatskov
+Copyright 2016-2023  Yomichan-Import Authors
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in

From d8a3b420ee8bb56f8c025d4173c8b64588884dcd Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sun, 22 Jan 2023 17:55:27 -0600
Subject: [PATCH 07/19] Exclude "search" and "forms" terms from non-English
 dictionaries

This allows a user to install the English version and another version
without cluttering their setup with duplicated information.

If a user doesn't want to use the English version, they can get the
"search" and "forms" terms by installing the separate jmdict_forms
file.
---
 jmdict.go      | 10 +++++++---
 jmdictForms.go | 12 ++++++++----
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/jmdict.go b/jmdict.go
index 74809e7..6de8877 100644
--- a/jmdict.go
+++ b/jmdict.go
@@ -134,8 +134,12 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
 		return nil, false
 	}
 	if headword.IsSearchOnly {
-		searchTerm := createSearchTerm(headword, entry, meta)
-		return []dbTerm{searchTerm}, true
+		if meta.language == "eng" {
+			searchTerm := createSearchTerm(headword, entry, meta)
+			return []dbTerm{searchTerm}, true
+		} else {
+			return nil, false
+		}
 	}
 	terms := []dbTerm{}
 	senseNumber := 1
@@ -156,7 +160,7 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
 		terms = append(terms, senseTerm)
 	}
 
-	if meta.hasMultipleForms[entry.Sequence] {
+	if meta.hasMultipleForms[entry.Sequence] && meta.language == "eng" {
 		formsTerm := createFormsTerm(headword, entry, meta)
 		terms = append(terms, formsTerm)
 	}
diff --git a/jmdictForms.go b/jmdictForms.go
index 76eba34..15b894d 100644
--- a/jmdictForms.go
+++ b/jmdictForms.go
@@ -210,17 +210,21 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
 		return err
 	}
 
+	meta := newJmdictMetadata(dictionary, languageName)
+
 	terms := dbTermList{}
 	for _, entry := range dictionary.Entries {
 		baseTerm := baseFormsTerm(entry)
 		headwords := extractHeadwords(entry)
 		for _, h := range headwords {
-			term := baseTerm
+			var term dbTerm
 			if h.IsSearchOnly {
-				term.Sequence = -term.Sequence
+				term = createSearchTerm(h, entry, meta)
+			} else {
+				term = baseTerm
+				term.Expression = h.Expression
+				term.Reading = h.Reading
 			}
-			term.Expression = h.Expression
-			term.Reading = h.Reading
 			terms = append(terms, term)
 		}
 	}

From 6726c5245b0d6b1bd2d5a98bbf025d4feb055353 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Mon, 23 Jan 2023 14:09:50 -0600
Subject: [PATCH 08/19] Rename variables for consistency

---
 jmdictReferences.go | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/jmdictReferences.go b/jmdictReferences.go
index 71a7501..aa5d229 100644
--- a/jmdictReferences.go
+++ b/jmdictReferences.go
@@ -87,10 +87,10 @@ func (meta *jmdictMetadata) MakeReferenceToSeqMap() {
 func (meta *jmdictMetadata) MakeHashToSearchValuesMap() {
 	meta.hashToSearchValues = make(map[hash][]searchValue)
 	for seq, searchHashes := range meta.seqToSearchHashes {
-		for score, searchHash := range searchHashes {
+		for idx, searchHash := range searchHashes {
 			searchValue := searchValue{
 				sequence:   seq,
-				index:      score,
+				index:      idx,
 				isPriority: searchHash.isPriority,
 			}
 			meta.hashToSearchValues[searchHash.hash] =
@@ -100,6 +100,10 @@ func (meta *jmdictMetadata) MakeHashToSearchValuesMap() {
 }
 
 /*
+ * This function attemps to convert a JMdict reference string into a
+ * single definite sequence number. These reference strings are often
+ * ambiguous, so we have to resort to using heuristics.
+ *
  * Generally, correspondence is determined by the order in which term
  * pairs are extracted from each JMdict entry. Take for example the
  * JMdict entry for ご本, which contains a reference to 本 (without a
@@ -115,7 +119,7 @@ func (meta *jmdictMetadata) MakeHashToSearchValuesMap() {
  * returned.
  *
  * In situations in which multiple sequences are found with the same
- * score, the entry with a priority tag ("news1", "ichi1", "spec1",
+ * index, the entry with a priority tag ("news1", "ichi1", "spec1",
  * "spec2", "gai1") is given preference. This mostly affects
  * katakana-only loanwords like ラグ.
  *
@@ -129,8 +133,8 @@ func (meta *jmdictMetadata) MakeHashToSearchValuesMap() {
  *
  * All else being equal, the entry with the smallest sequence number
  * is chosen. References in the JMdict file are currently ambiguous,
- * and getting this perfect won't be possible until sequence numbers
- * are explictly identified in these references.  See:
+ * and getting this perfect won't be possible until reference sequence
+ * numbers are included in the file.  See:
  * https://github.com/JMdictProject/JMdictIssues/issues/61
  */
 func (meta *jmdictMetadata) FindBestSequence(reference string) sequence {
@@ -142,24 +146,24 @@ func (meta *jmdictMetadata) FindBestSequence(reference string) sequence {
 		return bestSeq
 	}
 	hash := headword.Hash()
-	for _, seqScore := range meta.hashToSearchValues[hash] {
-		if meta.seqToSenseCount[seqScore.sequence] < senseNumber {
+	for _, v := range meta.hashToSearchValues[hash] {
+		if meta.seqToSenseCount[v.sequence] < senseNumber {
 			// entry must contain the specified sense
 			continue
-		} else if lowestIndex < seqScore.index {
+		} else if lowestIndex < v.index {
 			// lower indices are better
 			continue
-		} else if (lowestIndex == seqScore.index) && (bestIsPriority && !seqScore.isPriority) {
-			// if scores match, check priority
+		} else if (lowestIndex == v.index) && (bestIsPriority && !v.isPriority) {
+			// if indices match, check priority
 			continue
-		} else if (lowestIndex == seqScore.index) && (bestIsPriority == seqScore.isPriority) && (bestSeq < seqScore.sequence) {
-			// if scores and priority match, check sequence number.
+		} else if (lowestIndex == v.index) && (bestIsPriority == v.isPriority) && (bestSeq < v.sequence) {
+			// if indices and priority match, check sequence number.
 			// lower sequence numbers are better
 			continue
 		} else {
-			lowestIndex = seqScore.index
-			bestSeq = seqScore.sequence
-			bestIsPriority = seqScore.isPriority
+			lowestIndex = v.index
+			bestSeq = v.sequence
+			bestIsPriority = v.isPriority
 		}
 	}
 	return bestSeq

From d606f729cfbdf2a5f4eb5e5a7903b9854097f359 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Mon, 23 Jan 2023 14:13:22 -0600
Subject: [PATCH 09/19] Use secondary frequency tags in term score calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a term has a frequency tag, it should return higher in search
results than a match which does not have a tag.

For example, a search for 素性 should return すじょう rather than
そせい, because the former has a "news" frequency tag.
---
 jmdictHeadword.go | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/jmdictHeadword.go b/jmdictHeadword.go
index a1a75cb..4ead44c 100644
--- a/jmdictHeadword.go
+++ b/jmdictHeadword.go
@@ -16,6 +16,7 @@ type headword struct {
 	TermTags     []string
 	Index        int
 	IsPriority   bool
+	IsFrequent   bool
 	IsIrregular  bool
 	IsOutdated   bool
 	IsRareKanji  bool
@@ -69,6 +70,9 @@ func (h *headword) Score() int {
 	if h.IsPriority {
 		score += 1
 	}
+	if h.IsFrequent {
+		score += 1
+	}
 	if h.IsIrregular {
 		score -= 5
 	}
@@ -109,6 +113,9 @@ func (h *headword) SetFlags(infoTags, freqTags []string) {
 			break
 		}
 	}
+	if len(freqTags) > 1 {
+		h.IsFrequent = true
+	}
 	for _, infoTag := range infoTags {
 		switch infoTag {
 		case "iK", "ik", "io":

From ef1e74447d51854826ee81be7d5a28783f16dff2 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Mon, 23 Jan 2023 23:52:42 -0600
Subject: [PATCH 10/19] Include term tags and scores in standalone forms
 dictionary

---
 jmdictForms.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/jmdictForms.go b/jmdictForms.go
index 15b894d..def7f7a 100644
--- a/jmdictForms.go
+++ b/jmdictForms.go
@@ -81,7 +81,7 @@ func (h *headword) KanjiForm() string {
 	}
 }
 
-func jmdNeedsFormTable(headwords []headword) bool {
+func needsFormTable(headwords []headword) bool {
 	// Does the entry contain more than 1 distinct reading?
 	// E.g. バカがい and ばかがい are not distinct.
 	uniqueReading := ""
@@ -186,7 +186,7 @@ func formsGlossary(headwords []headword) []any {
 func baseFormsTerm(entry jmdict.JmdictEntry) dbTerm {
 	term := dbTerm{Sequence: entry.Sequence}
 	headwords := extractHeadwords(entry)
-	if jmdNeedsFormTable(headwords) {
+	if needsFormTable(headwords) {
 		term.Glossary = formsTableGlossary(headwords)
 	} else {
 		term.Glossary = formsGlossary(headwords)
@@ -224,6 +224,8 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
 				term = baseTerm
 				term.Expression = h.Expression
 				term.Reading = h.Reading
+				term.addTermTags(h.TermTags...)
+				term.Score = calculateTermScore(0, h)
 			}
 			terms = append(terms, term)
 		}

From 96358e3eb548d6706b00a203cd5aa126ad622b35 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Tue, 24 Jan 2023 08:55:24 -0600
Subject: [PATCH 11/19] Fix function parameter

Sense numbers start at 1, not 0
---
 jmdict.go      | 2 +-
 jmdictForms.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/jmdict.go b/jmdict.go
index 6de8877..7283907 100644
--- a/jmdict.go
+++ b/jmdict.go
@@ -85,7 +85,7 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe
 		term.addRules(rules...)
 	}
 	term.addTermTags(headword.TermTags...)
-	term.Score = calculateTermScore(0, headword)
+	term.Score = calculateTermScore(1, headword)
 
 	redirectHeadword := meta.seqToMainHeadword[entry.Sequence]
 	expHash := redirectHeadword.ExpHash()
diff --git a/jmdictForms.go b/jmdictForms.go
index def7f7a..032291d 100644
--- a/jmdictForms.go
+++ b/jmdictForms.go
@@ -225,7 +225,7 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
 				term.Expression = h.Expression
 				term.Reading = h.Reading
 				term.addTermTags(h.TermTags...)
-				term.Score = calculateTermScore(0, h)
+				term.Score = calculateTermScore(1, h)
 			}
 			terms = append(terms, term)
 		}

From 406067eeddf50c1a105234bd4ff34594d2ed90fe Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Tue, 24 Jan 2023 13:02:50 -0600
Subject: [PATCH 12/19] Include entity tags in standalone forms dictionary

---
 jmdictForms.go | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/jmdictForms.go b/jmdictForms.go
index 032291d..9d21ac4 100644
--- a/jmdictForms.go
+++ b/jmdictForms.go
@@ -205,7 +205,7 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
 	}
 	defer reader.Close()
 
-	dictionary, _, err := jmdict.LoadJmdictNoTransform(reader)
+	dictionary, entities, err := jmdict.LoadJmdictNoTransform(reader)
 	if err != nil {
 		return err
 	}
@@ -231,13 +231,18 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
 		}
 	}
 
+	tags := dbTagList{}
+	tags = append(tags, entityTags(entities)...)
+	tags = append(tags, newsFrequencyTags()...)
+	tags = append(tags, customDbTags()...)
+
 	if title == "" {
 		title = "JMdict Forms"
 	}
 
 	recordData := map[string]dbRecordList{
 		"term": terms.crush(),
-		"tag":  dbRecordList{},
+		"tag":  tags.crush(),
 	}
 
 	jmdictDate := jmdictPublicationDate(dictionary)

From 7bd967915c4597c38a93d79cb89a5f305ddbedca Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Wed, 25 Jan 2023 18:26:47 -0600
Subject: [PATCH 13/19] Add "forms" term in special circumstances
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a headword appears in multiple entries, then each entry needs a
corresponding "forms" term in the output dictionary.

For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also
appears as an irregular form in entry 1252910. If a "forms" term is
not included for the former entry, then it will appear that 軽卒 is
irregular for all senses in the output dictionary.
---
 jmdict.go      | 59 ++++++++++++++++++++++++++++++++++----------------
 jmdictForms.go | 19 ++++++++--------
 2 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/jmdict.go b/jmdict.go
index 7283907..362b14d 100644
--- a/jmdict.go
+++ b/jmdict.go
@@ -62,7 +62,21 @@ func jmdictPublicationDate(dictionary jmdict.Jmdict) string {
 	return jmdictDate
 }
 
-func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
+func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
+	// Don't add "forms" terms to non-English dictionaries.
+	// Information would be duplicated if users installed more
+	// than one version.
+	if meta.language != "eng" {
+		return dbTerm{}, false
+	}
+	// Don't need a "forms" term for entries with one unique
+	// headword which does not appear in any other entries.
+	if !meta.hasMultipleForms[entry.Sequence] {
+		if len(meta.headwordHashToSeqs[headword.Hash()]) == 1 {
+			return dbTerm{}, false
+		}
+	}
+
 	term := baseFormsTerm(entry)
 	term.Expression = headword.Expression
 	term.Reading = headword.Reading
@@ -72,10 +86,17 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet
 	term.addDefinitionTags("forms")
 	senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
 	term.Score = calculateTermScore(senseNumber, headword)
-	return term
+	return term, true
 }
 
-func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
+func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
+	// Don't add "search" terms to non-English dictionaries.
+	// Information would be duplicated if users installed more
+	// than one version.
+	if meta.language != "eng" {
+		return dbTerm{}, false
+	}
+
 	term := dbTerm{
 		Expression: headword.Expression,
 		Sequence:   -entry.Sequence,
@@ -98,10 +119,17 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe
 	)
 
 	term.Glossary = []any{contentStructure(content)}
-	return term
+	return term, true
 }
 
-func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) dbTerm {
+func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
+	if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
+		return dbTerm{}, false
+	}
+	if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
+		return dbTerm{}, false
+	}
+
 	term := dbTerm{
 		Expression: headword.Expression,
 		Reading:    headword.Reading,
@@ -126,7 +154,7 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor
 
 	term.Score = calculateTermScore(senseNumber, headword)
 
-	return term
+	return term, true
 }
 
 func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {
@@ -134,8 +162,7 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
 		return nil, false
 	}
 	if headword.IsSearchOnly {
-		if meta.language == "eng" {
-			searchTerm := createSearchTerm(headword, entry, meta)
+		if searchTerm, ok := createSearchTerm(headword, entry, meta); ok {
 			return []dbTerm{searchTerm}, true
 		} else {
 			return nil, false
@@ -145,25 +172,19 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
 	senseNumber := 1
 	for _, sense := range entry.Sense {
 		if !glossaryContainsLanguage(sense.Glossary, meta.language) {
+			// Do not increment sense number
 			continue
 		}
-		if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
-			senseNumber += 1
-			continue
+		if senseTerm, ok := createSenseTerm(sense, senseNumber, headword, entry, meta); ok {
+			terms = append(terms, senseTerm)
 		}
-		if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
-			senseNumber += 1
-			continue
-		}
-		senseTerm := createSenseTerm(sense, senseNumber, headword, entry, meta)
 		senseNumber += 1
-		terms = append(terms, senseTerm)
 	}
 
-	if meta.hasMultipleForms[entry.Sequence] && meta.language == "eng" {
-		formsTerm := createFormsTerm(headword, entry, meta)
+	if formsTerm, ok := createFormsTerm(headword, entry, meta); ok {
 		terms = append(terms, formsTerm)
 	}
+
 	return terms, true
 }
 
diff --git a/jmdictForms.go b/jmdictForms.go
index 9d21ac4..af4bba6 100644
--- a/jmdictForms.go
+++ b/jmdictForms.go
@@ -210,23 +210,24 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
 		return err
 	}
 
-	meta := newJmdictMetadata(dictionary, languageName)
+	meta := newJmdictMetadata(dictionary, "english")
 
 	terms := dbTermList{}
 	for _, entry := range dictionary.Entries {
 		baseTerm := baseFormsTerm(entry)
 		headwords := extractHeadwords(entry)
 		for _, h := range headwords {
-			var term dbTerm
 			if h.IsSearchOnly {
-				term = createSearchTerm(h, entry, meta)
-			} else {
-				term = baseTerm
-				term.Expression = h.Expression
-				term.Reading = h.Reading
-				term.addTermTags(h.TermTags...)
-				term.Score = calculateTermScore(1, h)
+				if term, ok := createSearchTerm(h, entry, meta); ok {
+					terms = append(terms, term)
+				}
+				continue
 			}
+			term := baseTerm
+			term.Expression = h.Expression
+			term.Reading = h.Reading
+			term.addTermTags(h.TermTags...)
+			term.Score = calculateTermScore(1, h)
 			terms = append(terms, term)
 		}
 	}

From 517ef3d052541731b0821cd32248c028278a29af Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Fri, 27 Jan 2023 19:09:12 -0600
Subject: [PATCH 14/19] Fix bug in term score assignments

This commit ensures that terms are grouped among their entries of
origin and displayed in correct sequential order in Yomichan's default
result grouping mode, "Group term-reading pairs."
---
 jmdict.go         | 16 ++++++++++------
 jmdictForms.go    |  2 +-
 jmdictMetadata.go | 23 +++++++++++++++++++++++
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/jmdict.go b/jmdict.go
index 362b14d..4c54db6 100644
--- a/jmdict.go
+++ b/jmdict.go
@@ -29,13 +29,15 @@ func grammarRules(partsOfSpeech []string) []string {
 	return rules
 }
 
-func calculateTermScore(senseNumber int, headword headword) int {
+func calculateTermScore(senseNumber int, depth int, headword headword) int {
 	const senseWeight int = 1
-	const entryPositionWeight int = 100
-	const priorityWeight int = 10000
+	const depthWeight int = 100
+	const entryPositionWeight int = 10000
+	const priorityWeight int = 1000000
 
 	score := 0
 	score -= (senseNumber - 1) * senseWeight
+	score -= depth * depthWeight
 	score -= headword.Index * entryPositionWeight
 	score += headword.Score() * priorityWeight
 
@@ -85,7 +87,8 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet
 
 	term.addDefinitionTags("forms")
 	senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
-	term.Score = calculateTermScore(senseNumber, headword)
+	entryDepth := meta.entryDepth[entry.Sequence]
+	term.Score = calculateTermScore(senseNumber, entryDepth, headword)
 	return term, true
 }
 
@@ -106,7 +109,7 @@ func createSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMe
 		term.addRules(rules...)
 	}
 	term.addTermTags(headword.TermTags...)
-	term.Score = calculateTermScore(1, headword)
+	term.Score = calculateTermScore(1, 0, headword)
 
 	redirectHeadword := meta.seqToMainHeadword[entry.Sequence]
 	expHash := redirectHeadword.ExpHash()
@@ -152,7 +155,8 @@ func createSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headwor
 	rules := grammarRules(sense.PartsOfSpeech)
 	term.addRules(rules...)
 
-	term.Score = calculateTermScore(senseNumber, headword)
+	entryDepth := meta.entryDepth[entry.Sequence]
+	term.Score = calculateTermScore(senseNumber, entryDepth, headword)
 
 	return term, true
 }
diff --git a/jmdictForms.go b/jmdictForms.go
index af4bba6..4964233 100644
--- a/jmdictForms.go
+++ b/jmdictForms.go
@@ -227,7 +227,7 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
 			term.Expression = h.Expression
 			term.Reading = h.Reading
 			term.addTermTags(h.TermTags...)
-			term.Score = calculateTermScore(1, h)
+			term.Score = calculateTermScore(1, 0, h)
 			terms = append(terms, term)
 		}
 	}
diff --git a/jmdictMetadata.go b/jmdictMetadata.go
index ec92827..99af862 100644
--- a/jmdictMetadata.go
+++ b/jmdictMetadata.go
@@ -20,6 +20,7 @@ type jmdictMetadata struct {
 	referenceToSeq     map[string]sequence
 	hashToSearchValues map[hash][]searchValue
 	seqToSearchHashes  map[sequence][]searchHash
+	entryDepth         map[sequence]int
 	hasMultipleForms   map[sequence]bool
 	maxSenseCount      int
 }
@@ -29,6 +30,26 @@ type senseID struct {
 	number   int
 }
 
+func (meta *jmdictMetadata) CalculateEntryDepth(headwords []headword, entrySequence sequence) {
+	// This is to ensure that terms are grouped among their
+	// entries of origin and displayed in correct sequential order
+	maxDepth := 0
+	for _, headword := range headwords {
+		hash := headword.Hash()
+		for _, seq := range meta.headwordHashToSeqs[hash] {
+			seqDepth := meta.entryDepth[seq]
+			if seqDepth == 0 {
+				meta.entryDepth[seq] = 1
+				seqDepth = 1
+			}
+			if maxDepth < seqDepth+1 {
+				maxDepth = seqDepth + 1
+			}
+		}
+	}
+	meta.entryDepth[entrySequence] = maxDepth
+}
+
 func (meta *jmdictMetadata) AddHeadword(headword headword, entry jmdict.JmdictEntry) {
 
 	// Determine how many senses are in this entry for this language
@@ -128,6 +149,7 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta
 		references:         []string{},
 		hashToSearchValues: nil,
 		referenceToSeq:     nil,
+		entryDepth:         make(map[sequence]int),
 		hasMultipleForms:   make(map[sequence]bool),
 		maxSenseCount:      0,
 	}
@@ -141,6 +163,7 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta
 				formCount += 1
 			}
 		}
+		meta.CalculateEntryDepth(headwords, entry.Sequence)
 		meta.hasMultipleForms[entry.Sequence] = (formCount > 1)
 	}
 

From 184dd45dbcd9350b2556442d129120bf31e60cb1 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sat, 28 Jan 2023 18:17:06 -0600
Subject: [PATCH 15/19] Use snake_case in filenames

---
 jmdictConstants.go => jmdict_constants.go     | 0
 jmdictForms.go => jmdict_forms.go             | 0
 jmdictGlossary.go => jmdict_glossary.go       | 0
 jmdictHeadword.go => jmdict_headword.go       | 0
 jmdictMetadata.go => jmdict_metadata.go       | 0
 jmdictReferences.go => jmdict_references.go   | 0
 jmdictTags.go => jmdict_tags.go               | 0
 structuredContent.go => structured_content.go | 0
 8 files changed, 0 insertions(+), 0 deletions(-)
 rename jmdictConstants.go => jmdict_constants.go (100%)
 rename jmdictForms.go => jmdict_forms.go (100%)
 rename jmdictGlossary.go => jmdict_glossary.go (100%)
 rename jmdictHeadword.go => jmdict_headword.go (100%)
 rename jmdictMetadata.go => jmdict_metadata.go (100%)
 rename jmdictReferences.go => jmdict_references.go (100%)
 rename jmdictTags.go => jmdict_tags.go (100%)
 rename structuredContent.go => structured_content.go (100%)

diff --git a/jmdictConstants.go b/jmdict_constants.go
similarity index 100%
rename from jmdictConstants.go
rename to jmdict_constants.go
diff --git a/jmdictForms.go b/jmdict_forms.go
similarity index 100%
rename from jmdictForms.go
rename to jmdict_forms.go
diff --git a/jmdictGlossary.go b/jmdict_glossary.go
similarity index 100%
rename from jmdictGlossary.go
rename to jmdict_glossary.go
diff --git a/jmdictHeadword.go b/jmdict_headword.go
similarity index 100%
rename from jmdictHeadword.go
rename to jmdict_headword.go
diff --git a/jmdictMetadata.go b/jmdict_metadata.go
similarity index 100%
rename from jmdictMetadata.go
rename to jmdict_metadata.go
diff --git a/jmdictReferences.go b/jmdict_references.go
similarity index 100%
rename from jmdictReferences.go
rename to jmdict_references.go
diff --git a/jmdictTags.go b/jmdict_tags.go
similarity index 100%
rename from jmdictTags.go
rename to jmdict_tags.go
diff --git a/structuredContent.go b/structured_content.go
similarity index 100%
rename from structuredContent.go
rename to structured_content.go

From abbe18314537935e1680af14d73657085543c249 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sat, 28 Jan 2023 18:39:08 -0600
Subject: [PATCH 16/19] Simplify logic for `index.json` struct

---
 common.go       | 1 +
 enamdict.go     | 2 --
 epwing.go       | 9 +++------
 frequency.go    | 9 +++------
 jmdict.go       | 1 -
 jmdict_forms.go | 1 -
 kanjidic.go     | 2 --
 rikai.go        | 9 +++------
 8 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/common.go b/common.go
index 9d6b2aa..613a255 100644
--- a/common.go
+++ b/common.go
@@ -214,6 +214,7 @@ func writeDb(outputPath string, index dbIndex, recordData map[string]dbRecordLis
 		}
 	}
 
+	index.setDefaults()
 	bytes, err := marshalJSON(index, pretty)
 	if err != nil {
 		return err
diff --git a/enamdict.go b/enamdict.go
index e0c1cb0..78b886d 100644
--- a/enamdict.go
+++ b/enamdict.go
@@ -105,10 +105,8 @@ func jmnedictExportDb(inputPath, outputPath, language, title string, stride int,
 		Title:       title,
 		Revision:    "jmnedict1",
 		Sequenced:   true,
-		Description: "",
 		Attribution: edrdgAttribution,
 	}
-	index.setDefaults()
 
 	return writeDb(
 		outputPath,
diff --git a/epwing.go b/epwing.go
index 83b54b8..c7b2136 100644
--- a/epwing.go
+++ b/epwing.go
@@ -102,13 +102,10 @@ func epwingExportDb(inputPath, outputPath, language, title string, stride int, p
 	}
 
 	index := dbIndex{
-		Title:       title,
-		Revision:    strings.Join(revisions, ";"),
-		Sequenced:   true,
-		Description: "",
-		Attribution: "",
+		Title:     title,
+		Revision:  strings.Join(revisions, ";"),
+		Sequenced: true,
 	}
-	index.setDefaults()
 
 	return writeDb(
 		outputPath,
diff --git a/frequency.go b/frequency.go
index 5d9f06a..310856c 100644
--- a/frequency.go
+++ b/frequency.go
@@ -56,13 +56,10 @@ func frequncyExportDb(inputPath, outputPath, language, title string, stride int,
 	}
 
 	index := dbIndex{
-		Title:       title,
-		Revision:    "frequency1",
-		Sequenced:   false,
-		Description: "",
-		Attribution: "",
+		Title:     title,
+		Revision:  "frequency1",
+		Sequenced: false,
 	}
-	index.setDefaults()
 
 	return writeDb(
 		outputPath,
diff --git a/jmdict.go b/jmdict.go
index 4c54db6..746f7a0 100644
--- a/jmdict.go
+++ b/jmdict.go
@@ -238,7 +238,6 @@ func jmdExportDb(inputPath string, outputPath string, languageName string, title
 		Sequenced:   true,
 		Attribution: edrdgAttribution,
 	}
-	index.setDefaults()
 
 	return writeDb(
 		outputPath,
diff --git a/jmdict_forms.go b/jmdict_forms.go
index 4964233..59df010 100644
--- a/jmdict_forms.go
+++ b/jmdict_forms.go
@@ -254,7 +254,6 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
 		Sequenced:   true,
 		Attribution: edrdgAttribution,
 	}
-	index.setDefaults()
 
 	return writeDb(
 		outputPath,
diff --git a/kanjidic.go b/kanjidic.go
index e1c42d9..5474aed 100644
--- a/kanjidic.go
+++ b/kanjidic.go
@@ -163,10 +163,8 @@ func kanjidicExportDb(inputPath, outputPath, language, title string, stride int,
 		Title:       title,
 		Revision:    "kanjidic2",
 		Sequenced:   false,
-		Description: "",
 		Attribution: edrdgAttribution,
 	}
-	index.setDefaults()
 
 	return writeDb(
 		outputPath,
diff --git a/rikai.go b/rikai.go
index f3b6b12..bfc5307 100644
--- a/rikai.go
+++ b/rikai.go
@@ -153,13 +153,10 @@ func rikaiExportDb(inputPath, outputPath, language, title string, stride int, pr
 	}
 
 	index := dbIndex{
-		Title:       title,
-		Revision:    "rikai2",
-		Sequenced:   true,
-		Description: "",
-		Attribution: "",
+		Title:     title,
+		Revision:  "rikai2",
+		Sequenced: true,
 	}
-	index.setDefaults()
 
 	return writeDb(
 		outputPath,

From 8b4b8999599d18766910edaa37f08ca9807fad26 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sun, 29 Jan 2023 14:06:50 -0600
Subject: [PATCH 17/19] Hide new JMdict structured content features behind
 "extra" option

Require `-language=english_extra` to produce the complete version of
the new JMdict dictionary file.

If and when we determine that the all the new features are ready to be
included the dictionary by default, we can remove this logic.
---
 jmdict.go              | 13 +++++++++++--
 jmdict_constants.go    | 23 ++++++++++++-----------
 jmdict_forms.go        |  2 +-
 jmdict_glossary.go     |  2 +-
 jmdict_metadata.go     |  2 ++
 scripts/build_dicts.sh |  5 +++--
 6 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/jmdict.go b/jmdict.go
index 746f7a0..ceb5835 100644
--- a/jmdict.go
+++ b/jmdict.go
@@ -1,6 +1,7 @@
 package yomichan
 
 import (
+	"errors"
 	"os"
 	"regexp"
 	"strconv"
@@ -48,7 +49,11 @@ func doDisplaySenseNumberTag(headword headword, entry jmdict.JmdictEntry, meta j
 	// Display sense numbers if the entry has more than one sense
 	// or if the headword is found in multiple entries.
 	hash := headword.Hash()
-	if meta.seqToSenseCount[entry.Sequence] > 1 {
+	if !meta.extraMode {
+		return false
+	} else if meta.language != "eng" {
+		return false
+	} else if meta.seqToSenseCount[entry.Sequence] > 1 {
 		return true
 	} else if len(meta.headwordHashToSeqs[hash]) > 1 {
 		return true
@@ -68,7 +73,7 @@ func createFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMet
 	// Don't add "forms" terms to non-English dictionaries.
 	// Information would be duplicated if users installed more
 	// than one version.
-	if meta.language != "eng" {
+	if meta.language != "eng" || !meta.extraMode {
 		return dbTerm{}, false
 	}
 	// Don't need a "forms" term for entries with one unique
@@ -193,6 +198,10 @@ func extractTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetada
 }
 
 func jmdExportDb(inputPath string, outputPath string, languageName string, title string, stride int, pretty bool) error {
+	if _, ok := langNameToCode[languageName]; !ok {
+		return errors.New("Unrecognized language parameter: " + languageName)
+	}
+
 	reader, err := os.Open(inputPath)
 	if err != nil {
 		return err
diff --git a/jmdict_constants.go b/jmdict_constants.go
index 1d49194..cb74233 100644
--- a/jmdict_constants.go
+++ b/jmdict_constants.go
@@ -42,17 +42,18 @@ var ISOtoFlag = map[string]string{
 }
 
 var langNameToCode = map[string]string{
-	"":          "eng",
-	"english":   "eng",
-	"dutch":     "dut",
-	"french":    "fre",
-	"german":    "ger",
-	"hungarian": "hun",
-	"italian":   "ita",
-	"russian":   "rus",
-	"slovenian": "slv",
-	"spanish":   "spa",
-	"swedish":   "swe",
+	"":              "eng",
+	"english":       "eng",
+	"english_extra": "eng",
+	"dutch":         "dut",
+	"french":        "fre",
+	"german":        "ger",
+	"hungarian":     "hun",
+	"italian":       "ita",
+	"russian":       "rus",
+	"slovenian":     "slv",
+	"spanish":       "spa",
+	"swedish":       "swe",
 }
 
 var glossTypeCodeToName = map[LangCode]string{
diff --git a/jmdict_forms.go b/jmdict_forms.go
index 59df010..5d01de5 100644
--- a/jmdict_forms.go
+++ b/jmdict_forms.go
@@ -210,7 +210,7 @@ func formsExportDb(inputPath, outputPath, languageName, title string, stride int
 		return err
 	}
 
-	meta := newJmdictMetadata(dictionary, "english")
+	meta := newJmdictMetadata(dictionary, "")
 
 	terms := dbTermList{}
 	for _, entry := range dictionary.Entries {
diff --git a/jmdict_glossary.go b/jmdict_glossary.go
index 0260cbf..d116981 100644
--- a/jmdict_glossary.go
+++ b/jmdict_glossary.go
@@ -287,7 +287,7 @@ func createGlossaryContent(sense jmdict.JmdictSense, meta jmdictMetadata) any {
 
 func createGlossary(sense jmdict.JmdictSense, meta jmdictMetadata) []any {
 	glossary := []any{}
-	if needsStructuredContent(sense, meta.language) {
+	if meta.extraMode && needsStructuredContent(sense, meta.language) {
 		glossary = append(glossary, createGlossaryContent(sense, meta))
 	} else {
 		for _, gloss := range sense.Glossary {
diff --git a/jmdict_metadata.go b/jmdict_metadata.go
index 99af862..98e35d9 100644
--- a/jmdict_metadata.go
+++ b/jmdict_metadata.go
@@ -23,6 +23,7 @@ type jmdictMetadata struct {
 	entryDepth         map[sequence]int
 	hasMultipleForms   map[sequence]bool
 	maxSenseCount      int
+	extraMode          bool
 }
 
 type senseID struct {
@@ -152,6 +153,7 @@ func newJmdictMetadata(dictionary jmdict.Jmdict, languageName string) jmdictMeta
 		entryDepth:         make(map[sequence]int),
 		hasMultipleForms:   make(map[sequence]bool),
 		maxSenseCount:      0,
+		extraMode:          languageName == "english_extra",
 	}
 
 	for _, entry := range dictionary.Entries {
diff --git a/scripts/build_dicts.sh b/scripts/build_dicts.sh
index 764d1d0..df63ac6 100755
--- a/scripts/build_dicts.sh
+++ b/scripts/build_dicts.sh
@@ -17,10 +17,11 @@ function refresh_source () {
 }
 
 refresh_source "JMdict_e_examp"
-yomichan -language="english" -title="JMdict" src/JMdict_e_examp dst/jmdict_english_with_examples.zip
+yomichan -language="english_extra" -title="JMdict" src/JMdict_e_examp dst/jmdict_english_extra_with_examples.zip
 
 refresh_source "JMdict"
-yomichan -language="english"   -title="JMdict"             src/JMdict dst/jmdict_english.zip
+yomichan -language="english_extra" -title="JMdict"         src/JMdict dst/jmdict_english_extra.zip
+yomichan -language="english"   -title="JMdict (English)"   src/JMdict dst/jmdict_english.zip
 yomichan -language="dutch"     -title="JMdict (Dutch)"     src/JMdict dst/jmdict_dutch.zip
 yomichan -language="french"    -title="JMdict (French)"    src/JMdict dst/jmdict_french.zip
 yomichan -language="german"    -title="JMdict (German)"    src/JMdict dst/jmdict_german.zip

From aab031972c4cef099eafb30fbc2ae8a96ee8e842 Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sun, 29 Jan 2023 20:06:46 -0600
Subject: [PATCH 18/19] Simplify declaration of constants

---
 jmdict_constants.go | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/jmdict_constants.go b/jmdict_constants.go
index cb74233..5424836 100644
--- a/jmdict_constants.go
+++ b/jmdict_constants.go
@@ -5,26 +5,28 @@ type LangCode struct {
 	code     string
 }
 
-const edrdgAttribution = "This publication has included material from the JMdict (EDICT, etc.) dictionary files in accordance with the licence provisions of the Electronic Dictionaries Research Group. See http://www.edrdg.org/"
+const (
+	edrdgAttribution = "This publication has included material from the JMdict (EDICT, etc.) dictionary files in accordance with the licence provisions of the Electronic Dictionaries Research Group. See http://www.edrdg.org/"
 
-const prioritySymbol = "★"
-const rareKanjiSymbol = "🅁"
-const irregularSymbol = "⚠"
-const outdatedSymbol = "⛬"
-const defaultSymbol = "㊒"
+	prioritySymbol  = "★"
+	rareKanjiSymbol = "🅁"
+	irregularSymbol = "⚠"
+	outdatedSymbol  = "⛬"
+	defaultSymbol   = "㊒"
 
-const priorityTagName = "⭐"
-const rareKanjiTagName = "R"
-const irregularTagName = "⚠️"
-const outdatedTagName = "⛬"
-const atejiTagName = "ateji"
-const gikunTagName = "gikun"
+	priorityTagName  = "⭐"
+	rareKanjiTagName = "R"
+	irregularTagName = "⚠️"
+	outdatedTagName  = "⛬"
+	atejiTagName     = "ateji"
+	gikunTagName     = "gikun"
 
-const langMarker = "'🌐 '"
-const noteMarker = "'📝 '"
-const infoMarker = "'ℹ️ '"
-const refMarker = "'➡️ '"
-const antonymMarker = "'🔄 '"
+	langMarker    = "'🌐 '"
+	noteMarker    = "'📝 '"
+	infoMarker    = "'ℹ️ '"
+	refMarker     = "'➡️ '"
+	antonymMarker = "'🔄 '"
+)
 
 var ISOtoFlag = map[string]string{
 	"":    "'🇬🇧 '",

From 0b328e1e0715b178c0c335f3c90919d82f0bb45d Mon Sep 17 00:00:00 2001
From: stephenmk <stephenmk@users.noreply.github.com>
Date: Sun, 29 Jan 2023 22:34:13 -0600
Subject: [PATCH 19/19] Add support for undocumented frequency and information
 tags

Custom dictionary files using the JMdict XML format may contain
nonstandard frequency and information tags.
---
 jmdict_headword.go | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/jmdict_headword.go b/jmdict_headword.go
index 4ead44c..19a4bba 100644
--- a/jmdict_headword.go
+++ b/jmdict_headword.go
@@ -130,6 +130,9 @@ func (h *headword) SetFlags(infoTags, freqTags []string) {
 			h.IsAteji = true
 		case "gikun":
 			h.IsGikun = true
+		default:
+			fmt.Println("Unknown information tag type: " + infoTag)
+			h.TermTags = append(h.TermTags, infoTag)
 		}
 	}
 	if h.IsOutdated && h.IsRareKanji {
@@ -138,16 +141,16 @@ func (h *headword) SetFlags(infoTags, freqTags []string) {
 }
 
 func (h *headword) SetTermTags(freqTags []string) {
-	h.TermTags = []string{}
 	if h.IsPriority {
 		h.TermTags = append(h.TermTags, priorityTagName)
 	}
+	knownFreqTags := []string{"ichi1", "ichi2", "gai1", "gai2", "spec1", "spec2"}
 	for _, tag := range freqTags {
 		isNewsFreqTag, _ := regexp.MatchString(`nf\d\d`, tag)
 		if isNewsFreqTag {
 			// nf tags are divided into ranks of 500
-			// (nf01 to nf48), but it will be easier
-			// for the user to read 1k, 2k, etc.
+			// (nf01 to nf48). Let's combine them into
+			// ranks of 1k (news1k, news2k, ..., news24k).
 			var i int
 			if _, err := fmt.Sscanf(tag, "nf%2d", &i); err == nil {
 				i = (i + (i % 2)) / 2
@@ -155,10 +158,15 @@ func (h *headword) SetTermTags(freqTags []string) {
 				h.TermTags = append(h.TermTags, newsTag)
 			}
 		} else if tag == "news1" || tag == "news2" {
+			// News tags are derived from the nf
+			// rankings, so these are not needed.
 			continue
-		} else {
-			tagWithoutTheNumber := tag[:len(tag)-1] // "ichi", "gai", or "spec"
+		} else if slices.Contains(knownFreqTags, tag) {
+			tagWithoutTheNumber := tag[:len(tag)-1]
 			h.TermTags = append(h.TermTags, tagWithoutTheNumber)
+		} else {
+			fmt.Println("Unknown frequency tag type: " + tag)
+			h.TermTags = append(h.TermTags, tag)
 		}
 	}
 	if h.IsIrregular {