yomichan-import/jmdict.go

package yomichan

import (
	"errors"
	"os"
	"regexp"
	"strconv"
	"strings"

	"foosoft.net/projects/jmdict"
	"golang.org/x/exp/slices"
)

func grammarRules(partsOfSpeech []string) []string {
	rules := []string{}
	for _, partOfSpeech := range partsOfSpeech {
		switch partOfSpeech {
		case "adj-i", "vk", "vz":
			rules = append(rules, partOfSpeech)
		default:
			if strings.HasPrefix(partOfSpeech, "v5") {
				rules = append(rules, "v5")
			} else if strings.HasPrefix(partOfSpeech, "v1") {
				rules = append(rules, "v1")
			} else if strings.HasPrefix(partOfSpeech, "vs-") {
				rules = append(rules, "vs")
			}
		}
	}
	return rules
}

func calculateTermScore(senseNumber int, depth int, headword headword) int {
	const senseWeight int = 1
	const depthWeight int = 100
	const entryPositionWeight int = 10000
	const priorityWeight int = 1000000

	score := 0
	score -= (senseNumber - 1) * senseWeight
	score -= depth * depthWeight
	score -= headword.Index * entryPositionWeight
	score += headword.Score() * priorityWeight

	return score
}

func doDisplaySenseNumberTag(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) bool {
	// Display sense numbers if the entry has more than one sense
	// or if the headword is found in multiple entries.
	hash := headword.Hash()
	if !meta.extraMode {
		return false
	} else if meta.language != "eng" {
		return false
	} else if meta.seqToSenseCount[entry.Sequence] > 1 {
		return true
	} else if len(meta.headwordHashToSeqs[hash]) > 1 {
		return true
	} else {
		return false
	}
}

func jmdictPublicationDate(dictionary jmdict.Jmdict) string {
	unknownDate := "unknown"
	idx := len(dictionary.Entries) - 1
	if len(dictionary.Entries) == 0 {
		return unknownDate
	} else if len(dictionary.Entries[idx].Sense) == 0 {
		return unknownDate
	} else if len(dictionary.Entries[idx].Sense[0].Glossary) == 0 {
		return unknownDate
	}
	dateGloss := dictionary.Entries[idx].Sense[0].Glossary[0].Content
	r := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`)
	date := r.FindString(dateGloss)
	if date != "" {
		return date
	} else {
		return unknownDate
	}
}

func jmdictFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
	// Don't add "forms" terms to non-English dictionaries.
	// Information would be duplicated if users installed more
	// than one version.
	if meta.language != "eng" || !meta.extraMode {
		return dbTerm{}, false
	}
	// Don't need a "forms" term for entries with one unique
	// headword which does not appear in any other entries.
	if !meta.hasMultipleForms[entry.Sequence] {
		if len(meta.headwordHashToSeqs[headword.Hash()]) == 1 {
			return dbTerm{}, false
		}
	}

	term := baseFormsTerm(entry, meta)
	term.Expression = headword.Expression
	term.Reading = headword.Reading

	term.addTermTags(headword.TermTags...)
	term.addDefinitionTags("forms")

	senseNumber := meta.seqToSenseCount[entry.Sequence] + 1
	entryDepth := meta.entryDepth[entry.Sequence]
	term.Score = calculateTermScore(senseNumber, entryDepth, headword)

	return term, true
}

func jmdictSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
	// Don't add "search" terms to non-English dictionaries.
	// Information would be duplicated if users installed more
	// than one version.
	if meta.language != "eng" {
		return dbTerm{}, false
	}

	term := dbTerm{
		Expression: headword.Expression,
		Sequence:   -entry.Sequence,
	}

	partsOfSpeech := meta.seqToPartsOfSpeech[entry.Sequence]
	rules := grammarRules(partsOfSpeech)
	term.addRules(rules...)

	term.addTermTags(headword.TermTags...)
	term.Score = calculateTermScore(1, 0, headword)

	redirectHeadword := meta.seqToMainHeadword[entry.Sequence]
	expHash := redirectHeadword.ExpHash()
	doDisplayReading := (len(meta.expHashToReadings[expHash]) > 1)

	content := contentSpan(
		contentAttr{fontSize: "130%"},
		"⟶",
		redirectHeadword.ToInternalLink(doDisplayReading),
	)

	term.Glossary = []any{contentStructure(content)}
	return term, true
}

func jmdictSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {
	if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {
		return dbTerm{}, false
	}
	if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {
		return dbTerm{}, false
	}

	term := dbTerm{
		Expression: headword.Expression,
		Reading:    headword.Reading,
		Sequence:   entry.Sequence,
	}

	term.Glossary = createGlossary(sense, meta)

	term.addTermTags(headword.TermTags...)

	if doDisplaySenseNumberTag(headword, entry, meta) {
		senseNumberTag := strconv.Itoa(senseNumber)
		term.addDefinitionTags(senseNumberTag)
	}

	if len(sense.PartsOfSpeech) == 0 && meta.language != "eng" {
		// This is a hack to provide part-of-speech info to
		// non-English versions of JMdict.
		sense.PartsOfSpeech = meta.seqToPartsOfSpeech[entry.Sequence]
	}

	term.addDefinitionTags(sense.PartsOfSpeech...)
	term.addDefinitionTags(sense.Fields...)
	term.addDefinitionTags(sense.Misc...)
	term.addDefinitionTags(sense.Dialects...)

	rules := grammarRules(sense.PartsOfSpeech)
	term.addRules(rules...)

	entryDepth := meta.entryDepth[entry.Sequence]
	term.Score = calculateTermScore(senseNumber, entryDepth, headword)

	return term, true
}

func jmdictTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {
	if meta.seqToSenseCount[entry.Sequence] == 0 {
		return nil, false
	}
	if headword.IsSearchOnly {
		if searchTerm, ok := jmdictSearchTerm(headword, entry, meta); ok {
			return []dbTerm{searchTerm}, true
		} else {
			return nil, false
		}
	}
	terms := []dbTerm{}
	senseNumber := 1
	for _, sense := range entry.Sense {
		if !glossaryContainsLanguage(sense.Glossary, meta.language) {
			// Do not increment sense number
			continue
		}
		if senseTerm, ok := jmdictSenseTerm(sense, senseNumber, headword, entry, meta); ok {
			terms = append(terms, senseTerm)
		}
		senseNumber += 1
	}

	if formsTerm, ok := jmdictFormsTerm(headword, entry, meta); ok {
		terms = append(terms, formsTerm)
	}

	return terms, true
}

func jmdictExportDb(inputPath string, outputPath string, languageName string, title string, stride int, pretty bool) error {
	if _, ok := langNameToCode[languageName]; !ok {
		return errors.New("Unrecognized language parameter: " + languageName)
	}

	reader, err := os.Open(inputPath)
	if err != nil {
		return err
	}
	defer reader.Close()

	dictionary, entities, err := jmdict.LoadJmdictNoTransform(reader)
	if err != nil {
		return err
	}

	meta := newJmdictMetadata(dictionary, languageName)

	terms := dbTermList{}
	for _, entry := range dictionary.Entries {
		headwords := extractHeadwords(entry)
		for _, headword := range headwords {
			if newTerms, ok := jmdictTerms(headword, entry, meta); ok {
				terms = append(terms, newTerms...)
			}
		}
	}

	tags := dbTagList{}
	tags = append(tags, entityTags(entities)...)
	tags = append(tags, senseNumberTags(meta.maxSenseCount)...)
	tags = append(tags, newsFrequencyTags()...)
	tags = append(tags, customDbTags()...)

	recordData := map[string]dbRecordList{
		"term": terms.crush(),
		"tag":  tags.crush(),
	}

	if title == "" {
		title = "JMdict"
	}
	jmdictDate := jmdictPublicationDate(dictionary)

	index := dbIndex{
		Title:       title,
		Revision:    "JMdict." + jmdictDate,
		Sequenced:   true,
		Attribution: edrdgAttribution,
	}

	return writeDb(
		outputPath,
		index,
		recordData,
		stride,
		pretty,
	)
}
Add new JMdict version 2023-01-22 20:37:18 +00:00			`package yomichan`

			`import (`
Hide new JMdict structured content features behind "extra" option Require `-language=english_extra` to produce the complete version of the new JMdict dictionary file. If and when we determine that the all the new features are ready to be included the dictionary by default, we can remove this logic. 2023-01-29 20:06:50 +00:00			`"errors"`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`"os"`
			`"regexp"`
			`"strconv"`
			`"strings"`

			`"foosoft.net/projects/jmdict"`
			`"golang.org/x/exp/slices"`
			`)`

			`func grammarRules(partsOfSpeech []string) []string {`
			`rules := []string{}`
			`for _, partOfSpeech := range partsOfSpeech {`
			`switch partOfSpeech {`
			`case "adj-i", "vk", "vz":`
			`rules = append(rules, partOfSpeech)`
			`default:`
			`if strings.HasPrefix(partOfSpeech, "v5") {`
			`rules = append(rules, "v5")`
			`} else if strings.HasPrefix(partOfSpeech, "v1") {`
			`rules = append(rules, "v1")`
			`} else if strings.HasPrefix(partOfSpeech, "vs-") {`
			`rules = append(rules, "vs")`
			`}`
			`}`
			`}`
			`return rules`
			`}`

Fix bug in term score assignments This commit ensures that terms are grouped among their entries of origin and displayed in correct sequential order in Yomichan's default result grouping mode, "Group term-reading pairs." 2023-01-28 01:09:12 +00:00			`func calculateTermScore(senseNumber int, depth int, headword headword) int {`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`const senseWeight int = 1`
Fix bug in term score assignments This commit ensures that terms are grouped among their entries of origin and displayed in correct sequential order in Yomichan's default result grouping mode, "Group term-reading pairs." 2023-01-28 01:09:12 +00:00			`const depthWeight int = 100`
			`const entryPositionWeight int = 10000`
			`const priorityWeight int = 1000000`
Add new JMdict version 2023-01-22 20:37:18 +00:00
			`score := 0`
			`score -= (senseNumber - 1) * senseWeight`
Fix bug in term score assignments This commit ensures that terms are grouped among their entries of origin and displayed in correct sequential order in Yomichan's default result grouping mode, "Group term-reading pairs." 2023-01-28 01:09:12 +00:00			`score -= depth * depthWeight`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`score -= headword.Index * entryPositionWeight`
			`score += headword.Score() * priorityWeight`

			`return score`
			`}`

			`func doDisplaySenseNumberTag(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) bool {`
			`// Display sense numbers if the entry has more than one sense`
			`// or if the headword is found in multiple entries.`
			`hash := headword.Hash()`
Hide new JMdict structured content features behind "extra" option Require `-language=english_extra` to produce the complete version of the new JMdict dictionary file. If and when we determine that the all the new features are ready to be included the dictionary by default, we can remove this logic. 2023-01-29 20:06:50 +00:00			`if !meta.extraMode {`
			`return false`
			`} else if meta.language != "eng" {`
			`return false`
			`} else if meta.seqToSenseCount[entry.Sequence] > 1 {`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`return true`
			`} else if len(meta.headwordHashToSeqs[hash]) > 1 {`
			`return true`
			`} else {`
			`return false`
			`}`
			`}`

			`func jmdictPublicationDate(dictionary jmdict.Jmdict) string {`
Improve readability of publication date functions 2023-02-04 07:42:08 +00:00			`unknownDate := "unknown"`
			`idx := len(dictionary.Entries) - 1`
Add verification logic for date entry in JMdict Very old versions of JMdict and unofficial versions are unlikely to have the publication date entry at the end of the file. 2023-01-30 19:26:26 +00:00			`if len(dictionary.Entries) == 0 {`
Improve readability of publication date functions 2023-02-04 07:42:08 +00:00			`return unknownDate`
			`} else if len(dictionary.Entries[idx].Sense) == 0 {`
			`return unknownDate`
			`} else if len(dictionary.Entries[idx].Sense[0].Glossary) == 0 {`
			`return unknownDate`
Add verification logic for date entry in JMdict Very old versions of JMdict and unofficial versions are unlikely to have the publication date entry at the end of the file. 2023-01-30 19:26:26 +00:00			`}`
Improve readability of publication date functions 2023-02-04 07:42:08 +00:00			`dateGloss := dictionary.Entries[idx].Sense[0].Glossary[0].Content`
Add new JMdict version 2023-01-22 20:37:18 +00:00			r := regexp.MustCompile(`\d{4}-\d{2}-\d{2}`)
Improve readability of publication date functions 2023-02-04 07:42:08 +00:00			`date := r.FindString(dateGloss)`
			`if date != "" {`
			`return date`
Add verification logic for date entry in JMdict Very old versions of JMdict and unofficial versions are unlikely to have the publication date entry at the end of the file. 2023-01-30 19:26:26 +00:00			`} else {`
Improve readability of publication date functions 2023-02-04 07:42:08 +00:00			`return unknownDate`
Add verification logic for date entry in JMdict Very old versions of JMdict and unofficial versions are unlikely to have the publication date entry at the end of the file. 2023-01-30 19:26:26 +00:00			`}`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`}`

Rename some jmdict functions 2023-02-02 01:14:37 +00:00			`func jmdictFormsTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {`
Add "forms" term in special circumstances If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary. 2023-01-26 00:26:47 +00:00			`// Don't add "forms" terms to non-English dictionaries.`
			`// Information would be duplicated if users installed more`
			`// than one version.`
Hide new JMdict structured content features behind "extra" option Require `-language=english_extra` to produce the complete version of the new JMdict dictionary file. If and when we determine that the all the new features are ready to be included the dictionary by default, we can remove this logic. 2023-01-29 20:06:50 +00:00			`if meta.language != "eng" \|\| !meta.extraMode {`
Add "forms" term in special circumstances If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary. 2023-01-26 00:26:47 +00:00			`return dbTerm{}, false`
			`}`
			`// Don't need a "forms" term for entries with one unique`
			`// headword which does not appear in any other entries.`
			`if !meta.hasMultipleForms[entry.Sequence] {`
			`if len(meta.headwordHashToSeqs[headword.Hash()]) == 1 {`
			`return dbTerm{}, false`
			`}`
			`}`

Use cached part-of-speech values 2023-02-02 21:50:57 +00:00			`term := baseFormsTerm(entry, meta)`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`term.Expression = headword.Expression`
			`term.Reading = headword.Reading`

			`term.addTermTags(headword.TermTags...)`
			`term.addDefinitionTags("forms")`
Use cached part-of-speech values 2023-02-02 21:50:57 +00:00
Add new JMdict version 2023-01-22 20:37:18 +00:00			`senseNumber := meta.seqToSenseCount[entry.Sequence] + 1`
Fix bug in term score assignments This commit ensures that terms are grouped among their entries of origin and displayed in correct sequential order in Yomichan's default result grouping mode, "Group term-reading pairs." 2023-01-28 01:09:12 +00:00			`entryDepth := meta.entryDepth[entry.Sequence]`
			`term.Score = calculateTermScore(senseNumber, entryDepth, headword)`
Use cached part-of-speech values 2023-02-02 21:50:57 +00:00
Add "forms" term in special circumstances If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary. 2023-01-26 00:26:47 +00:00			`return term, true`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`}`

Rename some jmdict functions 2023-02-02 01:14:37 +00:00			`func jmdictSearchTerm(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {`
Add "forms" term in special circumstances If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary. 2023-01-26 00:26:47 +00:00			`// Don't add "search" terms to non-English dictionaries.`
			`// Information would be duplicated if users installed more`
			`// than one version.`
			`if meta.language != "eng" {`
			`return dbTerm{}, false`
			`}`

Add new JMdict version 2023-01-22 20:37:18 +00:00			`term := dbTerm{`
			`Expression: headword.Expression,`
			`Sequence: -entry.Sequence,`
			`}`
Use cached part-of-speech values 2023-02-02 21:50:57 +00:00
			`partsOfSpeech := meta.seqToPartsOfSpeech[entry.Sequence]`
			`rules := grammarRules(partsOfSpeech)`
			`term.addRules(rules...)`

Add new JMdict version 2023-01-22 20:37:18 +00:00			`term.addTermTags(headword.TermTags...)`
Fix bug in term score assignments This commit ensures that terms are grouped among their entries of origin and displayed in correct sequential order in Yomichan's default result grouping mode, "Group term-reading pairs." 2023-01-28 01:09:12 +00:00			`term.Score = calculateTermScore(1, 0, headword)`
Add new JMdict version 2023-01-22 20:37:18 +00:00
			`redirectHeadword := meta.seqToMainHeadword[entry.Sequence]`
			`expHash := redirectHeadword.ExpHash()`
			`doDisplayReading := (len(meta.expHashToReadings[expHash]) > 1)`

			`content := contentSpan(`
			`contentAttr{fontSize: "130%"},`
			`"⟶",`
			`redirectHeadword.ToInternalLink(doDisplayReading),`
			`)`

			`term.Glossary = []any{contentStructure(content)}`
Add "forms" term in special circumstances If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary. 2023-01-26 00:26:47 +00:00			`return term, true`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`}`

Rename some jmdict functions 2023-02-02 01:14:37 +00:00			`func jmdictSenseTerm(sense jmdict.JmdictSense, senseNumber int, headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) (dbTerm, bool) {`
Add "forms" term in special circumstances If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary. 2023-01-26 00:26:47 +00:00			`if sense.RestrictedReadings != nil && !slices.Contains(sense.RestrictedReadings, headword.Reading) {`
			`return dbTerm{}, false`
			`}`
			`if sense.RestrictedKanji != nil && !slices.Contains(sense.RestrictedKanji, headword.Expression) {`
			`return dbTerm{}, false`
			`}`

Add new JMdict version 2023-01-22 20:37:18 +00:00			`term := dbTerm{`
			`Expression: headword.Expression,`
			`Reading: headword.Reading,`
			`Sequence: entry.Sequence,`
			`}`

			`term.Glossary = createGlossary(sense, meta)`

			`term.addTermTags(headword.TermTags...)`

			`if doDisplaySenseNumberTag(headword, entry, meta) {`
			`senseNumberTag := strconv.Itoa(senseNumber)`
			`term.addDefinitionTags(senseNumberTag)`
			`}`
JMdict: Ensure part-of-speech info is added in non-English versions Only English-language senses in JMdict contain part-of-speech tags. This info is displayed to users in definition tags and also used for deinflecting verbs and adjectives during term lookups. The old version of Yomichan-Import took the PoS tags from the final sense in the English version of an entry and applied them to every sense of every other language. For example, 川・かわ has two senses in English JMdict: a noun sense and a suffix sense. Therefore every sense of 川・かわ in every other language was tagged as a suffix. Instead, I suggest gathering all distinct PoS tags from each English entry and applying them all to each non-English sense. Every non-English sense of 川・かわ will therefore be tagged as both a noun and suffix. 2023-02-02 16:44:16 +00:00
			`if len(sense.PartsOfSpeech) == 0 && meta.language != "eng" {`
			`// This is a hack to provide part-of-speech info to`
			`// non-English versions of JMdict.`
			`sense.PartsOfSpeech = meta.seqToPartsOfSpeech[entry.Sequence]`
			`}`

Add new JMdict version 2023-01-22 20:37:18 +00:00			`term.addDefinitionTags(sense.PartsOfSpeech...)`
			`term.addDefinitionTags(sense.Fields...)`
			`term.addDefinitionTags(sense.Misc...)`
			`term.addDefinitionTags(sense.Dialects...)`

			`rules := grammarRules(sense.PartsOfSpeech)`
			`term.addRules(rules...)`

Fix bug in term score assignments This commit ensures that terms are grouped among their entries of origin and displayed in correct sequential order in Yomichan's default result grouping mode, "Group term-reading pairs." 2023-01-28 01:09:12 +00:00			`entryDepth := meta.entryDepth[entry.Sequence]`
			`term.Score = calculateTermScore(senseNumber, entryDepth, headword)`
Add new JMdict version 2023-01-22 20:37:18 +00:00
Add "forms" term in special circumstances If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary. 2023-01-26 00:26:47 +00:00			`return term, true`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`}`

Rename some jmdict functions 2023-02-02 01:14:37 +00:00			`func jmdictTerms(headword headword, entry jmdict.JmdictEntry, meta jmdictMetadata) ([]dbTerm, bool) {`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`if meta.seqToSenseCount[entry.Sequence] == 0 {`
			`return nil, false`
			`}`
			`if headword.IsSearchOnly {`
Rename some jmdict functions 2023-02-02 01:14:37 +00:00			`if searchTerm, ok := jmdictSearchTerm(headword, entry, meta); ok {`
Exclude "search" and "forms" terms from non-English dictionaries This allows a user to install the English version and another version without cluttering their setup with duplicated information. If a user doesn't want to use the English version, they can get the "search" and "forms" terms by installing the separate jmdict_forms file. 2023-01-22 23:55:27 +00:00			`return []dbTerm{searchTerm}, true`
			`} else {`
			`return nil, false`
			`}`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`}`
			`terms := []dbTerm{}`
			`senseNumber := 1`
			`for _, sense := range entry.Sense {`
			`if !glossaryContainsLanguage(sense.Glossary, meta.language) {`
Add "forms" term in special circumstances If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary. 2023-01-26 00:26:47 +00:00			`// Do not increment sense number`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`continue`
			`}`
Rename some jmdict functions 2023-02-02 01:14:37 +00:00			`if senseTerm, ok := jmdictSenseTerm(sense, senseNumber, headword, entry, meta); ok {`
Add "forms" term in special circumstances If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary. 2023-01-26 00:26:47 +00:00			`terms = append(terms, senseTerm)`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`}`
			`senseNumber += 1`
			`}`

Rename some jmdict functions 2023-02-02 01:14:37 +00:00			`if formsTerm, ok := jmdictFormsTerm(headword, entry, meta); ok {`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`terms = append(terms, formsTerm)`
			`}`
Add "forms" term in special circumstances If a headword appears in multiple entries, then each entry needs a corresponding "forms" term in the output dictionary. For example, 軽卒 is the only headword in entry 2275730, but 軽卒 also appears as an irregular form in entry 1252910. If a "forms" term is not included for the former entry, then it will appear that 軽卒 is irregular for all senses in the output dictionary. 2023-01-26 00:26:47 +00:00
Add new JMdict version 2023-01-22 20:37:18 +00:00			`return terms, true`
			`}`

Rename some jmdict functions 2023-02-02 01:14:37 +00:00			`func jmdictExportDb(inputPath string, outputPath string, languageName string, title string, stride int, pretty bool) error {`
Hide new JMdict structured content features behind "extra" option Require `-language=english_extra` to produce the complete version of the new JMdict dictionary file. If and when we determine that the all the new features are ready to be included the dictionary by default, we can remove this logic. 2023-01-29 20:06:50 +00:00			`if _, ok := langNameToCode[languageName]; !ok {`
			`return errors.New("Unrecognized language parameter: " + languageName)`
			`}`

Add new JMdict version 2023-01-22 20:37:18 +00:00			`reader, err := os.Open(inputPath)`
			`if err != nil {`
			`return err`
			`}`
			`defer reader.Close()`

			`dictionary, entities, err := jmdict.LoadJmdictNoTransform(reader)`
			`if err != nil {`
			`return err`
			`}`

			`meta := newJmdictMetadata(dictionary, languageName)`

			`terms := dbTermList{}`
			`for _, entry := range dictionary.Entries {`
			`headwords := extractHeadwords(entry)`
			`for _, headword := range headwords {`
Rename some jmdict functions 2023-02-02 01:14:37 +00:00			`if newTerms, ok := jmdictTerms(headword, entry, meta); ok {`
Add new JMdict version 2023-01-22 20:37:18 +00:00			`terms = append(terms, newTerms...)`
			`}`
			`}`
			`}`

			`tags := dbTagList{}`
			`tags = append(tags, entityTags(entities)...)`
			`tags = append(tags, senseNumberTags(meta.maxSenseCount)...)`
			`tags = append(tags, newsFrequencyTags()...)`
			`tags = append(tags, customDbTags()...)`

			`recordData := map[string]dbRecordList{`
			`"term": terms.crush(),`
			`"tag": tags.crush(),`
			`}`

			`if title == "" {`
			`title = "JMdict"`
			`}`
			`jmdictDate := jmdictPublicationDate(dictionary)`

			`index := dbIndex{`
			`Title: title,`
			`Revision: "JMdict." + jmdictDate,`
			`Sequenced: true,`
			`Attribution: edrdgAttribution,`
			`}`

			`return writeDb(`
			`outputPath,`
			`index,`
			`recordData,`
			`stride,`
			`pretty,`
			`)`
			`}`