1

Add support for undocumented frequency and information tags

Custom dictionary files using the JMdict XML format may contain
nonstandard frequency and information tags.
This commit is contained in:
stephenmk 2023-01-29 22:34:13 -06:00
parent aab031972c
commit 0b328e1e07
No known key found for this signature in database
GPG Key ID: B6DA730DB06235F1

View File

@ -130,6 +130,9 @@ func (h *headword) SetFlags(infoTags, freqTags []string) {
h.IsAteji = true h.IsAteji = true
case "gikun": case "gikun":
h.IsGikun = true h.IsGikun = true
default:
fmt.Println("Unknown information tag type: " + infoTag)
h.TermTags = append(h.TermTags, infoTag)
} }
} }
if h.IsOutdated && h.IsRareKanji { if h.IsOutdated && h.IsRareKanji {
@ -138,16 +141,16 @@ func (h *headword) SetFlags(infoTags, freqTags []string) {
} }
func (h *headword) SetTermTags(freqTags []string) { func (h *headword) SetTermTags(freqTags []string) {
h.TermTags = []string{}
if h.IsPriority { if h.IsPriority {
h.TermTags = append(h.TermTags, priorityTagName) h.TermTags = append(h.TermTags, priorityTagName)
} }
knownFreqTags := []string{"ichi1", "ichi2", "gai1", "gai2", "spec1", "spec2"}
for _, tag := range freqTags { for _, tag := range freqTags {
isNewsFreqTag, _ := regexp.MatchString(`nf\d\d`, tag) isNewsFreqTag, _ := regexp.MatchString(`nf\d\d`, tag)
if isNewsFreqTag { if isNewsFreqTag {
// nf tags are divided into ranks of 500 // nf tags are divided into ranks of 500
// (nf01 to nf48), but it will be easier // (nf01 to nf48). Let's combine them into
// for the user to read 1k, 2k, etc. // ranks of 1k (news1k, news2k, ..., news24k).
var i int var i int
if _, err := fmt.Sscanf(tag, "nf%2d", &i); err == nil { if _, err := fmt.Sscanf(tag, "nf%2d", &i); err == nil {
i = (i + (i % 2)) / 2 i = (i + (i % 2)) / 2
@ -155,10 +158,15 @@ func (h *headword) SetTermTags(freqTags []string) {
h.TermTags = append(h.TermTags, newsTag) h.TermTags = append(h.TermTags, newsTag)
} }
} else if tag == "news1" || tag == "news2" { } else if tag == "news1" || tag == "news2" {
// News tags are derived from the nf
// rankings, so these are not needed.
continue continue
} else { } else if slices.Contains(knownFreqTags, tag) {
tagWithoutTheNumber := tag[:len(tag)-1] // "ichi", "gai", or "spec" tagWithoutTheNumber := tag[:len(tag)-1]
h.TermTags = append(h.TermTags, tagWithoutTheNumber) h.TermTags = append(h.TermTags, tagWithoutTheNumber)
} else {
fmt.Println("Unknown frequency tag type: " + tag)
h.TermTags = append(h.TermTags, tag)
} }
} }
if h.IsIrregular { if h.IsIrregular {