From a09898c0c54adab32aec8f99a9a9ca9261aead0f Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Fri, 4 Nov 2016 09:05:11 -0700 Subject: [PATCH] WIP --- termmeta.go | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/termmeta.go b/termmeta.go index c275796..51eba18 100644 --- a/termmeta.go +++ b/termmeta.go @@ -27,11 +27,35 @@ import ( "io" "os" "path" + + "github.com/FooSoft/jmdict" ) type termMetaEntry struct { - Text string `json:"text"` - Tags string `json:"tags"` + Expression string `json:"exp"` + Reading string `json:"read"` + Tags []string `json:"tags"` +} + +func (meta *termMetaEntry) addTags(tags ...string) { + for _, tag := range tags { + if !hasString(tag, meta.Tags) { + meta.Tags = append(meta.Tags, tag) + } + } +} + +func (meta *termMetaEntry) addTagsPri(tags ...string) { + for _, tag := range tags { + switch tag { + case "news1", "ichi1", "spec1", "gai1": + meta.addTags("P") + fallthrough + case "news2", "ichi2", "spec2", "gai2": + meta.addTags(tag[:len(tag)-1]) + break + } + } } type termMetaIndex struct { @@ -98,6 +122,72 @@ func (index *termMetaIndex) output(dir string, pretty bool) error { return nil } +func extractEdictTermMeta(edictEntry jmdict.JmdictEntry) []termMetaEntry { + var entries []termMetaEntry + + convert := func(reading jmdict.JmdictReading, kanji *jmdict.JmdictKanji) { + if kanji != nil && reading.Restrictions != nil && !hasString(kanji.Expression, reading.Restrictions) { + return + } + + var entryBase termMetaEntry + entryBase.addTags(reading.Information...) + entryBase.addTagsPri(reading.Priorities...) + + if kanji == nil { + entryBase.Expression = reading.Reading + entryBase.addTagsPri(reading.Priorities...) + } else { + entryBase.Expression = kanji.Expression + entryBase.Reading = reading.Reading + entryBase.addTags(kanji.Information...) + + for _, priority := range kanji.Priorities { + if hasString(priority, reading.Priorities) { + entryBase.addTagsPri(priority) + } + } + } + + for _, sense := range edictEntry.Sense { + if sense.RestrictedReadings != nil && !hasString(reading.Reading, sense.RestrictedReadings) { + continue + } + + if kanji != nil && sense.RestrictedKanji != nil && !hasString(kanji.Expression, sense.RestrictedKanji) { + continue + } + + entry := termMetaEntry{ + Reading: entryBase.Reading, + Expression: entryBase.Expression, + } + + entry.addTags(entryBase.Tags...) + entry.addTags(sense.PartsOfSpeech...) + entry.addTags(sense.Fields...) + entry.addTags(sense.Misc...) + entry.addTags(sense.Dialects...) + + entries = append(entries, entry) + } + } + + if len(edictEntry.Kanji) > 0 { + for _, kanji := range edictEntry.Kanji { + for _, reading := range edictEntry.Readings { + convert(reading, &kanji) + } + } + } else { + for _, reading := range edictEntry.Readings { + convert(reading, nil) + } + } + + return entries +} + func outputTermMetaJson(dir string, reader io.Reader, flags int) error { // dict, entities, err := jmdict.LoadJmdictNoTransform(reader) // if err != nil {