1

add non-english language support for edict

This commit is contained in:
Alex Yatskov 2017-06-10 16:53:31 -07:00
parent 7bb72f8bbc
commit 4ff2370fa3
7 changed files with 67 additions and 24 deletions

View File

@ -237,10 +237,15 @@ func detectFormat(path string) string {
} else { } else {
base := filepath.Base(path) base := filepath.Base(path)
switch base { switch base {
case "JMdict":
case "JMdict.xml":
case "JMdict_e":
case "JMdict_e.xml": case "JMdict_e.xml":
return "edict" return "edict"
case "JMnedict":
case "JMnedict.xml": case "JMnedict.xml":
return "enamdict" return "enamdict"
case "kanjidic2":
case "kanjidic2.xml": case "kanjidic2.xml":
return "kanjidic" return "kanjidic"
} }
@ -248,3 +253,28 @@ func detectFormat(path string) string {
return "" return ""
} }
func convertLanguage(language string) string {
switch language {
case "dutch":
return "dut"
case "french":
return "fre"
case "german":
return "ger"
case "hungarian":
return "hun"
case "italian":
return "ita"
case "russian":
return "rus"
case "slovenian":
return "slv"
case "spanish":
return "spa"
case "swedish":
return "swe"
default:
return ""
}
}

View File

@ -97,7 +97,7 @@ func jmdictBuildTagMeta(entities map[string]string) map[string]dbTagMeta {
return tags return tags
} }
func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm { func jmdictExtractTerms(edictEntry jmdict.JmdictEntry, language string) []dbTerm {
var terms []dbTerm var terms []dbTerm
convert := func(reading jmdict.JmdictReading, kanji *jmdict.JmdictKanji) { convert := func(reading jmdict.JmdictReading, kanji *jmdict.JmdictKanji) {
@ -133,7 +133,21 @@ func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
continue continue
} }
term := dbTerm{Reading: termBase.Reading, Expression: termBase.Expression} term := dbTerm{
Reading: termBase.Reading,
Expression: termBase.Expression,
}
for _, glossary := range sense.Glossary {
if glossary.Language == nil && language == "" || glossary.Language != nil && language == *glossary.Language {
term.Glossary = append(term.Glossary, glossary.Content)
}
}
if len(term.Glossary) == 0 {
continue
}
term.addTags(termBase.Tags...) term.addTags(termBase.Tags...)
term.addTags(sense.PartsOfSpeech...) term.addTags(sense.PartsOfSpeech...)
term.addTags(sense.Fields...) term.addTags(sense.Fields...)
@ -146,10 +160,6 @@ func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
term.addTags(partsOfSpeech...) term.addTags(partsOfSpeech...)
} }
for _, glossary := range sense.Glossary {
term.Glossary = append(term.Glossary, glossary.Content)
}
jmdictBuildRules(&term) jmdictBuildRules(&term)
jmdictBuildScore(&term) jmdictBuildScore(&term)
@ -172,7 +182,7 @@ func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
return terms return terms
} }
func jmdictExportDb(inputPath, outputDir, title string, stride int, pretty bool) error { func jmdictExportDb(inputPath, outputDir, language, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath) reader, err := os.Open(inputPath)
if err != nil { if err != nil {
return err return err
@ -184,9 +194,11 @@ func jmdictExportDb(inputPath, outputDir, title string, stride int, pretty bool)
return err return err
} }
langTag := convertLanguage(language)
var terms dbTermList var terms dbTermList
for _, entry := range dict.Entries { for _, entry := range dict.Entries {
terms = append(terms, jmdictExtractTerms(entry)...) terms = append(terms, jmdictExtractTerms(entry, langTag)...)
} }
if title == "" { if title == "" {

View File

@ -97,7 +97,7 @@ func jmnedictExtractTerms(enamdictEntry jmdict.JmnedictEntry) []dbTerm {
return terms return terms
} }
func jmnedictExportDb(inputPath, outputDir, title string, stride int, pretty bool) error { func jmnedictExportDb(inputPath, outputDir, language, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath) reader, err := os.Open(inputPath)
if err != nil { if err != nil {
return err return err
@ -110,8 +110,8 @@ func jmnedictExportDb(inputPath, outputDir, title string, stride int, pretty boo
} }
var terms dbTermList var terms dbTermList
for _, e := range dict.Entries { for _, entry := range dict.Entries {
terms = append(terms, jmnedictExtractTerms(e)...) terms = append(terms, jmnedictExtractTerms(entry)...)
} }
if title == "" { if title == "" {

View File

@ -62,7 +62,7 @@ type epwingExtractor interface {
getRevision() string getRevision() string
} }
func epwingExportDb(inputPath, outputDir, title string, stride int, pretty bool) error { func epwingExportDb(inputPath, outputDir, language, title string, stride int, pretty bool) error {
stat, err := os.Stat(inputPath) stat, err := os.Stat(inputPath)
if err != nil { if err != nil {
return err return err

2
gui.go
View File

@ -124,7 +124,7 @@ func gui() error {
importButton.Enable() importButton.Enable()
}) })
if err := exportDb(inputPath, outputDir, format, title, DEFAULT_STRIDE, false); err != nil { if err := exportDb(inputPath, outputDir, format, "english", title, DEFAULT_STRIDE, false); err != nil {
log.Print(err) log.Print(err)
return return
} }

View File

@ -80,7 +80,7 @@ func kanjidicExtractKanji(entry jmdict.KanjidicCharacter) dbKanji {
return kanji return kanji
} }
func kanjidicExportDb(inputPath, outputDir, title string, stride int, pretty bool) error { func kanjidicExportDb(inputPath, outputDir, language, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath) reader, err := os.Open(inputPath)
if err != nil { if err != nil {
return err return err

13
main.go
View File

@ -45,8 +45,8 @@ func usage() {
flag.PrintDefaults() flag.PrintDefaults()
} }
func exportDb(inputPath, outputDir, format, title string, stride int, pretty bool) error { func exportDb(inputPath, outputDir, format, language, title string, stride int, pretty bool) error {
handlers := map[string]func(string, string, string, int, bool) error{ handlers := map[string]func(string, string, string, string, int, bool) error{
"edict": jmdictExportDb, "edict": jmdictExportDb,
"enamdict": jmnedictExportDb, "enamdict": jmnedictExportDb,
"kanjidic": kanjidicExportDb, "kanjidic": kanjidicExportDb,
@ -59,7 +59,7 @@ func exportDb(inputPath, outputDir, format, title string, stride int, pretty boo
} }
log.Printf("converting '%s' to '%s' in '%s' format...", inputPath, outputDir, format) log.Printf("converting '%s' to '%s' in '%s' format...", inputPath, outputDir, format)
return handler(inputPath, outputDir, title, stride, pretty) return handler(inputPath, outputDir, language, title, stride, pretty)
} }
func serveDb(serveDir string, port int) error { func serveDb(serveDir string, port int) error {
@ -74,11 +74,12 @@ func makeTmpDir() (string, error) {
func main() { func main() {
var ( var (
format = flag.String("format", "", "dictionary format [edict|enamdict|kanjidic|epwing]") format = flag.String("format", "", "dictionary format [edict|enamdict|kanjidic|epwing]")
language = flag.String("language", "english", "dictionary language (if applicable)")
title = flag.String("title", "", "dictionary title")
port = flag.Int("port", DEFAULT_PORT, "port to serve dictionary JSON on") port = flag.Int("port", DEFAULT_PORT, "port to serve dictionary JSON on")
stride = flag.Int("stride", DEFAULT_STRIDE, "dictionary bank stride")
pretty = flag.Bool("pretty", false, "output prettified dictionary JSON") pretty = flag.Bool("pretty", false, "output prettified dictionary JSON")
serve = flag.Bool("serve", false, "serve dictionary JSON for extension") serve = flag.Bool("serve", false, "serve dictionary JSON for extension")
stride = flag.Int("stride", DEFAULT_STRIDE, "dictionary bank stride")
title = flag.String("title", "", "dictionary title")
) )
flag.Usage = usage flag.Usage = usage
@ -122,7 +123,7 @@ func main() {
*serve = true *serve = true
} }
if err := exportDb(inputPath, outputDir, *format, *title, *stride, *pretty); err != nil { if err := exportDb(inputPath, outputDir, *format, *language, *title, *stride, *pretty); err != nil {
log.Fatal(err) log.Fatal(err)
} }