1

add non-english language support for edict

This commit is contained in:
Alex Yatskov 2017-06-10 16:53:31 -07:00
parent 7bb72f8bbc
commit 4ff2370fa3
7 changed files with 67 additions and 24 deletions

View File

@ -237,10 +237,15 @@ func detectFormat(path string) string {
} else {
base := filepath.Base(path)
switch base {
case "JMdict":
case "JMdict.xml":
case "JMdict_e":
case "JMdict_e.xml":
return "edict"
case "JMnedict":
case "JMnedict.xml":
return "enamdict"
case "kanjidic2":
case "kanjidic2.xml":
return "kanjidic"
}
@ -248,3 +253,28 @@ func detectFormat(path string) string {
return ""
}
func convertLanguage(language string) string {
switch language {
case "dutch":
return "dut"
case "french":
return "fre"
case "german":
return "ger"
case "hungarian":
return "hun"
case "italian":
return "ita"
case "russian":
return "rus"
case "slovenian":
return "slv"
case "spanish":
return "spa"
case "swedish":
return "swe"
default:
return ""
}
}

View File

@ -97,7 +97,7 @@ func jmdictBuildTagMeta(entities map[string]string) map[string]dbTagMeta {
return tags
}
func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
func jmdictExtractTerms(edictEntry jmdict.JmdictEntry, language string) []dbTerm {
var terms []dbTerm
convert := func(reading jmdict.JmdictReading, kanji *jmdict.JmdictKanji) {
@ -133,7 +133,21 @@ func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
continue
}
term := dbTerm{Reading: termBase.Reading, Expression: termBase.Expression}
term := dbTerm{
Reading: termBase.Reading,
Expression: termBase.Expression,
}
for _, glossary := range sense.Glossary {
if glossary.Language == nil && language == "" || glossary.Language != nil && language == *glossary.Language {
term.Glossary = append(term.Glossary, glossary.Content)
}
}
if len(term.Glossary) == 0 {
continue
}
term.addTags(termBase.Tags...)
term.addTags(sense.PartsOfSpeech...)
term.addTags(sense.Fields...)
@ -146,10 +160,6 @@ func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
term.addTags(partsOfSpeech...)
}
for _, glossary := range sense.Glossary {
term.Glossary = append(term.Glossary, glossary.Content)
}
jmdictBuildRules(&term)
jmdictBuildScore(&term)
@ -172,7 +182,7 @@ func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
return terms
}
func jmdictExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
func jmdictExportDb(inputPath, outputDir, language, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath)
if err != nil {
return err
@ -184,9 +194,11 @@ func jmdictExportDb(inputPath, outputDir, title string, stride int, pretty bool)
return err
}
langTag := convertLanguage(language)
var terms dbTermList
for _, entry := range dict.Entries {
terms = append(terms, jmdictExtractTerms(entry)...)
terms = append(terms, jmdictExtractTerms(entry, langTag)...)
}
if title == "" {

View File

@ -97,7 +97,7 @@ func jmnedictExtractTerms(enamdictEntry jmdict.JmnedictEntry) []dbTerm {
return terms
}
func jmnedictExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
func jmnedictExportDb(inputPath, outputDir, language, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath)
if err != nil {
return err
@ -110,8 +110,8 @@ func jmnedictExportDb(inputPath, outputDir, title string, stride int, pretty boo
}
var terms dbTermList
for _, e := range dict.Entries {
terms = append(terms, jmnedictExtractTerms(e)...)
for _, entry := range dict.Entries {
terms = append(terms, jmnedictExtractTerms(entry)...)
}
if title == "" {

View File

@ -62,7 +62,7 @@ type epwingExtractor interface {
getRevision() string
}
func epwingExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
func epwingExportDb(inputPath, outputDir, language, title string, stride int, pretty bool) error {
stat, err := os.Stat(inputPath)
if err != nil {
return err

2
gui.go
View File

@ -124,7 +124,7 @@ func gui() error {
importButton.Enable()
})
if err := exportDb(inputPath, outputDir, format, title, DEFAULT_STRIDE, false); err != nil {
if err := exportDb(inputPath, outputDir, format, "english", title, DEFAULT_STRIDE, false); err != nil {
log.Print(err)
return
}

View File

@ -80,7 +80,7 @@ func kanjidicExtractKanji(entry jmdict.KanjidicCharacter) dbKanji {
return kanji
}
func kanjidicExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
func kanjidicExportDb(inputPath, outputDir, language, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath)
if err != nil {
return err

21
main.go
View File

@ -45,8 +45,8 @@ func usage() {
flag.PrintDefaults()
}
func exportDb(inputPath, outputDir, format, title string, stride int, pretty bool) error {
handlers := map[string]func(string, string, string, int, bool) error{
func exportDb(inputPath, outputDir, format, language, title string, stride int, pretty bool) error {
handlers := map[string]func(string, string, string, string, int, bool) error{
"edict": jmdictExportDb,
"enamdict": jmnedictExportDb,
"kanjidic": kanjidicExportDb,
@ -59,7 +59,7 @@ func exportDb(inputPath, outputDir, format, title string, stride int, pretty boo
}
log.Printf("converting '%s' to '%s' in '%s' format...", inputPath, outputDir, format)
return handler(inputPath, outputDir, title, stride, pretty)
return handler(inputPath, outputDir, language, title, stride, pretty)
}
func serveDb(serveDir string, port int) error {
@ -73,12 +73,13 @@ func makeTmpDir() (string, error) {
func main() {
var (
format = flag.String("format", "", "dictionary format [edict|enamdict|kanjidic|epwing]")
port = flag.Int("port", DEFAULT_PORT, "port to serve dictionary JSON on")
pretty = flag.Bool("pretty", false, "output prettified dictionary JSON")
serve = flag.Bool("serve", false, "serve dictionary JSON for extension")
stride = flag.Int("stride", DEFAULT_STRIDE, "dictionary bank stride")
title = flag.String("title", "", "dictionary title")
format = flag.String("format", "", "dictionary format [edict|enamdict|kanjidic|epwing]")
language = flag.String("language", "english", "dictionary language (if applicable)")
title = flag.String("title", "", "dictionary title")
port = flag.Int("port", DEFAULT_PORT, "port to serve dictionary JSON on")
stride = flag.Int("stride", DEFAULT_STRIDE, "dictionary bank stride")
pretty = flag.Bool("pretty", false, "output prettified dictionary JSON")
serve = flag.Bool("serve", false, "serve dictionary JSON for extension")
)
flag.Usage = usage
@ -122,7 +123,7 @@ func main() {
*serve = true
}
if err := exportDb(inputPath, outputDir, *format, *title, *stride, *pretty); err != nil {
if err := exportDb(inputPath, outputDir, *format, *language, *title, *stride, *pretty); err != nil {
log.Fatal(err)
}