1

Merge branch 'language'

This commit is contained in:
Alex Yatskov 2017-06-26 20:37:21 -07:00
commit b4717f43cf
8 changed files with 223 additions and 218 deletions

View File

@ -19,76 +19,25 @@ contributions are welcome.
## Installation ##
Builds of Yomichan Import are currently available for Linux, Mac OS X, and Windows. The required version of
Builds of Yomichan Import are currently available for Linux, Mac OS X, and Windows. The necessary version of
[Zero-EPWING](https://foosoft.net/projects/zero-epwing) is included for processing EPWING dictionaries.
* [yomichan-import_linux.tar.gz](https://foosoft.net/projects/yomichan-import/dl/yomichan-import_linux.tar.gz): (GTK+ 3 required for GUI)
* [yomichan-import_darwin.tar.gz](https://foosoft.net/projects/yomichan-import/dl/yomichan-import_darwin.tar.gz)
* [yomichan-import_windows.zip](https://foosoft.net/projects/yomichan-import/dl/yomichan-import_windows.zip) (64 bit Vista or above, no console output)
## Using the Graphical Interface ##
## Basic Usage ##
In most cases, it is sufficient to run the application without command line arguments and use the graphical interface.
Follow the steps below to import your dictionary into Yomichan:
Please follow the steps outlined below to import your custom dictionary into Yomichan:
1. Launch the `yomichan-import` executable.
2. Specify the path to the dictionary you wish to convert (path to `CATALOGS` file for EPWING dictionaries).
3. Specify a network port to use (the default port `9876` should be fine for most configurations).
4. Specify the dictionary format from the provided options.
5. Press the button labeled *Import dictionary...* and wait for processing to complete.
6. Once you the message `starting dictionary server on port 9876...`, the dictionary data is ready to be imported.
7. In Yomichan, open the options page and select the *Local dictionary* item in the dictionary importer drop-down menu.
8. When `http://localhost:9876/index.json` is displayed in the address text-box, press the *Import* button to begin import.
9. Wait for the import progress to complete (a progress bar is displayed during dictionary processing).
9. Close Yomichan Import once the import process has finished.
2. Specify the source path of the dictionary you wish to convert.
3. Specify the target path of the dictionary ZIP archive that you wish to create.
4. Press the button labeled *Import dictionary...* and wait for processing to complete.
5. On the Yomichan options page, browse to the dictionary ZIP archive file you created.
6. Wait for the import progress to complete before closing the options page.
[![Import window](https://foosoft.net/projects/yomichan-import/img/import-thumb.png)](https://foosoft.net/projects/yomichan-import/img/import.png)
## Using the Command Line ##
Yomichan Import can be used as a command line application. When executed with the `--help` argument, usage instructions
will be displayed (except on Windows).
```
Usage: yomichan-import [options] input-path [output-dir]
https://foosoft.net/projects/yomichan-import/
Parameters:
-format string
dictionary format [edict|enamdict|kanjidic|epwing]
-port int
port to serve dictionary JSON on (default 9876)
-pretty
output prettified dictionary JSON
-serve
serve dictionary JSON for extension
-stride int
dictionary bank stride (default 10000)
-title string
dictionary title
```
In most cases it is sufficient to simply provide the path to the dictionary resource you wish to process, without
explicitly specifying a format. Yomichan Import will attempt to automatically determine the format of the dictionary
based on the contents of the path:
| Format | Resource |
| ------------ | ------------------------------------ |
| **edict** | file named `JMDict_e.xml` |
| **enamdict** | file named `JMNedict.xml` |
| **kanjidic** | file named `kanjidic2.xml` |
| **epwing** | directory with file named `CATALOGS` |
For example, if you wanted to process an EPWING dictionary titled Daijirin, you could do so with the following command
(shown on Linux):
```
$ ./yomichan-import dict/Kokugo/Daijirin/
```
Yomichan Import will now begin the conversion process, which can take a couple of minutes to complete. Once you see the
message `starting dictionary server on port 9876...` output to your console, you can use Yomichan to import the
processed dictionary data using the same steps as described in the *Using the Graphical Interface* section.
[![Importer](https://foosoft.net/projects/yomichan-import/img/import-thumb.png)](https://foosoft.net/projects/yomichan-import/img/import.png)
## License ##

View File

@ -23,10 +23,12 @@
package main
import (
"archive/zip"
"bytes"
"encoding/json"
"errors"
"fmt"
"os"
"path"
"path/filepath"
"strings"
)
@ -115,9 +117,12 @@ func (kanji dbKanjiList) crush() [][]interface{} {
return results
}
func writeDb(outputDir, title string, revision string, termRecords [][]interface{}, kanjiRecords [][]interface{}, tagMeta map[string]dbTagMeta, stride int, pretty bool) error {
func writeDb(outputPath, title, revision string, termRecords [][]interface{}, kanjiRecords [][]interface{}, tagMeta map[string]dbTagMeta, stride int, pretty bool) error {
const DB_VERSION = 1
var zbuff bytes.Buffer
zip := zip.NewWriter(&zbuff)
marshalJson := func(obj interface{}, pretty bool) ([]byte, error) {
if pretty {
return json.MarshalIndent(obj, "", " ")
@ -142,13 +147,12 @@ func writeDb(outputDir, title string, revision string, termRecords [][]interface
return 0, err
}
fp, err := os.Create(path.Join(outputDir, fmt.Sprintf("%s_bank_%d.json", prefix, i/stride+1)))
zw, err := zip.Create(fmt.Sprintf("%s_bank_%d.json", prefix, i/stride+1))
if err != nil {
return 0, err
}
defer fp.Close()
if _, err = fp.Write(bytes); err != nil {
if _, err := zw.Write(bytes); err != nil {
return 0, err
}
@ -158,10 +162,6 @@ func writeDb(outputDir, title string, revision string, termRecords [][]interface
return bankCount, nil
}
if err := os.MkdirAll(outputDir, 0755); err != nil {
return err
}
var err error
var db struct {
Title string `json:"title"`
@ -190,17 +190,27 @@ func writeDb(outputDir, title string, revision string, termRecords [][]interface
return err
}
fp, err := os.Create(path.Join(outputDir, "index.json"))
zw, err := zip.Create("index.json")
if err != nil {
return err
}
defer fp.Close()
if _, err := fp.Write(bytes); err != nil {
if _, err := zw.Write(bytes); err != nil {
return err
}
return nil
zip.Close()
fp, err := os.Create(outputPath)
if err != nil {
return err
}
if _, err := fp.Write(zbuff.Bytes()); err != nil {
return err
}
return fp.Close()
}
func appendStringUnique(target []string, source ...string) []string {
@ -223,28 +233,29 @@ func hasString(needle string, haystack []string) bool {
return false
}
func detectFormat(path string) string {
func detectFormat(path string) (string, error) {
switch filepath.Base(path) {
case "JMdict", "JMdict.xml", "JMdict_e", "JMdict_e.xml":
return "edict", nil
case "JMnedict", "JMnedict.xml":
return "enamdict", nil
case "kanjidic2", "kanjidic2.xml":
return "kanjidic", nil
case "CATALOGS":
return "epwing", nil
}
info, err := os.Stat(path)
if err != nil {
return ""
return "", err
}
if info.IsDir() {
_, err := os.Stat(filepath.Join(path, "CATALOGS"))
if err == nil {
return "epwing"
}
} else {
base := filepath.Base(path)
switch base {
case "JMdict_e.xml":
return "edict"
case "JMnedict.xml":
return "enamdict"
case "kanjidic2.xml":
return "kanjidic"
return "epwing", nil
}
}
return ""
return "", errors.New("unrecognized dictionary format")
}

View File

@ -97,7 +97,7 @@ func jmdictBuildTagMeta(entities map[string]string) map[string]dbTagMeta {
return tags
}
func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
func jmdictExtractTerms(edictEntry jmdict.JmdictEntry, language string) []dbTerm {
var terms []dbTerm
convert := func(reading jmdict.JmdictReading, kanji *jmdict.JmdictKanji) {
@ -133,7 +133,21 @@ func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
continue
}
term := dbTerm{Reading: termBase.Reading, Expression: termBase.Expression}
term := dbTerm{
Reading: termBase.Reading,
Expression: termBase.Expression,
}
for _, glossary := range sense.Glossary {
if glossary.Language == nil && language == "" || glossary.Language != nil && language == *glossary.Language {
term.Glossary = append(term.Glossary, glossary.Content)
}
}
if len(term.Glossary) == 0 {
continue
}
term.addTags(termBase.Tags...)
term.addTags(sense.PartsOfSpeech...)
term.addTags(sense.Fields...)
@ -146,10 +160,6 @@ func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
term.addTags(partsOfSpeech...)
}
for _, glossary := range sense.Glossary {
term.Glossary = append(term.Glossary, glossary.Content)
}
jmdictBuildRules(&term)
jmdictBuildScore(&term)
@ -172,7 +182,7 @@ func jmdictExtractTerms(edictEntry jmdict.JmdictEntry) []dbTerm {
return terms
}
func jmdictExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
func jmdictExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath)
if err != nil {
return err
@ -184,9 +194,31 @@ func jmdictExportDb(inputPath, outputDir, title string, stride int, pretty bool)
return err
}
var langTag string
switch language {
case "dutch":
langTag = "dut"
case "french":
langTag = "fre"
case "german":
langTag = "ger"
case "hungarian":
langTag = "hun"
case "italian":
langTag = "ita"
case "russian":
langTag = "rus"
case "slovenian":
langTag = "slv"
case "spanish":
langTag = "spa"
case "swedish":
langTag = "swe"
}
var terms dbTermList
for _, entry := range dict.Entries {
terms = append(terms, jmdictExtractTerms(entry)...)
terms = append(terms, jmdictExtractTerms(entry, langTag)...)
}
if title == "" {
@ -194,7 +226,7 @@ func jmdictExportDb(inputPath, outputDir, title string, stride int, pretty bool)
}
return writeDb(
outputDir,
outputPath,
title,
JMDICT_REVISION,
terms.crush(),

View File

@ -97,7 +97,7 @@ func jmnedictExtractTerms(enamdictEntry jmdict.JmnedictEntry) []dbTerm {
return terms
}
func jmnedictExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
func jmnedictExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath)
if err != nil {
return err
@ -110,8 +110,8 @@ func jmnedictExportDb(inputPath, outputDir, title string, stride int, pretty boo
}
var terms dbTermList
for _, e := range dict.Entries {
terms = append(terms, jmnedictExtractTerms(e)...)
for _, entry := range dict.Entries {
terms = append(terms, jmnedictExtractTerms(entry)...)
}
if title == "" {
@ -119,7 +119,7 @@ func jmnedictExportDb(inputPath, outputDir, title string, stride int, pretty boo
}
return writeDb(
outputDir,
outputPath,
title,
JMNEDICT_REVISION,
terms.crush(),

View File

@ -62,14 +62,22 @@ type epwingExtractor interface {
getRevision() string
}
func epwingExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
func epwingExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
stat, err := os.Stat(inputPath)
if err != nil {
return err
}
var data []byte
var toolExec bool
if stat.IsDir() {
toolExec = true
} else if filepath.Base(inputPath) == "CATALOGS" {
inputPath = filepath.Dir(inputPath)
toolExec = true
}
var data []byte
if toolExec {
toolPath := filepath.Join("bin", runtime.GOOS, "zero-epwing")
if runtime.GOOS == "windows" {
toolPath += ".exe"
@ -193,7 +201,7 @@ func epwingExportDb(inputPath, outputDir, title string, stride int, pretty bool)
}
return writeDb(
outputDir,
outputPath,
title,
strings.Join(revisions, ";"),
terms.crush(),

102
gui.go
View File

@ -44,36 +44,33 @@ func (l *logger) Write(p []byte) (n int, err error) {
func gui() error {
return ui.Main(func() {
pathEntry := ui.NewEntry()
browseButton := ui.NewButton("Browse...")
pathBox := ui.NewHorizontalBox()
pathBox.Append(pathEntry, true)
pathBox.Append(browseButton, false)
pathSourceEntry := ui.NewEntry()
pathSourceButton := ui.NewButton("Browse...")
pathSourceBox := ui.NewHorizontalBox()
pathSourceBox.Append(pathSourceEntry, true)
pathSourceBox.Append(pathSourceButton, false)
portSpin := ui.NewSpinbox(0, 65535)
portSpin.SetValue(DEFAULT_PORT)
formatCombo := ui.NewCombobox()
formatCombo.Append("EPWING")
formatCombo.Append("EDICT")
formatCombo.Append("ENAMDICT")
formatCombo.Append("KANJIDIC")
formatCombo.SetSelected(0)
pathTargetEntry := ui.NewEntry()
pathTargetButton := ui.NewButton("Browse...")
pathTargetBox := ui.NewHorizontalBox()
pathTargetBox.Append(pathTargetEntry, true)
pathTargetBox.Append(pathTargetButton, false)
titleEntry := ui.NewEntry()
languageEntry := ui.NewEntry()
outputEntry := ui.NewEntry()
importButton := ui.NewButton("Import dictionary...")
mainBox := ui.NewVerticalBox()
mainBox.Append(ui.NewLabel("Path to dictionary source (CATALOGS file for EPWING):"), false)
mainBox.Append(pathBox, false)
mainBox.Append(ui.NewLabel("Dictionary title (leave blank for default):"), false)
mainBox.Append(ui.NewLabel("Path to dictionary source (CATALOGS file for EPWING)"), false)
mainBox.Append(pathSourceBox, false)
mainBox.Append(ui.NewLabel("Path to dictionary target ZIP file"), false)
mainBox.Append(pathTargetBox, false)
mainBox.Append(ui.NewLabel("Dictionary display title (blank for default)"), false)
mainBox.Append(titleEntry, false)
mainBox.Append(ui.NewLabel("Network port for extension server:"), false)
mainBox.Append(portSpin, false)
mainBox.Append(ui.NewLabel("Dictionary format:"), false)
mainBox.Append(formatCombo, false)
mainBox.Append(ui.NewLabel("Application output:"), false)
mainBox.Append(ui.NewLabel("Dictionary glossary language (blank for English)"), false)
mainBox.Append(languageEntry, false)
mainBox.Append(ui.NewLabel("Application output"), false)
mainBox.Append(outputEntry, false)
mainBox.Append(ui.NewVerticalBox(), true)
mainBox.Append(importButton, false)
@ -82,9 +79,19 @@ func gui() error {
window.SetMargined(true)
window.SetChild(mainBox)
browseButton.OnClicked(func(*ui.Button) {
pathSourceButton.OnClicked(func(*ui.Button) {
if path := ui.OpenFile(window); len(path) > 0 {
pathEntry.SetText(path)
pathSourceEntry.SetText(path)
}
})
pathTargetButton.OnClicked(func(*ui.Button) {
if path := ui.SaveFile(window); len(path) > 0 {
if len(filepath.Ext(path)) == 0 {
path += ".zip"
}
pathTargetEntry.SetText(path)
}
})
@ -94,45 +101,42 @@ func gui() error {
importButton.Disable()
outputEntry.SetText("")
var (
outputDir string
err error
)
if outputDir, err = makeTmpDir(); err != nil {
ui.MsgBoxError(window, "Error", err.Error())
return
}
inputPath := pathEntry.Text()
inputPath := pathSourceEntry.Text()
if len(inputPath) == 0 {
ui.MsgBoxError(window, "Error", "You must specify a dictionary source path.")
ui.MsgBoxError(window, "Error", "You must specify a dictionary source path")
importButton.Enable()
return
}
format := []string{"epwing", "edict", "enamdict", "kanjidic"}[formatCombo.Selected()]
if format == "epwing" {
inputPath = filepath.Dir(inputPath)
outputPath := pathTargetEntry.Text()
if len(outputPath) == 0 {
ui.MsgBoxError(window, "Error", "You must specify a dictionary target path")
importButton.Enable()
return
}
format, err := detectFormat(inputPath)
if err != nil {
ui.MsgBoxError(window, "Error", "Unable to detect dictionary format")
importButton.Enable()
return
}
title := titleEntry.Text()
port := portSpin.Value()
language := languageEntry.Text()
go func() {
var success bool
defer ui.QueueMain(func() {
importButton.Enable()
if success {
ui.MsgBox(window, "Success", "Conversion process complete")
} else {
ui.MsgBox(window, "Error", "Conversion process failed")
}
})
if err := exportDb(inputPath, outputDir, format, title, DEFAULT_STRIDE, false); err != nil {
log.Print(err)
return
}
if err := serveDb(outputDir, port); err != nil {
log.Print(err)
return
}
success = exportDb(inputPath, outputPath, format, language, title, DEFAULT_STRIDE, false) == nil
}()
})

View File

@ -32,9 +32,23 @@ import (
const KANJIDIC_REVISION = "kanjidic1"
func kanjidicExtractKanji(entry jmdict.KanjidicCharacter) dbKanji {
func kanjidicExtractKanji(entry jmdict.KanjidicCharacter, language string) *dbKanji {
if entry.ReadingMeaning == nil {
return nil
}
kanji := dbKanji{Character: entry.Literal}
for _, m := range entry.ReadingMeaning.Meanings {
if m.Language == nil && language == "" || m.Language != nil && language == *m.Language {
kanji.Meanings = append(kanji.Meanings, m.Meaning)
}
}
if len(kanji.Meanings) == 0 {
return nil
}
if level := entry.Misc.JlptLevel; level != nil {
kanji.addTags(fmt.Sprintf("jlpt:%s", *level))
}
@ -60,27 +74,19 @@ func kanjidicExtractKanji(entry jmdict.KanjidicCharacter) dbKanji {
kanji.addTags(fmt.Sprintf("strokes:%s", counts[0]))
}
if entry.ReadingMeaning != nil {
for _, m := range entry.ReadingMeaning.Meanings {
if m.Language == nil || *m.Language == "en" {
kanji.Meanings = append(kanji.Meanings, m.Meaning)
}
}
for _, r := range entry.ReadingMeaning.Readings {
switch r.Type {
case "ja_on":
kanji.Onyomi = append(kanji.Onyomi, r.Value)
case "ja_kun":
kanji.Kunyomi = append(kanji.Kunyomi, r.Value)
}
for _, r := range entry.ReadingMeaning.Readings {
switch r.Type {
case "ja_on":
kanji.Onyomi = append(kanji.Onyomi, r.Value)
case "ja_kun":
kanji.Kunyomi = append(kanji.Kunyomi, r.Value)
}
}
return kanji
return &kanji
}
func kanjidicExportDb(inputPath, outputDir, title string, stride int, pretty bool) error {
func kanjidicExportDb(inputPath, outputPath, language, title string, stride int, pretty bool) error {
reader, err := os.Open(inputPath)
if err != nil {
return err
@ -92,9 +98,22 @@ func kanjidicExportDb(inputPath, outputDir, title string, stride int, pretty boo
return err
}
var langTag string
switch language {
case "french":
langTag = "fr"
case "spanish":
langTag = "es"
case "portuguese":
langTag = "pt"
}
var kanji dbKanjiList
for _, entry := range dict.Characters {
kanji = append(kanji, kanjidicExtractKanji(entry))
kanjiCurr := kanjidicExtractKanji(entry, langTag)
if kanjiCurr != nil {
kanji = append(kanji, *kanjiCurr)
}
}
tagMeta := map[string]dbTagMeta{
@ -111,7 +130,7 @@ func kanjidicExportDb(inputPath, outputDir, title string, stride int, pretty boo
}
return writeDb(
outputDir,
outputPath,
title,
KANJIDIC_REVISION,
nil,

76
main.go
View File

@ -28,43 +28,45 @@ import (
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"path"
"strings"
)
const (
DEFAULT_STRIDE = 10000
DEFAULT_PORT = 9876
DEFAULT_STRIDE = 10000
DEFAULT_PORT = 9876
DEFAULT_LANGUAGE = "english"
)
func usage() {
fmt.Fprintf(os.Stderr, "Usage: %s [options] input-path [output-dir]\n", path.Base(os.Args[0]))
fmt.Fprintf(os.Stderr, "Usage: %s [options] input-path output-path\n", path.Base(os.Args[0]))
fmt.Fprint(os.Stderr, "https://foosoft.net/projects/yomichan-import/\n\n")
fmt.Fprint(os.Stderr, "Parameters:\n")
flag.PrintDefaults()
}
func exportDb(inputPath, outputDir, format, title string, stride int, pretty bool) error {
handlers := map[string]func(string, string, string, int, bool) error{
func exportDb(inputPath, outputPath, format, language, title string, stride int, pretty bool) error {
handlers := map[string]func(string, string, string, string, int, bool) error{
"edict": jmdictExportDb,
"enamdict": jmnedictExportDb,
"kanjidic": kanjidicExportDb,
"epwing": epwingExportDb,
}
handler, ok := handlers[format]
handler, ok := handlers[strings.ToLower(format)]
if !ok {
return errors.New("unrecognized dictionray format")
return errors.New("unrecognized dictionary format")
}
log.Printf("converting '%s' to '%s' in '%s' format...", inputPath, outputDir, format)
return handler(inputPath, outputDir, title, stride, pretty)
}
log.Printf("converting '%s' to '%s' in '%s' format...", inputPath, outputPath, format)
if err := handler(inputPath, outputPath, strings.ToLower(language), title, stride, pretty); err != nil {
log.Printf("conversion process failed: %s", err.Error())
return err
}
func serveDb(serveDir string, port int) error {
log.Printf("starting dictionary server on port %d...\n", port)
return http.ListenAndServe(fmt.Sprintf(":%d", port), http.FileServer(http.Dir(serveDir)))
log.Print("conversion process complete")
return nil
}
func makeTmpDir() (string, error) {
@ -73,62 +75,42 @@ func makeTmpDir() (string, error) {
func main() {
var (
format = flag.String("format", "", "dictionary format [edict|enamdict|kanjidic|epwing]")
port = flag.Int("port", DEFAULT_PORT, "port to serve dictionary JSON on")
pretty = flag.Bool("pretty", false, "output prettified dictionary JSON")
serve = flag.Bool("serve", false, "serve dictionary JSON for extension")
stride = flag.Int("stride", DEFAULT_STRIDE, "dictionary bank stride")
title = flag.String("title", "", "dictionary title")
format = flag.String("format", "", "dictionary format [edict|enamdict|kanjidic|epwing]")
language = flag.String("language", DEFAULT_LANGUAGE, "dictionary language (if supported)")
title = flag.String("title", "", "dictionary title")
stride = flag.Int("stride", DEFAULT_STRIDE, "dictionary bank stride")
pretty = flag.Bool("pretty", false, "output prettified dictionary JSON")
)
flag.Usage = usage
flag.Parse()
var (
inputPath string
outputDir string
)
if flag.NArg() == 0 {
if flag.NArg() != 2 {
if err := gui(); err == nil {
return
} else {
usage()
os.Exit(2)
}
} else {
inputPath = flag.Arg(0)
if flag.NArg() > 1 {
outputDir = flag.Arg(1)
}
}
var (
inputPath = flag.Arg(0)
outputPath = flag.Arg(1)
)
if _, err := os.Stat(inputPath); err != nil {
log.Fatalf("dictionary path '%s' does not exist", inputPath)
}
if *format == "" {
if *format = detectFormat(inputPath); *format == "" {
log.Fatal("failed to detect dictionary format")
}
}
if outputDir == "" {
var err error
if outputDir, err = makeTmpDir(); err != nil {
if *format, err = detectFormat(inputPath); err != nil {
log.Fatal(err)
}
*serve = true
}
if err := exportDb(inputPath, outputDir, *format, *title, *stride, *pretty); err != nil {
if err := exportDb(inputPath, outputPath, *format, *language, *title, *stride, *pretty); err != nil {
log.Fatal(err)
}
if *serve {
if err := serveDb(outputDir, *port); err != nil {
log.Fatal(err)
}
}
}