1
This commit is contained in:
Alex Yatskov 2016-07-28 21:39:35 -07:00
parent 04e38d2270
commit d0c5337c01
2 changed files with 67 additions and 9 deletions

View File

@ -22,7 +22,12 @@
package main
import "io"
import (
"encoding/xml"
"io"
"log"
"regexp"
)
type edictKanji struct {
// This element will contain a word or short phrase in Japanese
@ -228,5 +233,54 @@ type edictEntry struct {
}
func processEdict(reader io.Reader, writer io.Writer) error {
return nil
entries, err := loadEdict(reader)
log.Print(entries)
return err
}
func loadEdict(reader io.Reader) ([]edictEntry, error) {
var (
err error
entries []edictEntry
)
decoder := xml.NewDecoder(reader)
for {
token, _ := decoder.Token()
if token == nil {
break
}
switch startElement := token.(type) {
case xml.Directive:
directive := token.(xml.Directive)
if decoder.Entity, err = parseEntities(&directive); err != nil {
return nil, err
}
case xml.StartElement:
if startElement.Name.Local == "entry" {
var entry edictEntry
if err := decoder.DecodeElement(&entry, &startElement); err != nil {
return nil, err
}
entries = append(entries, entry)
}
}
}
return entries, nil
}
func parseEntities(d *xml.Directive) (map[string]string, error) {
re := regexp.MustCompile("<!ENTITY\\s([0-9\\-A-z]+)\\s\"(.+)\">")
matches := re.FindAllStringSubmatch(string(*d), -1)
entities := make(map[string]string)
for _, match := range matches {
entities[match[1]] = match[2]
}
return entities, nil
}

18
main.go
View File

@ -25,10 +25,10 @@ package main
import (
"errors"
"fmt"
"io"
"log"
"os"
"path"
"strings"
)
func usage() {
@ -36,6 +36,15 @@ func usage() {
}
func process(fileFormat, inputFile, outputFile string) error {
handlers := map[string]func(io.Reader, io.Writer) error{
"edict": processEdict,
}
handler, ok := handlers[fileFormat]
if !ok {
return errors.New("unrecognized file format")
}
input, err := os.Open(inputFile)
if err != nil {
return err
@ -46,12 +55,7 @@ func process(fileFormat, inputFile, outputFile string) error {
return err
}
switch strings.ToLower(fileFormat) {
case "edict":
return processEdict(input, output)
default:
return errors.New("unrecognized file format")
}
return handler(input, output)
}
func main() {