diff --git a/edict.go b/edict.go index 80b3115..ad58e43 100644 --- a/edict.go +++ b/edict.go @@ -22,7 +22,12 @@ package main -import "io" +import ( + "encoding/xml" + "io" + "log" + "regexp" +) type edictKanji struct { // This element will contain a word or short phrase in Japanese @@ -228,5 +233,54 @@ type edictEntry struct { } func processEdict(reader io.Reader, writer io.Writer) error { - return nil + entries, err := loadEdict(reader) + log.Print(entries) + return err +} + +func loadEdict(reader io.Reader) ([]edictEntry, error) { + var ( + err error + entries []edictEntry + ) + + decoder := xml.NewDecoder(reader) + + for { + token, _ := decoder.Token() + if token == nil { + break + } + + switch startElement := token.(type) { + case xml.Directive: + directive := token.(xml.Directive) + if decoder.Entity, err = parseEntities(&directive); err != nil { + return nil, err + } + case xml.StartElement: + if startElement.Name.Local == "entry" { + var entry edictEntry + if err := decoder.DecodeElement(&entry, &startElement); err != nil { + return nil, err + } + + entries = append(entries, entry) + } + } + } + + return entries, nil +} + +func parseEntities(d *xml.Directive) (map[string]string, error) { + re := regexp.MustCompile("") + matches := re.FindAllStringSubmatch(string(*d), -1) + + entities := make(map[string]string) + for _, match := range matches { + entities[match[1]] = match[2] + } + + return entities, nil } diff --git a/main.go b/main.go index d185550..feba954 100644 --- a/main.go +++ b/main.go @@ -25,10 +25,10 @@ package main import ( "errors" "fmt" + "io" "log" "os" "path" - "strings" ) func usage() { @@ -36,6 +36,15 @@ func usage() { } func process(fileFormat, inputFile, outputFile string) error { + handlers := map[string]func(io.Reader, io.Writer) error{ + "edict": processEdict, + } + + handler, ok := handlers[fileFormat] + if !ok { + return errors.New("unrecognized file format") + } + input, err := os.Open(inputFile) if err != nil { return err @@ -46,12 +55,7 @@ func process(fileFormat, inputFile, outputFile string) error { return err } - switch strings.ToLower(fileFormat) { - case "edict": - return processEdict(input, output) - default: - return errors.New("unrecognized file format") - } + return handler(input, output) } func main() {