This commit is contained in:
Alex Yatskov 2016-07-30 18:04:49 -07:00
parent 15dc6933e5
commit 4df5d16aa1
3 changed files with 46 additions and 60 deletions

View File

@ -24,10 +24,34 @@ package jmdict
import (
"encoding/xml"
"io"
"regexp"
)
func parseEntities(d *xml.Directive) (map[string]string, error) {
func parseEntries(reader io.Reader, callback func(decoder *xml.Decoder, element xml.StartElement) error) (map[string]string, error) {
decoder := xml.NewDecoder(reader)
for {
token, _ := decoder.Token()
if token == nil {
break
}
switch startElement := token.(type) {
case xml.Directive:
directive := token.(xml.Directive)
decoder.Entity = parseEntities(&directive)
case xml.StartElement:
if err := callback(decoder, startElement); err != nil {
return nil, err
}
}
}
return decoder.Entity, nil
}
func parseEntities(d *xml.Directive) map[string]string {
re := regexp.MustCompile("<!ENTITY\\s([0-9\\-A-z]+)\\s\"(.+)\">")
matches := re.FindAllStringSubmatch(string(*d), -1)
@ -36,5 +60,5 @@ func parseEntities(d *xml.Directive) (map[string]string, error) {
entities[match[1]] = match[2]
}
return entities, nil
return entities
}

View File

@ -22,11 +22,8 @@
package jmdict
import (
"encoding/xml"
"io"
"log"
)
import "io"
import "encoding/xml"
// Entries consist of kanji elements, reading elements,
// general information and sense elements. Each entry must have at
@ -232,34 +229,17 @@ type edictSense struct {
}
func LoadEdict(reader io.Reader) ([]edictEntry, map[string]string, error) {
decoder := xml.NewDecoder(reader)
var entries []edictEntry
for {
token, _ := decoder.Token()
if token == nil {
break
entities, err := parseEntries(reader, func(decoder *xml.Decoder, element xml.StartElement) error {
var entry edictEntry
if err := decoder.DecodeElement(&entry, &element); err != nil {
return err
}
switch startElement := token.(type) {
case xml.Directive:
directive := token.(xml.Directive)
var err error
if decoder.Entity, err = parseEntities(&directive); err != nil {
return nil, nil, err
}
log.Print(decoder.Entity)
case xml.StartElement:
if startElement.Name.Local == "entry" {
var entry edictEntry
if err := decoder.DecodeElement(&entry, &startElement); err != nil {
return nil, nil, err
}
entries = append(entries, entry)
return nil
})
entries = append(entries, entry)
}
}
}
return entries, decoder.Entity, nil
return entries, entities, err
}

View File

@ -25,7 +25,6 @@ package jmdict
import (
"encoding/xml"
"io"
"log"
)
// Entries consist of kanji elements, reading elements
@ -131,34 +130,17 @@ type enamTranslation struct {
}
func LoadEnamdict(reader io.Reader) ([]enamdictEntry, map[string]string, error) {
decoder := xml.NewDecoder(reader)
var entries []enamdictEntry
for {
token, _ := decoder.Token()
if token == nil {
break
entities, err := parseEntries(reader, func(decoder *xml.Decoder, element xml.StartElement) error {
var entry enamdictEntry
if err := decoder.DecodeElement(&entry, &element); err != nil {
return err
}
switch startElement := token.(type) {
case xml.Directive:
directive := token.(xml.Directive)
var err error
if decoder.Entity, err = parseEntities(&directive); err != nil {
return nil, nil, err
}
log.Print(decoder.Entity)
case xml.StartElement:
if startElement.Name.Local == "entry" {
var entry enamdictEntry
if err := decoder.DecodeElement(&entry, &startElement); err != nil {
return nil, nil, err
}
entries = append(entries, entry)
return nil
})
entries = append(entries, entry)
}
}
}
return entries, decoder.Entity, nil
return entries, entities, err
}