This commit is contained in:
Alex Yatskov 2016-07-30 18:04:49 -07:00
parent 15dc6933e5
commit 4df5d16aa1
3 changed files with 46 additions and 60 deletions

View File

@ -24,10 +24,34 @@ package jmdict
import ( import (
"encoding/xml" "encoding/xml"
"io"
"regexp" "regexp"
) )
func parseEntities(d *xml.Directive) (map[string]string, error) { func parseEntries(reader io.Reader, callback func(decoder *xml.Decoder, element xml.StartElement) error) (map[string]string, error) {
decoder := xml.NewDecoder(reader)
for {
token, _ := decoder.Token()
if token == nil {
break
}
switch startElement := token.(type) {
case xml.Directive:
directive := token.(xml.Directive)
decoder.Entity = parseEntities(&directive)
case xml.StartElement:
if err := callback(decoder, startElement); err != nil {
return nil, err
}
}
}
return decoder.Entity, nil
}
func parseEntities(d *xml.Directive) map[string]string {
re := regexp.MustCompile("<!ENTITY\\s([0-9\\-A-z]+)\\s\"(.+)\">") re := regexp.MustCompile("<!ENTITY\\s([0-9\\-A-z]+)\\s\"(.+)\">")
matches := re.FindAllStringSubmatch(string(*d), -1) matches := re.FindAllStringSubmatch(string(*d), -1)
@ -36,5 +60,5 @@ func parseEntities(d *xml.Directive) (map[string]string, error) {
entities[match[1]] = match[2] entities[match[1]] = match[2]
} }
return entities, nil return entities
} }

View File

@ -22,11 +22,8 @@
package jmdict package jmdict
import ( import "io"
"encoding/xml" import "encoding/xml"
"io"
"log"
)
// Entries consist of kanji elements, reading elements, // Entries consist of kanji elements, reading elements,
// general information and sense elements. Each entry must have at // general information and sense elements. Each entry must have at
@ -232,34 +229,17 @@ type edictSense struct {
} }
func LoadEdict(reader io.Reader) ([]edictEntry, map[string]string, error) { func LoadEdict(reader io.Reader) ([]edictEntry, map[string]string, error) {
decoder := xml.NewDecoder(reader)
var entries []edictEntry var entries []edictEntry
for {
token, _ := decoder.Token() entities, err := parseEntries(reader, func(decoder *xml.Decoder, element xml.StartElement) error {
if token == nil { var entry edictEntry
break if err := decoder.DecodeElement(&entry, &element); err != nil {
return err
} }
switch startElement := token.(type) { entries = append(entries, entry)
case xml.Directive: return nil
directive := token.(xml.Directive) })
var err error
if decoder.Entity, err = parseEntities(&directive); err != nil {
return nil, nil, err
}
log.Print(decoder.Entity)
case xml.StartElement:
if startElement.Name.Local == "entry" {
var entry edictEntry
if err := decoder.DecodeElement(&entry, &startElement); err != nil {
return nil, nil, err
}
entries = append(entries, entry) return entries, entities, err
}
}
}
return entries, decoder.Entity, nil
} }

View File

@ -25,7 +25,6 @@ package jmdict
import ( import (
"encoding/xml" "encoding/xml"
"io" "io"
"log"
) )
// Entries consist of kanji elements, reading elements // Entries consist of kanji elements, reading elements
@ -131,34 +130,17 @@ type enamTranslation struct {
} }
func LoadEnamdict(reader io.Reader) ([]enamdictEntry, map[string]string, error) { func LoadEnamdict(reader io.Reader) ([]enamdictEntry, map[string]string, error) {
decoder := xml.NewDecoder(reader)
var entries []enamdictEntry var entries []enamdictEntry
for {
token, _ := decoder.Token() entities, err := parseEntries(reader, func(decoder *xml.Decoder, element xml.StartElement) error {
if token == nil { var entry enamdictEntry
break if err := decoder.DecodeElement(&entry, &element); err != nil {
return err
} }
switch startElement := token.(type) { entries = append(entries, entry)
case xml.Directive: return nil
directive := token.(xml.Directive) })
var err error
if decoder.Entity, err = parseEntities(&directive); err != nil {
return nil, nil, err
}
log.Print(decoder.Entity)
case xml.StartElement:
if startElement.Name.Local == "entry" {
var entry enamdictEntry
if err := decoder.DecodeElement(&entry, &startElement); err != nil {
return nil, nil, err
}
entries = append(entries, entry) return entries, entities, err
}
}
}
return entries, decoder.Entity, nil
} }