Code cleanup
This commit is contained in:
parent
baaaa1ebf7
commit
5d4089581e
19
scraper.go
19
scraper.go
@ -43,7 +43,7 @@ type review struct {
|
||||
|
||||
type profiler interface {
|
||||
index(doc *goquery.Document) (string, []string)
|
||||
review(doc *goquery.Document) *review
|
||||
review(doc *goquery.Document) (string, string, map[string]float64, error)
|
||||
}
|
||||
|
||||
func makeAbsUrl(ref, base string) (string, error) {
|
||||
@ -83,17 +83,23 @@ func scrapeReview(url string, out chan review, cache *webCache, group *sync.Wait
|
||||
|
||||
doc, err := cache.load(url)
|
||||
if err != nil {
|
||||
log.Printf("failed to scrape review at %s (%v)", url, err)
|
||||
} else if r := prof.review(doc); r != nil {
|
||||
r.url = url
|
||||
out <- *r
|
||||
log.Printf("failed to load review at %s (%v)", url, err)
|
||||
return
|
||||
}
|
||||
|
||||
name, address, features, err := prof.review(doc)
|
||||
if err != nil {
|
||||
log.Printf("failed to scrape review at %s (%v)", url, err)
|
||||
return
|
||||
}
|
||||
|
||||
out <- review{name: name, address: address, features: features, url: url}
|
||||
}
|
||||
|
||||
func scrapeIndex(indexUrl string, out chan review, cache *webCache, prof profiler) {
|
||||
doc, err := cache.load(indexUrl)
|
||||
if err != nil {
|
||||
log.Printf("failed to scrape index at %s (%v)", indexUrl, err)
|
||||
log.Printf("failed to load index at %s (%v)", indexUrl, err)
|
||||
return
|
||||
}
|
||||
|
||||
@ -136,7 +142,6 @@ func scrape(url string, wc *webCache, gc *geoCache, prof profiler) []review {
|
||||
var reviews []review
|
||||
for {
|
||||
if r, ok := <-decodeChan; ok {
|
||||
log.Print(r.name)
|
||||
reviews = append(reviews, r)
|
||||
} else {
|
||||
return reviews
|
||||
|
42
tabelog.go
42
tabelog.go
@ -23,6 +23,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@ -48,34 +49,37 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
|
||||
return nextIndexUrl, reviewUrls
|
||||
}
|
||||
|
||||
func (tabelog) review(doc *goquery.Document) *review {
|
||||
var r review
|
||||
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, err error) {
|
||||
name = doc.Find("a.rd-header__rst-name-main").Text()
|
||||
|
||||
r.name = doc.Find("a.rd-header__rst-name-main").Text()
|
||||
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
||||
r.address = strings.TrimSpace(addresses.First().Text())
|
||||
address = strings.TrimSpace(addresses.First().Text())
|
||||
} else {
|
||||
return nil
|
||||
err = errors.New("invalid value for address")
|
||||
return
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
r.features = make(map[string]float64)
|
||||
if r.features["dishes"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil {
|
||||
return nil
|
||||
features = make(map[string]float64)
|
||||
if features["dishes"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil {
|
||||
err = errors.New("invalid value for dishes")
|
||||
return
|
||||
}
|
||||
if r.features["service"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil {
|
||||
return nil
|
||||
if features["service"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil {
|
||||
err = errors.New("invalid value for service")
|
||||
return
|
||||
}
|
||||
if r.features["atmosphere"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil {
|
||||
return nil
|
||||
if features["atmosphere"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil {
|
||||
err = errors.New("invalid value for atmosphere")
|
||||
return
|
||||
}
|
||||
if r.features["cost"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil {
|
||||
return nil
|
||||
if features["cost"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil {
|
||||
err = errors.New("invalid value for cost")
|
||||
return
|
||||
}
|
||||
if r.features["drinks"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil {
|
||||
return nil
|
||||
if features["drinks"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil {
|
||||
err = errors.New("invalid value for drinks")
|
||||
return
|
||||
}
|
||||
|
||||
return &r
|
||||
return
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user