Code cleanup
This commit is contained in:
parent
baaaa1ebf7
commit
5d4089581e
19
scraper.go
19
scraper.go
@ -43,7 +43,7 @@ type review struct {
|
|||||||
|
|
||||||
type profiler interface {
|
type profiler interface {
|
||||||
index(doc *goquery.Document) (string, []string)
|
index(doc *goquery.Document) (string, []string)
|
||||||
review(doc *goquery.Document) *review
|
review(doc *goquery.Document) (string, string, map[string]float64, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeAbsUrl(ref, base string) (string, error) {
|
func makeAbsUrl(ref, base string) (string, error) {
|
||||||
@ -83,17 +83,23 @@ func scrapeReview(url string, out chan review, cache *webCache, group *sync.Wait
|
|||||||
|
|
||||||
doc, err := cache.load(url)
|
doc, err := cache.load(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("failed to scrape review at %s (%v)", url, err)
|
log.Printf("failed to load review at %s (%v)", url, err)
|
||||||
} else if r := prof.review(doc); r != nil {
|
return
|
||||||
r.url = url
|
|
||||||
out <- *r
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
name, address, features, err := prof.review(doc)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("failed to scrape review at %s (%v)", url, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
out <- review{name: name, address: address, features: features, url: url}
|
||||||
}
|
}
|
||||||
|
|
||||||
func scrapeIndex(indexUrl string, out chan review, cache *webCache, prof profiler) {
|
func scrapeIndex(indexUrl string, out chan review, cache *webCache, prof profiler) {
|
||||||
doc, err := cache.load(indexUrl)
|
doc, err := cache.load(indexUrl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("failed to scrape index at %s (%v)", indexUrl, err)
|
log.Printf("failed to load index at %s (%v)", indexUrl, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -136,7 +142,6 @@ func scrape(url string, wc *webCache, gc *geoCache, prof profiler) []review {
|
|||||||
var reviews []review
|
var reviews []review
|
||||||
for {
|
for {
|
||||||
if r, ok := <-decodeChan; ok {
|
if r, ok := <-decodeChan; ok {
|
||||||
log.Print(r.name)
|
|
||||||
reviews = append(reviews, r)
|
reviews = append(reviews, r)
|
||||||
} else {
|
} else {
|
||||||
return reviews
|
return reviews
|
||||||
|
42
tabelog.go
42
tabelog.go
@ -23,6 +23,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@ -48,34 +49,37 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
|
|||||||
return nextIndexUrl, reviewUrls
|
return nextIndexUrl, reviewUrls
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tabelog) review(doc *goquery.Document) *review {
|
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, err error) {
|
||||||
var r review
|
name = doc.Find("a.rd-header__rst-name-main").Text()
|
||||||
|
|
||||||
r.name = doc.Find("a.rd-header__rst-name-main").Text()
|
|
||||||
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
||||||
r.address = strings.TrimSpace(addresses.First().Text())
|
address = strings.TrimSpace(addresses.First().Text())
|
||||||
} else {
|
} else {
|
||||||
return nil
|
err = errors.New("invalid value for address")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var err error
|
features = make(map[string]float64)
|
||||||
|
if features["dishes"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil {
|
||||||
r.features = make(map[string]float64)
|
err = errors.New("invalid value for dishes")
|
||||||
if r.features["dishes"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil {
|
return
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
if r.features["service"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil {
|
if features["service"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil {
|
||||||
return nil
|
err = errors.New("invalid value for service")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
if r.features["atmosphere"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil {
|
if features["atmosphere"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil {
|
||||||
return nil
|
err = errors.New("invalid value for atmosphere")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
if r.features["cost"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil {
|
if features["cost"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil {
|
||||||
return nil
|
err = errors.New("invalid value for cost")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
if r.features["drinks"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil {
|
if features["drinks"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil {
|
||||||
return nil
|
err = errors.New("invalid value for drinks")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
return &r
|
return
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user