1

Code cleanup

This commit is contained in:
Alex Yatskov 2015-08-17 13:59:19 +09:00
parent baaaa1ebf7
commit 5d4089581e
2 changed files with 35 additions and 26 deletions

View File

@ -43,7 +43,7 @@ type review struct {
type profiler interface {
index(doc *goquery.Document) (string, []string)
review(doc *goquery.Document) *review
review(doc *goquery.Document) (string, string, map[string]float64, error)
}
func makeAbsUrl(ref, base string) (string, error) {
@ -83,17 +83,23 @@ func scrapeReview(url string, out chan review, cache *webCache, group *sync.Wait
doc, err := cache.load(url)
if err != nil {
log.Printf("failed to scrape review at %s (%v)", url, err)
} else if r := prof.review(doc); r != nil {
r.url = url
out <- *r
log.Printf("failed to load review at %s (%v)", url, err)
return
}
name, address, features, err := prof.review(doc)
if err != nil {
log.Printf("failed to scrape review at %s (%v)", url, err)
return
}
out <- review{name: name, address: address, features: features, url: url}
}
func scrapeIndex(indexUrl string, out chan review, cache *webCache, prof profiler) {
doc, err := cache.load(indexUrl)
if err != nil {
log.Printf("failed to scrape index at %s (%v)", indexUrl, err)
log.Printf("failed to load index at %s (%v)", indexUrl, err)
return
}
@ -136,7 +142,6 @@ func scrape(url string, wc *webCache, gc *geoCache, prof profiler) []review {
var reviews []review
for {
if r, ok := <-decodeChan; ok {
log.Print(r.name)
reviews = append(reviews, r)
} else {
return reviews

View File

@ -23,6 +23,7 @@
package main
import (
"errors"
"strconv"
"strings"
@ -48,34 +49,37 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
return nextIndexUrl, reviewUrls
}
func (tabelog) review(doc *goquery.Document) *review {
var r review
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, err error) {
name = doc.Find("a.rd-header__rst-name-main").Text()
r.name = doc.Find("a.rd-header__rst-name-main").Text()
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
r.address = strings.TrimSpace(addresses.First().Text())
address = strings.TrimSpace(addresses.First().Text())
} else {
return nil
err = errors.New("invalid value for address")
return
}
var err error
r.features = make(map[string]float64)
if r.features["dishes"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil {
return nil
features = make(map[string]float64)
if features["dishes"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil {
err = errors.New("invalid value for dishes")
return
}
if r.features["service"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil {
return nil
if features["service"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil {
err = errors.New("invalid value for service")
return
}
if r.features["atmosphere"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil {
return nil
if features["atmosphere"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil {
err = errors.New("invalid value for atmosphere")
return
}
if r.features["cost"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil {
return nil
if features["cost"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil {
err = errors.New("invalid value for cost")
return
}
if r.features["drinks"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil {
return nil
if features["drinks"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil {
err = errors.New("invalid value for drinks")
return
}
return &r
return
}