1

Code cleanup

This commit is contained in:
Alex Yatskov 2015-08-17 13:59:19 +09:00
parent baaaa1ebf7
commit 5d4089581e
2 changed files with 35 additions and 26 deletions

View File

@ -43,7 +43,7 @@ type review struct {
type profiler interface { type profiler interface {
index(doc *goquery.Document) (string, []string) index(doc *goquery.Document) (string, []string)
review(doc *goquery.Document) *review review(doc *goquery.Document) (string, string, map[string]float64, error)
} }
func makeAbsUrl(ref, base string) (string, error) { func makeAbsUrl(ref, base string) (string, error) {
@ -83,17 +83,23 @@ func scrapeReview(url string, out chan review, cache *webCache, group *sync.Wait
doc, err := cache.load(url) doc, err := cache.load(url)
if err != nil { if err != nil {
log.Printf("failed to scrape review at %s (%v)", url, err) log.Printf("failed to load review at %s (%v)", url, err)
} else if r := prof.review(doc); r != nil { return
r.url = url
out <- *r
} }
name, address, features, err := prof.review(doc)
if err != nil {
log.Printf("failed to scrape review at %s (%v)", url, err)
return
}
out <- review{name: name, address: address, features: features, url: url}
} }
func scrapeIndex(indexUrl string, out chan review, cache *webCache, prof profiler) { func scrapeIndex(indexUrl string, out chan review, cache *webCache, prof profiler) {
doc, err := cache.load(indexUrl) doc, err := cache.load(indexUrl)
if err != nil { if err != nil {
log.Printf("failed to scrape index at %s (%v)", indexUrl, err) log.Printf("failed to load index at %s (%v)", indexUrl, err)
return return
} }
@ -136,7 +142,6 @@ func scrape(url string, wc *webCache, gc *geoCache, prof profiler) []review {
var reviews []review var reviews []review
for { for {
if r, ok := <-decodeChan; ok { if r, ok := <-decodeChan; ok {
log.Print(r.name)
reviews = append(reviews, r) reviews = append(reviews, r)
} else { } else {
return reviews return reviews

View File

@ -23,6 +23,7 @@
package main package main
import ( import (
"errors"
"strconv" "strconv"
"strings" "strings"
@ -48,34 +49,37 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
return nextIndexUrl, reviewUrls return nextIndexUrl, reviewUrls
} }
func (tabelog) review(doc *goquery.Document) *review { func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, err error) {
var r review name = doc.Find("a.rd-header__rst-name-main").Text()
r.name = doc.Find("a.rd-header__rst-name-main").Text()
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 { if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
r.address = strings.TrimSpace(addresses.First().Text()) address = strings.TrimSpace(addresses.First().Text())
} else { } else {
return nil err = errors.New("invalid value for address")
return
} }
var err error features = make(map[string]float64)
if features["dishes"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil {
r.features = make(map[string]float64) err = errors.New("invalid value for dishes")
if r.features["dishes"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil { return
return nil
} }
if r.features["service"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil { if features["service"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil {
return nil err = errors.New("invalid value for service")
return
} }
if r.features["atmosphere"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil { if features["atmosphere"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil {
return nil err = errors.New("invalid value for atmosphere")
return
} }
if r.features["cost"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil { if features["cost"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil {
return nil err = errors.New("invalid value for cost")
return
} }
if r.features["drinks"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil { if features["drinks"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil {
return nil err = errors.New("invalid value for drinks")
return
} }
return &r return
} }