From 5d4089581ecb478beef1d8d05f9c3f8a07c352ba Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Mon, 17 Aug 2015 13:59:19 +0900 Subject: [PATCH] Code cleanup --- scraper.go | 19 ++++++++++++------- tabelog.go | 42 +++++++++++++++++++++++------------------- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/scraper.go b/scraper.go index 8b8994e..5ea0a79 100644 --- a/scraper.go +++ b/scraper.go @@ -43,7 +43,7 @@ type review struct { type profiler interface { index(doc *goquery.Document) (string, []string) - review(doc *goquery.Document) *review + review(doc *goquery.Document) (string, string, map[string]float64, error) } func makeAbsUrl(ref, base string) (string, error) { @@ -83,17 +83,23 @@ func scrapeReview(url string, out chan review, cache *webCache, group *sync.Wait doc, err := cache.load(url) if err != nil { - log.Printf("failed to scrape review at %s (%v)", url, err) - } else if r := prof.review(doc); r != nil { - r.url = url - out <- *r + log.Printf("failed to load review at %s (%v)", url, err) + return } + + name, address, features, err := prof.review(doc) + if err != nil { + log.Printf("failed to scrape review at %s (%v)", url, err) + return + } + + out <- review{name: name, address: address, features: features, url: url} } func scrapeIndex(indexUrl string, out chan review, cache *webCache, prof profiler) { doc, err := cache.load(indexUrl) if err != nil { - log.Printf("failed to scrape index at %s (%v)", indexUrl, err) + log.Printf("failed to load index at %s (%v)", indexUrl, err) return } @@ -136,7 +142,6 @@ func scrape(url string, wc *webCache, gc *geoCache, prof profiler) []review { var reviews []review for { if r, ok := <-decodeChan; ok { - log.Print(r.name) reviews = append(reviews, r) } else { return reviews diff --git a/tabelog.go b/tabelog.go index 92d7155..f2efda9 100644 --- a/tabelog.go +++ b/tabelog.go @@ -23,6 +23,7 @@ package main import ( + "errors" "strconv" "strings" @@ -48,34 +49,37 @@ func (tabelog) index(doc *goquery.Document) (string, []string) { return nextIndexUrl, reviewUrls } -func (tabelog) review(doc *goquery.Document) *review { - var r review +func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, err error) { + name = doc.Find("a.rd-header__rst-name-main").Text() - r.name = doc.Find("a.rd-header__rst-name-main").Text() if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 { - r.address = strings.TrimSpace(addresses.First().Text()) + address = strings.TrimSpace(addresses.First().Text()) } else { - return nil + err = errors.New("invalid value for address") + return } - var err error - - r.features = make(map[string]float64) - if r.features["dishes"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil { - return nil + features = make(map[string]float64) + if features["dishes"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil { + err = errors.New("invalid value for dishes") + return } - if r.features["service"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil { - return nil + if features["service"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil { + err = errors.New("invalid value for service") + return } - if r.features["atmosphere"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil { - return nil + if features["atmosphere"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil { + err = errors.New("invalid value for atmosphere") + return } - if r.features["cost"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil { - return nil + if features["cost"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil { + err = errors.New("invalid value for cost") + return } - if r.features["drinks"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil { - return nil + if features["drinks"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil { + err = errors.New("invalid value for drinks") + return } - return &r + return }