diff --git a/scrape.go b/scrape.go index 04552ed..f75da90 100644 --- a/scrape.go +++ b/scrape.go @@ -22,5 +22,20 @@ package main +import "log" + func main() { + gc, err := newGeoCache("cache/geocache.json") + if err != nil { + log.Fatal(err) + } + defer gc.save() + + wc, err := newWebCache("cache/webcache") + if err != nil { + log.Fatal(err) + } + + t := tabelog{} + scrape("http://tabelog.com/en/kanagawa/rstLst/1/", wc, gc, t) } diff --git a/scraper.go b/scraper.go index 2956e14..953141f 100644 --- a/scraper.go +++ b/scraper.go @@ -43,7 +43,7 @@ type review struct { type profiler interface { index(doc *goquery.Document) (string, []string) - profile(doc *goquery.Document) review + profile(doc *goquery.Document) *review } func makeAbsUrl(base, ref string) (string, error) { @@ -84,8 +84,8 @@ func scrapeReview(url string, out chan review, cache *webCache, group *sync.Wait doc, err := cache.load(url) if err != nil { log.Printf("failed to scrape review at %s (%v)", url, err) - } else { - out <- prof.profile(doc) + } else if r := prof.profile(doc); r != nil { + out <- *r } } @@ -137,6 +137,7 @@ func scrape(url string, wc *webCache, gc *geoCache, prof profiler) []review { var reviews []review for { if r, ok := <-decodeChan; ok { + log.Print(r.name) reviews = append(reviews, r) } else { return reviews diff --git a/tabelog.go b/tabelog.go index 67842ef..08833d8 100644 --- a/tabelog.go +++ b/tabelog.go @@ -32,7 +32,7 @@ import ( type tabelog struct { } -func (t *tabelog) index(doc *goquery.Document) (string, []string) { +func (tabelog) index(doc *goquery.Document) (string, []string) { var reviewUrls []string doc.Find("div.list-rst__header > p > a").Each(func(index int, sel *goquery.Selection) { if href, ok := sel.Attr("href"); ok { @@ -48,7 +48,7 @@ func (t *tabelog) index(doc *goquery.Document) (string, []string) { return nextIndexUrl, reviewUrls } -func (t *tabelog) profile(doc *goquery.Document) *review { +func (tabelog) profile(doc *goquery.Document) *review { var r review r.url = doc.Url.String()