1
This commit is contained in:
Alex Yatskov 2015-08-16 19:12:16 +09:00
parent d0cf3364ef
commit c8be7c69ea
3 changed files with 21 additions and 5 deletions

View File

@ -22,5 +22,20 @@
package main
import "log"
func main() {
gc, err := newGeoCache("cache/geocache.json")
if err != nil {
log.Fatal(err)
}
defer gc.save()
wc, err := newWebCache("cache/webcache")
if err != nil {
log.Fatal(err)
}
t := tabelog{}
scrape("http://tabelog.com/en/kanagawa/rstLst/1/", wc, gc, t)
}

View File

@ -43,7 +43,7 @@ type review struct {
type profiler interface {
index(doc *goquery.Document) (string, []string)
profile(doc *goquery.Document) review
profile(doc *goquery.Document) *review
}
func makeAbsUrl(base, ref string) (string, error) {
@ -84,8 +84,8 @@ func scrapeReview(url string, out chan review, cache *webCache, group *sync.Wait
doc, err := cache.load(url)
if err != nil {
log.Printf("failed to scrape review at %s (%v)", url, err)
} else {
out <- prof.profile(doc)
} else if r := prof.profile(doc); r != nil {
out <- *r
}
}
@ -137,6 +137,7 @@ func scrape(url string, wc *webCache, gc *geoCache, prof profiler) []review {
var reviews []review
for {
if r, ok := <-decodeChan; ok {
log.Print(r.name)
reviews = append(reviews, r)
} else {
return reviews

View File

@ -32,7 +32,7 @@ import (
type tabelog struct {
}
func (t *tabelog) index(doc *goquery.Document) (string, []string) {
func (tabelog) index(doc *goquery.Document) (string, []string) {
var reviewUrls []string
doc.Find("div.list-rst__header > p > a").Each(func(index int, sel *goquery.Selection) {
if href, ok := sel.Attr("href"); ok {
@ -48,7 +48,7 @@ func (t *tabelog) index(doc *goquery.Document) (string, []string) {
return nextIndexUrl, reviewUrls
}
func (t *tabelog) profile(doc *goquery.Document) *review {
func (tabelog) profile(doc *goquery.Document) *review {
var r review
r.url = doc.Url.String()