WIP
This commit is contained in:
parent
d0cf3364ef
commit
c8be7c69ea
15
scrape.go
15
scrape.go
@ -22,5 +22,20 @@
|
||||
|
||||
package main
|
||||
|
||||
import "log"
|
||||
|
||||
func main() {
|
||||
gc, err := newGeoCache("cache/geocache.json")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer gc.save()
|
||||
|
||||
wc, err := newWebCache("cache/webcache")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
t := tabelog{}
|
||||
scrape("http://tabelog.com/en/kanagawa/rstLst/1/", wc, gc, t)
|
||||
}
|
||||
|
@ -43,7 +43,7 @@ type review struct {
|
||||
|
||||
type profiler interface {
|
||||
index(doc *goquery.Document) (string, []string)
|
||||
profile(doc *goquery.Document) review
|
||||
profile(doc *goquery.Document) *review
|
||||
}
|
||||
|
||||
func makeAbsUrl(base, ref string) (string, error) {
|
||||
@ -84,8 +84,8 @@ func scrapeReview(url string, out chan review, cache *webCache, group *sync.Wait
|
||||
doc, err := cache.load(url)
|
||||
if err != nil {
|
||||
log.Printf("failed to scrape review at %s (%v)", url, err)
|
||||
} else {
|
||||
out <- prof.profile(doc)
|
||||
} else if r := prof.profile(doc); r != nil {
|
||||
out <- *r
|
||||
}
|
||||
}
|
||||
|
||||
@ -137,6 +137,7 @@ func scrape(url string, wc *webCache, gc *geoCache, prof profiler) []review {
|
||||
var reviews []review
|
||||
for {
|
||||
if r, ok := <-decodeChan; ok {
|
||||
log.Print(r.name)
|
||||
reviews = append(reviews, r)
|
||||
} else {
|
||||
return reviews
|
||||
|
@ -32,7 +32,7 @@ import (
|
||||
type tabelog struct {
|
||||
}
|
||||
|
||||
func (t *tabelog) index(doc *goquery.Document) (string, []string) {
|
||||
func (tabelog) index(doc *goquery.Document) (string, []string) {
|
||||
var reviewUrls []string
|
||||
doc.Find("div.list-rst__header > p > a").Each(func(index int, sel *goquery.Selection) {
|
||||
if href, ok := sel.Attr("href"); ok {
|
||||
@ -48,7 +48,7 @@ func (t *tabelog) index(doc *goquery.Document) (string, []string) {
|
||||
return nextIndexUrl, reviewUrls
|
||||
}
|
||||
|
||||
func (t *tabelog) profile(doc *goquery.Document) *review {
|
||||
func (tabelog) profile(doc *goquery.Document) *review {
|
||||
var r review
|
||||
|
||||
r.url = doc.Url.String()
|
||||
|
Loading…
Reference in New Issue
Block a user