WIP
This commit is contained in:
parent
d0cf3364ef
commit
c8be7c69ea
15
scrape.go
15
scrape.go
@ -22,5 +22,20 @@
|
|||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
|
import "log"
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
gc, err := newGeoCache("cache/geocache.json")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
defer gc.save()
|
||||||
|
|
||||||
|
wc, err := newWebCache("cache/webcache")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
t := tabelog{}
|
||||||
|
scrape("http://tabelog.com/en/kanagawa/rstLst/1/", wc, gc, t)
|
||||||
}
|
}
|
||||||
|
@ -43,7 +43,7 @@ type review struct {
|
|||||||
|
|
||||||
type profiler interface {
|
type profiler interface {
|
||||||
index(doc *goquery.Document) (string, []string)
|
index(doc *goquery.Document) (string, []string)
|
||||||
profile(doc *goquery.Document) review
|
profile(doc *goquery.Document) *review
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeAbsUrl(base, ref string) (string, error) {
|
func makeAbsUrl(base, ref string) (string, error) {
|
||||||
@ -84,8 +84,8 @@ func scrapeReview(url string, out chan review, cache *webCache, group *sync.Wait
|
|||||||
doc, err := cache.load(url)
|
doc, err := cache.load(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("failed to scrape review at %s (%v)", url, err)
|
log.Printf("failed to scrape review at %s (%v)", url, err)
|
||||||
} else {
|
} else if r := prof.profile(doc); r != nil {
|
||||||
out <- prof.profile(doc)
|
out <- *r
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,6 +137,7 @@ func scrape(url string, wc *webCache, gc *geoCache, prof profiler) []review {
|
|||||||
var reviews []review
|
var reviews []review
|
||||||
for {
|
for {
|
||||||
if r, ok := <-decodeChan; ok {
|
if r, ok := <-decodeChan; ok {
|
||||||
|
log.Print(r.name)
|
||||||
reviews = append(reviews, r)
|
reviews = append(reviews, r)
|
||||||
} else {
|
} else {
|
||||||
return reviews
|
return reviews
|
||||||
|
@ -32,7 +32,7 @@ import (
|
|||||||
type tabelog struct {
|
type tabelog struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tabelog) index(doc *goquery.Document) (string, []string) {
|
func (tabelog) index(doc *goquery.Document) (string, []string) {
|
||||||
var reviewUrls []string
|
var reviewUrls []string
|
||||||
doc.Find("div.list-rst__header > p > a").Each(func(index int, sel *goquery.Selection) {
|
doc.Find("div.list-rst__header > p > a").Each(func(index int, sel *goquery.Selection) {
|
||||||
if href, ok := sel.Attr("href"); ok {
|
if href, ok := sel.Attr("href"); ok {
|
||||||
@ -48,7 +48,7 @@ func (t *tabelog) index(doc *goquery.Document) (string, []string) {
|
|||||||
return nextIndexUrl, reviewUrls
|
return nextIndexUrl, reviewUrls
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *tabelog) profile(doc *goquery.Document) *review {
|
func (tabelog) profile(doc *goquery.Document) *review {
|
||||||
var r review
|
var r review
|
||||||
|
|
||||||
r.url = doc.Url.String()
|
r.url = doc.Url.String()
|
||||||
|
Loading…
Reference in New Issue
Block a user