Updates
This commit is contained in:
parent
50221007aa
commit
cbf71c9123
11
scrape.go
11
scrape.go
@ -43,7 +43,7 @@ func main() {
|
||||
|
||||
tasks := []scrapeTask{
|
||||
{"http://tabelog.com/en/kanagawa/rstLst/1/", tabelog{}},
|
||||
// {"http://www.tripadvisor.com/Restaurants-g298173-Yokohama_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
||||
{"http://www.tripadvisor.com/Restaurants-g298173-Yokohama_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
||||
// {"http://www.tripadvisor.com/Restaurants-g1021277-Fujisawa_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
||||
// {"http://www.tripadvisor.com/Restaurants-g1021279-Chigasaki_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
||||
// {"http://www.tripadvisor.com/Restaurants-g298172-Kawasaki_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
||||
@ -51,15 +51,8 @@ func main() {
|
||||
// {"http://www.tripadvisor.com/Restaurants-g298184-Tokyo_Tokyo_Prefecture_Kanto.html", tripadvisor{}},
|
||||
}
|
||||
|
||||
out := make(chan restaurant)
|
||||
|
||||
for _, task := range tasks {
|
||||
scrape(task.url, out, wc, gc, task.scr)
|
||||
scrape(task.url, wc, gc, task.scr)
|
||||
}
|
||||
|
||||
for {
|
||||
if _, ok := <-out; !ok {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
13
scraper.go
13
scraper.go
@ -136,8 +136,19 @@ func scrapeIndex(indexUrl string, out chan restaurant, wc *webCache, scr scraper
|
||||
}
|
||||
}
|
||||
|
||||
func scrape(url string, out chan restaurant, wc *webCache, gc *geoCache, scr scraper) {
|
||||
func scrape(url string, wc *webCache, gc *geoCache, scr scraper) []restaurant {
|
||||
out := make(chan restaurant)
|
||||
in := make(chan restaurant)
|
||||
|
||||
go scrapeIndex(url, in, wc, scr)
|
||||
go decodeReviews(in, out, gc)
|
||||
|
||||
var results []restaurant
|
||||
for {
|
||||
if res, ok := <-out; ok {
|
||||
results = append(results, res)
|
||||
} else {
|
||||
return results
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user