From cbf71c91235f9548eef6ec11b30b57bebc307a67 Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Mon, 17 Aug 2015 16:04:06 +0900 Subject: [PATCH] Updates --- scrape.go | 11 ++--------- scraper.go | 13 ++++++++++++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/scrape.go b/scrape.go index 63bc5e9..b713cb5 100644 --- a/scrape.go +++ b/scrape.go @@ -43,7 +43,7 @@ func main() { tasks := []scrapeTask{ {"http://tabelog.com/en/kanagawa/rstLst/1/", tabelog{}}, - // {"http://www.tripadvisor.com/Restaurants-g298173-Yokohama_Kanagawa_Prefecture_Kanto.html", tripadvisor{}}, + {"http://www.tripadvisor.com/Restaurants-g298173-Yokohama_Kanagawa_Prefecture_Kanto.html", tripadvisor{}}, // {"http://www.tripadvisor.com/Restaurants-g1021277-Fujisawa_Kanagawa_Prefecture_Kanto.html", tripadvisor{}}, // {"http://www.tripadvisor.com/Restaurants-g1021279-Chigasaki_Kanagawa_Prefecture_Kanto.html", tripadvisor{}}, // {"http://www.tripadvisor.com/Restaurants-g298172-Kawasaki_Kanagawa_Prefecture_Kanto.html", tripadvisor{}}, @@ -51,15 +51,8 @@ func main() { // {"http://www.tripadvisor.com/Restaurants-g298184-Tokyo_Tokyo_Prefecture_Kanto.html", tripadvisor{}}, } - out := make(chan restaurant) - for _, task := range tasks { - scrape(task.url, out, wc, gc, task.scr) + scrape(task.url, wc, gc, task.scr) } - for { - if _, ok := <-out; !ok { - return - } - } } diff --git a/scraper.go b/scraper.go index aff2bd0..3a73727 100644 --- a/scraper.go +++ b/scraper.go @@ -136,8 +136,19 @@ func scrapeIndex(indexUrl string, out chan restaurant, wc *webCache, scr scraper } } -func scrape(url string, out chan restaurant, wc *webCache, gc *geoCache, scr scraper) { +func scrape(url string, wc *webCache, gc *geoCache, scr scraper) []restaurant { + out := make(chan restaurant) in := make(chan restaurant) + go scrapeIndex(url, in, wc, scr) go decodeReviews(in, out, gc) + + var results []restaurant + for { + if res, ok := <-out; ok { + results = append(results, res) + } else { + return results + } + } }