From 9e9bd99f0b8d6d8249d32730dbf91d4a89272200 Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Fri, 18 Sep 2015 13:46:56 +0900 Subject: [PATCH] WIP --- build/build.go | 21 ++++++++++++++------- build/scrape.go | 6 +++--- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/build/build.go b/build/build.go index f532e8f..349334c 100644 --- a/build/build.go +++ b/build/build.go @@ -65,11 +65,11 @@ func scrapeData(urlsPath, geocachePath, webcachePath string) ([]review, error) { } defer file.Close() - var ( - ctx = scrapeCtx{gc, wc} - reviews []review - ) + ctx := scrapeCtx{gc, wc} + tlog := tabelog{scrapeCtx: ctx} + tadv := tripadvisor{scrapeCtx: ctx} + var reviews []review for scanner := bufio.NewScanner(file); scanner.Scan(); { if line := scanner.Text(); len(line) > 0 { parsed, err := url.Parse(line) @@ -77,14 +77,21 @@ func scrapeData(urlsPath, geocachePath, webcachePath string) ([]review, error) { return nil, err } + var revs []review switch parsed.Host { case "tabelog.com": - reviews = append(reviews, scrape(line, tabelog{scrapeCtx: ctx})...) + revs, err = scrape(line, tlog) case "www.tripadvisor.com": - reviews = append(reviews, scrape(line, tripadvisor{scrapeCtx: ctx})...) + revs, err = scrape(line, tadv) default: - return nil, errors.New("unsupported review site") + err = errors.New("unsupported review site") } + + if err != nil { + return nil, err + } + + reviews = append(reviews, revs...) } } diff --git a/build/scrape.go b/build/scrape.go index c423dfe..1c660b4 100644 --- a/build/scrape.go +++ b/build/scrape.go @@ -144,17 +144,17 @@ func scrapeIndex(indexUrl string, out chan review, scr scraper) error { return nil } -func scrape(url string, scr scraper) []review { +func scrape(url string, scr scraper) ([]review, error) { out := make(chan review, 128) in := make(chan review, 128) - go scrapeIndex(url, in, scr) go decodeReviews(in, out, scr) + err := scrapeIndex(url, in, scr) var reviews []review for rev, ok := <-out; ok; { reviews = append(reviews, rev) } - return reviews + return reviews, err }