1
This commit is contained in:
Alex Yatskov 2015-09-18 13:46:56 +09:00
parent dc38ce4a5f
commit 9e9bd99f0b
2 changed files with 17 additions and 10 deletions

View File

@ -65,11 +65,11 @@ func scrapeData(urlsPath, geocachePath, webcachePath string) ([]review, error) {
}
defer file.Close()
var (
ctx = scrapeCtx{gc, wc}
reviews []review
)
ctx := scrapeCtx{gc, wc}
tlog := tabelog{scrapeCtx: ctx}
tadv := tripadvisor{scrapeCtx: ctx}
var reviews []review
for scanner := bufio.NewScanner(file); scanner.Scan(); {
if line := scanner.Text(); len(line) > 0 {
parsed, err := url.Parse(line)
@ -77,14 +77,21 @@ func scrapeData(urlsPath, geocachePath, webcachePath string) ([]review, error) {
return nil, err
}
var revs []review
switch parsed.Host {
case "tabelog.com":
reviews = append(reviews, scrape(line, tabelog{scrapeCtx: ctx})...)
revs, err = scrape(line, tlog)
case "www.tripadvisor.com":
reviews = append(reviews, scrape(line, tripadvisor{scrapeCtx: ctx})...)
revs, err = scrape(line, tadv)
default:
return nil, errors.New("unsupported review site")
err = errors.New("unsupported review site")
}
if err != nil {
return nil, err
}
reviews = append(reviews, revs...)
}
}

View File

@ -144,17 +144,17 @@ func scrapeIndex(indexUrl string, out chan review, scr scraper) error {
return nil
}
func scrape(url string, scr scraper) []review {
func scrape(url string, scr scraper) ([]review, error) {
out := make(chan review, 128)
in := make(chan review, 128)
go scrapeIndex(url, in, scr)
go decodeReviews(in, out, scr)
err := scrapeIndex(url, in, scr)
var reviews []review
for rev, ok := <-out; ok; {
reviews = append(reviews, rev)
}
return reviews
return reviews, err
}