WIP
This commit is contained in:
parent
dc38ce4a5f
commit
9e9bd99f0b
@ -65,11 +65,11 @@ func scrapeData(urlsPath, geocachePath, webcachePath string) ([]review, error) {
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
var (
|
||||
ctx = scrapeCtx{gc, wc}
|
||||
reviews []review
|
||||
)
|
||||
ctx := scrapeCtx{gc, wc}
|
||||
tlog := tabelog{scrapeCtx: ctx}
|
||||
tadv := tripadvisor{scrapeCtx: ctx}
|
||||
|
||||
var reviews []review
|
||||
for scanner := bufio.NewScanner(file); scanner.Scan(); {
|
||||
if line := scanner.Text(); len(line) > 0 {
|
||||
parsed, err := url.Parse(line)
|
||||
@ -77,14 +77,21 @@ func scrapeData(urlsPath, geocachePath, webcachePath string) ([]review, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var revs []review
|
||||
switch parsed.Host {
|
||||
case "tabelog.com":
|
||||
reviews = append(reviews, scrape(line, tabelog{scrapeCtx: ctx})...)
|
||||
revs, err = scrape(line, tlog)
|
||||
case "www.tripadvisor.com":
|
||||
reviews = append(reviews, scrape(line, tripadvisor{scrapeCtx: ctx})...)
|
||||
revs, err = scrape(line, tadv)
|
||||
default:
|
||||
return nil, errors.New("unsupported review site")
|
||||
err = errors.New("unsupported review site")
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
reviews = append(reviews, revs...)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -144,17 +144,17 @@ func scrapeIndex(indexUrl string, out chan review, scr scraper) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func scrape(url string, scr scraper) []review {
|
||||
func scrape(url string, scr scraper) ([]review, error) {
|
||||
out := make(chan review, 128)
|
||||
in := make(chan review, 128)
|
||||
|
||||
go scrapeIndex(url, in, scr)
|
||||
go decodeReviews(in, out, scr)
|
||||
err := scrapeIndex(url, in, scr)
|
||||
|
||||
var reviews []review
|
||||
for rev, ok := <-out; ok; {
|
||||
reviews = append(reviews, rev)
|
||||
}
|
||||
|
||||
return reviews
|
||||
return reviews, err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user