WIP
This commit is contained in:
parent
dc38ce4a5f
commit
9e9bd99f0b
@ -65,11 +65,11 @@ func scrapeData(urlsPath, geocachePath, webcachePath string) ([]review, error) {
|
|||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
var (
|
ctx := scrapeCtx{gc, wc}
|
||||||
ctx = scrapeCtx{gc, wc}
|
tlog := tabelog{scrapeCtx: ctx}
|
||||||
reviews []review
|
tadv := tripadvisor{scrapeCtx: ctx}
|
||||||
)
|
|
||||||
|
|
||||||
|
var reviews []review
|
||||||
for scanner := bufio.NewScanner(file); scanner.Scan(); {
|
for scanner := bufio.NewScanner(file); scanner.Scan(); {
|
||||||
if line := scanner.Text(); len(line) > 0 {
|
if line := scanner.Text(); len(line) > 0 {
|
||||||
parsed, err := url.Parse(line)
|
parsed, err := url.Parse(line)
|
||||||
@ -77,14 +77,21 @@ func scrapeData(urlsPath, geocachePath, webcachePath string) ([]review, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var revs []review
|
||||||
switch parsed.Host {
|
switch parsed.Host {
|
||||||
case "tabelog.com":
|
case "tabelog.com":
|
||||||
reviews = append(reviews, scrape(line, tabelog{scrapeCtx: ctx})...)
|
revs, err = scrape(line, tlog)
|
||||||
case "www.tripadvisor.com":
|
case "www.tripadvisor.com":
|
||||||
reviews = append(reviews, scrape(line, tripadvisor{scrapeCtx: ctx})...)
|
revs, err = scrape(line, tadv)
|
||||||
default:
|
default:
|
||||||
return nil, errors.New("unsupported review site")
|
err = errors.New("unsupported review site")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
reviews = append(reviews, revs...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,17 +144,17 @@ func scrapeIndex(indexUrl string, out chan review, scr scraper) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func scrape(url string, scr scraper) []review {
|
func scrape(url string, scr scraper) ([]review, error) {
|
||||||
out := make(chan review, 128)
|
out := make(chan review, 128)
|
||||||
in := make(chan review, 128)
|
in := make(chan review, 128)
|
||||||
|
|
||||||
go scrapeIndex(url, in, scr)
|
|
||||||
go decodeReviews(in, out, scr)
|
go decodeReviews(in, out, scr)
|
||||||
|
err := scrapeIndex(url, in, scr)
|
||||||
|
|
||||||
var reviews []review
|
var reviews []review
|
||||||
for rev, ok := <-out; ok; {
|
for rev, ok := <-out; ok; {
|
||||||
reviews = append(reviews, rev)
|
reviews = append(reviews, rev)
|
||||||
}
|
}
|
||||||
|
|
||||||
return reviews
|
return reviews, err
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user