1
This commit is contained in:
Alex Yatskov 2015-09-18 13:46:56 +09:00
parent dc38ce4a5f
commit 9e9bd99f0b
2 changed files with 17 additions and 10 deletions

View File

@ -65,11 +65,11 @@ func scrapeData(urlsPath, geocachePath, webcachePath string) ([]review, error) {
} }
defer file.Close() defer file.Close()
var ( ctx := scrapeCtx{gc, wc}
ctx = scrapeCtx{gc, wc} tlog := tabelog{scrapeCtx: ctx}
reviews []review tadv := tripadvisor{scrapeCtx: ctx}
)
var reviews []review
for scanner := bufio.NewScanner(file); scanner.Scan(); { for scanner := bufio.NewScanner(file); scanner.Scan(); {
if line := scanner.Text(); len(line) > 0 { if line := scanner.Text(); len(line) > 0 {
parsed, err := url.Parse(line) parsed, err := url.Parse(line)
@ -77,14 +77,21 @@ func scrapeData(urlsPath, geocachePath, webcachePath string) ([]review, error) {
return nil, err return nil, err
} }
var revs []review
switch parsed.Host { switch parsed.Host {
case "tabelog.com": case "tabelog.com":
reviews = append(reviews, scrape(line, tabelog{scrapeCtx: ctx})...) revs, err = scrape(line, tlog)
case "www.tripadvisor.com": case "www.tripadvisor.com":
reviews = append(reviews, scrape(line, tripadvisor{scrapeCtx: ctx})...) revs, err = scrape(line, tadv)
default: default:
return nil, errors.New("unsupported review site") err = errors.New("unsupported review site")
} }
if err != nil {
return nil, err
}
reviews = append(reviews, revs...)
} }
} }

View File

@ -144,17 +144,17 @@ func scrapeIndex(indexUrl string, out chan review, scr scraper) error {
return nil return nil
} }
func scrape(url string, scr scraper) []review { func scrape(url string, scr scraper) ([]review, error) {
out := make(chan review, 128) out := make(chan review, 128)
in := make(chan review, 128) in := make(chan review, 128)
go scrapeIndex(url, in, scr)
go decodeReviews(in, out, scr) go decodeReviews(in, out, scr)
err := scrapeIndex(url, in, scr)
var reviews []review var reviews []review
for rev, ok := <-out; ok; { for rev, ok := <-out; ok; {
reviews = append(reviews, rev) reviews = append(reviews, rev)
} }
return reviews return reviews, err
} }