Scraping correctly again
This commit is contained in:
parent
c8be7c69ea
commit
6138b978d4
@ -46,7 +46,7 @@ type profiler interface {
|
||||
profile(doc *goquery.Document) *review
|
||||
}
|
||||
|
||||
func makeAbsUrl(base, ref string) (string, error) {
|
||||
func makeAbsUrl(ref, base string) (string, error) {
|
||||
b, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return "", err
|
||||
@ -85,13 +85,12 @@ func scrapeReview(url string, out chan review, cache *webCache, group *sync.Wait
|
||||
if err != nil {
|
||||
log.Printf("failed to scrape review at %s (%v)", url, err)
|
||||
} else if r := prof.profile(doc); r != nil {
|
||||
r.url = url
|
||||
out <- *r
|
||||
}
|
||||
}
|
||||
|
||||
func scrapeIndex(indexUrl string, out chan review, cache *webCache, prof profiler) {
|
||||
var group sync.WaitGroup
|
||||
|
||||
doc, err := cache.load(indexUrl)
|
||||
if err != nil {
|
||||
log.Printf("failed to scrape index at %s (%v)", indexUrl, err)
|
||||
@ -103,6 +102,7 @@ func scrapeIndex(indexUrl string, out chan review, cache *webCache, prof profile
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
var group sync.WaitGroup
|
||||
for _, reviewUrl := range reviewUrls {
|
||||
absUrl, err := makeAbsUrl(reviewUrl, indexUrl)
|
||||
if err != nil {
|
||||
@ -112,7 +112,6 @@ func scrapeIndex(indexUrl string, out chan review, cache *webCache, prof profile
|
||||
group.Add(1)
|
||||
go scrapeReview(absUrl, out, cache, &group, prof)
|
||||
}
|
||||
|
||||
group.Wait()
|
||||
|
||||
if nextIndexUrl == "" {
|
||||
|
@ -51,9 +51,7 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
|
||||
func (tabelog) profile(doc *goquery.Document) *review {
|
||||
var r review
|
||||
|
||||
r.url = doc.Url.String()
|
||||
r.name = doc.Find("a.rd-header__rst-name-main").Text()
|
||||
|
||||
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
||||
r.address = strings.TrimSpace(addresses.First().Text())
|
||||
} else {
|
||||
@ -61,6 +59,8 @@ func (tabelog) profile(doc *goquery.Document) *review {
|
||||
}
|
||||
|
||||
var err error
|
||||
|
||||
r.features = make(map[string]float64)
|
||||
if r.features["dishes"], err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user