1

Work in progress

This commit is contained in:
Alex Yatskov 2015-08-12 18:25:55 +09:00
parent 61267464fb
commit c60b29c270

View File

@ -24,49 +24,31 @@ package main
import ( import (
"bytes" "bytes"
"errors" "encoding/json"
"io/ioutil"
"log" "log"
"regexp"
"strconv" "strconv"
"strings" "strings"
"sync"
"text/template" "text/template"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
) )
const ( const ()
tabelogTemplate = "http://tabelog.com/en/rstLst/{{.Page}}/?lat=35.465808055555996&lon=139.61964361111&zoom=16&RdoCosTp=2&LstCos=0&LstCosT=11&LstSitu=0&LstRev=0&LstReserve=0&ChkParking=0&LstSmoking=0"
)
type tabelogParams struct { type tabelogParams struct {
Page int Page int
} }
type tabelogReview struct { type tabelogReview struct {
name string Name string
address string Address string
dishes float64 Dishes float64
service float64 Service float64
atmosphere float64 Atmosphere float64
cost float64 Cost float64
drinks float64 Drinks float64
url string Url string
}
func parseCounts(doc *goquery.Document) (from, to, total int64, err error) {
t := doc.Find("#js-item-count-downside").Text()
r := regexp.MustCompile(`(\d+)\D*(\d+)\D*(\d+)`)
if c := r.FindStringSubmatch(t); c != nil {
from, _ = strconv.ParseInt(c[1], 10, 8)
to, _ = strconv.ParseInt(c[2], 10, 8)
total, _ = strconv.ParseInt(c[3], 10, 8)
} else {
err = errors.New("failed to parse counts")
}
return
} }
func scrapeReview(url string, out chan tabelogReview) { func scrapeReview(url string, out chan tabelogReview) {
@ -78,23 +60,23 @@ func scrapeReview(url string, out chan tabelogReview) {
var r tabelogReview var r tabelogReview
r.url = url r.Url = url
r.name = doc.Find("body > article > header > div.rd-header.l-container > div > div.rd-header__headline > h2 > a").Text() r.Name = doc.Find("body > article > header > div.rd-header.l-container > div > div.rd-header__headline > h2 > a").Text()
r.address = strings.TrimSpace(doc.Find("#anchor-rd-detail > section > table > tbody > tr > td > p.rd-detail-info__rst-address").First().Text()) r.Address = strings.TrimSpace(doc.Find("#anchor-rd-detail > section > table > tbody > tr > td > p.rd-detail-info__rst-address > span").Text())
if r.dishes, err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil { if r.Dishes, err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(2)").Text(), 8); err != nil {
return return
} }
if r.service, err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil { if r.Service, err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(4)").Text(), 8); err != nil {
return return
} }
if r.atmosphere, err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil { if r.Atmosphere, err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(6)").Text(), 8); err != nil {
return return
} }
if r.cost, err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil { if r.Cost, err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(8)").Text(), 8); err != nil {
return return
} }
if r.drinks, err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil { if r.Drinks, err = strconv.ParseFloat(doc.Find("#js-rating-detail > dd:nth-child(10)").Text(), 8); err != nil {
return return
} }
@ -116,28 +98,35 @@ func scrapeIndex(url string, out chan tabelogReview) error {
return nil return nil
} }
func dumpReviews(c chan tabelogReview, cond *sync.Cond) { func dumpReviews(filename string, in chan tabelogReview, out chan error) {
var reviews []tabelogReview
for { for {
review, ok := <-c if review, ok := <-in; ok {
if !ok { log.Println(review.Name)
reviews = append(reviews, review)
} else {
break break
} }
log.Print(review)
} }
cond.Signal() js, err := json.MarshalIndent(reviews, "", " ")
if err != nil {
out <- err
return
}
out <- ioutil.WriteFile(filename, js, 0644)
} }
func scrapeTabelog() error { func scrapeTabelog(filename, url string) error {
var cond sync.Cond
out := make(chan tabelogReview) out := make(chan tabelogReview)
go dumpReviews(out, &cond) in := make(chan error)
go dumpReviews(filename, out, in)
t := template.New("tabelog") t := template.New("tabelog")
t.Parse(tabelogTemplate) t.Parse(url)
for i := 1; i <= 60; i++ { for i := 1; i <= 2; i++ {
var url bytes.Buffer var url bytes.Buffer
if err := t.Execute(&url, tabelogParams{i}); err != nil { if err := t.Execute(&url, tabelogParams{i}); err != nil {
log.Fatal(err) log.Fatal(err)
@ -149,7 +138,5 @@ func scrapeTabelog() error {
} }
close(out) close(out)
cond.Wait() return <-in
return nil
} }