1

Modifying data range

This commit is contained in:
Alex Yatskov 2015-08-24 17:03:00 +09:00
parent 90522c0f50
commit b5ee8380a0
4 changed files with 34 additions and 23 deletions

View File

@ -102,10 +102,6 @@ func computeStnData(restaurants []restaurant, stationsPath string) error {
return nil
}
func buildFeatures(r restaurant) (delicious, accommodating, affordable, atmospheric float64) {
return r.features["food"], r.features["service"], r.features["value"], r.features["atmosphere"]
}
func dumpData(dbPath string, restaraunts []restaurant) error {
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
@ -135,8 +131,6 @@ func dumpData(dbPath string, restaraunts []restaurant) error {
}
for _, r := range restaraunts {
delicious, accommodating, affordable, atmospheric := buildFeatures(r)
_, err = db.Exec(`
INSERT INTO reviews(
name,
@ -153,10 +147,10 @@ func dumpData(dbPath string, restaraunts []restaurant) error {
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
r.name,
r.url,
delicious,
accommodating,
affordable,
atmospheric,
r.feats.delicious,
r.feats.accommodating,
r.feats.affordable,
r.feats.atmospheric,
r.longitude,
r.latitude,
r.closestStnDist,

View File

@ -30,12 +30,19 @@ import (
"github.com/PuerkitoBio/goquery"
)
type features struct {
delicious float64
accommodating float64
affordable float64
atmospheric float64
}
type restaurant struct {
name string
address string
url string
features map[string]float64
feats features
latitude float64
longitude float64
@ -46,7 +53,7 @@ type restaurant struct {
type scraper interface {
index(doc *goquery.Document) (string, []string)
review(doc *goquery.Document) (string, string, map[string]float64, error)
review(doc *goquery.Document) (string, string, features, error)
}
func makeAbsUrl(ref, base string) (string, error) {
@ -90,17 +97,17 @@ func scrapeReview(url string, out chan restaurant, wc *webCache, group *sync.Wai
return
}
name, address, features, err := scr.review(doc)
name, address, feats, err := scr.review(doc)
if err != nil {
log.Printf("failed to scrape review at %s (%v)", url, err)
return
}
out <- restaurant{
name: name,
address: address,
features: features,
url: url}
name: name,
address: address,
feats: feats,
url: url}
}
func scrapeIndex(indexUrl string, out chan restaurant, wc *webCache, scr scraper) {

View File

@ -50,7 +50,7 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
return nextIndexUrl, reviewUrls
}
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, err error) {
func (tabelog) review(doc *goquery.Document) (name, address string, feat features, err error) {
name = doc.Find("a.rd-header__rst-name-main").Text()
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
@ -60,14 +60,19 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
return
}
features = make(map[string]float64)
f := make(map[string]float64)
for index, category := range []string{"dishes", "service", "atmosphere", "cost", "drinks"} {
text := doc.Find(fmt.Sprintf("#js-rating-detail > dd:nth-child(%d)", (index+1)*2)).Text()
if features[category], err = strconv.ParseFloat(text, 8); err != nil {
if f[category], err = strconv.ParseFloat(text, 8); err != nil {
err = fmt.Errorf("invalid value for %s", category)
return
}
}
feat.accommodating = f["service"]/2.5 - 1.0
feat.affordable = f["cost"]/2.5 - 1.0
feat.atmospheric = f["atmosphere"]/2.5 - 1.0
feat.delicious = f["dishes"]/2.5 - 1.0
return
}

View File

@ -50,7 +50,7 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) {
return nextIndexUrl, reviewUrls
}
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, err error) {
func (tripadvisor) review(doc *goquery.Document) (name, address string, feat features, err error) {
name = strings.TrimSpace(doc.Find("h1#HEADING").Text())
address = strings.TrimSpace(doc.Find("address span.format_address").Text())
@ -60,15 +60,20 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features
return
}
features = make(map[string]float64)
f := make(map[string]float64)
for index, category := range []string{"food", "service", "value", "atmosphere"} {
alt, _ := ratings.Eq(index).Attr("alt")
rating := strings.Split(alt, " ")[0]
if features[category], err = strconv.ParseFloat(rating, 8); err != nil {
if f[category], err = strconv.ParseFloat(rating, 8); err != nil {
err = fmt.Errorf("invalid value for %s", category)
return
}
}
feat.accommodating = f["service"]/2.5 - 1.0
feat.affordable = f["value"]/2.5 - 1.0
feat.atmospheric = f["atmosphere"]/2.5 - 1.0
feat.delicious = f["food"]/2.5 - 1.0
return
}