1

Store physical address instead of urls

This commit is contained in:
Alex Yatskov 2015-09-20 12:08:44 +09:00
parent ac8c22aadd
commit 9fbd6ce67c
3 changed files with 55 additions and 42 deletions

View File

@ -33,7 +33,6 @@ import (
"log" "log"
"net/url" "net/url"
"os" "os"
"strings"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"github.com/fatih/color" "github.com/fatih/color"
@ -78,6 +77,7 @@ func (s semantics) reduce(weight float64) semantics {
type restaurant struct { type restaurant struct {
name string name string
address string
reviews []review reviews []review
sem semantics sem semantics
@ -153,8 +153,11 @@ func collateData(reviews []review) map[uint64]*restaurant {
var rest *restaurant var rest *restaurant
if rest, _ = restaurants[hash.Sum64()]; rest == nil { if rest, _ = restaurants[hash.Sum64()]; rest == nil {
rest = &restaurant{name: rev.name, latitude: rev.latitude, longitude: rev.longitude} restaurants[hash.Sum64()] = &restaurant{
restaurants[hash.Sum64()] = rest name: rev.name,
address: rev.address,
latitude: rev.latitude,
longitude: rev.longitude}
} }
rest.reviews = append(rest.reviews, rev) rest.reviews = append(rest.reviews, rev)
@ -217,7 +220,7 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
DROP TABLE IF EXISTS reviews; DROP TABLE IF EXISTS reviews;
CREATE TABLE reviews( CREATE TABLE reviews(
name VARCHAR(100) NOT NULL, name VARCHAR(100) NOT NULL,
urls VARCHAR(200) NOT NULL, address VARCHAR(400) NOT NULL,
delicious FLOAT NOT NULL, delicious FLOAT NOT NULL,
accommodating FLOAT NOT NULL, accommodating FLOAT NOT NULL,
affordable FLOAT NOT NULL, affordable FLOAT NOT NULL,
@ -235,15 +238,10 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
} }
for _, rest := range restaraunts { for _, rest := range restaraunts {
var urls []string
for _, rev := range rest.reviews {
urls = append(urls, rev.url)
}
_, err = db.Exec(` _, err = db.Exec(`
INSERT INTO reviews( INSERT INTO reviews(
name, name,
urls, address,
delicious, delicious,
accommodating, accommodating,
affordable, affordable,
@ -255,7 +253,7 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
accessCount accessCount
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, ) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
rest.name, rest.name,
strings.Join(urls, ","), rest.address,
rest.sem.delicious, rest.sem.delicious,
rest.sem.accomodating, rest.sem.accomodating,
rest.sem.affordable, rest.sem.affordable,

View File

@ -63,14 +63,14 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) { func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
if name = doc.Find("a.rd-header__rst-name-main").Text(); len(name) == 0 { if name = doc.Find("a.rd-header__rst-name-main").Text(); len(name) == 0 {
err = errors.New("invalid value for name") err = errors.New("invalid name")
return return
} }
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 { if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
address = strings.TrimSpace(addresses.First().Text()) address = strings.TrimSpace(addresses.First().Text())
} else { } else {
err = errors.New("invalid value for address") err = errors.New("invalid address")
return return
} }
@ -80,7 +80,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
var value float64 var value float64
if value, err = strconv.ParseFloat(valueText, 8); err != nil { if value, err = strconv.ParseFloat(valueText, 8); err != nil {
err = fmt.Errorf("invalid value for %s", category) err = fmt.Errorf("invalid rating for %s", category)
return return
} }
@ -89,7 +89,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
weight, err = strconv.ParseFloat(doc.Find("a.rd-header__rst-reviews-target > b").Text(), 8) weight, err = strconv.ParseFloat(doc.Find("a.rd-header__rst-reviews-target > b").Text(), 8)
if err != nil { if err != nil {
err = fmt.Errorf("invalid value for review count") err = fmt.Errorf("invalid review count")
return return
} }

View File

@ -62,44 +62,59 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) {
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) { func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
if name = strings.TrimSpace(doc.Find("h1#HEADING").Text()); len(name) == 0 { if name = strings.TrimSpace(doc.Find("h1#HEADING").Text()); len(name) == 0 {
err = errors.New("invalid value for name name") err = errors.New("invalid name")
return return
} }
if address = strings.TrimSpace(doc.Find("address span.format_address").Text()); len(address) == 0 { {
err = errors.New("invalid value for address") var addressParts []string
return doc.Find("address span.format_address > span").Each(func(index int, sel *goquery.Selection) {
} addressParts = append(addressParts, strings.TrimSpace(sel.Text()))
})
ratings := doc.Find("ul.barChart div.ratingRow img.sprite-rating_s_fill") if len(addressParts) == 0 {
if ratings.Length() != 4 { err = errors.New("invalid address")
err = errors.New("missing rating data")
return
}
features = make(map[string]float64)
for index, category := range []string{"food", "service", "value", "atmosphere"} {
altText, _ := ratings.Eq(index).Attr("alt")
valueText := strings.Split(altText, " ")[0]
var value float64
if value, err = strconv.ParseFloat(valueText, 8); err != nil {
err = fmt.Errorf("invalid value for %s", category)
return return
} }
features[category] = value/2.5 - 1.0 address = strings.Join(addressParts, " ")
} }
weightParts := strings.Split(doc.Find("h3.reviews_header").Text(), " ") {
if len(weightParts) == 0 { ratings := doc.Find("ul.barChart div.ratingRow img.sprite-rating_s_fill")
err = fmt.Errorf("missing review count") if ratings.Length() != 4 {
return err = errors.New("invalid ratings")
return
}
features = make(map[string]float64)
for index, category := range []string{"food", "service", "value", "atmosphere"} {
altText, _ := ratings.Eq(index).Attr("alt")
valueText := strings.Split(altText, " ")[0]
var value float64
if value, err = strconv.ParseFloat(valueText, 8); err != nil {
err = fmt.Errorf("invalid rating for %s", category)
return
}
features[category] = value/2.5 - 1.0
}
} }
if weight, err = strconv.ParseFloat(weightParts[0], 8); err != nil { {
err = fmt.Errorf("invalid value for review count") weightValid := false
return if weightParts := strings.Split(doc.Find("h3.reviews_header").Text(), " "); len(weightParts) > 0 {
if weight, err = strconv.ParseFloat(weightParts[0], 8); err == nil {
weightValid = true
return
}
}
if !weightValid {
err = fmt.Errorf("invalid review count")
return
}
} }
return return