1

Store physical address instead of urls

This commit is contained in:
Alex Yatskov 2015-09-20 12:08:44 +09:00
parent ac8c22aadd
commit 9fbd6ce67c
3 changed files with 55 additions and 42 deletions

View File

@ -33,7 +33,6 @@ import (
"log"
"net/url"
"os"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/fatih/color"
@ -78,6 +77,7 @@ func (s semantics) reduce(weight float64) semantics {
type restaurant struct {
name string
address string
reviews []review
sem semantics
@ -153,8 +153,11 @@ func collateData(reviews []review) map[uint64]*restaurant {
var rest *restaurant
if rest, _ = restaurants[hash.Sum64()]; rest == nil {
rest = &restaurant{name: rev.name, latitude: rev.latitude, longitude: rev.longitude}
restaurants[hash.Sum64()] = rest
restaurants[hash.Sum64()] = &restaurant{
name: rev.name,
address: rev.address,
latitude: rev.latitude,
longitude: rev.longitude}
}
rest.reviews = append(rest.reviews, rev)
@ -217,7 +220,7 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
DROP TABLE IF EXISTS reviews;
CREATE TABLE reviews(
name VARCHAR(100) NOT NULL,
urls VARCHAR(200) NOT NULL,
address VARCHAR(400) NOT NULL,
delicious FLOAT NOT NULL,
accommodating FLOAT NOT NULL,
affordable FLOAT NOT NULL,
@ -235,15 +238,10 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
}
for _, rest := range restaraunts {
var urls []string
for _, rev := range rest.reviews {
urls = append(urls, rev.url)
}
_, err = db.Exec(`
INSERT INTO reviews(
name,
urls,
address,
delicious,
accommodating,
affordable,
@ -255,7 +253,7 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
accessCount
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
rest.name,
strings.Join(urls, ","),
rest.address,
rest.sem.delicious,
rest.sem.accomodating,
rest.sem.affordable,

View File

@ -63,14 +63,14 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
if name = doc.Find("a.rd-header__rst-name-main").Text(); len(name) == 0 {
err = errors.New("invalid value for name")
err = errors.New("invalid name")
return
}
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
address = strings.TrimSpace(addresses.First().Text())
} else {
err = errors.New("invalid value for address")
err = errors.New("invalid address")
return
}
@ -80,7 +80,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
var value float64
if value, err = strconv.ParseFloat(valueText, 8); err != nil {
err = fmt.Errorf("invalid value for %s", category)
err = fmt.Errorf("invalid rating for %s", category)
return
}
@ -89,7 +89,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
weight, err = strconv.ParseFloat(doc.Find("a.rd-header__rst-reviews-target > b").Text(), 8)
if err != nil {
err = fmt.Errorf("invalid value for review count")
err = fmt.Errorf("invalid review count")
return
}

View File

@ -62,18 +62,28 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) {
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
if name = strings.TrimSpace(doc.Find("h1#HEADING").Text()); len(name) == 0 {
err = errors.New("invalid value for name name")
err = errors.New("invalid name")
return
}
if address = strings.TrimSpace(doc.Find("address span.format_address").Text()); len(address) == 0 {
err = errors.New("invalid value for address")
{
var addressParts []string
doc.Find("address span.format_address > span").Each(func(index int, sel *goquery.Selection) {
addressParts = append(addressParts, strings.TrimSpace(sel.Text()))
})
if len(addressParts) == 0 {
err = errors.New("invalid address")
return
}
address = strings.Join(addressParts, " ")
}
{
ratings := doc.Find("ul.barChart div.ratingRow img.sprite-rating_s_fill")
if ratings.Length() != 4 {
err = errors.New("missing rating data")
err = errors.New("invalid ratings")
return
}
@ -84,23 +94,28 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features
var value float64
if value, err = strconv.ParseFloat(valueText, 8); err != nil {
err = fmt.Errorf("invalid value for %s", category)
err = fmt.Errorf("invalid rating for %s", category)
return
}
features[category] = value/2.5 - 1.0
}
weightParts := strings.Split(doc.Find("h3.reviews_header").Text(), " ")
if len(weightParts) == 0 {
err = fmt.Errorf("missing review count")
return
}
if weight, err = strconv.ParseFloat(weightParts[0], 8); err != nil {
err = fmt.Errorf("invalid value for review count")
{
weightValid := false
if weightParts := strings.Split(doc.Find("h3.reviews_header").Text(), " "); len(weightParts) > 0 {
if weight, err = strconv.ParseFloat(weightParts[0], 8); err == nil {
weightValid = true
return
}
}
if !weightValid {
err = fmt.Errorf("invalid review count")
return
}
}
return
}