Store physical address instead of urls
This commit is contained in:
parent
ac8c22aadd
commit
9fbd6ce67c
@ -33,7 +33,6 @@ import (
|
||||
"log"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/fatih/color"
|
||||
@ -78,6 +77,7 @@ func (s semantics) reduce(weight float64) semantics {
|
||||
|
||||
type restaurant struct {
|
||||
name string
|
||||
address string
|
||||
reviews []review
|
||||
sem semantics
|
||||
|
||||
@ -153,8 +153,11 @@ func collateData(reviews []review) map[uint64]*restaurant {
|
||||
|
||||
var rest *restaurant
|
||||
if rest, _ = restaurants[hash.Sum64()]; rest == nil {
|
||||
rest = &restaurant{name: rev.name, latitude: rev.latitude, longitude: rev.longitude}
|
||||
restaurants[hash.Sum64()] = rest
|
||||
restaurants[hash.Sum64()] = &restaurant{
|
||||
name: rev.name,
|
||||
address: rev.address,
|
||||
latitude: rev.latitude,
|
||||
longitude: rev.longitude}
|
||||
}
|
||||
|
||||
rest.reviews = append(rest.reviews, rev)
|
||||
@ -217,7 +220,7 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
|
||||
DROP TABLE IF EXISTS reviews;
|
||||
CREATE TABLE reviews(
|
||||
name VARCHAR(100) NOT NULL,
|
||||
urls VARCHAR(200) NOT NULL,
|
||||
address VARCHAR(400) NOT NULL,
|
||||
delicious FLOAT NOT NULL,
|
||||
accommodating FLOAT NOT NULL,
|
||||
affordable FLOAT NOT NULL,
|
||||
@ -235,15 +238,10 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
|
||||
}
|
||||
|
||||
for _, rest := range restaraunts {
|
||||
var urls []string
|
||||
for _, rev := range rest.reviews {
|
||||
urls = append(urls, rev.url)
|
||||
}
|
||||
|
||||
_, err = db.Exec(`
|
||||
INSERT INTO reviews(
|
||||
name,
|
||||
urls,
|
||||
address,
|
||||
delicious,
|
||||
accommodating,
|
||||
affordable,
|
||||
@ -255,7 +253,7 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
|
||||
accessCount
|
||||
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
rest.name,
|
||||
strings.Join(urls, ","),
|
||||
rest.address,
|
||||
rest.sem.delicious,
|
||||
rest.sem.accomodating,
|
||||
rest.sem.affordable,
|
||||
|
@ -63,14 +63,14 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
|
||||
|
||||
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
|
||||
if name = doc.Find("a.rd-header__rst-name-main").Text(); len(name) == 0 {
|
||||
err = errors.New("invalid value for name")
|
||||
err = errors.New("invalid name")
|
||||
return
|
||||
}
|
||||
|
||||
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
||||
address = strings.TrimSpace(addresses.First().Text())
|
||||
} else {
|
||||
err = errors.New("invalid value for address")
|
||||
err = errors.New("invalid address")
|
||||
return
|
||||
}
|
||||
|
||||
@ -80,7 +80,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
|
||||
|
||||
var value float64
|
||||
if value, err = strconv.ParseFloat(valueText, 8); err != nil {
|
||||
err = fmt.Errorf("invalid value for %s", category)
|
||||
err = fmt.Errorf("invalid rating for %s", category)
|
||||
return
|
||||
}
|
||||
|
||||
@ -89,7 +89,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
|
||||
|
||||
weight, err = strconv.ParseFloat(doc.Find("a.rd-header__rst-reviews-target > b").Text(), 8)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("invalid value for review count")
|
||||
err = fmt.Errorf("invalid review count")
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -62,44 +62,59 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) {
|
||||
|
||||
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
|
||||
if name = strings.TrimSpace(doc.Find("h1#HEADING").Text()); len(name) == 0 {
|
||||
err = errors.New("invalid value for name name")
|
||||
err = errors.New("invalid name")
|
||||
return
|
||||
}
|
||||
|
||||
if address = strings.TrimSpace(doc.Find("address span.format_address").Text()); len(address) == 0 {
|
||||
err = errors.New("invalid value for address")
|
||||
return
|
||||
}
|
||||
{
|
||||
var addressParts []string
|
||||
doc.Find("address span.format_address > span").Each(func(index int, sel *goquery.Selection) {
|
||||
addressParts = append(addressParts, strings.TrimSpace(sel.Text()))
|
||||
})
|
||||
|
||||
ratings := doc.Find("ul.barChart div.ratingRow img.sprite-rating_s_fill")
|
||||
if ratings.Length() != 4 {
|
||||
err = errors.New("missing rating data")
|
||||
return
|
||||
}
|
||||
|
||||
features = make(map[string]float64)
|
||||
for index, category := range []string{"food", "service", "value", "atmosphere"} {
|
||||
altText, _ := ratings.Eq(index).Attr("alt")
|
||||
valueText := strings.Split(altText, " ")[0]
|
||||
|
||||
var value float64
|
||||
if value, err = strconv.ParseFloat(valueText, 8); err != nil {
|
||||
err = fmt.Errorf("invalid value for %s", category)
|
||||
if len(addressParts) == 0 {
|
||||
err = errors.New("invalid address")
|
||||
return
|
||||
}
|
||||
|
||||
features[category] = value/2.5 - 1.0
|
||||
address = strings.Join(addressParts, " ")
|
||||
}
|
||||
|
||||
weightParts := strings.Split(doc.Find("h3.reviews_header").Text(), " ")
|
||||
if len(weightParts) == 0 {
|
||||
err = fmt.Errorf("missing review count")
|
||||
return
|
||||
{
|
||||
ratings := doc.Find("ul.barChart div.ratingRow img.sprite-rating_s_fill")
|
||||
if ratings.Length() != 4 {
|
||||
err = errors.New("invalid ratings")
|
||||
return
|
||||
}
|
||||
|
||||
features = make(map[string]float64)
|
||||
for index, category := range []string{"food", "service", "value", "atmosphere"} {
|
||||
altText, _ := ratings.Eq(index).Attr("alt")
|
||||
valueText := strings.Split(altText, " ")[0]
|
||||
|
||||
var value float64
|
||||
if value, err = strconv.ParseFloat(valueText, 8); err != nil {
|
||||
err = fmt.Errorf("invalid rating for %s", category)
|
||||
return
|
||||
}
|
||||
|
||||
features[category] = value/2.5 - 1.0
|
||||
}
|
||||
}
|
||||
|
||||
if weight, err = strconv.ParseFloat(weightParts[0], 8); err != nil {
|
||||
err = fmt.Errorf("invalid value for review count")
|
||||
return
|
||||
{
|
||||
weightValid := false
|
||||
if weightParts := strings.Split(doc.Find("h3.reviews_header").Text(), " "); len(weightParts) > 0 {
|
||||
if weight, err = strconv.ParseFloat(weightParts[0], 8); err == nil {
|
||||
weightValid = true
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if !weightValid {
|
||||
err = fmt.Errorf("invalid review count")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
|
Loading…
Reference in New Issue
Block a user