Store physical address instead of urls
This commit is contained in:
parent
ac8c22aadd
commit
9fbd6ce67c
@ -33,7 +33,6 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"github.com/fatih/color"
|
"github.com/fatih/color"
|
||||||
@ -78,6 +77,7 @@ func (s semantics) reduce(weight float64) semantics {
|
|||||||
|
|
||||||
type restaurant struct {
|
type restaurant struct {
|
||||||
name string
|
name string
|
||||||
|
address string
|
||||||
reviews []review
|
reviews []review
|
||||||
sem semantics
|
sem semantics
|
||||||
|
|
||||||
@ -153,8 +153,11 @@ func collateData(reviews []review) map[uint64]*restaurant {
|
|||||||
|
|
||||||
var rest *restaurant
|
var rest *restaurant
|
||||||
if rest, _ = restaurants[hash.Sum64()]; rest == nil {
|
if rest, _ = restaurants[hash.Sum64()]; rest == nil {
|
||||||
rest = &restaurant{name: rev.name, latitude: rev.latitude, longitude: rev.longitude}
|
restaurants[hash.Sum64()] = &restaurant{
|
||||||
restaurants[hash.Sum64()] = rest
|
name: rev.name,
|
||||||
|
address: rev.address,
|
||||||
|
latitude: rev.latitude,
|
||||||
|
longitude: rev.longitude}
|
||||||
}
|
}
|
||||||
|
|
||||||
rest.reviews = append(rest.reviews, rev)
|
rest.reviews = append(rest.reviews, rev)
|
||||||
@ -217,7 +220,7 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
|
|||||||
DROP TABLE IF EXISTS reviews;
|
DROP TABLE IF EXISTS reviews;
|
||||||
CREATE TABLE reviews(
|
CREATE TABLE reviews(
|
||||||
name VARCHAR(100) NOT NULL,
|
name VARCHAR(100) NOT NULL,
|
||||||
urls VARCHAR(200) NOT NULL,
|
address VARCHAR(400) NOT NULL,
|
||||||
delicious FLOAT NOT NULL,
|
delicious FLOAT NOT NULL,
|
||||||
accommodating FLOAT NOT NULL,
|
accommodating FLOAT NOT NULL,
|
||||||
affordable FLOAT NOT NULL,
|
affordable FLOAT NOT NULL,
|
||||||
@ -235,15 +238,10 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, rest := range restaraunts {
|
for _, rest := range restaraunts {
|
||||||
var urls []string
|
|
||||||
for _, rev := range rest.reviews {
|
|
||||||
urls = append(urls, rev.url)
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = db.Exec(`
|
_, err = db.Exec(`
|
||||||
INSERT INTO reviews(
|
INSERT INTO reviews(
|
||||||
name,
|
name,
|
||||||
urls,
|
address,
|
||||||
delicious,
|
delicious,
|
||||||
accommodating,
|
accommodating,
|
||||||
affordable,
|
affordable,
|
||||||
@ -255,7 +253,7 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
|
|||||||
accessCount
|
accessCount
|
||||||
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||||
rest.name,
|
rest.name,
|
||||||
strings.Join(urls, ","),
|
rest.address,
|
||||||
rest.sem.delicious,
|
rest.sem.delicious,
|
||||||
rest.sem.accomodating,
|
rest.sem.accomodating,
|
||||||
rest.sem.affordable,
|
rest.sem.affordable,
|
||||||
|
@ -63,14 +63,14 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
|
|||||||
|
|
||||||
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
|
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
|
||||||
if name = doc.Find("a.rd-header__rst-name-main").Text(); len(name) == 0 {
|
if name = doc.Find("a.rd-header__rst-name-main").Text(); len(name) == 0 {
|
||||||
err = errors.New("invalid value for name")
|
err = errors.New("invalid name")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
||||||
address = strings.TrimSpace(addresses.First().Text())
|
address = strings.TrimSpace(addresses.First().Text())
|
||||||
} else {
|
} else {
|
||||||
err = errors.New("invalid value for address")
|
err = errors.New("invalid address")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
|
|||||||
|
|
||||||
var value float64
|
var value float64
|
||||||
if value, err = strconv.ParseFloat(valueText, 8); err != nil {
|
if value, err = strconv.ParseFloat(valueText, 8); err != nil {
|
||||||
err = fmt.Errorf("invalid value for %s", category)
|
err = fmt.Errorf("invalid rating for %s", category)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,7 +89,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
|
|||||||
|
|
||||||
weight, err = strconv.ParseFloat(doc.Find("a.rd-header__rst-reviews-target > b").Text(), 8)
|
weight, err = strconv.ParseFloat(doc.Find("a.rd-header__rst-reviews-target > b").Text(), 8)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("invalid value for review count")
|
err = fmt.Errorf("invalid review count")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,44 +62,59 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) {
|
|||||||
|
|
||||||
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
|
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
|
||||||
if name = strings.TrimSpace(doc.Find("h1#HEADING").Text()); len(name) == 0 {
|
if name = strings.TrimSpace(doc.Find("h1#HEADING").Text()); len(name) == 0 {
|
||||||
err = errors.New("invalid value for name name")
|
err = errors.New("invalid name")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if address = strings.TrimSpace(doc.Find("address span.format_address").Text()); len(address) == 0 {
|
{
|
||||||
err = errors.New("invalid value for address")
|
var addressParts []string
|
||||||
return
|
doc.Find("address span.format_address > span").Each(func(index int, sel *goquery.Selection) {
|
||||||
}
|
addressParts = append(addressParts, strings.TrimSpace(sel.Text()))
|
||||||
|
})
|
||||||
|
|
||||||
ratings := doc.Find("ul.barChart div.ratingRow img.sprite-rating_s_fill")
|
if len(addressParts) == 0 {
|
||||||
if ratings.Length() != 4 {
|
err = errors.New("invalid address")
|
||||||
err = errors.New("missing rating data")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
features = make(map[string]float64)
|
|
||||||
for index, category := range []string{"food", "service", "value", "atmosphere"} {
|
|
||||||
altText, _ := ratings.Eq(index).Attr("alt")
|
|
||||||
valueText := strings.Split(altText, " ")[0]
|
|
||||||
|
|
||||||
var value float64
|
|
||||||
if value, err = strconv.ParseFloat(valueText, 8); err != nil {
|
|
||||||
err = fmt.Errorf("invalid value for %s", category)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
features[category] = value/2.5 - 1.0
|
address = strings.Join(addressParts, " ")
|
||||||
}
|
}
|
||||||
|
|
||||||
weightParts := strings.Split(doc.Find("h3.reviews_header").Text(), " ")
|
{
|
||||||
if len(weightParts) == 0 {
|
ratings := doc.Find("ul.barChart div.ratingRow img.sprite-rating_s_fill")
|
||||||
err = fmt.Errorf("missing review count")
|
if ratings.Length() != 4 {
|
||||||
return
|
err = errors.New("invalid ratings")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
features = make(map[string]float64)
|
||||||
|
for index, category := range []string{"food", "service", "value", "atmosphere"} {
|
||||||
|
altText, _ := ratings.Eq(index).Attr("alt")
|
||||||
|
valueText := strings.Split(altText, " ")[0]
|
||||||
|
|
||||||
|
var value float64
|
||||||
|
if value, err = strconv.ParseFloat(valueText, 8); err != nil {
|
||||||
|
err = fmt.Errorf("invalid rating for %s", category)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
features[category] = value/2.5 - 1.0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if weight, err = strconv.ParseFloat(weightParts[0], 8); err != nil {
|
{
|
||||||
err = fmt.Errorf("invalid value for review count")
|
weightValid := false
|
||||||
return
|
if weightParts := strings.Split(doc.Find("h3.reviews_header").Text(), " "); len(weightParts) > 0 {
|
||||||
|
if weight, err = strconv.ParseFloat(weightParts[0], 8); err == nil {
|
||||||
|
weightValid = true
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !weightValid {
|
||||||
|
err = fmt.Errorf("invalid review count")
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user