Modifying data range
This commit is contained in:
parent
90522c0f50
commit
b5ee8380a0
@ -102,10 +102,6 @@ func computeStnData(restaurants []restaurant, stationsPath string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func buildFeatures(r restaurant) (delicious, accommodating, affordable, atmospheric float64) {
|
||||
return r.features["food"], r.features["service"], r.features["value"], r.features["atmosphere"]
|
||||
}
|
||||
|
||||
func dumpData(dbPath string, restaraunts []restaurant) error {
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
@ -135,8 +131,6 @@ func dumpData(dbPath string, restaraunts []restaurant) error {
|
||||
}
|
||||
|
||||
for _, r := range restaraunts {
|
||||
delicious, accommodating, affordable, atmospheric := buildFeatures(r)
|
||||
|
||||
_, err = db.Exec(`
|
||||
INSERT INTO reviews(
|
||||
name,
|
||||
@ -153,10 +147,10 @@ func dumpData(dbPath string, restaraunts []restaurant) error {
|
||||
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
r.name,
|
||||
r.url,
|
||||
delicious,
|
||||
accommodating,
|
||||
affordable,
|
||||
atmospheric,
|
||||
r.feats.delicious,
|
||||
r.feats.accommodating,
|
||||
r.feats.affordable,
|
||||
r.feats.atmospheric,
|
||||
r.longitude,
|
||||
r.latitude,
|
||||
r.closestStnDist,
|
||||
|
@ -30,12 +30,19 @@ import (
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type features struct {
|
||||
delicious float64
|
||||
accommodating float64
|
||||
affordable float64
|
||||
atmospheric float64
|
||||
}
|
||||
|
||||
type restaurant struct {
|
||||
name string
|
||||
address string
|
||||
url string
|
||||
|
||||
features map[string]float64
|
||||
feats features
|
||||
|
||||
latitude float64
|
||||
longitude float64
|
||||
@ -46,7 +53,7 @@ type restaurant struct {
|
||||
|
||||
type scraper interface {
|
||||
index(doc *goquery.Document) (string, []string)
|
||||
review(doc *goquery.Document) (string, string, map[string]float64, error)
|
||||
review(doc *goquery.Document) (string, string, features, error)
|
||||
}
|
||||
|
||||
func makeAbsUrl(ref, base string) (string, error) {
|
||||
@ -90,17 +97,17 @@ func scrapeReview(url string, out chan restaurant, wc *webCache, group *sync.Wai
|
||||
return
|
||||
}
|
||||
|
||||
name, address, features, err := scr.review(doc)
|
||||
name, address, feats, err := scr.review(doc)
|
||||
if err != nil {
|
||||
log.Printf("failed to scrape review at %s (%v)", url, err)
|
||||
return
|
||||
}
|
||||
|
||||
out <- restaurant{
|
||||
name: name,
|
||||
address: address,
|
||||
features: features,
|
||||
url: url}
|
||||
name: name,
|
||||
address: address,
|
||||
feats: feats,
|
||||
url: url}
|
||||
}
|
||||
|
||||
func scrapeIndex(indexUrl string, out chan restaurant, wc *webCache, scr scraper) {
|
||||
|
@ -50,7 +50,7 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
|
||||
return nextIndexUrl, reviewUrls
|
||||
}
|
||||
|
||||
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, err error) {
|
||||
func (tabelog) review(doc *goquery.Document) (name, address string, feat features, err error) {
|
||||
name = doc.Find("a.rd-header__rst-name-main").Text()
|
||||
|
||||
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
||||
@ -60,14 +60,19 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
|
||||
return
|
||||
}
|
||||
|
||||
features = make(map[string]float64)
|
||||
f := make(map[string]float64)
|
||||
for index, category := range []string{"dishes", "service", "atmosphere", "cost", "drinks"} {
|
||||
text := doc.Find(fmt.Sprintf("#js-rating-detail > dd:nth-child(%d)", (index+1)*2)).Text()
|
||||
if features[category], err = strconv.ParseFloat(text, 8); err != nil {
|
||||
if f[category], err = strconv.ParseFloat(text, 8); err != nil {
|
||||
err = fmt.Errorf("invalid value for %s", category)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
feat.accommodating = f["service"]/2.5 - 1.0
|
||||
feat.affordable = f["cost"]/2.5 - 1.0
|
||||
feat.atmospheric = f["atmosphere"]/2.5 - 1.0
|
||||
feat.delicious = f["dishes"]/2.5 - 1.0
|
||||
|
||||
return
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) {
|
||||
return nextIndexUrl, reviewUrls
|
||||
}
|
||||
|
||||
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, err error) {
|
||||
func (tripadvisor) review(doc *goquery.Document) (name, address string, feat features, err error) {
|
||||
name = strings.TrimSpace(doc.Find("h1#HEADING").Text())
|
||||
address = strings.TrimSpace(doc.Find("address span.format_address").Text())
|
||||
|
||||
@ -60,15 +60,20 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features
|
||||
return
|
||||
}
|
||||
|
||||
features = make(map[string]float64)
|
||||
f := make(map[string]float64)
|
||||
for index, category := range []string{"food", "service", "value", "atmosphere"} {
|
||||
alt, _ := ratings.Eq(index).Attr("alt")
|
||||
rating := strings.Split(alt, " ")[0]
|
||||
if features[category], err = strconv.ParseFloat(rating, 8); err != nil {
|
||||
if f[category], err = strconv.ParseFloat(rating, 8); err != nil {
|
||||
err = fmt.Errorf("invalid value for %s", category)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
feat.accommodating = f["service"]/2.5 - 1.0
|
||||
feat.affordable = f["value"]/2.5 - 1.0
|
||||
feat.atmospheric = f["atmosphere"]/2.5 - 1.0
|
||||
feat.delicious = f["food"]/2.5 - 1.0
|
||||
|
||||
return
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user