Modifying data range
This commit is contained in:
parent
90522c0f50
commit
b5ee8380a0
@ -102,10 +102,6 @@ func computeStnData(restaurants []restaurant, stationsPath string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func buildFeatures(r restaurant) (delicious, accommodating, affordable, atmospheric float64) {
|
|
||||||
return r.features["food"], r.features["service"], r.features["value"], r.features["atmosphere"]
|
|
||||||
}
|
|
||||||
|
|
||||||
func dumpData(dbPath string, restaraunts []restaurant) error {
|
func dumpData(dbPath string, restaraunts []restaurant) error {
|
||||||
db, err := sql.Open("sqlite3", dbPath)
|
db, err := sql.Open("sqlite3", dbPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -135,8 +131,6 @@ func dumpData(dbPath string, restaraunts []restaurant) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, r := range restaraunts {
|
for _, r := range restaraunts {
|
||||||
delicious, accommodating, affordable, atmospheric := buildFeatures(r)
|
|
||||||
|
|
||||||
_, err = db.Exec(`
|
_, err = db.Exec(`
|
||||||
INSERT INTO reviews(
|
INSERT INTO reviews(
|
||||||
name,
|
name,
|
||||||
@ -153,10 +147,10 @@ func dumpData(dbPath string, restaraunts []restaurant) error {
|
|||||||
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||||
r.name,
|
r.name,
|
||||||
r.url,
|
r.url,
|
||||||
delicious,
|
r.feats.delicious,
|
||||||
accommodating,
|
r.feats.accommodating,
|
||||||
affordable,
|
r.feats.affordable,
|
||||||
atmospheric,
|
r.feats.atmospheric,
|
||||||
r.longitude,
|
r.longitude,
|
||||||
r.latitude,
|
r.latitude,
|
||||||
r.closestStnDist,
|
r.closestStnDist,
|
||||||
|
@ -30,12 +30,19 @@ import (
|
|||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type features struct {
|
||||||
|
delicious float64
|
||||||
|
accommodating float64
|
||||||
|
affordable float64
|
||||||
|
atmospheric float64
|
||||||
|
}
|
||||||
|
|
||||||
type restaurant struct {
|
type restaurant struct {
|
||||||
name string
|
name string
|
||||||
address string
|
address string
|
||||||
url string
|
url string
|
||||||
|
|
||||||
features map[string]float64
|
feats features
|
||||||
|
|
||||||
latitude float64
|
latitude float64
|
||||||
longitude float64
|
longitude float64
|
||||||
@ -46,7 +53,7 @@ type restaurant struct {
|
|||||||
|
|
||||||
type scraper interface {
|
type scraper interface {
|
||||||
index(doc *goquery.Document) (string, []string)
|
index(doc *goquery.Document) (string, []string)
|
||||||
review(doc *goquery.Document) (string, string, map[string]float64, error)
|
review(doc *goquery.Document) (string, string, features, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeAbsUrl(ref, base string) (string, error) {
|
func makeAbsUrl(ref, base string) (string, error) {
|
||||||
@ -90,17 +97,17 @@ func scrapeReview(url string, out chan restaurant, wc *webCache, group *sync.Wai
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
name, address, features, err := scr.review(doc)
|
name, address, feats, err := scr.review(doc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("failed to scrape review at %s (%v)", url, err)
|
log.Printf("failed to scrape review at %s (%v)", url, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
out <- restaurant{
|
out <- restaurant{
|
||||||
name: name,
|
name: name,
|
||||||
address: address,
|
address: address,
|
||||||
features: features,
|
feats: feats,
|
||||||
url: url}
|
url: url}
|
||||||
}
|
}
|
||||||
|
|
||||||
func scrapeIndex(indexUrl string, out chan restaurant, wc *webCache, scr scraper) {
|
func scrapeIndex(indexUrl string, out chan restaurant, wc *webCache, scr scraper) {
|
||||||
|
@ -50,7 +50,7 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
|
|||||||
return nextIndexUrl, reviewUrls
|
return nextIndexUrl, reviewUrls
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, err error) {
|
func (tabelog) review(doc *goquery.Document) (name, address string, feat features, err error) {
|
||||||
name = doc.Find("a.rd-header__rst-name-main").Text()
|
name = doc.Find("a.rd-header__rst-name-main").Text()
|
||||||
|
|
||||||
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
||||||
@ -60,14 +60,19 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
features = make(map[string]float64)
|
f := make(map[string]float64)
|
||||||
for index, category := range []string{"dishes", "service", "atmosphere", "cost", "drinks"} {
|
for index, category := range []string{"dishes", "service", "atmosphere", "cost", "drinks"} {
|
||||||
text := doc.Find(fmt.Sprintf("#js-rating-detail > dd:nth-child(%d)", (index+1)*2)).Text()
|
text := doc.Find(fmt.Sprintf("#js-rating-detail > dd:nth-child(%d)", (index+1)*2)).Text()
|
||||||
if features[category], err = strconv.ParseFloat(text, 8); err != nil {
|
if f[category], err = strconv.ParseFloat(text, 8); err != nil {
|
||||||
err = fmt.Errorf("invalid value for %s", category)
|
err = fmt.Errorf("invalid value for %s", category)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
feat.accommodating = f["service"]/2.5 - 1.0
|
||||||
|
feat.affordable = f["cost"]/2.5 - 1.0
|
||||||
|
feat.atmospheric = f["atmosphere"]/2.5 - 1.0
|
||||||
|
feat.delicious = f["dishes"]/2.5 - 1.0
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -50,7 +50,7 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) {
|
|||||||
return nextIndexUrl, reviewUrls
|
return nextIndexUrl, reviewUrls
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, err error) {
|
func (tripadvisor) review(doc *goquery.Document) (name, address string, feat features, err error) {
|
||||||
name = strings.TrimSpace(doc.Find("h1#HEADING").Text())
|
name = strings.TrimSpace(doc.Find("h1#HEADING").Text())
|
||||||
address = strings.TrimSpace(doc.Find("address span.format_address").Text())
|
address = strings.TrimSpace(doc.Find("address span.format_address").Text())
|
||||||
|
|
||||||
@ -60,15 +60,20 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
features = make(map[string]float64)
|
f := make(map[string]float64)
|
||||||
for index, category := range []string{"food", "service", "value", "atmosphere"} {
|
for index, category := range []string{"food", "service", "value", "atmosphere"} {
|
||||||
alt, _ := ratings.Eq(index).Attr("alt")
|
alt, _ := ratings.Eq(index).Attr("alt")
|
||||||
rating := strings.Split(alt, " ")[0]
|
rating := strings.Split(alt, " ")[0]
|
||||||
if features[category], err = strconv.ParseFloat(rating, 8); err != nil {
|
if f[category], err = strconv.ParseFloat(rating, 8); err != nil {
|
||||||
err = fmt.Errorf("invalid value for %s", category)
|
err = fmt.Errorf("invalid value for %s", category)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
feat.accommodating = f["service"]/2.5 - 1.0
|
||||||
|
feat.affordable = f["value"]/2.5 - 1.0
|
||||||
|
feat.atmospheric = f["atmosphere"]/2.5 - 1.0
|
||||||
|
feat.delicious = f["food"]/2.5 - 1.0
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user