diff --git a/build/build.go b/build/build.go index 598ab52..3efaebd 100644 --- a/build/build.go +++ b/build/build.go @@ -33,7 +33,6 @@ import ( "log" "net/url" "os" - "strings" "github.com/PuerkitoBio/goquery" "github.com/fatih/color" @@ -78,6 +77,7 @@ func (s semantics) reduce(weight float64) semantics { type restaurant struct { name string + address string reviews []review sem semantics @@ -153,8 +153,11 @@ func collateData(reviews []review) map[uint64]*restaurant { var rest *restaurant if rest, _ = restaurants[hash.Sum64()]; rest == nil { - rest = &restaurant{name: rev.name, latitude: rev.latitude, longitude: rev.longitude} - restaurants[hash.Sum64()] = rest + restaurants[hash.Sum64()] = &restaurant{ + name: rev.name, + address: rev.address, + latitude: rev.latitude, + longitude: rev.longitude} } rest.reviews = append(rest.reviews, rev) @@ -217,7 +220,7 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error { DROP TABLE IF EXISTS reviews; CREATE TABLE reviews( name VARCHAR(100) NOT NULL, - urls VARCHAR(200) NOT NULL, + address VARCHAR(400) NOT NULL, delicious FLOAT NOT NULL, accommodating FLOAT NOT NULL, affordable FLOAT NOT NULL, @@ -235,15 +238,10 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error { } for _, rest := range restaraunts { - var urls []string - for _, rev := range rest.reviews { - urls = append(urls, rev.url) - } - _, err = db.Exec(` INSERT INTO reviews( name, - urls, + address, delicious, accommodating, affordable, @@ -255,7 +253,7 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error { accessCount ) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, rest.name, - strings.Join(urls, ","), + rest.address, rest.sem.delicious, rest.sem.accomodating, rest.sem.affordable, diff --git a/build/tabelog.go b/build/tabelog.go index cd83148..f801074 100644 --- a/build/tabelog.go +++ b/build/tabelog.go @@ -63,14 +63,14 @@ func (tabelog) index(doc *goquery.Document) (string, []string) { func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) { if name = doc.Find("a.rd-header__rst-name-main").Text(); len(name) == 0 { - err = errors.New("invalid value for name") + err = errors.New("invalid name") return } if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 { address = strings.TrimSpace(addresses.First().Text()) } else { - err = errors.New("invalid value for address") + err = errors.New("invalid address") return } @@ -80,7 +80,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map var value float64 if value, err = strconv.ParseFloat(valueText, 8); err != nil { - err = fmt.Errorf("invalid value for %s", category) + err = fmt.Errorf("invalid rating for %s", category) return } @@ -89,7 +89,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map weight, err = strconv.ParseFloat(doc.Find("a.rd-header__rst-reviews-target > b").Text(), 8) if err != nil { - err = fmt.Errorf("invalid value for review count") + err = fmt.Errorf("invalid review count") return } diff --git a/build/tripadvisor.go b/build/tripadvisor.go index 1daf529..c44bc9f 100644 --- a/build/tripadvisor.go +++ b/build/tripadvisor.go @@ -62,44 +62,59 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) { func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) { if name = strings.TrimSpace(doc.Find("h1#HEADING").Text()); len(name) == 0 { - err = errors.New("invalid value for name name") + err = errors.New("invalid name") return } - if address = strings.TrimSpace(doc.Find("address span.format_address").Text()); len(address) == 0 { - err = errors.New("invalid value for address") - return - } + { + var addressParts []string + doc.Find("address span.format_address > span").Each(func(index int, sel *goquery.Selection) { + addressParts = append(addressParts, strings.TrimSpace(sel.Text())) + }) - ratings := doc.Find("ul.barChart div.ratingRow img.sprite-rating_s_fill") - if ratings.Length() != 4 { - err = errors.New("missing rating data") - return - } - - features = make(map[string]float64) - for index, category := range []string{"food", "service", "value", "atmosphere"} { - altText, _ := ratings.Eq(index).Attr("alt") - valueText := strings.Split(altText, " ")[0] - - var value float64 - if value, err = strconv.ParseFloat(valueText, 8); err != nil { - err = fmt.Errorf("invalid value for %s", category) + if len(addressParts) == 0 { + err = errors.New("invalid address") return } - features[category] = value/2.5 - 1.0 + address = strings.Join(addressParts, " ") } - weightParts := strings.Split(doc.Find("h3.reviews_header").Text(), " ") - if len(weightParts) == 0 { - err = fmt.Errorf("missing review count") - return + { + ratings := doc.Find("ul.barChart div.ratingRow img.sprite-rating_s_fill") + if ratings.Length() != 4 { + err = errors.New("invalid ratings") + return + } + + features = make(map[string]float64) + for index, category := range []string{"food", "service", "value", "atmosphere"} { + altText, _ := ratings.Eq(index).Attr("alt") + valueText := strings.Split(altText, " ")[0] + + var value float64 + if value, err = strconv.ParseFloat(valueText, 8); err != nil { + err = fmt.Errorf("invalid rating for %s", category) + return + } + + features[category] = value/2.5 - 1.0 + } } - if weight, err = strconv.ParseFloat(weightParts[0], 8); err != nil { - err = fmt.Errorf("invalid value for review count") - return + { + weightValid := false + if weightParts := strings.Split(doc.Find("h3.reviews_header").Text(), " "); len(weightParts) > 0 { + if weight, err = strconv.ParseFloat(weightParts[0], 8); err == nil { + weightValid = true + return + } + } + + if !weightValid { + err = fmt.Errorf("invalid review count") + return + } } return