Semantics update
This commit is contained in:
parent
02c6cf9767
commit
018033466b
@ -60,15 +60,30 @@ type semantics struct {
|
||||
delicious float64
|
||||
}
|
||||
|
||||
func (s semantics) combine(other semantics, weight float64) semantics {
|
||||
return semantics{
|
||||
s.accomodating + other.accomodating*weight,
|
||||
s.affordable + other.affordable*weight,
|
||||
s.atmospheric + other.atmospheric*weight,
|
||||
s.delicious + other.delicious*weight}
|
||||
}
|
||||
|
||||
func (s semantics) reduce(weight float64) semantics {
|
||||
return semantics{
|
||||
s.accomodating / weight,
|
||||
s.affordable / weight,
|
||||
s.atmospheric / weight,
|
||||
s.delicious / weight}
|
||||
}
|
||||
|
||||
type restaurant struct {
|
||||
name string
|
||||
name string
|
||||
reviews []review
|
||||
sem semantics
|
||||
|
||||
latitude float64
|
||||
longitude float64
|
||||
|
||||
sem semantics
|
||||
reviews []review
|
||||
|
||||
closestStnName string
|
||||
closestStnDist float64
|
||||
}
|
||||
@ -165,6 +180,30 @@ func computeSemantics(restaraunts map[uint64]*restaurant) {
|
||||
type definer interface {
|
||||
define(keyword string) semantics
|
||||
}
|
||||
|
||||
for _, rest := range restaraunts {
|
||||
var (
|
||||
sem semantics
|
||||
weight float64
|
||||
)
|
||||
|
||||
for _, rev := range rest.reviews {
|
||||
def, ok := rev.scr.(definer)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
for name, value := range rev.features {
|
||||
sem = sem.combine(def.define(name), rev.weight*value)
|
||||
}
|
||||
|
||||
weight += rev.weight
|
||||
}
|
||||
|
||||
if weight > 0.0 {
|
||||
rest.sem = sem.reduce(weight)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
|
||||
|
@ -31,17 +31,12 @@ import (
|
||||
"github.com/fatih/color"
|
||||
)
|
||||
|
||||
type feature struct {
|
||||
value float64
|
||||
weight float64
|
||||
}
|
||||
|
||||
type review struct {
|
||||
name string
|
||||
address string
|
||||
url string
|
||||
|
||||
features map[string]feature
|
||||
name string
|
||||
address string
|
||||
url string
|
||||
features map[string]float64
|
||||
weight float64
|
||||
|
||||
latitude float64
|
||||
longitude float64
|
||||
@ -52,7 +47,7 @@ type review struct {
|
||||
|
||||
type scraper interface {
|
||||
index(doc *goquery.Document) (string, []string)
|
||||
review(doc *goquery.Document) (string, string, map[string]feature, error)
|
||||
review(doc *goquery.Document) (string, string, map[string]float64, float64, error)
|
||||
decode(address string) (float64, float64, error)
|
||||
load(url string) (*goquery.Document, error)
|
||||
}
|
||||
@ -92,7 +87,7 @@ func scrapeReview(url string, out chan review, scr scraper, group *sync.WaitGrou
|
||||
)
|
||||
|
||||
if doc, rev.err = scr.load(rev.url); rev.err == nil {
|
||||
rev.name, rev.address, rev.features, rev.err = scr.review(doc)
|
||||
rev.name, rev.address, rev.features, rev.weight, rev.err = scr.review(doc)
|
||||
}
|
||||
|
||||
out <- rev
|
||||
|
@ -61,7 +61,8 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
|
||||
return nextIndexUrl, reviewUrls
|
||||
}
|
||||
|
||||
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]feature, err error) {
|
||||
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
|
||||
weight = 1.0
|
||||
name = doc.Find("a.rd-header__rst-name-main").Text()
|
||||
|
||||
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
||||
@ -71,7 +72,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
|
||||
return
|
||||
}
|
||||
|
||||
features = make(map[string]feature)
|
||||
features = make(map[string]float64)
|
||||
|
||||
for index, category := range []string{"dishes", "service", "atmosphere", "cost", "drinks"} {
|
||||
valueText := doc.Find(fmt.Sprintf("#js-rating-detail > dd:nth-child(%d)", (index+1)*2)).Text()
|
||||
@ -82,7 +83,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
|
||||
return
|
||||
}
|
||||
|
||||
features[category] = feature{value/2.5 - 1.0, 1.0}
|
||||
features[category] = value/2.5 - 1.0
|
||||
}
|
||||
|
||||
return
|
||||
|
@ -60,7 +60,8 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) {
|
||||
return nextIndexUrl, reviewUrls
|
||||
}
|
||||
|
||||
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]feature, err error) {
|
||||
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
|
||||
weight = 1.0
|
||||
name = strings.TrimSpace(doc.Find("h1#HEADING").Text())
|
||||
address = strings.TrimSpace(doc.Find("address span.format_address").Text())
|
||||
|
||||
@ -70,7 +71,7 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features
|
||||
return
|
||||
}
|
||||
|
||||
features = make(map[string]feature)
|
||||
features = make(map[string]float64)
|
||||
|
||||
for index, category := range []string{"food", "service", "value", "atmosphere"} {
|
||||
altText, _ := ratings.Eq(index).Attr("alt")
|
||||
@ -82,7 +83,7 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features
|
||||
return
|
||||
}
|
||||
|
||||
features[category] = feature{value/2.5 - 1.0, 1.0}
|
||||
features[category] = value/2.5 - 1.0
|
||||
}
|
||||
|
||||
return
|
||||
|
Loading…
x
Reference in New Issue
Block a user