Semantics update
This commit is contained in:
parent
02c6cf9767
commit
018033466b
@ -60,15 +60,30 @@ type semantics struct {
|
|||||||
delicious float64
|
delicious float64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s semantics) combine(other semantics, weight float64) semantics {
|
||||||
|
return semantics{
|
||||||
|
s.accomodating + other.accomodating*weight,
|
||||||
|
s.affordable + other.affordable*weight,
|
||||||
|
s.atmospheric + other.atmospheric*weight,
|
||||||
|
s.delicious + other.delicious*weight}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s semantics) reduce(weight float64) semantics {
|
||||||
|
return semantics{
|
||||||
|
s.accomodating / weight,
|
||||||
|
s.affordable / weight,
|
||||||
|
s.atmospheric / weight,
|
||||||
|
s.delicious / weight}
|
||||||
|
}
|
||||||
|
|
||||||
type restaurant struct {
|
type restaurant struct {
|
||||||
name string
|
name string
|
||||||
|
reviews []review
|
||||||
|
sem semantics
|
||||||
|
|
||||||
latitude float64
|
latitude float64
|
||||||
longitude float64
|
longitude float64
|
||||||
|
|
||||||
sem semantics
|
|
||||||
reviews []review
|
|
||||||
|
|
||||||
closestStnName string
|
closestStnName string
|
||||||
closestStnDist float64
|
closestStnDist float64
|
||||||
}
|
}
|
||||||
@ -165,6 +180,30 @@ func computeSemantics(restaraunts map[uint64]*restaurant) {
|
|||||||
type definer interface {
|
type definer interface {
|
||||||
define(keyword string) semantics
|
define(keyword string) semantics
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, rest := range restaraunts {
|
||||||
|
var (
|
||||||
|
sem semantics
|
||||||
|
weight float64
|
||||||
|
)
|
||||||
|
|
||||||
|
for _, rev := range rest.reviews {
|
||||||
|
def, ok := rev.scr.(definer)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, value := range rev.features {
|
||||||
|
sem = sem.combine(def.define(name), rev.weight*value)
|
||||||
|
}
|
||||||
|
|
||||||
|
weight += rev.weight
|
||||||
|
}
|
||||||
|
|
||||||
|
if weight > 0.0 {
|
||||||
|
rest.sem = sem.reduce(weight)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
|
func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
|
||||||
|
@ -31,17 +31,12 @@ import (
|
|||||||
"github.com/fatih/color"
|
"github.com/fatih/color"
|
||||||
)
|
)
|
||||||
|
|
||||||
type feature struct {
|
|
||||||
value float64
|
|
||||||
weight float64
|
|
||||||
}
|
|
||||||
|
|
||||||
type review struct {
|
type review struct {
|
||||||
name string
|
name string
|
||||||
address string
|
address string
|
||||||
url string
|
url string
|
||||||
|
features map[string]float64
|
||||||
features map[string]feature
|
weight float64
|
||||||
|
|
||||||
latitude float64
|
latitude float64
|
||||||
longitude float64
|
longitude float64
|
||||||
@ -52,7 +47,7 @@ type review struct {
|
|||||||
|
|
||||||
type scraper interface {
|
type scraper interface {
|
||||||
index(doc *goquery.Document) (string, []string)
|
index(doc *goquery.Document) (string, []string)
|
||||||
review(doc *goquery.Document) (string, string, map[string]feature, error)
|
review(doc *goquery.Document) (string, string, map[string]float64, float64, error)
|
||||||
decode(address string) (float64, float64, error)
|
decode(address string) (float64, float64, error)
|
||||||
load(url string) (*goquery.Document, error)
|
load(url string) (*goquery.Document, error)
|
||||||
}
|
}
|
||||||
@ -92,7 +87,7 @@ func scrapeReview(url string, out chan review, scr scraper, group *sync.WaitGrou
|
|||||||
)
|
)
|
||||||
|
|
||||||
if doc, rev.err = scr.load(rev.url); rev.err == nil {
|
if doc, rev.err = scr.load(rev.url); rev.err == nil {
|
||||||
rev.name, rev.address, rev.features, rev.err = scr.review(doc)
|
rev.name, rev.address, rev.features, rev.weight, rev.err = scr.review(doc)
|
||||||
}
|
}
|
||||||
|
|
||||||
out <- rev
|
out <- rev
|
||||||
|
@ -61,7 +61,8 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
|
|||||||
return nextIndexUrl, reviewUrls
|
return nextIndexUrl, reviewUrls
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]feature, err error) {
|
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
|
||||||
|
weight = 1.0
|
||||||
name = doc.Find("a.rd-header__rst-name-main").Text()
|
name = doc.Find("a.rd-header__rst-name-main").Text()
|
||||||
|
|
||||||
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
|
||||||
@ -71,7 +72,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
features = make(map[string]feature)
|
features = make(map[string]float64)
|
||||||
|
|
||||||
for index, category := range []string{"dishes", "service", "atmosphere", "cost", "drinks"} {
|
for index, category := range []string{"dishes", "service", "atmosphere", "cost", "drinks"} {
|
||||||
valueText := doc.Find(fmt.Sprintf("#js-rating-detail > dd:nth-child(%d)", (index+1)*2)).Text()
|
valueText := doc.Find(fmt.Sprintf("#js-rating-detail > dd:nth-child(%d)", (index+1)*2)).Text()
|
||||||
@ -82,7 +83,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
features[category] = feature{value/2.5 - 1.0, 1.0}
|
features[category] = value/2.5 - 1.0
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
@ -60,7 +60,8 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) {
|
|||||||
return nextIndexUrl, reviewUrls
|
return nextIndexUrl, reviewUrls
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]feature, err error) {
|
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
|
||||||
|
weight = 1.0
|
||||||
name = strings.TrimSpace(doc.Find("h1#HEADING").Text())
|
name = strings.TrimSpace(doc.Find("h1#HEADING").Text())
|
||||||
address = strings.TrimSpace(doc.Find("address span.format_address").Text())
|
address = strings.TrimSpace(doc.Find("address span.format_address").Text())
|
||||||
|
|
||||||
@ -70,7 +71,7 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
features = make(map[string]feature)
|
features = make(map[string]float64)
|
||||||
|
|
||||||
for index, category := range []string{"food", "service", "value", "atmosphere"} {
|
for index, category := range []string{"food", "service", "value", "atmosphere"} {
|
||||||
altText, _ := ratings.Eq(index).Attr("alt")
|
altText, _ := ratings.Eq(index).Attr("alt")
|
||||||
@ -82,7 +83,7 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
features[category] = feature{value/2.5 - 1.0, 1.0}
|
features[category] = value/2.5 - 1.0
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user