1

Semantics update

This commit is contained in:
Alex Yatskov 2015-09-18 18:41:01 +09:00
parent 02c6cf9767
commit 018033466b
4 changed files with 58 additions and 22 deletions

View File

@ -60,15 +60,30 @@ type semantics struct {
delicious float64 delicious float64
} }
func (s semantics) combine(other semantics, weight float64) semantics {
return semantics{
s.accomodating + other.accomodating*weight,
s.affordable + other.affordable*weight,
s.atmospheric + other.atmospheric*weight,
s.delicious + other.delicious*weight}
}
func (s semantics) reduce(weight float64) semantics {
return semantics{
s.accomodating / weight,
s.affordable / weight,
s.atmospheric / weight,
s.delicious / weight}
}
type restaurant struct { type restaurant struct {
name string name string
reviews []review
sem semantics
latitude float64 latitude float64
longitude float64 longitude float64
sem semantics
reviews []review
closestStnName string closestStnName string
closestStnDist float64 closestStnDist float64
} }
@ -165,6 +180,30 @@ func computeSemantics(restaraunts map[uint64]*restaurant) {
type definer interface { type definer interface {
define(keyword string) semantics define(keyword string) semantics
} }
for _, rest := range restaraunts {
var (
sem semantics
weight float64
)
for _, rev := range rest.reviews {
def, ok := rev.scr.(definer)
if !ok {
continue
}
for name, value := range rev.features {
sem = sem.combine(def.define(name), rev.weight*value)
}
weight += rev.weight
}
if weight > 0.0 {
rest.sem = sem.reduce(weight)
}
}
} }
func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error { func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {

View File

@ -31,17 +31,12 @@ import (
"github.com/fatih/color" "github.com/fatih/color"
) )
type feature struct {
value float64
weight float64
}
type review struct { type review struct {
name string name string
address string address string
url string url string
features map[string]float64
features map[string]feature weight float64
latitude float64 latitude float64
longitude float64 longitude float64
@ -52,7 +47,7 @@ type review struct {
type scraper interface { type scraper interface {
index(doc *goquery.Document) (string, []string) index(doc *goquery.Document) (string, []string)
review(doc *goquery.Document) (string, string, map[string]feature, error) review(doc *goquery.Document) (string, string, map[string]float64, float64, error)
decode(address string) (float64, float64, error) decode(address string) (float64, float64, error)
load(url string) (*goquery.Document, error) load(url string) (*goquery.Document, error)
} }
@ -92,7 +87,7 @@ func scrapeReview(url string, out chan review, scr scraper, group *sync.WaitGrou
) )
if doc, rev.err = scr.load(rev.url); rev.err == nil { if doc, rev.err = scr.load(rev.url); rev.err == nil {
rev.name, rev.address, rev.features, rev.err = scr.review(doc) rev.name, rev.address, rev.features, rev.weight, rev.err = scr.review(doc)
} }
out <- rev out <- rev

View File

@ -61,7 +61,8 @@ func (tabelog) index(doc *goquery.Document) (string, []string) {
return nextIndexUrl, reviewUrls return nextIndexUrl, reviewUrls
} }
func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]feature, err error) { func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
weight = 1.0
name = doc.Find("a.rd-header__rst-name-main").Text() name = doc.Find("a.rd-header__rst-name-main").Text()
if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 { if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 {
@ -71,7 +72,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
return return
} }
features = make(map[string]feature) features = make(map[string]float64)
for index, category := range []string{"dishes", "service", "atmosphere", "cost", "drinks"} { for index, category := range []string{"dishes", "service", "atmosphere", "cost", "drinks"} {
valueText := doc.Find(fmt.Sprintf("#js-rating-detail > dd:nth-child(%d)", (index+1)*2)).Text() valueText := doc.Find(fmt.Sprintf("#js-rating-detail > dd:nth-child(%d)", (index+1)*2)).Text()
@ -82,7 +83,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map
return return
} }
features[category] = feature{value/2.5 - 1.0, 1.0} features[category] = value/2.5 - 1.0
} }
return return

View File

@ -60,7 +60,8 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) {
return nextIndexUrl, reviewUrls return nextIndexUrl, reviewUrls
} }
func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]feature, err error) { func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) {
weight = 1.0
name = strings.TrimSpace(doc.Find("h1#HEADING").Text()) name = strings.TrimSpace(doc.Find("h1#HEADING").Text())
address = strings.TrimSpace(doc.Find("address span.format_address").Text()) address = strings.TrimSpace(doc.Find("address span.format_address").Text())
@ -70,7 +71,7 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features
return return
} }
features = make(map[string]feature) features = make(map[string]float64)
for index, category := range []string{"food", "service", "value", "atmosphere"} { for index, category := range []string{"food", "service", "value", "atmosphere"} {
altText, _ := ratings.Eq(index).Attr("alt") altText, _ := ratings.Eq(index).Attr("alt")
@ -82,7 +83,7 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features
return return
} }
features[category] = feature{value/2.5 - 1.0, 1.0} features[category] = value/2.5 - 1.0
} }
return return