diff --git a/build/build.go b/build/build.go index f2bc5f7..598ab52 100644 --- a/build/build.go +++ b/build/build.go @@ -60,15 +60,30 @@ type semantics struct { delicious float64 } +func (s semantics) combine(other semantics, weight float64) semantics { + return semantics{ + s.accomodating + other.accomodating*weight, + s.affordable + other.affordable*weight, + s.atmospheric + other.atmospheric*weight, + s.delicious + other.delicious*weight} +} + +func (s semantics) reduce(weight float64) semantics { + return semantics{ + s.accomodating / weight, + s.affordable / weight, + s.atmospheric / weight, + s.delicious / weight} +} + type restaurant struct { - name string + name string + reviews []review + sem semantics latitude float64 longitude float64 - sem semantics - reviews []review - closestStnName string closestStnDist float64 } @@ -165,6 +180,30 @@ func computeSemantics(restaraunts map[uint64]*restaurant) { type definer interface { define(keyword string) semantics } + + for _, rest := range restaraunts { + var ( + sem semantics + weight float64 + ) + + for _, rev := range rest.reviews { + def, ok := rev.scr.(definer) + if !ok { + continue + } + + for name, value := range rev.features { + sem = sem.combine(def.define(name), rev.weight*value) + } + + weight += rev.weight + } + + if weight > 0.0 { + rest.sem = sem.reduce(weight) + } + } } func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error { diff --git a/build/scrape.go b/build/scrape.go index 3f9bb21..ee9fa85 100644 --- a/build/scrape.go +++ b/build/scrape.go @@ -31,17 +31,12 @@ import ( "github.com/fatih/color" ) -type feature struct { - value float64 - weight float64 -} - type review struct { - name string - address string - url string - - features map[string]feature + name string + address string + url string + features map[string]float64 + weight float64 latitude float64 longitude float64 @@ -52,7 +47,7 @@ type review struct { type scraper interface { index(doc *goquery.Document) (string, []string) - review(doc *goquery.Document) (string, string, map[string]feature, error) + review(doc *goquery.Document) (string, string, map[string]float64, float64, error) decode(address string) (float64, float64, error) load(url string) (*goquery.Document, error) } @@ -92,7 +87,7 @@ func scrapeReview(url string, out chan review, scr scraper, group *sync.WaitGrou ) if doc, rev.err = scr.load(rev.url); rev.err == nil { - rev.name, rev.address, rev.features, rev.err = scr.review(doc) + rev.name, rev.address, rev.features, rev.weight, rev.err = scr.review(doc) } out <- rev diff --git a/build/tabelog.go b/build/tabelog.go index da9ec0a..928589c 100644 --- a/build/tabelog.go +++ b/build/tabelog.go @@ -61,7 +61,8 @@ func (tabelog) index(doc *goquery.Document) (string, []string) { return nextIndexUrl, reviewUrls } -func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]feature, err error) { +func (tabelog) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) { + weight = 1.0 name = doc.Find("a.rd-header__rst-name-main").Text() if addresses := doc.Find("p.rd-detail-info__rst-address"); addresses.Length() == 2 { @@ -71,7 +72,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map return } - features = make(map[string]feature) + features = make(map[string]float64) for index, category := range []string{"dishes", "service", "atmosphere", "cost", "drinks"} { valueText := doc.Find(fmt.Sprintf("#js-rating-detail > dd:nth-child(%d)", (index+1)*2)).Text() @@ -82,7 +83,7 @@ func (tabelog) review(doc *goquery.Document) (name, address string, features map return } - features[category] = feature{value/2.5 - 1.0, 1.0} + features[category] = value/2.5 - 1.0 } return diff --git a/build/tripadvisor.go b/build/tripadvisor.go index 21d6b57..6f413c6 100644 --- a/build/tripadvisor.go +++ b/build/tripadvisor.go @@ -60,7 +60,8 @@ func (tripadvisor) index(doc *goquery.Document) (string, []string) { return nextIndexUrl, reviewUrls } -func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]feature, err error) { +func (tripadvisor) review(doc *goquery.Document) (name, address string, features map[string]float64, weight float64, err error) { + weight = 1.0 name = strings.TrimSpace(doc.Find("h1#HEADING").Text()) address = strings.TrimSpace(doc.Find("address span.format_address").Text()) @@ -70,7 +71,7 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features return } - features = make(map[string]feature) + features = make(map[string]float64) for index, category := range []string{"food", "service", "value", "atmosphere"} { altText, _ := ratings.Eq(index).Attr("alt") @@ -82,7 +83,7 @@ func (tripadvisor) review(doc *goquery.Document) (name, address string, features return } - features[category] = feature{value/2.5 - 1.0, 1.0} + features[category] = value/2.5 - 1.0 } return