1
restaurant-search/util.go

325 lines
8.2 KiB
Go
Raw Normal View History

2015-03-24 03:45:18 +00:00
/*
* Copyright (c) 2015 Alex Yatskov <alex@foosoft.net>
* Author: Alex Yatskov <alex@foosoft.net>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
* the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package main
import (
"log"
"math"
2015-03-25 03:33:41 +00:00
"strconv"
"sync"
2015-06-24 10:11:43 +00:00
"github.com/kellydunn/golang-geo"
)
2015-03-25 04:12:22 +00:00
func fixFeatures(features featureMap) featureMap {
fixedFeatures := featureMap{
2015-04-27 04:53:16 +00:00
"nearby": 0.0,
"accessible": 0.0,
"delicious": 0.0,
"accommodating": 0.0,
"affordable": 0.0,
"atmospheric": 0.0}
2015-03-25 04:12:22 +00:00
2015-06-29 04:13:54 +00:00
for name := range fixedFeatures {
2015-06-29 10:09:34 +00:00
if value, ok := features[name]; ok {
fixedFeatures[name] = value
}
2015-03-25 04:12:22 +00:00
}
return fixedFeatures
}
2015-06-29 04:06:26 +00:00
func fixModes(modes map[string]string) modeMap {
2015-06-29 04:13:54 +00:00
fixedModes := modeMap{
2015-06-29 10:09:34 +00:00
"nearby": modeTypeProd,
"accessible": modeTypeProd,
"delicious": modeTypeProd,
"accommodating": modeTypeProd,
"affordable": modeTypeProd,
"atmospheric": modeTypeProd}
2015-06-29 04:13:54 +00:00
for name := range fixedModes {
2015-06-29 04:20:48 +00:00
if value, ok := modes[name]; ok {
2015-06-29 10:09:34 +00:00
if mode, err := parseModeType(value); err == nil {
fixedModes[name] = mode
}
2015-06-29 04:20:48 +00:00
}
2015-06-29 04:06:26 +00:00
}
2015-06-29 04:13:54 +00:00
return fixedModes
2015-06-29 04:06:26 +00:00
}
2015-08-05 04:35:38 +00:00
func similarity(features1 featureMap, features2 featureMap) float64 {
2015-06-30 04:50:25 +00:00
var result float64
2015-06-29 10:09:34 +00:00
2015-03-24 03:45:18 +00:00
for key, value1 := range features1 {
if value2, ok := features2[key]; ok {
result += value1 * value2
}
2015-03-24 03:45:18 +00:00
}
2015-06-30 04:50:25 +00:00
return result
2015-03-24 03:45:18 +00:00
}
2015-06-29 04:06:26 +00:00
func compare(features1 featureMap, features2 featureMap, modes modeMap) float64 {
var result float64
2015-06-29 10:09:34 +00:00
2015-06-29 04:06:26 +00:00
for key, value1 := range features1 {
value2, _ := features2[key]
switch mode, _ := modes[key]; mode {
2015-06-29 10:09:34 +00:00
case modeTypeDist:
2015-06-29 04:31:04 +00:00
result += 1 - math.Abs(value1-value2)
2015-06-29 10:09:34 +00:00
case modeTypeProd:
2015-06-29 04:06:26 +00:00
result += value1 * value2
2015-06-29 10:09:34 +00:00
default:
log.Fatal("unsupported compare mode")
2015-06-29 04:06:26 +00:00
}
}
return result
}
func walkMatches(entries records, features featureMap, modes modeMap, minScore float64, callback func(record, float64)) {
2015-03-26 03:51:49 +00:00
for _, entry := range entries {
2015-06-29 04:06:26 +00:00
if score := compare(features, entry.features, modes); score >= minScore {
2015-03-26 03:51:49 +00:00
callback(entry, score)
2015-03-24 03:45:18 +00:00
}
}
}
2015-03-24 04:17:39 +00:00
2015-06-29 04:06:26 +00:00
func statRecords(entries records, features featureMap, modes modeMap, minScore float64) (float64, int) {
2015-03-25 11:17:12 +00:00
var compatibility float64
var count int
2015-06-29 04:06:26 +00:00
walkMatches(entries, features, modes, minScore, func(entry record, score float64) {
2015-03-26 03:51:49 +00:00
compatibility += entry.compatibility
2015-03-25 11:17:12 +00:00
count++
2015-03-24 04:17:39 +00:00
})
2015-03-25 11:17:12 +00:00
return compatibility, count
2015-03-24 04:17:39 +00:00
}
2015-03-25 11:17:12 +00:00
func stepRange(min, max float64, steps int, callback func(float64)) {
stepSize := (max - min) / float64(steps)
2015-03-24 04:17:39 +00:00
for i := 0; i < steps; i++ {
2015-03-25 11:17:12 +00:00
stepMax := max - stepSize*float64(i)
2015-03-24 04:17:39 +00:00
stepMin := stepMax - stepSize
stepMid := (stepMin + stepMax) / 2
callback(stepMid)
}
}
2015-06-29 04:06:26 +00:00
func findRecords(entries records, features featureMap, modes modeMap, minScore float64) records {
2015-03-25 03:00:54 +00:00
var foundEntries records
2015-03-24 06:16:58 +00:00
2015-06-29 04:06:26 +00:00
walkMatches(entries, features, modes, minScore, func(entry record, score float64) {
2015-03-26 03:51:49 +00:00
entry.score = score
foundEntries = append(foundEntries, entry)
2015-03-24 06:16:58 +00:00
})
2015-03-25 03:00:54 +00:00
return foundEntries
2015-03-24 06:16:58 +00:00
}
2015-06-29 04:06:26 +00:00
func project(entries records, features featureMap, modes modeMap, featureName string, minScore float64, steps int) []queryProjection {
2015-03-24 11:04:52 +00:00
sampleFeatures := make(featureMap)
2015-03-24 06:16:58 +00:00
for key, value := range features {
sampleFeatures[key] = value
}
2015-03-24 11:04:52 +00:00
var projection []queryProjection
2015-03-25 11:17:12 +00:00
stepRange(-1.0, 1.0, steps, func(sample float64) {
2015-04-18 09:24:45 +00:00
sample, sampleFeatures[featureName] = sampleFeatures[featureName], sample
2015-06-29 04:06:26 +00:00
compatibility, count := statRecords(entries, sampleFeatures, modes, minScore)
2015-04-18 09:24:45 +00:00
sample, sampleFeatures[featureName] = sampleFeatures[featureName], sample
2015-03-25 11:17:12 +00:00
projection = append(projection, queryProjection{compatibility, count, sample})
2015-03-24 06:16:58 +00:00
})
return projection
}
func computeRecordsGeo(entries records, context queryContext) {
distUserMin := math.MaxFloat64
distUserMax := 0.0
2015-03-25 09:22:57 +00:00
for index := range entries {
entry := &entries[index]
2015-03-24 13:55:25 +00:00
if context.geo != nil {
2015-03-24 08:58:35 +00:00
userPoint := geo.NewPoint(context.geo.latitude, context.geo.longitude)
2015-03-25 09:22:57 +00:00
entryPoint := geo.NewPoint(entry.geo.latitude, context.geo.longitude)
entry.distanceToUser = userPoint.GreatCircleDistance(entryPoint)
}
2015-03-25 09:22:57 +00:00
distUserMin = math.Min(entry.distanceToUser, distUserMin)
distUserMax = math.Max(entry.distanceToUser, distUserMax)
}
distUserRange := distUserMax - distUserMin
2015-03-25 09:22:57 +00:00
for index := range entries {
entry := &entries[index]
var accessible, nearby float64
if distUserRange > 0 {
nearby = -((entry.distanceToUser-distUserMin)/distUserRange - 0.5) * 2.0
2015-07-31 04:30:59 +00:00
accessible = 1.0 - (entry.distanceToStn / (context.walkingDist * 1000))
2015-03-25 09:22:57 +00:00
accessible = math.Max(accessible, -1.0)
accessible = math.Min(accessible, 1.0)
}
2015-03-25 09:22:57 +00:00
entry.features["nearby"] = nearby
entry.features["accessible"] = accessible
}
}
func computeRecordCompat(entry *record, context queryContext, wg *sync.WaitGroup) {
historyRows, err := db.Query("SELECT id FROM history WHERE reviewId = (?)", entry.id)
if err != nil {
log.Fatal(err)
}
defer historyRows.Close()
var groupSum float64
var groupCount int
for historyRows.Next() {
var historyId int
if err := historyRows.Scan(&historyId); err != nil {
log.Fatal(err)
}
2015-03-25 09:41:19 +00:00
groupRows, err := db.Query("SELECT categoryId, categoryValue FROM historyGroups WHERE historyId = (?)", historyId)
if err != nil {
log.Fatal(err)
}
defer groupRows.Close()
recordProfile := make(featureMap)
for groupRows.Next() {
var categoryId int
var categoryValue float64
if err := groupRows.Scan(&categoryId, &categoryValue); err != nil {
log.Fatal(err)
}
recordProfile[strconv.Itoa(categoryId)] = categoryValue
}
if err := groupRows.Err(); err != nil {
log.Fatal(err)
}
2015-08-05 04:35:38 +00:00
groupSum += similarity(recordProfile, context.profile)
groupCount++
}
if err := historyRows.Err(); err != nil {
log.Fatal(err)
}
if groupCount > 0 {
entry.compatibility = groupSum / float64(groupCount)
}
wg.Done()
}
func computeRecordsCompat(entries records, context queryContext) {
count := len(entries)
limit := 32
for i := 0; i < count; i += limit {
batch := count - i
if batch > limit {
batch = limit
}
var wg sync.WaitGroup
wg.Add(batch)
for j := 0; j < batch; j++ {
go computeRecordCompat(&entries[i+j], context, &wg)
}
wg.Wait()
}
}
2015-03-25 03:00:54 +00:00
func getRecords(context queryContext) records {
2015-08-23 08:17:38 +00:00
recordRows, err := db.Query("SELECT name, url, delicious, accommodating, affordable, atmospheric, latitude, longitude, closestStnDist, closestStnName, accessCount, id FROM reviews")
2015-03-25 03:00:54 +00:00
if err != nil {
log.Fatal(err)
}
2015-03-25 10:25:14 +00:00
defer recordRows.Close()
2015-03-25 03:00:54 +00:00
var entries []record
2015-03-25 10:25:14 +00:00
for recordRows.Next() {
2015-03-25 03:00:54 +00:00
var name, url, closestStn string
2015-04-27 04:53:16 +00:00
var delicious, accommodating, affordable, atmospheric, latitude, longitude, distanceToStn float64
2015-03-25 03:00:54 +00:00
var accessCount, id int
2015-03-25 10:25:14 +00:00
recordRows.Scan(
2015-03-25 03:33:41 +00:00
&name,
&url,
&delicious,
2015-04-27 04:53:16 +00:00
&accommodating,
2015-03-25 03:33:41 +00:00
&affordable,
&atmospheric,
&latitude,
&longitude,
&distanceToStn,
&closestStn,
&accessCount,
&id)
entry := record{
name: name,
2015-03-26 03:18:43 +00:00
url: "http://www.tripadvisor.com" + url,
2015-03-25 03:33:41 +00:00
distanceToStn: distanceToStn,
closestStn: closestStn,
accessCount: accessCount,
2015-03-25 11:17:12 +00:00
geo: geoData{latitude, longitude},
2015-03-25 03:33:41 +00:00
id: id}
2015-03-25 03:00:54 +00:00
2015-03-25 09:22:57 +00:00
entry.features = featureMap{
2015-04-27 04:53:16 +00:00
"delicious": delicious,
"accommodating": accommodating,
"affordable": affordable,
"atmospheric": atmospheric}
2015-03-25 03:00:54 +00:00
entries = append(entries, entry)
}
2015-03-25 10:25:14 +00:00
if err := recordRows.Err(); err != nil {
2015-03-25 03:00:54 +00:00
log.Fatal(err)
}
computeRecordsCompat(entries, context)
computeRecordsGeo(entries, context)
2015-03-25 09:22:57 +00:00
2015-03-25 03:00:54 +00:00
return entries
}