2015-03-24 03:45:18 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2015 Alex Yatskov <alex@foosoft.net>
|
|
|
|
* Author: Alex Yatskov <alex@foosoft.net>
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
|
|
* this software and associated documentation files (the "Software"), to deal in
|
|
|
|
* the Software without restriction, including without limitation the rights to
|
|
|
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
|
|
* the Software, and to permit persons to whom the Software is furnished to do so,
|
|
|
|
* subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in all
|
|
|
|
* copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
|
|
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
|
|
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
|
|
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2015-09-01 07:52:19 +00:00
|
|
|
package search
|
2015-03-24 03:45:18 +00:00
|
|
|
|
2015-03-24 08:29:24 +00:00
|
|
|
import (
|
2015-08-24 06:42:16 +00:00
|
|
|
"database/sql"
|
2015-03-24 08:29:24 +00:00
|
|
|
"math"
|
2015-03-25 03:33:41 +00:00
|
|
|
"strconv"
|
2015-06-24 10:11:43 +00:00
|
|
|
|
|
|
|
"github.com/kellydunn/golang-geo"
|
2015-03-24 08:29:24 +00:00
|
|
|
)
|
|
|
|
|
2015-08-23 11:03:26 +00:00
|
|
|
func fixFeatures(features map[string]float64) map[string]float64 {
|
|
|
|
fixedFeatures := map[string]float64{
|
2015-04-27 04:53:16 +00:00
|
|
|
"nearby": 0.0,
|
|
|
|
"accessible": 0.0,
|
|
|
|
"delicious": 0.0,
|
|
|
|
"accommodating": 0.0,
|
|
|
|
"affordable": 0.0,
|
|
|
|
"atmospheric": 0.0}
|
2015-03-25 04:12:22 +00:00
|
|
|
|
2015-06-29 04:13:54 +00:00
|
|
|
for name := range fixedFeatures {
|
2015-06-29 10:09:34 +00:00
|
|
|
if value, ok := features[name]; ok {
|
|
|
|
fixedFeatures[name] = value
|
|
|
|
}
|
2015-03-25 04:12:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return fixedFeatures
|
|
|
|
}
|
|
|
|
|
2015-08-23 11:03:26 +00:00
|
|
|
func fixModes(modes map[string]string) map[string]modeType {
|
|
|
|
fixedModes := map[string]modeType{
|
2015-06-29 10:09:34 +00:00
|
|
|
"nearby": modeTypeProd,
|
|
|
|
"accessible": modeTypeProd,
|
|
|
|
"delicious": modeTypeProd,
|
|
|
|
"accommodating": modeTypeProd,
|
|
|
|
"affordable": modeTypeProd,
|
|
|
|
"atmospheric": modeTypeProd}
|
2015-06-29 04:13:54 +00:00
|
|
|
|
|
|
|
for name := range fixedModes {
|
2015-06-29 04:20:48 +00:00
|
|
|
if value, ok := modes[name]; ok {
|
2015-06-29 10:09:34 +00:00
|
|
|
if mode, err := parseModeType(value); err == nil {
|
|
|
|
fixedModes[name] = mode
|
|
|
|
}
|
2015-06-29 04:20:48 +00:00
|
|
|
}
|
2015-06-29 04:06:26 +00:00
|
|
|
}
|
|
|
|
|
2015-06-29 04:13:54 +00:00
|
|
|
return fixedModes
|
2015-06-29 04:06:26 +00:00
|
|
|
}
|
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
func semanticSimilarity(features1 map[string]float64, features2 map[string]float64) float64 {
|
2015-06-30 04:50:25 +00:00
|
|
|
var result float64
|
2015-06-29 10:09:34 +00:00
|
|
|
|
2015-03-24 03:45:18 +00:00
|
|
|
for key, value1 := range features1 {
|
2015-08-10 08:00:40 +00:00
|
|
|
if value2, ok := features2[key]; ok {
|
|
|
|
result += value1 * value2
|
|
|
|
}
|
2015-03-24 03:45:18 +00:00
|
|
|
}
|
|
|
|
|
2015-06-30 04:50:25 +00:00
|
|
|
return result
|
2015-03-24 03:45:18 +00:00
|
|
|
}
|
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
func semanticCompare(features1 map[string]float64, features2 map[string]float64, modes map[string]modeType) float64 {
|
2015-06-29 04:06:26 +00:00
|
|
|
var result float64
|
2015-06-29 10:09:34 +00:00
|
|
|
|
2015-06-29 04:06:26 +00:00
|
|
|
for key, value1 := range features1 {
|
|
|
|
value2, _ := features2[key]
|
|
|
|
|
|
|
|
switch mode, _ := modes[key]; mode {
|
2015-06-29 10:09:34 +00:00
|
|
|
case modeTypeDist:
|
2015-06-29 04:31:04 +00:00
|
|
|
result += 1 - math.Abs(value1-value2)
|
2015-06-29 10:09:34 +00:00
|
|
|
case modeTypeProd:
|
2015-06-29 04:06:26 +00:00
|
|
|
result += value1 * value2
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
2015-08-23 11:03:26 +00:00
|
|
|
func walkMatches(entries []record, features map[string]float64, modes map[string]modeType, minScore float64, callback func(record, float64)) {
|
2015-03-26 03:51:49 +00:00
|
|
|
for _, entry := range entries {
|
2015-08-24 06:42:16 +00:00
|
|
|
if score := semanticCompare(features, entry.features, modes); score >= minScore {
|
2015-03-26 03:51:49 +00:00
|
|
|
callback(entry, score)
|
2015-03-24 03:45:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-03-24 04:17:39 +00:00
|
|
|
|
2015-08-23 11:03:26 +00:00
|
|
|
func statRecords(entries []record, features map[string]float64, modes map[string]modeType, minScore float64) (float64, int) {
|
2015-08-23 14:20:32 +00:00
|
|
|
var (
|
|
|
|
compatibility float64
|
|
|
|
count int
|
|
|
|
)
|
2015-03-25 11:17:12 +00:00
|
|
|
|
2015-06-29 04:06:26 +00:00
|
|
|
walkMatches(entries, features, modes, minScore, func(entry record, score float64) {
|
2015-08-23 10:56:07 +00:00
|
|
|
compatibility += entry.Compatibility
|
2015-03-25 11:17:12 +00:00
|
|
|
count++
|
2015-03-24 04:17:39 +00:00
|
|
|
})
|
|
|
|
|
2015-03-25 11:17:12 +00:00
|
|
|
return compatibility, count
|
2015-03-24 04:17:39 +00:00
|
|
|
}
|
|
|
|
|
2015-03-25 11:17:12 +00:00
|
|
|
func stepRange(min, max float64, steps int, callback func(float64)) {
|
|
|
|
stepSize := (max - min) / float64(steps)
|
2015-03-24 04:17:39 +00:00
|
|
|
|
|
|
|
for i := 0; i < steps; i++ {
|
2015-03-25 11:17:12 +00:00
|
|
|
stepMax := max - stepSize*float64(i)
|
2015-03-24 04:17:39 +00:00
|
|
|
stepMin := stepMax - stepSize
|
|
|
|
stepMid := (stepMin + stepMax) / 2
|
|
|
|
|
|
|
|
callback(stepMid)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-23 11:03:26 +00:00
|
|
|
func findRecords(entries []record, features map[string]float64, modes map[string]modeType, minScore float64) []record {
|
2015-08-23 10:56:07 +00:00
|
|
|
var matchedEntries []record
|
2015-03-24 06:16:58 +00:00
|
|
|
|
2015-06-29 04:06:26 +00:00
|
|
|
walkMatches(entries, features, modes, minScore, func(entry record, score float64) {
|
2015-08-23 10:56:07 +00:00
|
|
|
entry.Score = score
|
|
|
|
matchedEntries = append(matchedEntries, entry)
|
2015-03-24 06:16:58 +00:00
|
|
|
})
|
|
|
|
|
2015-08-23 10:56:07 +00:00
|
|
|
return matchedEntries
|
2015-03-24 06:16:58 +00:00
|
|
|
}
|
|
|
|
|
2015-08-23 11:03:26 +00:00
|
|
|
func project(entries []record, features map[string]float64, modes map[string]modeType, featureName string, minScore float64, steps int) []projection {
|
|
|
|
sampleFeatures := make(map[string]float64)
|
2015-03-24 06:16:58 +00:00
|
|
|
for key, value := range features {
|
|
|
|
sampleFeatures[key] = value
|
|
|
|
}
|
|
|
|
|
2015-08-23 10:56:07 +00:00
|
|
|
var projections []projection
|
2015-03-25 11:17:12 +00:00
|
|
|
stepRange(-1.0, 1.0, steps, func(sample float64) {
|
2015-04-18 09:24:45 +00:00
|
|
|
sample, sampleFeatures[featureName] = sampleFeatures[featureName], sample
|
2015-06-29 04:06:26 +00:00
|
|
|
compatibility, count := statRecords(entries, sampleFeatures, modes, minScore)
|
2015-04-18 09:24:45 +00:00
|
|
|
sample, sampleFeatures[featureName] = sampleFeatures[featureName], sample
|
|
|
|
|
2015-08-23 10:56:07 +00:00
|
|
|
projections = append(projections, projection{compatibility, count, sample})
|
2015-03-24 06:16:58 +00:00
|
|
|
})
|
|
|
|
|
2015-08-23 10:56:07 +00:00
|
|
|
return projections
|
2015-03-24 06:16:58 +00:00
|
|
|
}
|
2015-03-24 08:29:24 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
func computeRecordGeo(entries []record, context queryContext) {
|
2015-03-24 08:29:24 +00:00
|
|
|
distUserMin := math.MaxFloat64
|
|
|
|
distUserMax := 0.0
|
|
|
|
|
2015-03-25 09:22:57 +00:00
|
|
|
for index := range entries {
|
|
|
|
entry := &entries[index]
|
|
|
|
|
2015-03-24 13:55:25 +00:00
|
|
|
if context.geo != nil {
|
2015-08-23 10:56:07 +00:00
|
|
|
userPoint := geo.NewPoint(context.geo.Latitude, context.geo.Longitude)
|
|
|
|
entryPoint := geo.NewPoint(entry.geo.Latitude, context.geo.Longitude)
|
|
|
|
entry.DistanceToUser = userPoint.GreatCircleDistance(entryPoint)
|
2015-03-24 08:29:24 +00:00
|
|
|
}
|
|
|
|
|
2015-08-23 10:56:07 +00:00
|
|
|
distUserMin = math.Min(entry.DistanceToUser, distUserMin)
|
|
|
|
distUserMax = math.Max(entry.DistanceToUser, distUserMax)
|
2015-03-24 08:29:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
distUserRange := distUserMax - distUserMin
|
|
|
|
|
2015-03-25 09:22:57 +00:00
|
|
|
for index := range entries {
|
|
|
|
entry := &entries[index]
|
|
|
|
|
|
|
|
var accessible, nearby float64
|
|
|
|
if distUserRange > 0 {
|
2015-08-23 10:56:07 +00:00
|
|
|
nearby = -((entry.DistanceToUser-distUserMin)/distUserRange - 0.5) * 2.0
|
2015-03-24 08:29:24 +00:00
|
|
|
|
2015-08-24 09:50:52 +00:00
|
|
|
accessible = 1.0 - entry.DistanceToStn/context.walkingDist
|
2015-03-25 09:22:57 +00:00
|
|
|
accessible = math.Max(accessible, -1.0)
|
|
|
|
accessible = math.Min(accessible, 1.0)
|
2015-03-24 08:29:24 +00:00
|
|
|
}
|
|
|
|
|
2015-03-25 09:22:57 +00:00
|
|
|
entry.features["nearby"] = nearby
|
|
|
|
entry.features["accessible"] = accessible
|
2015-03-24 08:29:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
func computeRecordCompat(db *sql.DB, entries []record, context queryContext) error {
|
|
|
|
for i := range entries {
|
|
|
|
entry := &entries[i]
|
2015-08-23 14:20:32 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
historyRows, err := db.Query("SELECT id FROM history WHERE reviewId = (?)", entry.Id)
|
2015-03-24 08:29:24 +00:00
|
|
|
if err != nil {
|
2015-08-24 06:42:16 +00:00
|
|
|
return err
|
2015-03-24 08:29:24 +00:00
|
|
|
}
|
2015-08-24 06:42:16 +00:00
|
|
|
defer historyRows.Close()
|
2015-03-24 08:29:24 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
var (
|
|
|
|
groupSum float64
|
|
|
|
groupCount int
|
|
|
|
)
|
2015-03-24 08:29:24 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
for historyRows.Next() {
|
|
|
|
var historyId int
|
|
|
|
if err := historyRows.Scan(&historyId); err != nil {
|
|
|
|
return err
|
2015-03-24 08:29:24 +00:00
|
|
|
}
|
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
groupRows, err := db.Query("SELECT categoryId, categoryValue FROM historyGroups WHERE historyId = (?)", historyId)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer groupRows.Close()
|
2015-03-24 08:29:24 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
recordProfile := make(map[string]float64)
|
|
|
|
for groupRows.Next() {
|
|
|
|
var (
|
|
|
|
categoryId int
|
|
|
|
categoryValue float64
|
|
|
|
)
|
2015-03-24 08:29:24 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
if err := groupRows.Scan(&categoryId, &categoryValue); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-03-24 08:29:24 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
recordProfile[strconv.Itoa(categoryId)] = categoryValue
|
|
|
|
}
|
|
|
|
if err := groupRows.Err(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-07-31 05:41:44 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
groupSum += semanticSimilarity(recordProfile, context.profile)
|
|
|
|
groupCount++
|
2015-03-24 08:29:24 +00:00
|
|
|
}
|
2015-08-24 06:42:16 +00:00
|
|
|
if err := historyRows.Err(); err != nil {
|
|
|
|
return err
|
2015-03-24 08:29:24 +00:00
|
|
|
}
|
2015-07-31 05:41:44 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
if groupCount > 0 {
|
|
|
|
entry.Compatibility = groupSum / float64(groupCount)
|
|
|
|
}
|
2015-03-24 08:29:24 +00:00
|
|
|
}
|
2015-08-24 06:42:16 +00:00
|
|
|
|
|
|
|
return nil
|
2015-03-24 08:29:24 +00:00
|
|
|
}
|
2015-03-25 03:00:54 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
func fetchRecords(db *sql.DB, context queryContext) ([]record, error) {
|
|
|
|
rows, err := db.Query("SELECT name, url, delicious, accommodating, affordable, atmospheric, latitude, longitude, closestStnDist, closestStnName, accessCount, id FROM reviews")
|
2015-03-25 03:00:54 +00:00
|
|
|
if err != nil {
|
2015-08-24 06:42:16 +00:00
|
|
|
return nil, err
|
2015-03-25 03:00:54 +00:00
|
|
|
}
|
2015-08-24 06:42:16 +00:00
|
|
|
defer rows.Close()
|
2015-03-25 03:00:54 +00:00
|
|
|
|
|
|
|
var entries []record
|
2015-08-24 06:42:16 +00:00
|
|
|
for rows.Next() {
|
2015-08-23 14:20:32 +00:00
|
|
|
var (
|
|
|
|
name, url, closestStn string
|
|
|
|
delicious, accommodating, affordable, atmospheric float64
|
|
|
|
latitude, longitude, distanceToStn float64
|
|
|
|
accessCount, id int
|
|
|
|
)
|
2015-03-25 03:00:54 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
rows.Scan(
|
2015-03-25 03:33:41 +00:00
|
|
|
&name,
|
|
|
|
&url,
|
|
|
|
&delicious,
|
2015-04-27 04:53:16 +00:00
|
|
|
&accommodating,
|
2015-03-25 03:33:41 +00:00
|
|
|
&affordable,
|
|
|
|
&atmospheric,
|
|
|
|
&latitude,
|
|
|
|
&longitude,
|
|
|
|
&distanceToStn,
|
|
|
|
&closestStn,
|
|
|
|
&accessCount,
|
|
|
|
&id)
|
|
|
|
|
|
|
|
entry := record{
|
2015-08-23 10:56:07 +00:00
|
|
|
Name: name,
|
|
|
|
Url: url,
|
|
|
|
DistanceToStn: distanceToStn,
|
|
|
|
ClosestStn: closestStn,
|
|
|
|
AccessCount: accessCount,
|
2015-03-25 11:17:12 +00:00
|
|
|
geo: geoData{latitude, longitude},
|
2015-08-23 10:56:07 +00:00
|
|
|
Id: id}
|
2015-03-25 03:00:54 +00:00
|
|
|
|
2015-08-23 11:03:26 +00:00
|
|
|
entry.features = map[string]float64{
|
2015-04-27 04:53:16 +00:00
|
|
|
"delicious": delicious,
|
|
|
|
"accommodating": accommodating,
|
|
|
|
"affordable": affordable,
|
|
|
|
"atmospheric": atmospheric}
|
2015-03-25 03:00:54 +00:00
|
|
|
|
|
|
|
entries = append(entries, entry)
|
|
|
|
}
|
2015-08-24 06:42:16 +00:00
|
|
|
if err := rows.Err(); err != nil {
|
|
|
|
return nil, err
|
2015-03-25 03:00:54 +00:00
|
|
|
}
|
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
computeRecordGeo(entries, context)
|
|
|
|
if err := computeRecordCompat(db, entries, context); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-03-25 09:22:57 +00:00
|
|
|
|
2015-08-24 06:42:16 +00:00
|
|
|
return entries, nil
|
2015-03-25 03:00:54 +00:00
|
|
|
}
|