2014-10-17 09:07:06 +00:00
|
|
|
#!/usr/bin/env node
|
|
|
|
|
2014-10-03 02:43:56 +00:00
|
|
|
/*
|
|
|
|
|
|
|
|
The MIT License (MIT)
|
|
|
|
|
|
|
|
Copyright (c) 2014 Alex Yatskov
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
|
|
all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
THE SOFTWARE.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
2014-09-17 07:39:46 +00:00
|
|
|
'use strict';
|
|
|
|
|
2014-11-17 06:58:32 +00:00
|
|
|
var _ = require('underscore');
|
|
|
|
var geolib = require('geolib');
|
|
|
|
var mysql = require('mysql');
|
|
|
|
var pool = null;
|
|
|
|
|
2014-09-17 07:39:46 +00:00
|
|
|
|
|
|
|
function innerProduct(values1, values2) {
|
|
|
|
var result = 0.0;
|
|
|
|
|
2014-11-17 08:48:53 +00:00
|
|
|
console.assert(_.keys(values1).length == _.keys(values2).length);
|
2014-09-17 07:39:46 +00:00
|
|
|
for (var feature in values1) {
|
2014-11-17 08:48:53 +00:00
|
|
|
result += values1[feature] * values2[feature];
|
2014-09-17 07:39:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2014-11-08 02:23:42 +00:00
|
|
|
function walkMatches(data, features, minScore, callback) {
|
2014-09-17 07:39:46 +00:00
|
|
|
for (var i = 0, count = data.records.length; i < count; ++i) {
|
|
|
|
var record = data.records[i];
|
2014-11-17 08:43:27 +00:00
|
|
|
var score = innerProduct(features, record.features);
|
2014-09-17 07:39:46 +00:00
|
|
|
|
|
|
|
if (score >= minScore) {
|
2014-09-19 00:38:58 +00:00
|
|
|
callback(record, score);
|
2014-09-17 07:39:46 +00:00
|
|
|
}
|
|
|
|
}
|
2014-09-19 00:38:58 +00:00
|
|
|
}
|
|
|
|
|
2014-11-08 02:23:42 +00:00
|
|
|
function countRecords(data, features, minScore) {
|
2014-09-19 00:38:58 +00:00
|
|
|
var count = 0;
|
2014-11-08 02:23:42 +00:00
|
|
|
walkMatches(data, features, minScore, function(record, score) {
|
2014-09-19 00:38:58 +00:00
|
|
|
++count;
|
|
|
|
});
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2014-11-17 08:14:41 +00:00
|
|
|
function findRecords(data, features, minScore) {
|
2014-09-19 00:38:58 +00:00
|
|
|
var results = [];
|
2014-11-08 02:23:42 +00:00
|
|
|
walkMatches(data, features, minScore, function(record, score) {
|
2014-09-19 00:38:58 +00:00
|
|
|
results.push({
|
2014-11-17 07:29:02 +00:00
|
|
|
name: record.name,
|
|
|
|
url: 'http://www.tripadvisor.com' + record.relativeUrl,
|
|
|
|
score: score,
|
2014-11-17 08:14:41 +00:00
|
|
|
distance: record.distance,
|
2014-11-17 07:29:02 +00:00
|
|
|
id: record.id
|
2014-09-19 00:38:58 +00:00
|
|
|
});
|
|
|
|
});
|
2014-09-17 07:39:46 +00:00
|
|
|
|
|
|
|
results.sort(function(a, b) {
|
|
|
|
return b.score - a.score;
|
|
|
|
});
|
|
|
|
|
|
|
|
return results;
|
|
|
|
}
|
|
|
|
|
|
|
|
function step(range, steps, callback) {
|
|
|
|
var stepSize = (range.max - range.min) / steps;
|
|
|
|
|
|
|
|
for (var i = 0; i < steps; ++i) {
|
|
|
|
var stepMax = range.max - stepSize * i;
|
|
|
|
var stepMin = stepMax - stepSize;
|
|
|
|
var stepMid = (stepMin + stepMax) / 2;
|
|
|
|
|
|
|
|
callback(stepMid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-08 02:23:42 +00:00
|
|
|
function project(data, features, feature, minScore, range, steps) {
|
|
|
|
var sample = _.clone(features);
|
|
|
|
var results = [];
|
2014-09-17 07:39:46 +00:00
|
|
|
|
|
|
|
step(range, steps, function(position) {
|
2014-11-08 02:23:42 +00:00
|
|
|
sample[feature] = position;
|
2014-09-17 07:39:46 +00:00
|
|
|
results.push({
|
|
|
|
sample: position,
|
2014-11-08 02:23:42 +00:00
|
|
|
count: countRecords(data, sample, minScore)
|
2014-09-17 07:39:46 +00:00
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
return results;
|
|
|
|
}
|
|
|
|
|
2014-11-08 02:23:42 +00:00
|
|
|
function buildHints(data, features, feature, minScore, range, steps) {
|
2014-09-17 07:39:46 +00:00
|
|
|
var projection = project(
|
|
|
|
data,
|
2014-11-08 02:23:42 +00:00
|
|
|
features,
|
|
|
|
feature,
|
2014-09-17 07:39:46 +00:00
|
|
|
minScore,
|
|
|
|
range,
|
|
|
|
steps
|
|
|
|
);
|
|
|
|
|
|
|
|
var hints = [];
|
|
|
|
_.each(projection, function(result) {
|
|
|
|
hints.push({
|
|
|
|
sample: result.sample,
|
|
|
|
count: result.count
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
return hints;
|
|
|
|
}
|
|
|
|
|
|
|
|
function loadDb(params) {
|
2014-10-04 09:35:22 +00:00
|
|
|
pool = mysql.createPool(params);
|
2014-09-17 07:39:46 +00:00
|
|
|
}
|
|
|
|
|
2014-09-18 08:22:57 +00:00
|
|
|
function addKeyword(query, callback) {
|
2014-11-10 10:45:07 +00:00
|
|
|
if (!/^[a-zA-Z0-9\s\-]+$/.test(query.keyword)) {
|
2014-11-17 08:48:53 +00:00
|
|
|
callback({
|
|
|
|
keyword: query.keyword,
|
|
|
|
success: false
|
|
|
|
});
|
2014-11-08 14:23:56 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
getKeywords(function(keywords) {
|
|
|
|
var values = [
|
|
|
|
query.keyword,
|
|
|
|
query.features.food,
|
|
|
|
query.features.service,
|
|
|
|
query.features.value,
|
2014-11-17 07:49:02 +00:00
|
|
|
query.features.atmosphere,
|
|
|
|
query.features.proximity
|
2014-11-08 14:23:56 +00:00
|
|
|
];
|
|
|
|
|
2014-11-17 08:43:27 +00:00
|
|
|
pool.query('INSERT INTO keywords VALUES(?, ?, ?, ?, ?, ?)', values, function(err) {
|
|
|
|
callback({
|
|
|
|
keyword: query.keyword,
|
|
|
|
success: err === null
|
|
|
|
});
|
2014-11-08 14:23:56 +00:00
|
|
|
});
|
|
|
|
});
|
2014-09-18 08:22:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function removeKeyword(query, callback) {
|
2014-10-04 09:35:22 +00:00
|
|
|
pool.query('DELETE FROM keywords WHERE name=? AND name NOT IN (SELECT name FROM presets)', [query.keyword], function(err, fields) {
|
2014-09-18 08:22:57 +00:00
|
|
|
callback({
|
|
|
|
keyword: query.keyword,
|
2014-09-20 09:31:39 +00:00
|
|
|
success: err === null && fields.affectedRows > 0
|
2014-09-18 08:22:57 +00:00
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2014-09-17 07:39:46 +00:00
|
|
|
function getKeywords(callback) {
|
2014-10-04 09:35:22 +00:00
|
|
|
pool.query('SELECT * FROM keywords', function(err, rows) {
|
2014-09-17 07:39:46 +00:00
|
|
|
if (err) {
|
|
|
|
throw err;
|
|
|
|
}
|
|
|
|
|
|
|
|
var keywords = {};
|
|
|
|
for (var i = 0, count = rows.length; i < count; ++i) {
|
|
|
|
var row = rows[i];
|
|
|
|
keywords[row.name] = {
|
|
|
|
food: row.food,
|
|
|
|
service: row.service,
|
|
|
|
value: row.value,
|
2014-11-17 07:49:02 +00:00
|
|
|
atmosphere: row.atmosphere,
|
|
|
|
proximity: row.proximity
|
2014-09-17 07:39:46 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
callback(keywords);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2014-11-17 08:43:27 +00:00
|
|
|
function getRecords(geo, callback) {
|
2014-10-04 09:35:22 +00:00
|
|
|
pool.query('SELECT * FROM reviews', function(err, rows) {
|
2014-09-17 07:39:46 +00:00
|
|
|
if (err) {
|
|
|
|
throw err;
|
|
|
|
}
|
|
|
|
|
|
|
|
var records = _.map(rows, function(row) {
|
|
|
|
return {
|
|
|
|
name: row.name,
|
2014-10-17 09:07:06 +00:00
|
|
|
id: row.id,
|
2014-09-17 07:39:46 +00:00
|
|
|
relativeUrl: row.url,
|
2014-11-17 07:29:02 +00:00
|
|
|
geo: {
|
|
|
|
latitude: row.latitude,
|
|
|
|
longitude: row.longitude
|
|
|
|
},
|
2014-11-17 08:43:27 +00:00
|
|
|
features: {
|
2014-09-17 07:39:46 +00:00
|
|
|
food: row.food,
|
|
|
|
service: row.service,
|
|
|
|
value: row.value,
|
|
|
|
atmosphere: row.atmosphere
|
2014-11-17 07:29:02 +00:00
|
|
|
},
|
2014-09-17 07:39:46 +00:00
|
|
|
};
|
|
|
|
});
|
|
|
|
|
2014-11-17 08:43:27 +00:00
|
|
|
computeRecordGeo(records, geo);
|
2014-09-17 07:39:46 +00:00
|
|
|
callback(records);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2014-11-17 08:14:41 +00:00
|
|
|
function computeRecordGeo(records, geo) {
|
2014-11-17 08:43:27 +00:00
|
|
|
var distMin = Number.MAX_VALUE;
|
|
|
|
var distMax = Number.MIN_VALUE;
|
|
|
|
|
2014-11-17 08:14:41 +00:00
|
|
|
_.each(records, function(record) {
|
|
|
|
record.distance = 0.0;
|
2014-11-22 09:33:16 +00:00
|
|
|
if (geo) {
|
2014-11-17 08:14:41 +00:00
|
|
|
record.distance = geolib.getDistance(record.geo, geo) / 1000.0;
|
|
|
|
}
|
2014-11-17 08:43:27 +00:00
|
|
|
|
|
|
|
distMin = Math.min(distMin, record.distance);
|
|
|
|
distMax = Math.max(distMax, record.distance);
|
|
|
|
});
|
|
|
|
|
|
|
|
var distRange = distMax - distMin;
|
|
|
|
|
|
|
|
_.each(records, function(record) {
|
|
|
|
record.features.proximity = -((record.distance - distMin) / distRange - 0.5) * 2.0;
|
2014-11-17 08:14:41 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
function getData(geo, callback) {
|
2014-09-17 07:39:46 +00:00
|
|
|
getKeywords(function(keywords) {
|
2014-11-17 08:43:27 +00:00
|
|
|
getRecords(geo, function(records) {
|
2014-09-17 07:39:46 +00:00
|
|
|
callback({
|
|
|
|
keywords: keywords,
|
|
|
|
records: records
|
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2014-11-08 05:33:36 +00:00
|
|
|
function getParameters(callback) {
|
|
|
|
getKeywords(function(keywords) {
|
2014-11-17 08:14:41 +00:00
|
|
|
callback({keywords: keywords});
|
2014-11-08 05:33:36 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2014-09-17 07:39:46 +00:00
|
|
|
function execQuery(query, callback) {
|
2014-11-17 08:14:41 +00:00
|
|
|
getData(query.geo, function(data) {
|
2014-09-17 07:39:46 +00:00
|
|
|
var searchResults = findRecords(
|
|
|
|
data,
|
2014-11-08 02:23:42 +00:00
|
|
|
query.features,
|
2014-09-26 04:17:43 +00:00
|
|
|
query.minScore
|
2014-09-17 07:39:46 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
var graphColumns = {};
|
2014-11-08 02:23:42 +00:00
|
|
|
for (var feature in query.features) {
|
2014-09-17 07:39:46 +00:00
|
|
|
var searchHints = buildHints(
|
|
|
|
data,
|
2014-11-08 02:23:42 +00:00
|
|
|
query.features,
|
|
|
|
feature,
|
2014-09-26 04:17:43 +00:00
|
|
|
query.minScore,
|
2014-11-08 02:23:42 +00:00
|
|
|
query.range,
|
2014-09-17 07:39:46 +00:00
|
|
|
query.hintSteps
|
|
|
|
);
|
|
|
|
|
2014-11-08 02:23:42 +00:00
|
|
|
graphColumns[feature] = {
|
|
|
|
value: query.features[feature],
|
2014-09-17 07:39:46 +00:00
|
|
|
hints: searchHints,
|
|
|
|
steps: query.hintSteps
|
2014-10-17 09:07:06 +00:00
|
|
|
};
|
2014-09-17 07:39:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
callback({
|
|
|
|
columns: graphColumns,
|
|
|
|
items: searchResults.slice(0, query.maxResults),
|
|
|
|
count: searchResults.length
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = {
|
2014-11-17 07:29:02 +00:00
|
|
|
loadDb: loadDb,
|
|
|
|
addKeyword: addKeyword,
|
|
|
|
removeKeyword: removeKeyword,
|
|
|
|
getParameters: getParameters,
|
|
|
|
execQuery: execQuery
|
2014-09-17 07:39:46 +00:00
|
|
|
};
|