1

No longer use json files for data, completely switch to mysql

This commit is contained in:
Alex Yatskov 2014-09-17 16:39:46 +09:00
parent 28d419f8be
commit ed36d33fa9
5 changed files with 239 additions and 68080 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,50 +0,0 @@
{
"food": {
"food": 1.0
},
"service": {
"service": 1.0
},
"value": {
"value": 1.0
},
"atmosphere": {
"atmosphere": 1.0
},
"hole in the wall": {
"food": 0.82,
"service": 0.10,
"value": 0.22,
"atmosphere": -0.79
},
"high class": {
"food": 0.93,
"service": 0.80,
"value": -0.05,
"atmosphere": 0.95
},
"cheap grub": {
"food": -0.42,
"service": -0.64,
"value": 0.87,
"atmosphere": -0.91
},
"ripoff": {
"food": -0.82,
"service": -0.64,
"value": -0.98,
"atmosphere": -0.23
},
"uninspired": {
"food": -0.83,
"service": 0.0,
"value": 0.0,
"atmosphere": -0.68
},
"moody": {
"food": 0.32,
"service": -0.33,
"value": -0.42,
"atmosphere": 0.61
}
}

View File

@ -1,192 +0,0 @@
'use strict';
var _ = require('underscore');
var db_keywords = require('./keywords.json');
var db_data = require('./data.json');
var mysql = require('mysql');
var connection = null;
function innerProduct(values1, values2) {
var result = 0.0;
for (var feature in values1) {
result += values1[feature] * (values2[feature] || 0.0);
}
return result;
}
function scale(values, factor) {
var result = {};
for (var feature in values) {
result[feature] = values[feature] * factor;
}
return result;
}
function countData(searchParams, minScore) {
var dataCount = 0;
for (var i = 0, count = db_data.length; i < count; ++i) {
var record = db_data[i];
var score = 0.0;
for (var keyword in searchParams) {
var features = scale(db_keywords[keyword], searchParams[keyword]);
score += innerProduct(features, record.rating);
}
if (score >= minScore) {
++dataCount;
}
}
return dataCount;
}
function findData(searchParams, minScore) {
var results = [];
for (var i = 0, count = db_data.length; i < count; ++i) {
var record = db_data[i];
var score = 0.0;
for (var keyword in searchParams) {
var features = scale(db_keywords[keyword], searchParams[keyword]);
score += innerProduct(features, record.rating);
}
if (score >= minScore) {
results.push({
name: record.name,
url: 'http://www.tripadvisor.com' + record.relativeUrl,
score: score
});
}
}
results.sort(function(a, b) {
return b.score - a.score;
});
return results;
}
function searchStepper(range, steps, callback) {
var stepSize = (range.max - range.min) / steps;
for (var i = 0; i < steps; ++i) {
var stepMax = range.max - stepSize * i;
var stepMin = stepMax - stepSize;
var stepMid = (stepMin + stepMax) / 2;
callback(stepMid);
}
}
function searchProjection(searchParams, minScore, keyword, range, steps) {
var testParams = _.clone(searchParams);
var results = [];
searchStepper(range, steps, function(position) {
testParams[keyword] = position;
results.push({
sample: position,
count: countData(testParams, minScore)
});
});
return results;
}
function searchBuildHints(searchParams, minScore, keyword, range, steps) {
var projection = searchProjection(
searchParams,
minScore,
keyword,
range,
steps
);
var hints = [];
_.each(projection, function(result) {
hints.push({
sample: result.sample,
count: result.count
});
});
return hints;
}
module.exports.loadDb = function(params) {
module.exports.freeDb();
connection = mysql.createConnection(params);
}
module.exports.freeDb = function() {
if (connection) {
connection.end();
connection = null;
}
}
module.exports.getKeywords = function(callback) {
connection.query('SELECT name FROM keywords ORDER BY name ASC', function(err, rows) {
if (err) {
throw err;
}
var keywords = _.map(rows, function(row) {
return row['name']
});
callback(keywords);
});
}
module.exports.execQuery = function(query) {
if (!query.searchParams) {
query.searchParams = {};
for (var i = 0, count = query.keywords.length; i < count; ++i) {
var keyword = query.keywords[i];
if (_.has(db_keywords, keyword)) {
query.searchParams[keyword] = 1.0;
}
}
}
var searchResults = findData(
query.searchParams,
query.minScore * _.keys(query.searchParams).length
);
var graphColumns = {};
for (var keyword in query.searchParams) {
var searchHints = searchBuildHints(
query.searchParams,
query.minScore * _.keys(query.searchParams).length,
keyword,
query.searchRange,
query.hintSteps
);
graphColumns[keyword] = {
color: '#607080',
value: query.searchParams[keyword],
hints: searchHints,
steps: query.hintSteps
}
}
return {
columns: graphColumns,
params: query.searchParams,
items: searchResults.slice(0, query.maxResults),
count: searchResults.length
};
}

233
server/search.js Normal file
View File

@ -0,0 +1,233 @@
'use strict';
var _ = require('underscore');
var mysql = require('mysql');
var connection = null;
function innerProduct(values1, values2) {
var result = 0.0;
for (var feature in values1) {
result += values1[feature] * (values2[feature] || 0.0);
}
return result;
}
function scale(values, factor) {
var result = {};
for (var feature in values) {
result[feature] = values[feature] * factor;
}
return result;
}
function countRecords(data, searchParams, minScore) {
var dataCount = 0;
for (var i = 0, count = data.records.length; i < count; ++i) {
var record = data.records[i];
var score = 0.0;
for (var keyword in searchParams) {
var features = scale(data.keywords[keyword], searchParams[keyword]);
score += innerProduct(features, record.rating);
}
if (score >= minScore) {
++dataCount;
}
}
return dataCount;
}
function findRecords(data, searchParams, minScore) {
var results = [];
for (var i = 0, count = data.records.length; i < count; ++i) {
var record = data.records[i];
var score = 0.0;
for (var keyword in searchParams) {
var features = scale(data.keywords[keyword], searchParams[keyword]);
score += innerProduct(features, record.rating);
}
if (score >= minScore) {
results.push({
name: record.name,
url: 'http://www.tripadvisor.com' + record.relativeUrl,
score: score
});
}
}
results.sort(function(a, b) {
return b.score - a.score;
});
return results;
}
function step(range, steps, callback) {
var stepSize = (range.max - range.min) / steps;
for (var i = 0; i < steps; ++i) {
var stepMax = range.max - stepSize * i;
var stepMin = stepMax - stepSize;
var stepMid = (stepMin + stepMax) / 2;
callback(stepMid);
}
}
function project(data, searchParams, minScore, keyword, range, steps) {
var testParams = _.clone(searchParams);
var results = [];
step(range, steps, function(position) {
testParams[keyword] = position;
results.push({
sample: position,
count: countRecords(data, testParams, minScore)
});
});
return results;
}
function buildHints(data, searchParams, minScore, keyword, range, steps) {
var projection = project(
data,
searchParams,
minScore,
keyword,
range,
steps
);
var hints = [];
_.each(projection, function(result) {
hints.push({
sample: result.sample,
count: result.count
});
});
return hints;
}
function loadDb(params) {
connection = mysql.createConnection(params);
}
function getKeywords(callback) {
connection.query('SELECT * FROM keywords', function(err, rows) {
if (err) {
throw err;
}
var keywords = {};
for (var i = 0, count = rows.length; i < count; ++i) {
var row = rows[i];
keywords[row.name] = {
food: row.food,
service: row.service,
value: row.value,
atmosphere: row.atmosphere
};
}
callback(keywords);
});
}
function getRecords(callback) {
connection.query('SELECT * FROM reviews', function(err, rows) {
if (err) {
throw err;
}
var records = _.map(rows, function(row) {
return {
name: row.name,
relativeUrl: row.url,
rating: {
food: row.food,
service: row.service,
value: row.value,
atmosphere: row.atmosphere
}
};
});
callback(records);
});
}
function getData(callback) {
getKeywords(function(keywords) {
getRecords(function(records) {
callback({
keywords: keywords,
records: records
});
});
});
}
function execQuery(query, callback) {
getData(function(data) {
if (!query.searchParams) {
query.searchParams = {};
for (var i = 0, count = query.keywords.length; i < count; ++i) {
var keyword = query.keywords[i];
if (_.has(data.keywords, keyword)) {
query.searchParams[keyword] = 1.0;
}
}
}
var searchResults = findRecords(
data,
query.searchParams,
query.minScore * _.keys(query.searchParams).length
);
var graphColumns = {};
for (var keyword in query.searchParams) {
var searchHints = buildHints(
data,
query.searchParams,
query.minScore * _.keys(query.searchParams).length,
keyword,
query.searchRange,
query.hintSteps
);
graphColumns[keyword] = {
color: '#607080',
value: query.searchParams[keyword],
hints: searchHints,
steps: query.hintSteps
}
}
callback({
columns: graphColumns,
params: query.searchParams,
items: searchResults.slice(0, query.maxResults),
count: searchResults.length
});
});
}
module.exports = {
'loadDb': loadDb,
'getKeywords': getKeywords,
'execQuery': execQuery
};

View File

@ -2,8 +2,9 @@
'use strict'; 'use strict';
var _ = require('underscore');
var express = require('express'); var express = require('express');
var search = require('./db/search.js'); var search = require('./search.js');
function main() { function main() {
@ -16,16 +17,15 @@ function main() {
}); });
app.use('/keywords', function(req, res) { app.use('/keywords', function(req, res) {
console.log('Requesting keywords');
search.getKeywords(function(keywords) { search.getKeywords(function(keywords) {
res.json(keywords); res.json(_.keys(keywords).sort());
}); });
}); });
app.use('/search', function(req, res) { app.use('/search', function(req, res) {
console.log('Requesting search'); search.execQuery(req.query, function(results) {
console.log(req.query); res.json(results);
res.json(search.execQuery(req.query)); });
}); });
app.listen(3000); app.listen(3000);