Switching to sqlite3
This commit is contained in:
parent
6f57277ed3
commit
c63c6a835f
1
db/.gitignore
vendored
1
db/.gitignore
vendored
@ -1 +0,0 @@
|
||||
node_modules
|
101
db/build_db.js
101
db/build_db.js
@ -1,101 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015 Alex Yatskov <alex@foosoft.net>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
* this software and associated documentation files (the "Software"), to deal in
|
||||
* the Software without restriction, including without limitation the rights to
|
||||
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
* the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
var mysql = require('mysql');
|
||||
var data = require('./data.json');
|
||||
|
||||
|
||||
//
|
||||
// Setup
|
||||
//
|
||||
|
||||
var conn = mysql.createConnection({host: 'localhost', user: 'hscd'});
|
||||
conn.query('DROP DATABASE IF EXISTS hscd');
|
||||
conn.query('CREATE DATABASE hscd');
|
||||
conn.query('USE hscd');
|
||||
|
||||
|
||||
//
|
||||
// Reviews
|
||||
//
|
||||
|
||||
conn.query('DROP TABLE IF EXISTS reviews');
|
||||
conn.query('CREATE TABLE reviews(name VARCHAR(100) NOT NULL, url VARCHAR(200) NOT NULL, delicious FLOAT NOT NULL, accommodating FLOAT NOT NULL, affordable FLOAT NOT NULL, atmospheric FLOAT NOT NULL, latitude FLOAT NOT NULL, longitude FLOAT NOT NULL, distanceToStn FLOAT NOT NULL, closestStn VARCHAR(100) NOT NULL, accessCount INT NOT NULL, id INT NOT NULL AUTO_INCREMENT PRIMARY KEY) DEFAULT CHARACTER SET utf8');
|
||||
|
||||
for (var i = 0, count = data.length; i < count; ++i) {
|
||||
var record = data[i];
|
||||
conn.query('INSERT INTO reviews(name, url, delicious, accommodating, affordable, atmospheric, latitude, longitude, distanceToStn, closestStn, accessCount) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', [
|
||||
record.name,
|
||||
record.relativeUrl,
|
||||
record.rating.food,
|
||||
record.rating.service,
|
||||
record.rating.value,
|
||||
record.rating.atmosphere,
|
||||
record.geo.latitude,
|
||||
record.geo.longitude,
|
||||
record.distanceToStn,
|
||||
record.closestStn,
|
||||
0
|
||||
]);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Categories
|
||||
//
|
||||
|
||||
conn.query('DROP TABLE IF EXISTS categories');
|
||||
conn.query('CREATE TABLE categories(description VARCHAR(200) NOT NULL, id INT NOT NULL AUTO_INCREMENT PRIMARY KEY)');
|
||||
|
||||
var categories = [
|
||||
'I prefer quiet places',
|
||||
'I enjoy Mexican Food',
|
||||
'I drive a car'
|
||||
];
|
||||
|
||||
for (var i = 0, count = categories.length; i < count; ++i) {
|
||||
conn.query('INSERT INTO categories(description) VALUES (?)', [categories[i]]);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// History
|
||||
//
|
||||
|
||||
conn.query('DROP TABLE IF EXISTS history');
|
||||
conn.query('CREATE TABLE history(date DATETIME NOT NULL, reviewId INT NOT NULL, id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, FOREIGN KEY(reviewId) REFERENCES reviews(id))');
|
||||
|
||||
|
||||
//
|
||||
// HistoryGroup
|
||||
//
|
||||
|
||||
conn.query('DROP TABLE IF EXISTS historyGroups');
|
||||
conn.query('CREATE TABLE historyGroups(categoryId INT NOT NULL, categoryValue FLOAT NOT NULL, historyId INT NOT NULL, FOREIGN KEY(historyId) REFERENCES history(id), FOREIGN KEY(categoryId) REFERENCES categories(id))');
|
||||
|
||||
|
||||
//
|
||||
// Cleanup
|
||||
//
|
||||
|
||||
conn.end();
|
2
db/cache/.gitignore
vendored
2
db/cache/.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
*.html*
|
||||
!.gitignore
|
15626
db/cache/geo.json
vendored
15626
db/cache/geo.json
vendored
File diff suppressed because it is too large
Load Diff
27627
db/data.json
27627
db/data.json
File diff suppressed because it is too large
Load Diff
@ -1,2 +0,0 @@
|
||||
#!/bin/bash
|
||||
mysqldump -u hscd hscd > hscd.sql
|
102
db/geocode.js
102
db/geocode.js
@ -1,102 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015 Alex Yatskov <alex@foosoft.net>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
* this software and associated documentation files (the "Software"), to deal in
|
||||
* the Software without restriction, including without limitation the rights to
|
||||
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
* the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
var _ = require('underscore');
|
||||
var geocoder = require('node-geocoder');
|
||||
var geolib = require('geolib');
|
||||
var jf = require('jsonfile');
|
||||
|
||||
|
||||
function queryPosition(gc, address, cache, sequence, callback) {
|
||||
if (_.has(cache, address)) {
|
||||
console.log('Cache lookup success for:\n\t%s', address);
|
||||
callback(cache[address]);
|
||||
return sequence;
|
||||
}
|
||||
|
||||
setTimeout(function() {
|
||||
gc.geocode(address, function(err, res) {
|
||||
if (err) {
|
||||
console.log('Geocode lookup fail for: \n\t%s', address);
|
||||
callback(null);
|
||||
}
|
||||
else {
|
||||
console.log('Geocode lookup success for: \n\t%s', address);
|
||||
callback(cache[address] = res[0]);
|
||||
}
|
||||
});
|
||||
}, sequence * 200);
|
||||
|
||||
return sequence + 1;
|
||||
}
|
||||
|
||||
function buildAccess(reviewData, stationData, accessibility) {
|
||||
_.each(reviewData, function(reviewItem) {
|
||||
var distMin = Number.MAX_VALUE;
|
||||
var station = '';
|
||||
|
||||
console.log('Computing access for: \n\t%s', reviewItem.name);
|
||||
_.each(stationData, function(stationItem, stationName) {
|
||||
var distance = geolib.getDistance(reviewItem.geo, stationItem.geo);
|
||||
if (distance < distMin) {
|
||||
station = stationName;
|
||||
distMin = distance;
|
||||
}
|
||||
});
|
||||
|
||||
reviewItem.distanceToStn = distMin;
|
||||
reviewItem.closestStn = station;
|
||||
});
|
||||
}
|
||||
|
||||
function main() {
|
||||
var gc = geocoder.getGeocoder('google', 'http', {});
|
||||
var sequence = 0;
|
||||
|
||||
var stationData = jf.readFileSync('stations.json');
|
||||
var cacheData = jf.readFileSync('cache/geo.json', {throws: false}) || {};
|
||||
var reviewData = jf.readFileSync('data.json');
|
||||
var reviewCount = reviewData.length;
|
||||
var reviewDataDest = [];
|
||||
|
||||
_.each(reviewData, function(reviewItem) {
|
||||
sequence = queryPosition(gc, reviewItem.address, cacheData, sequence, function(geo) {
|
||||
if (geo) {
|
||||
var destItem = _.clone(reviewItem);
|
||||
destItem.geo = geo;
|
||||
reviewDataDest.push(destItem);
|
||||
}
|
||||
|
||||
if (--reviewCount === 0) {
|
||||
buildAccess(reviewDataDest, stationData);
|
||||
|
||||
jf.writeFileSync('data.json', reviewDataDest);
|
||||
jf.writeFileSync('cache/geo.json', cacheData);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
138
db/hscd.sql
138
db/hscd.sql
File diff suppressed because one or more lines are too long
@ -1,2 +0,0 @@
|
||||
#!/bin/bash
|
||||
mysql -p -u root < init.sql
|
@ -1,4 +0,0 @@
|
||||
CREATE DATABASE hscd;
|
||||
USE hscd;
|
||||
CREATE USER 'hscd'@'localhost';
|
||||
GRANT ALL PRIVILEGES ON hscd . * to 'hscd'@'localhost';
|
@ -1,2 +0,0 @@
|
||||
#!/bin/bash
|
||||
mysql -u hscd hscd < hscd.sql
|
@ -1,19 +0,0 @@
|
||||
{
|
||||
"name": "hscd_scrape",
|
||||
"version": "0.0.0",
|
||||
"description": "",
|
||||
"main": "scrape.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"cheerio": "~0.17.0",
|
||||
"mysql": "^2.5.0",
|
||||
"underscore": "~1.6.0",
|
||||
"node-geocoder": "~2.11.0",
|
||||
"geolib": "~2.0.14",
|
||||
"jsonfile": "~2.0.0"
|
||||
}
|
||||
}
|
185
db/scrape.js
185
db/scrape.js
@ -1,185 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/*
|
||||
* Copyright (c) 2015 Alex Yatskov <alex@foosoft.net>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
* this software and associated documentation files (the "Software"), to deal in
|
||||
* the Software without restriction, including without limitation the rights to
|
||||
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
* the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
var cheerio = require('cheerio');
|
||||
var request = require('request');
|
||||
var underscore = require('underscore');
|
||||
var url = require('url');
|
||||
var path = require('path');
|
||||
var fs = require('fs');
|
||||
var _ = require('underscore');
|
||||
|
||||
|
||||
function requestCached(relativeUrl, callback) {
|
||||
var absoluteUrl = url.resolve('http://www.tripadvisor.com', relativeUrl);
|
||||
var cachePath = path.join('cache', relativeUrl);
|
||||
|
||||
fs.readFile(cachePath, function(err, data) {
|
||||
if (err) {
|
||||
var stream = fs.createWriteStream(cachePath);
|
||||
request(absoluteUrl, callback).pipe(stream);
|
||||
}
|
||||
else {
|
||||
callback(null, null, data);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function getBarPercent(bar) {
|
||||
return parseFloat(bar.attr('alt')) / 5.0;
|
||||
}
|
||||
|
||||
function reviewScraped(err, resp, html) {
|
||||
if (err) {
|
||||
return console.error('Error: %s', err);
|
||||
}
|
||||
|
||||
var $ = cheerio.load(html);
|
||||
|
||||
var address = $('address span.format_address').text().trim();
|
||||
if (!address) {
|
||||
console.warn('Warning: review skipped, no address');
|
||||
return;
|
||||
}
|
||||
|
||||
var storeName = $('h1#HEADING').text().trim();
|
||||
if (storeName.indexOf('CLOSED') !== -1) {
|
||||
console.warn('Warning: review skipped, closed');
|
||||
return;
|
||||
}
|
||||
|
||||
var rating = $('ul.barChart img.rating_s_fill');
|
||||
if (rating.length != 4) {
|
||||
console.warn('Warning: review skipped, no summary');
|
||||
return;
|
||||
}
|
||||
|
||||
var rateFood = getBarPercent($(rating[0]));
|
||||
var rateService = getBarPercent($(rating[1]));
|
||||
var rateValue = getBarPercent($(rating[2]));
|
||||
var rateAtmosphere = getBarPercent($(rating[3]));
|
||||
|
||||
if (rateFood === 0.0 && rateService === 0.0 && rateValue === 0.0 && rateAtmosphere === 0.0) {
|
||||
console.warn('Warning: review skipped, empty review');
|
||||
return;
|
||||
}
|
||||
|
||||
var data = {
|
||||
name: storeName,
|
||||
relativeUrl: this.relativeUrl,
|
||||
address: address,
|
||||
rating: {
|
||||
food: (rateFood - 0.5) * 2.0,
|
||||
service: (rateService - 0.5) * 2.0,
|
||||
value: (rateValue - 0.5) * 2.0,
|
||||
atmosphere: (rateAtmosphere - 0.5) * 2.0
|
||||
}
|
||||
};
|
||||
|
||||
this.callback(data);
|
||||
}
|
||||
|
||||
function scrapeReview(relativeUrl, callback) {
|
||||
console.log('Scraping review %s...', relativeUrl);
|
||||
|
||||
var c = _.bind(reviewScraped, {
|
||||
callback: callback,
|
||||
relativeUrl: relativeUrl
|
||||
});
|
||||
requestCached(relativeUrl, c);
|
||||
}
|
||||
|
||||
function indexScraped(err, resp, html) {
|
||||
if (err) {
|
||||
return console.error('Error: %s', err);
|
||||
}
|
||||
|
||||
var $ = cheerio.load(html);
|
||||
var that = this;
|
||||
var abort = false;
|
||||
|
||||
$('a.property_title').each(function(index, element) {
|
||||
if (abort) {
|
||||
return;
|
||||
}
|
||||
|
||||
var reviewUrl = $(element).attr('href');
|
||||
if (that.callback(reviewUrl)) {
|
||||
abort = true;
|
||||
}
|
||||
});
|
||||
|
||||
if (!abort) {
|
||||
var nextPageUrl = $('a.sprite-pageNext').attr('href');
|
||||
if (nextPageUrl) {
|
||||
scrapeIndices(nextPageUrl, this.callback);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function scrapeIndices(relativeUrl, callback) {
|
||||
console.log('Scraping index %s...', relativeUrl);
|
||||
|
||||
var c = _.bind(indexScraped, { callback: callback });
|
||||
requestCached(relativeUrl, c);
|
||||
}
|
||||
|
||||
function main() {
|
||||
var relativePaths = [
|
||||
'/Restaurants-g298173-Yokohama_Kanagawa_Prefecture_Kanto.html',
|
||||
'/Restaurants-g1021277-Fujisawa_Kanagawa_Prefecture_Kanto.html',
|
||||
'/Restaurants-g1021279-Chigasaki_Kanagawa_Prefecture_Kanto.html',
|
||||
'/Restaurants-g298172-Kawasaki_Kanagawa_Prefecture_Kanto.html',
|
||||
'/Restaurants-g1066854-Shinagawa_Tokyo_Tokyo_Prefecture_Kanto.html',
|
||||
'/Restaurants-g298184-Tokyo_Tokyo_Prefecture_Kanto.html'
|
||||
];
|
||||
var databasePath = 'data.json';
|
||||
|
||||
var abort = false;
|
||||
process.on('SIGINT', function() {
|
||||
console.warn('Caught SIGINT, aborting...');
|
||||
abort = true;
|
||||
});
|
||||
|
||||
var results = [];
|
||||
_.each(relativePaths, function(relativePath) {
|
||||
scrapeIndices(relativePath, function(relativeUrl) {
|
||||
scrapeReview(relativeUrl, function(data) {
|
||||
results.push(data);
|
||||
});
|
||||
|
||||
return abort;
|
||||
});
|
||||
});
|
||||
|
||||
process.on('exit', function() {
|
||||
console.log('Total reviews scraped: %d', results.length);
|
||||
var strData = JSON.stringify(results, null, 4);
|
||||
fs.writeFileSync(databasePath, strData);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
if (require.main === module) {
|
||||
main();
|
||||
}
|
6876
db/stations.json
6876
db/stations.json
File diff suppressed because it is too large
Load Diff
@ -40,7 +40,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/GaryBoone/GoStats/stats"
|
||||
_ "github.com/go-sql-driver/mysql"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
var db *sql.DB
|
||||
@ -312,12 +312,12 @@ func main() {
|
||||
|
||||
staticDir := flag.String("static", "static", "path to static files")
|
||||
portNum := flag.Int("port", 8080, "port to serve content on")
|
||||
dataSrc := flag.String("data", "hscd@/hscd", "data source for database")
|
||||
dataSrc := flag.String("data", "db.sqlite3", "data source for database")
|
||||
profile := flag.String("profile", "", "write cpu profile to file")
|
||||
flag.Parse()
|
||||
|
||||
var err error
|
||||
if db, err = sql.Open("mysql", *dataSrc); err != nil {
|
||||
if db, err = sql.Open("sqlite3", *dataSrc); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
2
util.go
2
util.go
@ -270,7 +270,7 @@ func computeRecordsCompat(entries records, context queryContext) {
|
||||
}
|
||||
|
||||
func getRecords(context queryContext) records {
|
||||
recordRows, err := db.Query("SELECT name, url, delicious, accommodating, affordable, atmospheric, latitude, longitude, distanceToStn, closestStn, accessCount, id FROM reviews")
|
||||
recordRows, err := db.Query("SELECT name, url, delicious, accommodating, affordable, atmospheric, latitude, longitude, closestStnDist, closestStnName, accessCount, id FROM reviews")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user