From eece4dd8761fff5b93ee72658b0a3aaf68a2a9db Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Tue, 13 Jan 2015 15:59:09 +0900 Subject: [PATCH] Removing keyword table generation, cleanup --- scrape/build_db.js | 27 +-------------------- {db => scrape}/dump.sh | 0 scrape/geocode.js | 1 + {db => scrape}/hscd.sql | 54 +---------------------------------------- {db => scrape}/load.sh | 0 scrape/scrape.js | 21 ++++++++++++++++ 6 files changed, 24 insertions(+), 79 deletions(-) rename {db => scrape}/dump.sh (100%) rename {db => scrape}/hscd.sql (99%) rename {db => scrape}/load.sh (100%) diff --git a/scrape/build_db.js b/scrape/build_db.js index 3e22c9c..481c91d 100755 --- a/scrape/build_db.js +++ b/scrape/build_db.js @@ -56,34 +56,9 @@ for (var i = 0, count = data.length; i < count; ++i) { // -// Keywords +// Cleanup // conn.query('DROP TABLE IF EXISTS keywords'); -conn.query('CREATE TABLE keywords(name VARCHAR(50) NOT NULL, delicious FLOAT NOT NULL, accomodating FLOAT NOT NULL, affordable FLOAT NOT NULL, atmospheric FLOAT NOT NULL, nearby FLOAT NOT NULL, access FLOAT NOT NULL, PRIMARY KEY(name)) DEFAULT CHARACTER SET utf8'); - -var keywords = { - delicious: [1.0, 0.0, 0.0, 0.0, 0.0, 0.0], - accommodating: [0.0, 1.0, 0.0, 0.0, 0.0, 0.0], - affordable: [0.0, 0.0, 1.0, 0.0, 0.0, 0.0], - atmospheric: [0.0, 0.0, 0.0, 1.0, 0.0, 0.0] -}; - -for (var keyword in keywords) { - var record = keywords[keyword]; - conn.query('INSERT INTO keywords VALUES(?, ?, ?, ?, ?, ?, ?)', [keyword].concat(record)); -} - - -// -// Presets -// - conn.query('DROP TABLE IF EXISTS presets'); -conn.query('CREATE TABLE presets(name VARCHAR(50) NOT NULL, PRIMARY KEY(name)) DEFAULT CHARACTER SET utf8'); - -for (var keyword in keywords) { - conn.query('INSERT INTO presets VALUES(?)', [keyword]); -} - conn.end(); diff --git a/db/dump.sh b/scrape/dump.sh similarity index 100% rename from db/dump.sh rename to scrape/dump.sh diff --git a/scrape/geocode.js b/scrape/geocode.js index 808570b..fc5c738 100755 --- a/scrape/geocode.js +++ b/scrape/geocode.js @@ -26,6 +26,7 @@ var geocoder = require('node-geocoder'); var geolib = require('geolib'); var jf = require('jsonfile'); + function queryPosition(gc, address, cache, sequence, callback) { if (_.has(cache, address)) { console.log('Cache lookup success for:\n\t%s', address); diff --git a/db/hscd.sql b/scrape/hscd.sql similarity index 99% rename from db/hscd.sql rename to scrape/hscd.sql index 5617e6a..07ce0b0 100644 --- a/db/hscd.sql +++ b/scrape/hscd.sql @@ -15,58 +15,6 @@ /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; --- --- Table structure for table `keywords` --- - -DROP TABLE IF EXISTS `keywords`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `keywords` ( - `name` varchar(50) NOT NULL, - `delicious` float NOT NULL, - `accomodating` float NOT NULL, - `affordable` float NOT NULL, - `atmospheric` float NOT NULL, - `nearby` float NOT NULL, - `access` float NOT NULL, - PRIMARY KEY (`name`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `keywords` --- - -LOCK TABLES `keywords` WRITE; -/*!40000 ALTER TABLE `keywords` DISABLE KEYS */; -INSERT INTO `keywords` VALUES ('accommodating',0,1,0,0,0,0),('affordable',0,0,1,0,0,0),('atmospheric',0,0,0,1,0,0),('delicious',1,0,0,0,0,0); -/*!40000 ALTER TABLE `keywords` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `presets` --- - -DROP TABLE IF EXISTS `presets`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `presets` ( - `name` varchar(50) NOT NULL, - PRIMARY KEY (`name`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `presets` --- - -LOCK TABLES `presets` WRITE; -/*!40000 ALTER TABLE `presets` DISABLE KEYS */; -INSERT INTO `presets` VALUES ('accommodating'),('affordable'),('atmospheric'),('delicious'); -/*!40000 ALTER TABLE `presets` ENABLE KEYS */; -UNLOCK TABLES; - -- -- Table structure for table `reviews` -- @@ -109,4 +57,4 @@ UNLOCK TABLES; /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; --- Dump completed on 2015-01-05 16:09:59 +-- Dump completed on 2015-01-13 15:58:36 diff --git a/db/load.sh b/scrape/load.sh similarity index 100% rename from db/load.sh rename to scrape/load.sh diff --git a/scrape/scrape.js b/scrape/scrape.js index abc1bb9..d0df9ae 100755 --- a/scrape/scrape.js +++ b/scrape/scrape.js @@ -1,5 +1,26 @@ #!/usr/bin/env node +/* + * Copyright (c) 2015 Alex Yatskov + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + var cheerio = require('cheerio'); var request = require('request'); var url = require('url');