diff --git a/package.json b/package.json index 33842f7..831458c 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ "gulp-nodemon": "^1.0.4", "gulp-replace": "^0.4.0", "gulp-uglify": "^1.0.1", - "main-bower-files": "^2.0.0" + "main-bower-files": "^2.0.0", + "jsonfile": "~2.0.0" } } diff --git a/scrape/geocode.js b/scrape/geocode.js new file mode 100755 index 0000000..cc5dead --- /dev/null +++ b/scrape/geocode.js @@ -0,0 +1,53 @@ +#!/usr/bin/env node + +var geocoder = require('node-geocoder'); +var jf = require('jsonfile'); +var _ = require('underscore'); + + +function queryPosition(gc, address, cache, callback) { + if (_.has(cache, address)) { + console.log('Cache lookup success for:\n\t%s', address); + callback(cache[address]); + return; + } + + gc.geocode(address, function(err, res) { + if (err) { + console.log('Geocode lookup fail for: \n\t%s', address); + callback(null); + } + else { + console.log('Geocode lookup success for: \n\t%s', address); + callback(cache[address] = res[0]); + } + }); +} + + +function main() { + var gc = geocoder.getGeocoder('google', 'http', {}); + var srcData = jf.readFileSync('data.json'); + var srcCount = srcData.length; + var cacheData = jf.readFileSync('cache/geo.json', {throws: false}) || {}; + var destData = []; + + _.each(srcData, function(srcItem) { + queryPosition(gc, srcItem.address, cacheData, function(geo) { + if (geo) { + var destItem = _.clone(srcItem); + destItem.geo = geo; + destData.push(destItem); + } + + if (--srcCount === 0) { + jf.writeFileSync('data.json', destData); + jf.writeFileSync('cache/geo.json', cacheData); + } + }); + }); +} + +if (require.main === module) { + main(); +}