2015-08-11 11:30:42 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2015 Alex Yatskov <alex@foosoft.net>
|
|
|
|
* Author: Alex Yatskov <alex@foosoft.net>
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
|
|
* this software and associated documentation files (the "Software"), to deal in
|
|
|
|
* the Software without restriction, including without limitation the rights to
|
|
|
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
|
|
* the Software, and to permit persons to whom the Software is furnished to do so,
|
|
|
|
* subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in all
|
|
|
|
* copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
|
|
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
|
|
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
|
|
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package main
|
|
|
|
|
2015-08-16 10:12:16 +00:00
|
|
|
import "log"
|
|
|
|
|
2015-08-17 05:23:03 +00:00
|
|
|
type scrapeTask struct {
|
|
|
|
url string
|
|
|
|
scr scraper
|
|
|
|
}
|
|
|
|
|
2015-08-11 11:30:42 +00:00
|
|
|
func main() {
|
2015-08-16 10:12:16 +00:00
|
|
|
gc, err := newGeoCache("cache/geocache.json")
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
defer gc.save()
|
|
|
|
|
|
|
|
wc, err := newWebCache("cache/webcache")
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2015-08-17 05:23:03 +00:00
|
|
|
tasks := []scrapeTask{
|
2015-08-17 06:47:20 +00:00
|
|
|
{"http://tabelog.com/en/kanagawa/rstLst/1/", tabelog{}},
|
2015-08-17 09:00:29 +00:00
|
|
|
|
2015-08-17 07:04:06 +00:00
|
|
|
{"http://www.tripadvisor.com/Restaurants-g298173-Yokohama_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
2015-08-17 09:00:29 +00:00
|
|
|
{"http://www.tripadvisor.com/Restaurants-g298172-Kawasaki_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
|
|
|
{"http://www.tripadvisor.com/Restaurants-g1021282-Sagamihara_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
|
|
|
{"http://www.tripadvisor.com/Restaurants-g1021277-Fujisawa_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
|
|
|
{"http://www.tripadvisor.com/Restaurants-g303156-Kamakura_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
|
|
|
{"http://www.tripadvisor.com/Restaurants-g298174-Yokosuka_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
|
|
|
{"http://www.tripadvisor.com/Restaurants-g1021278-Odawara_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
|
|
|
{"http://www.tripadvisor.com/Restaurants-g681222-Hiratsuka_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
|
|
|
{"http://www.tripadvisor.com/Restaurants-g298169-Atsugi_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
|
|
|
{"http://www.tripadvisor.com/Restaurants-g1021286-Yamato_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
|
|
|
{"http://www.tripadvisor.com/Restaurants-g1021279-Chigasaki_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
|
|
|
{"http://www.tripadvisor.com/Restaurants-g1021285-Hadano_Kanagawa_Prefecture_Kanto.html", tripadvisor{}},
|
2015-08-17 05:23:03 +00:00
|
|
|
}
|
|
|
|
|
2015-08-17 09:00:29 +00:00
|
|
|
count := 0
|
2015-08-17 05:23:03 +00:00
|
|
|
for _, task := range tasks {
|
2015-08-17 09:00:29 +00:00
|
|
|
restaraunts := scrape(task.url, wc, gc, task.scr)
|
|
|
|
count += len(restaraunts)
|
2015-08-17 05:23:03 +00:00
|
|
|
}
|
2015-08-17 05:16:07 +00:00
|
|
|
|
2015-08-17 09:00:29 +00:00
|
|
|
log.Print(count)
|
2015-08-11 11:30:42 +00:00
|
|
|
}
|