1
This commit is contained in:
Alex Yatskov 2015-08-23 18:02:46 +09:00
parent 37161c7206
commit 02f58a3ed5

View File

@ -26,14 +26,16 @@ import (
"bufio" "bufio"
"database/sql" "database/sql"
"errors" "errors"
"flag"
"log"
"net/url" "net/url"
"os" "os"
_ "github.com/mattn/go-sqlite3" _ "github.com/mattn/go-sqlite3"
) )
func scrapeDataUrls(filename string, wc *webCache, gc *geoCache) ([]restaurant, error) { func scrapeDataUrls(urlsPath string, wc *webCache, gc *geoCache) ([]restaurant, error) {
file, err := os.Open(filename) file, err := os.Open(urlsPath)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -211,16 +213,23 @@ func dumpData(dbPath string, restaraunts []restaurant) error {
} }
func main() { func main() {
restaurants, err := scrapeData("data/urls.txt", "cache/geocache.json", "cache/webcache") dbPath := flag.String("db", "data/db.sqlite3", "output database")
urlsPath := flag.String("urls", "data/urls.txt", "index URLs to scrape")
stationsPath := flag.String("stations", "data/stations.json", "station geolocation data")
geocachePath := flag.String("geocache", "cache/geocache.json", "geolocation data cache")
webcachePath := flag.String("webcache", "cache/webcache", "web data cache")
flag.Parse()
restaurants, err := scrapeData(*urlsPath, *geocachePath, *webcachePath)
if err != nil { if err != nil {
panic(err) log.Fatal(err)
} }
if err := computeStnData(restaurants, "data/stations.json"); err != nil { if err := computeStnData(restaurants, *stationsPath); err != nil {
panic(err) log.Fatal(err)
} }
if err := dumpData("data/db.sqlite3", restaurants); err != nil { if err := dumpData(*dbPath, restaurants); err != nil {
panic(err) log.Fatal(err)
} }
} }