From e480b4cb61c11113a5970778ed43112162ada197 Mon Sep 17 00:00:00 2001 From: Alex Yatskov Date: Fri, 18 Sep 2015 16:33:47 +0900 Subject: [PATCH] WIP --- build/build.go | 207 +++++++++++++++++++++++++----------------------- build/scrape.go | 3 +- 2 files changed, 110 insertions(+), 100 deletions(-) diff --git a/build/build.go b/build/build.go index b12f1c3..b25b84c 100644 --- a/build/build.go +++ b/build/build.go @@ -25,6 +25,7 @@ package main import ( "bufio" "bytes" + "database/sql" "encoding/binary" "errors" "flag" @@ -32,6 +33,7 @@ import ( "log" "net/url" "os" + "strings" "github.com/PuerkitoBio/goquery" _ "github.com/mattn/go-sqlite3" @@ -51,6 +53,11 @@ func (s scrapeCtx) load(url string) (*goquery.Document, error) { } type restaurant struct { + name string + + latitude float64 + longitude float64 + reviews []review accomodating float64 @@ -127,7 +134,7 @@ func collateData(reviews []review) map[uint64]*restaurant { var rest *restaurant if rest, _ = restaurants[hash.Sum64()]; rest == nil { - rest = new(restaurant) + rest = &restaurant{name: rev.name, latitude: rev.latitude, longitude: rev.longitude} restaurants[hash.Sum64()] = rest } @@ -144,118 +151,120 @@ func computeStnData(restaurants map[uint64]*restaurant, stationsPath string) err } for _, rest := range restaurants { - if len(rest.reviews) > 0 { - rev := rest.reviews[0] - rest.closestStnName, rest.closestStnDist = sq.closestStation(rev.latitude, rev.longitude) - } + rest.closestStnName, rest.closestStnDist = sq.closestStation(rest.latitude, rest.longitude) } return nil } -// func dumpData(dbPath string, restaraunts []restaurant) error { -// db, err := sql.Open("sqlite3", dbPath) -// if err != nil { -// return err -// } -// defer db.Close() +func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error { + db, err := sql.Open("sqlite3", dbPath) + if err != nil { + return err + } + defer db.Close() -// _, err = db.Exec(` -// DROP TABLE IF EXISTS reviews; -// CREATE TABLE reviews( -// name VARCHAR(100) NOT NULL, -// url VARCHAR(200) NOT NULL, -// delicious FLOAT NOT NULL, -// accommodating FLOAT NOT NULL, -// affordable FLOAT NOT NULL, -// atmospheric FLOAT NOT NULL, -// latitude FLOAT NOT NULL, -// longitude FLOAT NOT NULL, -// closestStnDist FLOAT NOT NULL, -// closestStnName VARCHAR(100) NOT NULL, -// accessCount INTEGER NOT NULL, -// id INTEGER PRIMARY KEY -// )`) + _, err = db.Exec(` + DROP TABLE IF EXISTS reviews; + CREATE TABLE reviews( + name VARCHAR(100) NOT NULL, + urls VARCHAR(200) NOT NULL, + delicious FLOAT NOT NULL, + accommodating FLOAT NOT NULL, + affordable FLOAT NOT NULL, + atmospheric FLOAT NOT NULL, + latitude FLOAT NOT NULL, + longitude FLOAT NOT NULL, + closestStnDist FLOAT NOT NULL, + closestStnName VARCHAR(100) NOT NULL, + accessCount INTEGER NOT NULL, + id INTEGER PRIMARY KEY + )`) -// if err != nil { -// return err -// } + if err != nil { + return err + } -// for _, r := range restaraunts { -// _, err = db.Exec(` -// INSERT INTO reviews( -// name, -// url, -// delicious, -// accommodating, -// affordable, -// atmospheric, -// latitude, -// longitude, -// closestStnDist, -// closestStnName, -// accessCount -// ) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, -// r.name, -// r.url, -// r.feats.delicious, -// r.feats.accommodating, -// r.feats.affordable, -// r.feats.atmospheric, -// r.latitude, -// r.longitude, -// r.closestStnDist, -// r.closestStnName, -// 0) + for _, rest := range restaraunts { + var urls []string + for _, rev := range rest.reviews { + urls = append(urls, rev.url) + } -// if err != nil { -// return err -// } -// } + _, err = db.Exec(` + INSERT INTO reviews( + name, + urls, + delicious, + accommodating, + affordable, + atmospheric, + latitude, + longitude, + closestStnDist, + closestStnName, + accessCount + ) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + rest.name, + strings.Join(urls, ","), + rest.delicious, + rest.accomodating, + rest.affordable, + rest.atmospheric, + rest.latitude, + rest.longitude, + rest.closestStnDist, + rest.closestStnName, + 0) -// _, err = db.Exec(` -// DROP TABLE IF EXISTS categories; -// CREATE TABLE categories( -// description VARCHAR(200) NOT NULL, -// id INTEGER PRIMARY KEY)`) + if err != nil { + return err + } + } -// if err != nil { -// return err -// } + _, err = db.Exec(` + DROP TABLE IF EXISTS categories; + CREATE TABLE categories( + description VARCHAR(200) NOT NULL, + id INTEGER PRIMARY KEY)`) -// for _, category := range []string{"I prefer quiet places", "I enjoy Mexican Food", "I drive a car"} { -// if _, err := db.Exec("INSERT INTO categories(description) VALUES (?)", category); err != nil { -// return err -// } -// } + if err != nil { + return err + } -// _, err = db.Exec(` -// DROP TABLE IF EXISTS history; -// CREATE TABLE history( -// date DATETIME NOT NULL, -// reviewId INTEGER NOT NULL, -// id INTEGER PRIMARY KEY, -// FOREIGN KEY(reviewId) REFERENCES reviews(id))`) + for _, category := range []string{"I prefer quiet places", "I enjoy Mexican Food", "I drive a car"} { + if _, err := db.Exec("INSERT INTO categories(description) VALUES (?)", category); err != nil { + return err + } + } -// if err != nil { -// return err -// } + _, err = db.Exec(` + DROP TABLE IF EXISTS history; + CREATE TABLE history( + date DATETIME NOT NULL, + reviewId INTEGER NOT NULL, + id INTEGER PRIMARY KEY, + FOREIGN KEY(reviewId) REFERENCES reviews(id))`) -// _, err = db.Exec(` -// DROP TABLE IF EXISTS historyGroups; -// CREATE TABLE historyGroups( -// categoryId INTEGER NOT NULL, -// categoryValue FLOAT NOT NULL, -// historyId INTEGER NOT NULL, -// FOREIGN KEY(historyId) REFERENCES history(id), -// FOREIGN KEY(categoryId) REFERENCES categories(id))`) + if err != nil { + return err + } -// if err != nil { -// return err -// } + _, err = db.Exec(` + DROP TABLE IF EXISTS historyGroups; + CREATE TABLE historyGroups( + categoryId INTEGER NOT NULL, + categoryValue FLOAT NOT NULL, + historyId INTEGER NOT NULL, + FOREIGN KEY(historyId) REFERENCES history(id), + FOREIGN KEY(categoryId) REFERENCES categories(id))`) -// return nil -// } + if err != nil { + return err + } + + return nil +} func main() { dbPath := flag.String("db", "data/db.sqlite3", "database output path") @@ -276,7 +285,7 @@ func main() { log.Fatal(err) } - // if err := dumpData(*dbPath, reviews); err != nil { - // log.Fatal(err) - // } + if err := dumpData(*dbPath, restaurants); err != nil { + log.Fatal(err) + } } diff --git a/build/scrape.go b/build/scrape.go index 4c754e4..8fd6911 100644 --- a/build/scrape.go +++ b/build/scrape.go @@ -44,6 +44,7 @@ type review struct { latitude float64 longitude float64 + scr scraper err error } @@ -85,7 +86,7 @@ func scrapeReview(url string, out chan review, scr scraper, group *sync.WaitGrou var ( doc *goquery.Document - rev = review{url: url} + rev = review{url: url, scr: scr} ) if doc, rev.err = scr.load(rev.url); rev.err == nil {