WIP
This commit is contained in:
parent
227329c187
commit
e480b4cb61
207
build/build.go
207
build/build.go
@ -25,6 +25,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"database/sql"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
"flag"
|
"flag"
|
||||||
@ -32,6 +33,7 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
_ "github.com/mattn/go-sqlite3"
|
_ "github.com/mattn/go-sqlite3"
|
||||||
@ -51,6 +53,11 @@ func (s scrapeCtx) load(url string) (*goquery.Document, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type restaurant struct {
|
type restaurant struct {
|
||||||
|
name string
|
||||||
|
|
||||||
|
latitude float64
|
||||||
|
longitude float64
|
||||||
|
|
||||||
reviews []review
|
reviews []review
|
||||||
|
|
||||||
accomodating float64
|
accomodating float64
|
||||||
@ -127,7 +134,7 @@ func collateData(reviews []review) map[uint64]*restaurant {
|
|||||||
|
|
||||||
var rest *restaurant
|
var rest *restaurant
|
||||||
if rest, _ = restaurants[hash.Sum64()]; rest == nil {
|
if rest, _ = restaurants[hash.Sum64()]; rest == nil {
|
||||||
rest = new(restaurant)
|
rest = &restaurant{name: rev.name, latitude: rev.latitude, longitude: rev.longitude}
|
||||||
restaurants[hash.Sum64()] = rest
|
restaurants[hash.Sum64()] = rest
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,118 +151,120 @@ func computeStnData(restaurants map[uint64]*restaurant, stationsPath string) err
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, rest := range restaurants {
|
for _, rest := range restaurants {
|
||||||
if len(rest.reviews) > 0 {
|
rest.closestStnName, rest.closestStnDist = sq.closestStation(rest.latitude, rest.longitude)
|
||||||
rev := rest.reviews[0]
|
|
||||||
rest.closestStnName, rest.closestStnDist = sq.closestStation(rev.latitude, rev.longitude)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// func dumpData(dbPath string, restaraunts []restaurant) error {
|
func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
|
||||||
// db, err := sql.Open("sqlite3", dbPath)
|
db, err := sql.Open("sqlite3", dbPath)
|
||||||
// if err != nil {
|
if err != nil {
|
||||||
// return err
|
return err
|
||||||
// }
|
}
|
||||||
// defer db.Close()
|
defer db.Close()
|
||||||
|
|
||||||
// _, err = db.Exec(`
|
_, err = db.Exec(`
|
||||||
// DROP TABLE IF EXISTS reviews;
|
DROP TABLE IF EXISTS reviews;
|
||||||
// CREATE TABLE reviews(
|
CREATE TABLE reviews(
|
||||||
// name VARCHAR(100) NOT NULL,
|
name VARCHAR(100) NOT NULL,
|
||||||
// url VARCHAR(200) NOT NULL,
|
urls VARCHAR(200) NOT NULL,
|
||||||
// delicious FLOAT NOT NULL,
|
delicious FLOAT NOT NULL,
|
||||||
// accommodating FLOAT NOT NULL,
|
accommodating FLOAT NOT NULL,
|
||||||
// affordable FLOAT NOT NULL,
|
affordable FLOAT NOT NULL,
|
||||||
// atmospheric FLOAT NOT NULL,
|
atmospheric FLOAT NOT NULL,
|
||||||
// latitude FLOAT NOT NULL,
|
latitude FLOAT NOT NULL,
|
||||||
// longitude FLOAT NOT NULL,
|
longitude FLOAT NOT NULL,
|
||||||
// closestStnDist FLOAT NOT NULL,
|
closestStnDist FLOAT NOT NULL,
|
||||||
// closestStnName VARCHAR(100) NOT NULL,
|
closestStnName VARCHAR(100) NOT NULL,
|
||||||
// accessCount INTEGER NOT NULL,
|
accessCount INTEGER NOT NULL,
|
||||||
// id INTEGER PRIMARY KEY
|
id INTEGER PRIMARY KEY
|
||||||
// )`)
|
)`)
|
||||||
|
|
||||||
// if err != nil {
|
if err != nil {
|
||||||
// return err
|
return err
|
||||||
// }
|
}
|
||||||
|
|
||||||
// for _, r := range restaraunts {
|
for _, rest := range restaraunts {
|
||||||
// _, err = db.Exec(`
|
var urls []string
|
||||||
// INSERT INTO reviews(
|
for _, rev := range rest.reviews {
|
||||||
// name,
|
urls = append(urls, rev.url)
|
||||||
// url,
|
}
|
||||||
// delicious,
|
|
||||||
// accommodating,
|
|
||||||
// affordable,
|
|
||||||
// atmospheric,
|
|
||||||
// latitude,
|
|
||||||
// longitude,
|
|
||||||
// closestStnDist,
|
|
||||||
// closestStnName,
|
|
||||||
// accessCount
|
|
||||||
// ) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
||||||
// r.name,
|
|
||||||
// r.url,
|
|
||||||
// r.feats.delicious,
|
|
||||||
// r.feats.accommodating,
|
|
||||||
// r.feats.affordable,
|
|
||||||
// r.feats.atmospheric,
|
|
||||||
// r.latitude,
|
|
||||||
// r.longitude,
|
|
||||||
// r.closestStnDist,
|
|
||||||
// r.closestStnName,
|
|
||||||
// 0)
|
|
||||||
|
|
||||||
// if err != nil {
|
_, err = db.Exec(`
|
||||||
// return err
|
INSERT INTO reviews(
|
||||||
// }
|
name,
|
||||||
// }
|
urls,
|
||||||
|
delicious,
|
||||||
|
accommodating,
|
||||||
|
affordable,
|
||||||
|
atmospheric,
|
||||||
|
latitude,
|
||||||
|
longitude,
|
||||||
|
closestStnDist,
|
||||||
|
closestStnName,
|
||||||
|
accessCount
|
||||||
|
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||||
|
rest.name,
|
||||||
|
strings.Join(urls, ","),
|
||||||
|
rest.delicious,
|
||||||
|
rest.accomodating,
|
||||||
|
rest.affordable,
|
||||||
|
rest.atmospheric,
|
||||||
|
rest.latitude,
|
||||||
|
rest.longitude,
|
||||||
|
rest.closestStnDist,
|
||||||
|
rest.closestStnName,
|
||||||
|
0)
|
||||||
|
|
||||||
// _, err = db.Exec(`
|
if err != nil {
|
||||||
// DROP TABLE IF EXISTS categories;
|
return err
|
||||||
// CREATE TABLE categories(
|
}
|
||||||
// description VARCHAR(200) NOT NULL,
|
}
|
||||||
// id INTEGER PRIMARY KEY)`)
|
|
||||||
|
|
||||||
// if err != nil {
|
_, err = db.Exec(`
|
||||||
// return err
|
DROP TABLE IF EXISTS categories;
|
||||||
// }
|
CREATE TABLE categories(
|
||||||
|
description VARCHAR(200) NOT NULL,
|
||||||
|
id INTEGER PRIMARY KEY)`)
|
||||||
|
|
||||||
// for _, category := range []string{"I prefer quiet places", "I enjoy Mexican Food", "I drive a car"} {
|
if err != nil {
|
||||||
// if _, err := db.Exec("INSERT INTO categories(description) VALUES (?)", category); err != nil {
|
return err
|
||||||
// return err
|
}
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// _, err = db.Exec(`
|
for _, category := range []string{"I prefer quiet places", "I enjoy Mexican Food", "I drive a car"} {
|
||||||
// DROP TABLE IF EXISTS history;
|
if _, err := db.Exec("INSERT INTO categories(description) VALUES (?)", category); err != nil {
|
||||||
// CREATE TABLE history(
|
return err
|
||||||
// date DATETIME NOT NULL,
|
}
|
||||||
// reviewId INTEGER NOT NULL,
|
}
|
||||||
// id INTEGER PRIMARY KEY,
|
|
||||||
// FOREIGN KEY(reviewId) REFERENCES reviews(id))`)
|
|
||||||
|
|
||||||
// if err != nil {
|
_, err = db.Exec(`
|
||||||
// return err
|
DROP TABLE IF EXISTS history;
|
||||||
// }
|
CREATE TABLE history(
|
||||||
|
date DATETIME NOT NULL,
|
||||||
|
reviewId INTEGER NOT NULL,
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
FOREIGN KEY(reviewId) REFERENCES reviews(id))`)
|
||||||
|
|
||||||
// _, err = db.Exec(`
|
if err != nil {
|
||||||
// DROP TABLE IF EXISTS historyGroups;
|
return err
|
||||||
// CREATE TABLE historyGroups(
|
}
|
||||||
// categoryId INTEGER NOT NULL,
|
|
||||||
// categoryValue FLOAT NOT NULL,
|
|
||||||
// historyId INTEGER NOT NULL,
|
|
||||||
// FOREIGN KEY(historyId) REFERENCES history(id),
|
|
||||||
// FOREIGN KEY(categoryId) REFERENCES categories(id))`)
|
|
||||||
|
|
||||||
// if err != nil {
|
_, err = db.Exec(`
|
||||||
// return err
|
DROP TABLE IF EXISTS historyGroups;
|
||||||
// }
|
CREATE TABLE historyGroups(
|
||||||
|
categoryId INTEGER NOT NULL,
|
||||||
|
categoryValue FLOAT NOT NULL,
|
||||||
|
historyId INTEGER NOT NULL,
|
||||||
|
FOREIGN KEY(historyId) REFERENCES history(id),
|
||||||
|
FOREIGN KEY(categoryId) REFERENCES categories(id))`)
|
||||||
|
|
||||||
// return nil
|
if err != nil {
|
||||||
// }
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
dbPath := flag.String("db", "data/db.sqlite3", "database output path")
|
dbPath := flag.String("db", "data/db.sqlite3", "database output path")
|
||||||
@ -276,7 +285,7 @@ func main() {
|
|||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// if err := dumpData(*dbPath, reviews); err != nil {
|
if err := dumpData(*dbPath, restaurants); err != nil {
|
||||||
// log.Fatal(err)
|
log.Fatal(err)
|
||||||
// }
|
}
|
||||||
}
|
}
|
||||||
|
@ -44,6 +44,7 @@ type review struct {
|
|||||||
latitude float64
|
latitude float64
|
||||||
longitude float64
|
longitude float64
|
||||||
|
|
||||||
|
scr scraper
|
||||||
err error
|
err error
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,7 +86,7 @@ func scrapeReview(url string, out chan review, scr scraper, group *sync.WaitGrou
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
doc *goquery.Document
|
doc *goquery.Document
|
||||||
rev = review{url: url}
|
rev = review{url: url, scr: scr}
|
||||||
)
|
)
|
||||||
|
|
||||||
if doc, rev.err = scr.load(rev.url); rev.err == nil {
|
if doc, rev.err = scr.load(rev.url); rev.err == nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user