WIP
This commit is contained in:
parent
227329c187
commit
e480b4cb61
207
build/build.go
207
build/build.go
@ -25,6 +25,7 @@ package main
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"database/sql"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"flag"
|
||||
@ -32,6 +33,7 @@ import (
|
||||
"log"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
@ -51,6 +53,11 @@ func (s scrapeCtx) load(url string) (*goquery.Document, error) {
|
||||
}
|
||||
|
||||
type restaurant struct {
|
||||
name string
|
||||
|
||||
latitude float64
|
||||
longitude float64
|
||||
|
||||
reviews []review
|
||||
|
||||
accomodating float64
|
||||
@ -127,7 +134,7 @@ func collateData(reviews []review) map[uint64]*restaurant {
|
||||
|
||||
var rest *restaurant
|
||||
if rest, _ = restaurants[hash.Sum64()]; rest == nil {
|
||||
rest = new(restaurant)
|
||||
rest = &restaurant{name: rev.name, latitude: rev.latitude, longitude: rev.longitude}
|
||||
restaurants[hash.Sum64()] = rest
|
||||
}
|
||||
|
||||
@ -144,118 +151,120 @@ func computeStnData(restaurants map[uint64]*restaurant, stationsPath string) err
|
||||
}
|
||||
|
||||
for _, rest := range restaurants {
|
||||
if len(rest.reviews) > 0 {
|
||||
rev := rest.reviews[0]
|
||||
rest.closestStnName, rest.closestStnDist = sq.closestStation(rev.latitude, rev.longitude)
|
||||
}
|
||||
rest.closestStnName, rest.closestStnDist = sq.closestStation(rest.latitude, rest.longitude)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// func dumpData(dbPath string, restaraunts []restaurant) error {
|
||||
// db, err := sql.Open("sqlite3", dbPath)
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
// defer db.Close()
|
||||
func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
|
||||
db, err := sql.Open("sqlite3", dbPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// _, err = db.Exec(`
|
||||
// DROP TABLE IF EXISTS reviews;
|
||||
// CREATE TABLE reviews(
|
||||
// name VARCHAR(100) NOT NULL,
|
||||
// url VARCHAR(200) NOT NULL,
|
||||
// delicious FLOAT NOT NULL,
|
||||
// accommodating FLOAT NOT NULL,
|
||||
// affordable FLOAT NOT NULL,
|
||||
// atmospheric FLOAT NOT NULL,
|
||||
// latitude FLOAT NOT NULL,
|
||||
// longitude FLOAT NOT NULL,
|
||||
// closestStnDist FLOAT NOT NULL,
|
||||
// closestStnName VARCHAR(100) NOT NULL,
|
||||
// accessCount INTEGER NOT NULL,
|
||||
// id INTEGER PRIMARY KEY
|
||||
// )`)
|
||||
_, err = db.Exec(`
|
||||
DROP TABLE IF EXISTS reviews;
|
||||
CREATE TABLE reviews(
|
||||
name VARCHAR(100) NOT NULL,
|
||||
urls VARCHAR(200) NOT NULL,
|
||||
delicious FLOAT NOT NULL,
|
||||
accommodating FLOAT NOT NULL,
|
||||
affordable FLOAT NOT NULL,
|
||||
atmospheric FLOAT NOT NULL,
|
||||
latitude FLOAT NOT NULL,
|
||||
longitude FLOAT NOT NULL,
|
||||
closestStnDist FLOAT NOT NULL,
|
||||
closestStnName VARCHAR(100) NOT NULL,
|
||||
accessCount INTEGER NOT NULL,
|
||||
id INTEGER PRIMARY KEY
|
||||
)`)
|
||||
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// for _, r := range restaraunts {
|
||||
// _, err = db.Exec(`
|
||||
// INSERT INTO reviews(
|
||||
// name,
|
||||
// url,
|
||||
// delicious,
|
||||
// accommodating,
|
||||
// affordable,
|
||||
// atmospheric,
|
||||
// latitude,
|
||||
// longitude,
|
||||
// closestStnDist,
|
||||
// closestStnName,
|
||||
// accessCount
|
||||
// ) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
// r.name,
|
||||
// r.url,
|
||||
// r.feats.delicious,
|
||||
// r.feats.accommodating,
|
||||
// r.feats.affordable,
|
||||
// r.feats.atmospheric,
|
||||
// r.latitude,
|
||||
// r.longitude,
|
||||
// r.closestStnDist,
|
||||
// r.closestStnName,
|
||||
// 0)
|
||||
for _, rest := range restaraunts {
|
||||
var urls []string
|
||||
for _, rev := range rest.reviews {
|
||||
urls = append(urls, rev.url)
|
||||
}
|
||||
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
// }
|
||||
_, err = db.Exec(`
|
||||
INSERT INTO reviews(
|
||||
name,
|
||||
urls,
|
||||
delicious,
|
||||
accommodating,
|
||||
affordable,
|
||||
atmospheric,
|
||||
latitude,
|
||||
longitude,
|
||||
closestStnDist,
|
||||
closestStnName,
|
||||
accessCount
|
||||
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
rest.name,
|
||||
strings.Join(urls, ","),
|
||||
rest.delicious,
|
||||
rest.accomodating,
|
||||
rest.affordable,
|
||||
rest.atmospheric,
|
||||
rest.latitude,
|
||||
rest.longitude,
|
||||
rest.closestStnDist,
|
||||
rest.closestStnName,
|
||||
0)
|
||||
|
||||
// _, err = db.Exec(`
|
||||
// DROP TABLE IF EXISTS categories;
|
||||
// CREATE TABLE categories(
|
||||
// description VARCHAR(200) NOT NULL,
|
||||
// id INTEGER PRIMARY KEY)`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
_, err = db.Exec(`
|
||||
DROP TABLE IF EXISTS categories;
|
||||
CREATE TABLE categories(
|
||||
description VARCHAR(200) NOT NULL,
|
||||
id INTEGER PRIMARY KEY)`)
|
||||
|
||||
// for _, category := range []string{"I prefer quiet places", "I enjoy Mexican Food", "I drive a car"} {
|
||||
// if _, err := db.Exec("INSERT INTO categories(description) VALUES (?)", category); err != nil {
|
||||
// return err
|
||||
// }
|
||||
// }
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// _, err = db.Exec(`
|
||||
// DROP TABLE IF EXISTS history;
|
||||
// CREATE TABLE history(
|
||||
// date DATETIME NOT NULL,
|
||||
// reviewId INTEGER NOT NULL,
|
||||
// id INTEGER PRIMARY KEY,
|
||||
// FOREIGN KEY(reviewId) REFERENCES reviews(id))`)
|
||||
for _, category := range []string{"I prefer quiet places", "I enjoy Mexican Food", "I drive a car"} {
|
||||
if _, err := db.Exec("INSERT INTO categories(description) VALUES (?)", category); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
_, err = db.Exec(`
|
||||
DROP TABLE IF EXISTS history;
|
||||
CREATE TABLE history(
|
||||
date DATETIME NOT NULL,
|
||||
reviewId INTEGER NOT NULL,
|
||||
id INTEGER PRIMARY KEY,
|
||||
FOREIGN KEY(reviewId) REFERENCES reviews(id))`)
|
||||
|
||||
// _, err = db.Exec(`
|
||||
// DROP TABLE IF EXISTS historyGroups;
|
||||
// CREATE TABLE historyGroups(
|
||||
// categoryId INTEGER NOT NULL,
|
||||
// categoryValue FLOAT NOT NULL,
|
||||
// historyId INTEGER NOT NULL,
|
||||
// FOREIGN KEY(historyId) REFERENCES history(id),
|
||||
// FOREIGN KEY(categoryId) REFERENCES categories(id))`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
_, err = db.Exec(`
|
||||
DROP TABLE IF EXISTS historyGroups;
|
||||
CREATE TABLE historyGroups(
|
||||
categoryId INTEGER NOT NULL,
|
||||
categoryValue FLOAT NOT NULL,
|
||||
historyId INTEGER NOT NULL,
|
||||
FOREIGN KEY(historyId) REFERENCES history(id),
|
||||
FOREIGN KEY(categoryId) REFERENCES categories(id))`)
|
||||
|
||||
// return nil
|
||||
// }
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
dbPath := flag.String("db", "data/db.sqlite3", "database output path")
|
||||
@ -276,7 +285,7 @@ func main() {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// if err := dumpData(*dbPath, reviews); err != nil {
|
||||
// log.Fatal(err)
|
||||
// }
|
||||
if err := dumpData(*dbPath, restaurants); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ type review struct {
|
||||
latitude float64
|
||||
longitude float64
|
||||
|
||||
scr scraper
|
||||
err error
|
||||
}
|
||||
|
||||
@ -85,7 +86,7 @@ func scrapeReview(url string, out chan review, scr scraper, group *sync.WaitGrou
|
||||
|
||||
var (
|
||||
doc *goquery.Document
|
||||
rev = review{url: url}
|
||||
rev = review{url: url, scr: scr}
|
||||
)
|
||||
|
||||
if doc, rev.err = scr.load(rev.url); rev.err == nil {
|
||||
|
Loading…
x
Reference in New Issue
Block a user