1
This commit is contained in:
Alex Yatskov 2015-09-18 16:33:47 +09:00
parent 227329c187
commit e480b4cb61
2 changed files with 110 additions and 100 deletions

View File

@ -25,6 +25,7 @@ package main
import (
"bufio"
"bytes"
"database/sql"
"encoding/binary"
"errors"
"flag"
@ -32,6 +33,7 @@ import (
"log"
"net/url"
"os"
"strings"
"github.com/PuerkitoBio/goquery"
_ "github.com/mattn/go-sqlite3"
@ -51,6 +53,11 @@ func (s scrapeCtx) load(url string) (*goquery.Document, error) {
}
type restaurant struct {
name string
latitude float64
longitude float64
reviews []review
accomodating float64
@ -127,7 +134,7 @@ func collateData(reviews []review) map[uint64]*restaurant {
var rest *restaurant
if rest, _ = restaurants[hash.Sum64()]; rest == nil {
rest = new(restaurant)
rest = &restaurant{name: rev.name, latitude: rev.latitude, longitude: rev.longitude}
restaurants[hash.Sum64()] = rest
}
@ -144,118 +151,120 @@ func computeStnData(restaurants map[uint64]*restaurant, stationsPath string) err
}
for _, rest := range restaurants {
if len(rest.reviews) > 0 {
rev := rest.reviews[0]
rest.closestStnName, rest.closestStnDist = sq.closestStation(rev.latitude, rev.longitude)
}
rest.closestStnName, rest.closestStnDist = sq.closestStation(rest.latitude, rest.longitude)
}
return nil
}
// func dumpData(dbPath string, restaraunts []restaurant) error {
// db, err := sql.Open("sqlite3", dbPath)
// if err != nil {
// return err
// }
// defer db.Close()
func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
db, err := sql.Open("sqlite3", dbPath)
if err != nil {
return err
}
defer db.Close()
// _, err = db.Exec(`
// DROP TABLE IF EXISTS reviews;
// CREATE TABLE reviews(
// name VARCHAR(100) NOT NULL,
// url VARCHAR(200) NOT NULL,
// delicious FLOAT NOT NULL,
// accommodating FLOAT NOT NULL,
// affordable FLOAT NOT NULL,
// atmospheric FLOAT NOT NULL,
// latitude FLOAT NOT NULL,
// longitude FLOAT NOT NULL,
// closestStnDist FLOAT NOT NULL,
// closestStnName VARCHAR(100) NOT NULL,
// accessCount INTEGER NOT NULL,
// id INTEGER PRIMARY KEY
// )`)
_, err = db.Exec(`
DROP TABLE IF EXISTS reviews;
CREATE TABLE reviews(
name VARCHAR(100) NOT NULL,
urls VARCHAR(200) NOT NULL,
delicious FLOAT NOT NULL,
accommodating FLOAT NOT NULL,
affordable FLOAT NOT NULL,
atmospheric FLOAT NOT NULL,
latitude FLOAT NOT NULL,
longitude FLOAT NOT NULL,
closestStnDist FLOAT NOT NULL,
closestStnName VARCHAR(100) NOT NULL,
accessCount INTEGER NOT NULL,
id INTEGER PRIMARY KEY
)`)
// if err != nil {
// return err
// }
if err != nil {
return err
}
// for _, r := range restaraunts {
// _, err = db.Exec(`
// INSERT INTO reviews(
// name,
// url,
// delicious,
// accommodating,
// affordable,
// atmospheric,
// latitude,
// longitude,
// closestStnDist,
// closestStnName,
// accessCount
// ) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
// r.name,
// r.url,
// r.feats.delicious,
// r.feats.accommodating,
// r.feats.affordable,
// r.feats.atmospheric,
// r.latitude,
// r.longitude,
// r.closestStnDist,
// r.closestStnName,
// 0)
for _, rest := range restaraunts {
var urls []string
for _, rev := range rest.reviews {
urls = append(urls, rev.url)
}
// if err != nil {
// return err
// }
// }
_, err = db.Exec(`
INSERT INTO reviews(
name,
urls,
delicious,
accommodating,
affordable,
atmospheric,
latitude,
longitude,
closestStnDist,
closestStnName,
accessCount
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
rest.name,
strings.Join(urls, ","),
rest.delicious,
rest.accomodating,
rest.affordable,
rest.atmospheric,
rest.latitude,
rest.longitude,
rest.closestStnDist,
rest.closestStnName,
0)
// _, err = db.Exec(`
// DROP TABLE IF EXISTS categories;
// CREATE TABLE categories(
// description VARCHAR(200) NOT NULL,
// id INTEGER PRIMARY KEY)`)
if err != nil {
return err
}
}
// if err != nil {
// return err
// }
_, err = db.Exec(`
DROP TABLE IF EXISTS categories;
CREATE TABLE categories(
description VARCHAR(200) NOT NULL,
id INTEGER PRIMARY KEY)`)
// for _, category := range []string{"I prefer quiet places", "I enjoy Mexican Food", "I drive a car"} {
// if _, err := db.Exec("INSERT INTO categories(description) VALUES (?)", category); err != nil {
// return err
// }
// }
if err != nil {
return err
}
// _, err = db.Exec(`
// DROP TABLE IF EXISTS history;
// CREATE TABLE history(
// date DATETIME NOT NULL,
// reviewId INTEGER NOT NULL,
// id INTEGER PRIMARY KEY,
// FOREIGN KEY(reviewId) REFERENCES reviews(id))`)
for _, category := range []string{"I prefer quiet places", "I enjoy Mexican Food", "I drive a car"} {
if _, err := db.Exec("INSERT INTO categories(description) VALUES (?)", category); err != nil {
return err
}
}
// if err != nil {
// return err
// }
_, err = db.Exec(`
DROP TABLE IF EXISTS history;
CREATE TABLE history(
date DATETIME NOT NULL,
reviewId INTEGER NOT NULL,
id INTEGER PRIMARY KEY,
FOREIGN KEY(reviewId) REFERENCES reviews(id))`)
// _, err = db.Exec(`
// DROP TABLE IF EXISTS historyGroups;
// CREATE TABLE historyGroups(
// categoryId INTEGER NOT NULL,
// categoryValue FLOAT NOT NULL,
// historyId INTEGER NOT NULL,
// FOREIGN KEY(historyId) REFERENCES history(id),
// FOREIGN KEY(categoryId) REFERENCES categories(id))`)
if err != nil {
return err
}
// if err != nil {
// return err
// }
_, err = db.Exec(`
DROP TABLE IF EXISTS historyGroups;
CREATE TABLE historyGroups(
categoryId INTEGER NOT NULL,
categoryValue FLOAT NOT NULL,
historyId INTEGER NOT NULL,
FOREIGN KEY(historyId) REFERENCES history(id),
FOREIGN KEY(categoryId) REFERENCES categories(id))`)
// return nil
// }
if err != nil {
return err
}
return nil
}
func main() {
dbPath := flag.String("db", "data/db.sqlite3", "database output path")
@ -276,7 +285,7 @@ func main() {
log.Fatal(err)
}
// if err := dumpData(*dbPath, reviews); err != nil {
// log.Fatal(err)
// }
if err := dumpData(*dbPath, restaurants); err != nil {
log.Fatal(err)
}
}

View File

@ -44,6 +44,7 @@ type review struct {
latitude float64
longitude float64
scr scraper
err error
}
@ -85,7 +86,7 @@ func scrapeReview(url string, out chan review, scr scraper, group *sync.WaitGrou
var (
doc *goquery.Document
rev = review{url: url}
rev = review{url: url, scr: scr}
)
if doc, rev.err = scr.load(rev.url); rev.err == nil {