1
This commit is contained in:
Alex Yatskov 2015-09-18 16:33:47 +09:00
parent 227329c187
commit e480b4cb61
2 changed files with 110 additions and 100 deletions

View File

@ -25,6 +25,7 @@ package main
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"database/sql"
"encoding/binary" "encoding/binary"
"errors" "errors"
"flag" "flag"
@ -32,6 +33,7 @@ import (
"log" "log"
"net/url" "net/url"
"os" "os"
"strings"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
_ "github.com/mattn/go-sqlite3" _ "github.com/mattn/go-sqlite3"
@ -51,6 +53,11 @@ func (s scrapeCtx) load(url string) (*goquery.Document, error) {
} }
type restaurant struct { type restaurant struct {
name string
latitude float64
longitude float64
reviews []review reviews []review
accomodating float64 accomodating float64
@ -127,7 +134,7 @@ func collateData(reviews []review) map[uint64]*restaurant {
var rest *restaurant var rest *restaurant
if rest, _ = restaurants[hash.Sum64()]; rest == nil { if rest, _ = restaurants[hash.Sum64()]; rest == nil {
rest = new(restaurant) rest = &restaurant{name: rev.name, latitude: rev.latitude, longitude: rev.longitude}
restaurants[hash.Sum64()] = rest restaurants[hash.Sum64()] = rest
} }
@ -144,118 +151,120 @@ func computeStnData(restaurants map[uint64]*restaurant, stationsPath string) err
} }
for _, rest := range restaurants { for _, rest := range restaurants {
if len(rest.reviews) > 0 { rest.closestStnName, rest.closestStnDist = sq.closestStation(rest.latitude, rest.longitude)
rev := rest.reviews[0]
rest.closestStnName, rest.closestStnDist = sq.closestStation(rev.latitude, rev.longitude)
}
} }
return nil return nil
} }
// func dumpData(dbPath string, restaraunts []restaurant) error { func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
// db, err := sql.Open("sqlite3", dbPath) db, err := sql.Open("sqlite3", dbPath)
// if err != nil { if err != nil {
// return err return err
// } }
// defer db.Close() defer db.Close()
// _, err = db.Exec(` _, err = db.Exec(`
// DROP TABLE IF EXISTS reviews; DROP TABLE IF EXISTS reviews;
// CREATE TABLE reviews( CREATE TABLE reviews(
// name VARCHAR(100) NOT NULL, name VARCHAR(100) NOT NULL,
// url VARCHAR(200) NOT NULL, urls VARCHAR(200) NOT NULL,
// delicious FLOAT NOT NULL, delicious FLOAT NOT NULL,
// accommodating FLOAT NOT NULL, accommodating FLOAT NOT NULL,
// affordable FLOAT NOT NULL, affordable FLOAT NOT NULL,
// atmospheric FLOAT NOT NULL, atmospheric FLOAT NOT NULL,
// latitude FLOAT NOT NULL, latitude FLOAT NOT NULL,
// longitude FLOAT NOT NULL, longitude FLOAT NOT NULL,
// closestStnDist FLOAT NOT NULL, closestStnDist FLOAT NOT NULL,
// closestStnName VARCHAR(100) NOT NULL, closestStnName VARCHAR(100) NOT NULL,
// accessCount INTEGER NOT NULL, accessCount INTEGER NOT NULL,
// id INTEGER PRIMARY KEY id INTEGER PRIMARY KEY
// )`) )`)
// if err != nil { if err != nil {
// return err return err
// } }
// for _, r := range restaraunts { for _, rest := range restaraunts {
// _, err = db.Exec(` var urls []string
// INSERT INTO reviews( for _, rev := range rest.reviews {
// name, urls = append(urls, rev.url)
// url, }
// delicious,
// accommodating,
// affordable,
// atmospheric,
// latitude,
// longitude,
// closestStnDist,
// closestStnName,
// accessCount
// ) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
// r.name,
// r.url,
// r.feats.delicious,
// r.feats.accommodating,
// r.feats.affordable,
// r.feats.atmospheric,
// r.latitude,
// r.longitude,
// r.closestStnDist,
// r.closestStnName,
// 0)
// if err != nil { _, err = db.Exec(`
// return err INSERT INTO reviews(
// } name,
// } urls,
delicious,
accommodating,
affordable,
atmospheric,
latitude,
longitude,
closestStnDist,
closestStnName,
accessCount
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
rest.name,
strings.Join(urls, ","),
rest.delicious,
rest.accomodating,
rest.affordable,
rest.atmospheric,
rest.latitude,
rest.longitude,
rest.closestStnDist,
rest.closestStnName,
0)
// _, err = db.Exec(` if err != nil {
// DROP TABLE IF EXISTS categories; return err
// CREATE TABLE categories( }
// description VARCHAR(200) NOT NULL, }
// id INTEGER PRIMARY KEY)`)
// if err != nil { _, err = db.Exec(`
// return err DROP TABLE IF EXISTS categories;
// } CREATE TABLE categories(
description VARCHAR(200) NOT NULL,
id INTEGER PRIMARY KEY)`)
// for _, category := range []string{"I prefer quiet places", "I enjoy Mexican Food", "I drive a car"} { if err != nil {
// if _, err := db.Exec("INSERT INTO categories(description) VALUES (?)", category); err != nil { return err
// return err }
// }
// }
// _, err = db.Exec(` for _, category := range []string{"I prefer quiet places", "I enjoy Mexican Food", "I drive a car"} {
// DROP TABLE IF EXISTS history; if _, err := db.Exec("INSERT INTO categories(description) VALUES (?)", category); err != nil {
// CREATE TABLE history( return err
// date DATETIME NOT NULL, }
// reviewId INTEGER NOT NULL, }
// id INTEGER PRIMARY KEY,
// FOREIGN KEY(reviewId) REFERENCES reviews(id))`)
// if err != nil { _, err = db.Exec(`
// return err DROP TABLE IF EXISTS history;
// } CREATE TABLE history(
date DATETIME NOT NULL,
reviewId INTEGER NOT NULL,
id INTEGER PRIMARY KEY,
FOREIGN KEY(reviewId) REFERENCES reviews(id))`)
// _, err = db.Exec(` if err != nil {
// DROP TABLE IF EXISTS historyGroups; return err
// CREATE TABLE historyGroups( }
// categoryId INTEGER NOT NULL,
// categoryValue FLOAT NOT NULL,
// historyId INTEGER NOT NULL,
// FOREIGN KEY(historyId) REFERENCES history(id),
// FOREIGN KEY(categoryId) REFERENCES categories(id))`)
// if err != nil { _, err = db.Exec(`
// return err DROP TABLE IF EXISTS historyGroups;
// } CREATE TABLE historyGroups(
categoryId INTEGER NOT NULL,
categoryValue FLOAT NOT NULL,
historyId INTEGER NOT NULL,
FOREIGN KEY(historyId) REFERENCES history(id),
FOREIGN KEY(categoryId) REFERENCES categories(id))`)
// return nil if err != nil {
// } return err
}
return nil
}
func main() { func main() {
dbPath := flag.String("db", "data/db.sqlite3", "database output path") dbPath := flag.String("db", "data/db.sqlite3", "database output path")
@ -276,7 +285,7 @@ func main() {
log.Fatal(err) log.Fatal(err)
} }
// if err := dumpData(*dbPath, reviews); err != nil { if err := dumpData(*dbPath, restaurants); err != nil {
// log.Fatal(err) log.Fatal(err)
// } }
} }

View File

@ -44,6 +44,7 @@ type review struct {
latitude float64 latitude float64
longitude float64 longitude float64
scr scraper
err error err error
} }
@ -85,7 +86,7 @@ func scrapeReview(url string, out chan review, scr scraper, group *sync.WaitGrou
var ( var (
doc *goquery.Document doc *goquery.Document
rev = review{url: url} rev = review{url: url, scr: scr}
) )
if doc, rev.err = scr.load(rev.url); rev.err == nil { if doc, rev.err = scr.load(rev.url); rev.err == nil {