1

Semantics

This commit is contained in:
Alex Yatskov 2015-09-18 18:19:39 +09:00
parent c3929aff1d
commit 02c6cf9767
4 changed files with 46 additions and 11 deletions

View File

@ -53,19 +53,22 @@ func (s scrapeCtx) load(url string) (*goquery.Document, error) {
return s.wc.load(url) return s.wc.load(url)
} }
type semantics struct {
accomodating float64
affordable float64
atmospheric float64
delicious float64
}
type restaurant struct { type restaurant struct {
name string name string
latitude float64 latitude float64
longitude float64 longitude float64
sem semantics
reviews []review reviews []review
accomodating float64
affordable float64
atmospheric float64
delicious float64
closestStnName string closestStnName string
closestStnDist float64 closestStnDist float64
} }
@ -145,7 +148,7 @@ func collateData(reviews []review) map[uint64]*restaurant {
return restaurants return restaurants
} }
func computeStnData(restaurants map[uint64]*restaurant, stationsPath string) error { func computeStations(restaurants map[uint64]*restaurant, stationsPath string) error {
sq, err := newStationQuery(stationsPath) sq, err := newStationQuery(stationsPath)
if err != nil { if err != nil {
return err return err
@ -158,6 +161,12 @@ func computeStnData(restaurants map[uint64]*restaurant, stationsPath string) err
return nil return nil
} }
func computeSemantics(restaraunts map[uint64]*restaurant) {
type definer interface {
define(keyword string) semantics
}
}
func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error { func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
db, err := sql.Open("sqlite3", dbPath) db, err := sql.Open("sqlite3", dbPath)
if err != nil { if err != nil {
@ -208,10 +217,10 @@ func dumpData(dbPath string, restaraunts map[uint64]*restaurant) error {
) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, ) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
rest.name, rest.name,
strings.Join(urls, ","), strings.Join(urls, ","),
rest.delicious, rest.sem.delicious,
rest.accomodating, rest.sem.accomodating,
rest.affordable, rest.sem.affordable,
rest.atmospheric, rest.sem.atmospheric,
rest.latitude, rest.latitude,
rest.longitude, rest.longitude,
rest.closestStnDist, rest.closestStnDist,
@ -284,8 +293,11 @@ func main() {
log.Print(color.BlueString("collating data...")) log.Print(color.BlueString("collating data..."))
restaurants := collateData(reviews) restaurants := collateData(reviews)
log.Print(color.BlueString("computing data semantics.."))
computeSemantics(restaurants)
log.Print(color.BlueString("computing station data...")) log.Print(color.BlueString("computing station data..."))
if err := computeStnData(restaurants, *stationsPath); err != nil { if err := computeStations(restaurants, *stationsPath); err != nil {
log.Fatal(err) log.Fatal(err)
} }

View File

@ -27,6 +27,10 @@
"Latitude": 35.5252002, "Latitude": 35.5252002,
"Longitude": 139.6930519 "Longitude": 139.6930519
}, },
"1,396,279,436": {
"Latitude": 39.886922,
"Longitude": 32.8780643
},
"1-1 Akebonocho Naka-ku Yokohama Kanagawa": { "1-1 Akebonocho Naka-ku Yokohama Kanagawa": {
"Latitude": 35.4419997, "Latitude": 35.4419997,
"Longitude": 139.6302873 "Longitude": 139.6302873

View File

@ -35,6 +35,16 @@ type tabelog struct {
scrapeCtx scrapeCtx
} }
func (tabelog) define(keyword string) semantics {
return map[string]semantics{
"dishes": {accomodating: 0.0, affordable: 0.0, atmospheric: 0.0, delicious: 0.8},
"drinks": {accomodating: 0.0, affordable: 0.0, atmospheric: 0.0, delicious: 0.2},
"service": {accomodating: 1.0, affordable: 0.0, atmospheric: 0.0, delicious: 0.0},
"cost": {accomodating: 0.0, affordable: 1.0, atmospheric: 0.0, delicious: 0.0},
"atmosphere": {accomodating: 0.0, affordable: 0.0, atmospheric: 1.0, delicious: 0.0},
}[keyword]
}
func (tabelog) index(doc *goquery.Document) (string, []string) { func (tabelog) index(doc *goquery.Document) (string, []string) {
var reviewUrls []string var reviewUrls []string
doc.Find("div.list-rst__header > p > a").Each(func(index int, sel *goquery.Selection) { doc.Find("div.list-rst__header > p > a").Each(func(index int, sel *goquery.Selection) {

View File

@ -35,6 +35,15 @@ type tripadvisor struct {
scrapeCtx scrapeCtx
} }
func (tripadvisor) define(keyword string) semantics {
return map[string]semantics{
"food": {accomodating: 0.0, affordable: 0.0, atmospheric: 0.0, delicious: 1.0},
"service": {accomodating: 1.0, affordable: 0.0, atmospheric: 0.0, delicious: 0.0},
"value": {accomodating: 0.0, affordable: 1.0, atmospheric: 0.0, delicious: 0.0},
"atmosphere": {accomodating: 0.0, affordable: 0.0, atmospheric: 1.0, delicious: 0.0},
}[keyword]
}
func (tripadvisor) index(doc *goquery.Document) (string, []string) { func (tripadvisor) index(doc *goquery.Document) (string, []string) {
var reviewUrls []string var reviewUrls []string
doc.Find("a.property_title").Each(func(index int, sel *goquery.Selection) { doc.Find("a.property_title").Each(func(index int, sel *goquery.Selection) {