Improvements
This commit is contained in:
parent
b7a07980e5
commit
adf3e20004
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1 @@
|
|||||||
search-scrape
|
scrape
|
||||||
|
1856
cache/geocache.json
vendored
1856
cache/geocache.json
vendored
File diff suppressed because it is too large
Load Diff
9244
data/tabelog.json
9244
data/tabelog.json
File diff suppressed because it is too large
Load Diff
@ -26,6 +26,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/kellydunn/golang-geo"
|
"github.com/kellydunn/golang-geo"
|
||||||
)
|
)
|
||||||
@ -38,13 +39,16 @@ type geoCoord struct {
|
|||||||
type geoCache struct {
|
type geoCache struct {
|
||||||
filename string
|
filename string
|
||||||
data map[string]geoCoord
|
data map[string]geoCoord
|
||||||
|
ticker *time.Ticker
|
||||||
coder geo.GoogleGeocoder
|
coder geo.GoogleGeocoder
|
||||||
}
|
}
|
||||||
|
|
||||||
func newGeoCache(filename string) (*geoCache, error) {
|
func newGeoCache(filename string) (*geoCache, error) {
|
||||||
cache := &geoCache{
|
cache := &geoCache{
|
||||||
filename: filename,
|
filename: filename,
|
||||||
data: make(map[string]geoCoord)}
|
data: make(map[string]geoCoord),
|
||||||
|
ticker: time.NewTicker(time.Millisecond * 200),
|
||||||
|
}
|
||||||
|
|
||||||
if err := cache.load(); err != nil {
|
if err := cache.load(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -80,6 +84,8 @@ func (c *geoCache) decode(address string) (geoCoord, error) {
|
|||||||
return coord, nil
|
return coord, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
<-c.ticker.C
|
||||||
|
|
||||||
point, err := c.coder.Geocode(address)
|
point, err := c.coder.Geocode(address)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return geoCoord{}, err
|
return geoCoord{}, err
|
||||||
|
@ -94,7 +94,7 @@ func decodeReviews(in chan tabelogReview, out chan tabelogReview, wg *sync.WaitG
|
|||||||
|
|
||||||
for {
|
for {
|
||||||
if review, ok := <-in; ok {
|
if review, ok := <-in; ok {
|
||||||
log.Print("decoding %s", review.Name)
|
log.Printf("decoding %s", review.Name)
|
||||||
|
|
||||||
coord, err := gc.decode(review.Address)
|
coord, err := gc.decode(review.Address)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -107,6 +107,7 @@ func decodeReviews(in chan tabelogReview, out chan tabelogReview, wg *sync.WaitG
|
|||||||
out <- review
|
out <- review
|
||||||
} else {
|
} else {
|
||||||
close(out)
|
close(out)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -185,6 +186,7 @@ func scrapeTabelog(url, resultFile, webCacheDir, geoCacheFile string) {
|
|||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(2)
|
wg.Add(2)
|
||||||
|
|
||||||
go decodeReviews(scrapeChan, decodeChan, &wg, gc)
|
go decodeReviews(scrapeChan, decodeChan, &wg, gc)
|
||||||
go dumpReviews(resultFile, decodeChan, &wg)
|
go dumpReviews(resultFile, decodeChan, &wg)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user