Improvements
This commit is contained in:
parent
b7a07980e5
commit
adf3e20004
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1 @@
|
||||
search-scrape
|
||||
scrape
|
||||
|
1856
cache/geocache.json
vendored
1856
cache/geocache.json
vendored
File diff suppressed because it is too large
Load Diff
9244
data/tabelog.json
9244
data/tabelog.json
File diff suppressed because it is too large
Load Diff
@ -26,6 +26,7 @@ import (
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/kellydunn/golang-geo"
|
||||
)
|
||||
@ -38,13 +39,16 @@ type geoCoord struct {
|
||||
type geoCache struct {
|
||||
filename string
|
||||
data map[string]geoCoord
|
||||
ticker *time.Ticker
|
||||
coder geo.GoogleGeocoder
|
||||
}
|
||||
|
||||
func newGeoCache(filename string) (*geoCache, error) {
|
||||
cache := &geoCache{
|
||||
filename: filename,
|
||||
data: make(map[string]geoCoord)}
|
||||
data: make(map[string]geoCoord),
|
||||
ticker: time.NewTicker(time.Millisecond * 200),
|
||||
}
|
||||
|
||||
if err := cache.load(); err != nil {
|
||||
return nil, err
|
||||
@ -80,6 +84,8 @@ func (c *geoCache) decode(address string) (geoCoord, error) {
|
||||
return coord, nil
|
||||
}
|
||||
|
||||
<-c.ticker.C
|
||||
|
||||
point, err := c.coder.Geocode(address)
|
||||
if err != nil {
|
||||
return geoCoord{}, err
|
||||
|
@ -94,7 +94,7 @@ func decodeReviews(in chan tabelogReview, out chan tabelogReview, wg *sync.WaitG
|
||||
|
||||
for {
|
||||
if review, ok := <-in; ok {
|
||||
log.Print("decoding %s", review.Name)
|
||||
log.Printf("decoding %s", review.Name)
|
||||
|
||||
coord, err := gc.decode(review.Address)
|
||||
if err != nil {
|
||||
@ -107,6 +107,7 @@ func decodeReviews(in chan tabelogReview, out chan tabelogReview, wg *sync.WaitG
|
||||
out <- review
|
||||
} else {
|
||||
close(out)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -185,6 +186,7 @@ func scrapeTabelog(url, resultFile, webCacheDir, geoCacheFile string) {
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(2)
|
||||
|
||||
go decodeReviews(scrapeChan, decodeChan, &wg, gc)
|
||||
go dumpReviews(resultFile, decodeChan, &wg)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user