1

Improvements

This commit is contained in:
Alex Yatskov 2015-08-14 16:56:44 +09:00
parent b7a07980e5
commit adf3e20004
5 changed files with 9567 additions and 1547 deletions

2
.gitignore vendored
View File

@ -1 +1 @@
search-scrape
scrape

1856
cache/geocache.json vendored

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -26,6 +26,7 @@ import (
"encoding/json"
"io/ioutil"
"os"
"time"
"github.com/kellydunn/golang-geo"
)
@ -38,13 +39,16 @@ type geoCoord struct {
type geoCache struct {
filename string
data map[string]geoCoord
ticker *time.Ticker
coder geo.GoogleGeocoder
}
func newGeoCache(filename string) (*geoCache, error) {
cache := &geoCache{
filename: filename,
data: make(map[string]geoCoord)}
data: make(map[string]geoCoord),
ticker: time.NewTicker(time.Millisecond * 200),
}
if err := cache.load(); err != nil {
return nil, err
@ -80,6 +84,8 @@ func (c *geoCache) decode(address string) (geoCoord, error) {
return coord, nil
}
<-c.ticker.C
point, err := c.coder.Geocode(address)
if err != nil {
return geoCoord{}, err

View File

@ -94,7 +94,7 @@ func decodeReviews(in chan tabelogReview, out chan tabelogReview, wg *sync.WaitG
for {
if review, ok := <-in; ok {
log.Print("decoding %s", review.Name)
log.Printf("decoding %s", review.Name)
coord, err := gc.decode(review.Address)
if err != nil {
@ -107,6 +107,7 @@ func decodeReviews(in chan tabelogReview, out chan tabelogReview, wg *sync.WaitG
out <- review
} else {
close(out)
return
}
}
}
@ -185,6 +186,7 @@ func scrapeTabelog(url, resultFile, webCacheDir, geoCacheFile string) {
var wg sync.WaitGroup
wg.Add(2)
go decodeReviews(scrapeChan, decodeChan, &wg, gc)
go dumpReviews(resultFile, decodeChan, &wg)