1

Improvements

This commit is contained in:
Alex Yatskov 2015-08-14 16:56:44 +09:00
parent b7a07980e5
commit adf3e20004
5 changed files with 9567 additions and 1547 deletions

2
.gitignore vendored
View File

@ -1 +1 @@
search-scrape scrape

1856
cache/geocache.json vendored

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -26,6 +26,7 @@ import (
"encoding/json" "encoding/json"
"io/ioutil" "io/ioutil"
"os" "os"
"time"
"github.com/kellydunn/golang-geo" "github.com/kellydunn/golang-geo"
) )
@ -38,13 +39,16 @@ type geoCoord struct {
type geoCache struct { type geoCache struct {
filename string filename string
data map[string]geoCoord data map[string]geoCoord
ticker *time.Ticker
coder geo.GoogleGeocoder coder geo.GoogleGeocoder
} }
func newGeoCache(filename string) (*geoCache, error) { func newGeoCache(filename string) (*geoCache, error) {
cache := &geoCache{ cache := &geoCache{
filename: filename, filename: filename,
data: make(map[string]geoCoord)} data: make(map[string]geoCoord),
ticker: time.NewTicker(time.Millisecond * 200),
}
if err := cache.load(); err != nil { if err := cache.load(); err != nil {
return nil, err return nil, err
@ -80,6 +84,8 @@ func (c *geoCache) decode(address string) (geoCoord, error) {
return coord, nil return coord, nil
} }
<-c.ticker.C
point, err := c.coder.Geocode(address) point, err := c.coder.Geocode(address)
if err != nil { if err != nil {
return geoCoord{}, err return geoCoord{}, err

View File

@ -94,7 +94,7 @@ func decodeReviews(in chan tabelogReview, out chan tabelogReview, wg *sync.WaitG
for { for {
if review, ok := <-in; ok { if review, ok := <-in; ok {
log.Print("decoding %s", review.Name) log.Printf("decoding %s", review.Name)
coord, err := gc.decode(review.Address) coord, err := gc.decode(review.Address)
if err != nil { if err != nil {
@ -107,6 +107,7 @@ func decodeReviews(in chan tabelogReview, out chan tabelogReview, wg *sync.WaitG
out <- review out <- review
} else { } else {
close(out) close(out)
return
} }
} }
} }
@ -185,6 +186,7 @@ func scrapeTabelog(url, resultFile, webCacheDir, geoCacheFile string) {
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(2) wg.Add(2)
go decodeReviews(scrapeChan, decodeChan, &wg, gc) go decodeReviews(scrapeChan, decodeChan, &wg, gc)
go dumpReviews(resultFile, decodeChan, &wg) go dumpReviews(resultFile, decodeChan, &wg)