Compare commits
10 Commits
b6e08ef993
...
713a5770ae
Author | SHA1 | Date | |
---|---|---|---|
713a5770ae | |||
3836bdadc9 | |||
0b789b4485 | |||
41282e6ac8 | |||
c51cf9738f | |||
f9a80d3dac | |||
9f4eec10ee | |||
0968af8d65 | |||
dc567c859d | |||
fcac80c590 |
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
build
|
2
LICENSE
2
LICENSE
@ -1,4 +1,4 @@
|
|||||||
Copyright 2016-2019 Alex Yatskov
|
Copyright 2016-2021 Alex Yatskov
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
this software and associated documentation files (the "Software"), to deal in
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
50
Makefile
Normal file
50
Makefile
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
appname := scrawl
|
||||||
|
sources := $(wildcard *.go)
|
||||||
|
|
||||||
|
build = GOOS=$(1) GOARCH=$(2) go build -o build/$(appname)$(3)
|
||||||
|
tar = cd build && tar -cvzf $(appname)_$(1)_$(2).tar.gz $(appname)$(3) && rm $(appname)$(3)
|
||||||
|
zip = cd build && zip $(appname)_$(1)_$(2).zip $(appname)$(3) && rm $(appname)$(3)
|
||||||
|
|
||||||
|
.PHONY: all windows darwin linux clean
|
||||||
|
|
||||||
|
all: windows darwin linux
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf build/
|
||||||
|
|
||||||
|
# linux builds
|
||||||
|
linux: build/$(appname)_linux_arm.tar.gz build/$(appname)_linux_arm64.tar.gz build/$(appname)_linux_386.tar.gz build/$(appname)_linux_amd64.tar.gz
|
||||||
|
|
||||||
|
build/$(appname)_linux_386.tar.gz: $(sources)
|
||||||
|
$(call build,linux,386,)
|
||||||
|
$(call tar,linux,386)
|
||||||
|
|
||||||
|
build/$(appname)_linux_amd64.tar.gz: $(sources)
|
||||||
|
$(call build,linux,amd64,)
|
||||||
|
$(call tar,linux,amd64)
|
||||||
|
|
||||||
|
build/$(appname)_linux_arm.tar.gz: $(sources)
|
||||||
|
$(call build,linux,arm,)
|
||||||
|
$(call tar,linux,arm)
|
||||||
|
|
||||||
|
build/$(appname)_linux_arm64.tar.gz: $(sources)
|
||||||
|
$(call build,linux,arm64,)
|
||||||
|
$(call tar,linux,arm64)
|
||||||
|
|
||||||
|
# darwin builds
|
||||||
|
darwin: build/$(appname)_darwin_amd64.tar.gz
|
||||||
|
|
||||||
|
build/$(appname)_darwin_amd64.tar.gz: $(sources)
|
||||||
|
$(call build,darwin,amd64,)
|
||||||
|
$(call tar,darwin,amd64)
|
||||||
|
|
||||||
|
# windows builds
|
||||||
|
windows: build/$(appname)_windows_386.zip build/$(appname)_windows_amd64.zip
|
||||||
|
|
||||||
|
build/$(appname)_windows_386.zip: $(sources)
|
||||||
|
$(call build,windows,386,.exe)
|
||||||
|
$(call zip,windows,386,.exe)
|
||||||
|
|
||||||
|
build/$(appname)_windows_amd64.zip: $(sources)
|
||||||
|
$(call build,windows,amd64,.exe)
|
||||||
|
$(call zip,windows,amd64,.exe)
|
87
README.md
87
README.md
@ -1,4 +1,4 @@
|
|||||||
# Scrawl #
|
# Scrawl
|
||||||
|
|
||||||
Scrawl is a simple command line tool for downloading files referenced on websites using [CSS
|
Scrawl is a simple command line tool for downloading files referenced on websites using [CSS
|
||||||
selectors](http://www.w3schools.com/cssref/css_selectors.asp). This application is not meant to be a replacement for
|
selectors](http://www.w3schools.com/cssref/css_selectors.asp). This application is not meant to be a replacement for
|
||||||
@ -6,66 +6,41 @@ selectors](http://www.w3schools.com/cssref/css_selectors.asp). This application
|
|||||||
files when the context in which they are presented is known to. This capability is particularly useful when the path of
|
files when the context in which they are presented is known to. This capability is particularly useful when the path of
|
||||||
the desired file is not known but the URL of the website that links to it is (common for download pages).
|
the desired file is not known but the URL of the website that links to it is (common for download pages).
|
||||||
|
|
||||||
## Installation ##
|
## Installation
|
||||||
|
|
||||||
If you already have the Go environment and toolchain set up, you can get the latest version by running:
|
If you already have the Go environment and toolchain set up, you can get the latest version by running:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ go get github.com/FooSoft/scrawl
|
go install foosoft.net/projects/scrawl@latest
|
||||||
```
|
```
|
||||||
|
|
||||||
Otherwise, you can use the pre-built binaries for the platforms below:
|
Otherwise, you can use the [pre-built binaries](https://github.com/FooSoft/scrawl/releases) from the project page.
|
||||||
|
|
||||||
* [scrawl\_darwin\_386.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_darwin_386.tar.gz)
|
## Usage
|
||||||
* [scrawl\_darwin\_amd64.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_darwin_amd64.tar.gz)
|
|
||||||
* [scrawl\_linux\_386.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_linux_386.tar.gz)
|
|
||||||
* [scrawl\_linux\_amd64.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_linux_amd64.tar.gz)
|
|
||||||
* [scrawl\_linux\_arm.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_linux_arm.tar.gz)
|
|
||||||
* [scrawl\_windows\_386.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_windows_386.tar.gz)
|
|
||||||
* [scrawl\_windows\_amd64.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_windows_amd64.tar.gz)
|
|
||||||
|
|
||||||
## Usage ##
|
Executing Scrawl with the `-help` command line argument will trigger online help to be displayed. Below is a more
|
||||||
|
detailed description of what the parameters do.
|
||||||
|
|
||||||
Executing Scrawl with the `-help` command line argument will trigger online help to be displayed. The list below
|
* `attr`: The attribute containing the desired download path is specified by this argument.
|
||||||
provides a more detailed description of what the parameters do.
|
* `dir`: This argument specifies the output directory for downloaded files.
|
||||||
|
* `vebose`: Scrawl will output more details about what it is currently doing when this flag is set.
|
||||||
|
|
||||||
* **attr**
|
## Example
|
||||||
|
|
||||||
The attribute containing the desired download path is specified by this argument.
|
|
||||||
|
|
||||||
* **dir**
|
|
||||||
|
|
||||||
This argument specifies the output directory for downloaded files.
|
|
||||||
|
|
||||||
* **vebose**
|
|
||||||
|
|
||||||
Scrawl will output more details about what it is currently doing when this flag is set.
|
|
||||||
|
|
||||||
## Example ##
|
|
||||||
|
|
||||||
Let's say we want to create a script to download the latest Debian package of [Anki](http://ankisrs.net/):
|
Let's say we want to create a script to download the latest Debian package of [Anki](http://ankisrs.net/):
|
||||||
|
|
||||||
1. We load up the homepage and are presented with a big download button as shown in the screenshot below:
|
1. We load up the homepage and are presented with a big download button as shown in the screenshot below: \
|
||||||
|
![](img/anki.png)
|
||||||
[![Anki Homepage](https://foosoft.net/projects/scrawl/img/anki-thumb.png)](https://foosoft.net/projects/scrawl/img/anki.png)
|
2. Let's copy that link so we can download the latest version with wGet or curl from our script at any time! Hmm, it
|
||||||
|
looks like the path `http://ankisrs.net/download/mirror/anki-2.0.33.deb` has the version number embedded in the
|
||||||
2. Let's copy that link so we can download the latest version with wGet or curl from our script at any time!
|
filename. This means that even after a new version of Anki is released, our script will keep getting version
|
||||||
|
|
||||||
Hmm, it looks like the path `http://ankisrs.net/download/mirror/anki-2.0.33.deb` has the version number embedded in
|
|
||||||
the filename. This means that even after a new version of Anki is released, our script will keep getting version
|
|
||||||
`2.0.33` (unless of course it gets deleted).
|
`2.0.33` (unless of course it gets deleted).
|
||||||
|
3. Let's inspect the download link in your favorite browser to see what additional information we can get: \
|
||||||
3. Let's inspect the download link in your favorite browser to see what additional information we can get:
|
![](img/inspect.png)
|
||||||
|
4. It appears that we can easily create a selector for this element: `#linux > a:nth-child(2)`. Note that
|
||||||
[![Inspector](https://foosoft.net/projects/scrawl/img/inspect-thumb.png)](https://foosoft.net/projects/scrawl/img/inspect.png)
|
[Chrome](https://www.google.com/chrome/) provides the option to copy the CSS selector for any element, making
|
||||||
|
knowledge of web technology optional for this step.
|
||||||
4. It appears that we can easily create a selector for this element: `#linux > a:nth-child(2)`.
|
|
||||||
|
|
||||||
Note that [Chrome](https://www.google.com/chrome/) provides the option to copy the CSS selector for any element,
|
|
||||||
making knowledge of web technology optional for this step.
|
|
||||||
|
|
||||||
5. Now let's create a simple download and install script:
|
5. Now let's create a simple download and install script:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
rm -rf /tmp/anki
|
rm -rf /tmp/anki
|
||||||
@ -74,26 +49,6 @@ Let's say we want to create a script to download the latest Debian package of [A
|
|||||||
sudo dpkg -i /tmp/anki/*.deb
|
sudo dpkg -i /tmp/anki/*.deb
|
||||||
sudo apt-get install -y -f
|
sudo apt-get install -y -f
|
||||||
```
|
```
|
||||||
|
|
||||||
In this script, we prepare an empty download directory and tell Scrawl to scrape `http://ankisrs.net/`, extracting
|
In this script, we prepare an empty download directory and tell Scrawl to scrape `http://ankisrs.net/`, extracting
|
||||||
the `href` property of the download link identified by the CSS selector `#linux > a:nth-child(2)`. We then install
|
the `href` property of the download link identified by the CSS selector `#linux > a:nth-child(2)`. We then install
|
||||||
the package and bring in any unsatisfied dependencies.
|
the package and bring in any unsatisfied dependencies.
|
||||||
|
|
||||||
## License ##
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
||||||
this software and associated documentation files (the "Software"), to deal in
|
|
||||||
the Software without restriction, including without limitation the rights to
|
|
||||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
||||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
|
||||||
subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
||||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
||||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
||||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
|
10
go.mod
Normal file
10
go.mod
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
module foosoft.net/projects/scrawl
|
||||||
|
|
||||||
|
go 1.18
|
||||||
|
|
||||||
|
require github.com/PuerkitoBio/goquery v1.8.0
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/andybalholm/cascadia v1.3.1 // indirect
|
||||||
|
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 // indirect
|
||||||
|
)
|
11
go.sum
Normal file
11
go.sum
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
|
||||||
|
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
|
||||||
|
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
|
||||||
|
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
|
||||||
|
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
|
||||||
|
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||||
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
|
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
BIN
img/anki.png
Normal file
BIN
img/anki.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 38 KiB |
BIN
img/inspect.png
Normal file
BIN
img/inspect.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 25 KiB |
22
scrawl.go
22
scrawl.go
@ -1,25 +1,3 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2016 Alex Yatskov <alex@foosoft.net>
|
|
||||||
* Author: Alex Yatskov <alex@foosoft.net>
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
||||||
* this software and associated documentation files (the "Software"), to deal in
|
|
||||||
* the Software without restriction, including without limitation the rights to
|
|
||||||
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
||||||
* the Software, and to permit persons to whom the Software is furnished to do so,
|
|
||||||
* subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in all
|
|
||||||
* copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
||||||
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
||||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
||||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
Loading…
Reference in New Issue
Block a user