Compare commits
10 Commits
b6e08ef993
...
713a5770ae
Author | SHA1 | Date | |
---|---|---|---|
713a5770ae | |||
3836bdadc9 | |||
0b789b4485 | |||
41282e6ac8 | |||
c51cf9738f | |||
f9a80d3dac | |||
9f4eec10ee | |||
0968af8d65 | |||
dc567c859d | |||
fcac80c590 |
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
build
|
2
LICENSE
2
LICENSE
@ -1,4 +1,4 @@
|
||||
Copyright 2016-2019 Alex Yatskov
|
||||
Copyright 2016-2021 Alex Yatskov
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
|
50
Makefile
Normal file
50
Makefile
Normal file
@ -0,0 +1,50 @@
|
||||
appname := scrawl
|
||||
sources := $(wildcard *.go)
|
||||
|
||||
build = GOOS=$(1) GOARCH=$(2) go build -o build/$(appname)$(3)
|
||||
tar = cd build && tar -cvzf $(appname)_$(1)_$(2).tar.gz $(appname)$(3) && rm $(appname)$(3)
|
||||
zip = cd build && zip $(appname)_$(1)_$(2).zip $(appname)$(3) && rm $(appname)$(3)
|
||||
|
||||
.PHONY: all windows darwin linux clean
|
||||
|
||||
all: windows darwin linux
|
||||
|
||||
clean:
|
||||
rm -rf build/
|
||||
|
||||
# linux builds
|
||||
linux: build/$(appname)_linux_arm.tar.gz build/$(appname)_linux_arm64.tar.gz build/$(appname)_linux_386.tar.gz build/$(appname)_linux_amd64.tar.gz
|
||||
|
||||
build/$(appname)_linux_386.tar.gz: $(sources)
|
||||
$(call build,linux,386,)
|
||||
$(call tar,linux,386)
|
||||
|
||||
build/$(appname)_linux_amd64.tar.gz: $(sources)
|
||||
$(call build,linux,amd64,)
|
||||
$(call tar,linux,amd64)
|
||||
|
||||
build/$(appname)_linux_arm.tar.gz: $(sources)
|
||||
$(call build,linux,arm,)
|
||||
$(call tar,linux,arm)
|
||||
|
||||
build/$(appname)_linux_arm64.tar.gz: $(sources)
|
||||
$(call build,linux,arm64,)
|
||||
$(call tar,linux,arm64)
|
||||
|
||||
# darwin builds
|
||||
darwin: build/$(appname)_darwin_amd64.tar.gz
|
||||
|
||||
build/$(appname)_darwin_amd64.tar.gz: $(sources)
|
||||
$(call build,darwin,amd64,)
|
||||
$(call tar,darwin,amd64)
|
||||
|
||||
# windows builds
|
||||
windows: build/$(appname)_windows_386.zip build/$(appname)_windows_amd64.zip
|
||||
|
||||
build/$(appname)_windows_386.zip: $(sources)
|
||||
$(call build,windows,386,.exe)
|
||||
$(call zip,windows,386,.exe)
|
||||
|
||||
build/$(appname)_windows_amd64.zip: $(sources)
|
||||
$(call build,windows,amd64,.exe)
|
||||
$(call zip,windows,amd64,.exe)
|
87
README.md
87
README.md
@ -1,4 +1,4 @@
|
||||
# Scrawl #
|
||||
# Scrawl
|
||||
|
||||
Scrawl is a simple command line tool for downloading files referenced on websites using [CSS
|
||||
selectors](http://www.w3schools.com/cssref/css_selectors.asp). This application is not meant to be a replacement for
|
||||
@ -6,66 +6,41 @@ selectors](http://www.w3schools.com/cssref/css_selectors.asp). This application
|
||||
files when the context in which they are presented is known to. This capability is particularly useful when the path of
|
||||
the desired file is not known but the URL of the website that links to it is (common for download pages).
|
||||
|
||||
## Installation ##
|
||||
## Installation
|
||||
|
||||
If you already have the Go environment and toolchain set up, you can get the latest version by running:
|
||||
|
||||
```
|
||||
$ go get github.com/FooSoft/scrawl
|
||||
go install foosoft.net/projects/scrawl@latest
|
||||
```
|
||||
|
||||
Otherwise, you can use the pre-built binaries for the platforms below:
|
||||
Otherwise, you can use the [pre-built binaries](https://github.com/FooSoft/scrawl/releases) from the project page.
|
||||
|
||||
* [scrawl\_darwin\_386.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_darwin_386.tar.gz)
|
||||
* [scrawl\_darwin\_amd64.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_darwin_amd64.tar.gz)
|
||||
* [scrawl\_linux\_386.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_linux_386.tar.gz)
|
||||
* [scrawl\_linux\_amd64.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_linux_amd64.tar.gz)
|
||||
* [scrawl\_linux\_arm.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_linux_arm.tar.gz)
|
||||
* [scrawl\_windows\_386.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_windows_386.tar.gz)
|
||||
* [scrawl\_windows\_amd64.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_windows_amd64.tar.gz)
|
||||
## Usage
|
||||
|
||||
## Usage ##
|
||||
Executing Scrawl with the `-help` command line argument will trigger online help to be displayed. Below is a more
|
||||
detailed description of what the parameters do.
|
||||
|
||||
Executing Scrawl with the `-help` command line argument will trigger online help to be displayed. The list below
|
||||
provides a more detailed description of what the parameters do.
|
||||
* `attr`: The attribute containing the desired download path is specified by this argument.
|
||||
* `dir`: This argument specifies the output directory for downloaded files.
|
||||
* `vebose`: Scrawl will output more details about what it is currently doing when this flag is set.
|
||||
|
||||
* **attr**
|
||||
|
||||
The attribute containing the desired download path is specified by this argument.
|
||||
|
||||
* **dir**
|
||||
|
||||
This argument specifies the output directory for downloaded files.
|
||||
|
||||
* **vebose**
|
||||
|
||||
Scrawl will output more details about what it is currently doing when this flag is set.
|
||||
|
||||
## Example ##
|
||||
## Example
|
||||
|
||||
Let's say we want to create a script to download the latest Debian package of [Anki](http://ankisrs.net/):
|
||||
|
||||
1. We load up the homepage and are presented with a big download button as shown in the screenshot below:
|
||||
|
||||
[![Anki Homepage](https://foosoft.net/projects/scrawl/img/anki-thumb.png)](https://foosoft.net/projects/scrawl/img/anki.png)
|
||||
|
||||
2. Let's copy that link so we can download the latest version with wGet or curl from our script at any time!
|
||||
|
||||
Hmm, it looks like the path `http://ankisrs.net/download/mirror/anki-2.0.33.deb` has the version number embedded in
|
||||
the filename. This means that even after a new version of Anki is released, our script will keep getting version
|
||||
1. We load up the homepage and are presented with a big download button as shown in the screenshot below: \
|
||||
![](img/anki.png)
|
||||
2. Let's copy that link so we can download the latest version with wGet or curl from our script at any time! Hmm, it
|
||||
looks like the path `http://ankisrs.net/download/mirror/anki-2.0.33.deb` has the version number embedded in the
|
||||
filename. This means that even after a new version of Anki is released, our script will keep getting version
|
||||
`2.0.33` (unless of course it gets deleted).
|
||||
|
||||
3. Let's inspect the download link in your favorite browser to see what additional information we can get:
|
||||
|
||||
[![Inspector](https://foosoft.net/projects/scrawl/img/inspect-thumb.png)](https://foosoft.net/projects/scrawl/img/inspect.png)
|
||||
|
||||
4. It appears that we can easily create a selector for this element: `#linux > a:nth-child(2)`.
|
||||
|
||||
Note that [Chrome](https://www.google.com/chrome/) provides the option to copy the CSS selector for any element,
|
||||
making knowledge of web technology optional for this step.
|
||||
|
||||
3. Let's inspect the download link in your favorite browser to see what additional information we can get: \
|
||||
![](img/inspect.png)
|
||||
4. It appears that we can easily create a selector for this element: `#linux > a:nth-child(2)`. Note that
|
||||
[Chrome](https://www.google.com/chrome/) provides the option to copy the CSS selector for any element, making
|
||||
knowledge of web technology optional for this step.
|
||||
5. Now let's create a simple download and install script:
|
||||
|
||||
```bash
|
||||
#!/bin/sh
|
||||
rm -rf /tmp/anki
|
||||
@ -74,26 +49,6 @@ Let's say we want to create a script to download the latest Debian package of [A
|
||||
sudo dpkg -i /tmp/anki/*.deb
|
||||
sudo apt-get install -y -f
|
||||
```
|
||||
|
||||
In this script, we prepare an empty download directory and tell Scrawl to scrape `http://ankisrs.net/`, extracting
|
||||
the `href` property of the download link identified by the CSS selector `#linux > a:nth-child(2)`. We then install
|
||||
the package and bring in any unsatisfied dependencies.
|
||||
|
||||
## License ##
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
10
go.mod
Normal file
10
go.mod
Normal file
@ -0,0 +1,10 @@
|
||||
module foosoft.net/projects/scrawl
|
||||
|
||||
go 1.18
|
||||
|
||||
require github.com/PuerkitoBio/goquery v1.8.0
|
||||
|
||||
require (
|
||||
github.com/andybalholm/cascadia v1.3.1 // indirect
|
||||
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 // indirect
|
||||
)
|
11
go.sum
Normal file
11
go.sum
Normal file
@ -0,0 +1,11 @@
|
||||
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
|
||||
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
|
||||
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
|
||||
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
|
||||
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
|
||||
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
BIN
img/anki.png
Normal file
BIN
img/anki.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 38 KiB |
BIN
img/inspect.png
Normal file
BIN
img/inspect.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 25 KiB |
22
scrawl.go
22
scrawl.go
@ -1,25 +1,3 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Alex Yatskov <alex@foosoft.net>
|
||||
* Author: Alex Yatskov <alex@foosoft.net>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
* this software and associated documentation files (the "Software"), to deal in
|
||||
* the Software without restriction, including without limitation the rights to
|
||||
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
* the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
|
Loading…
Reference in New Issue
Block a user