Compare commits
No commits in common. "713a5770ae968cfd48250de79422eb21cc530086" and "b6e08ef993e2e9b39ebe6b4d0c8fd3d9c97342c5" have entirely different histories.
713a5770ae
...
b6e08ef993
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +0,0 @@
|
|||||||
build
|
|
2
LICENSE
2
LICENSE
@ -1,4 +1,4 @@
|
|||||||
Copyright 2016-2021 Alex Yatskov
|
Copyright 2016-2019 Alex Yatskov
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
this software and associated documentation files (the "Software"), to deal in
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
50
Makefile
50
Makefile
@ -1,50 +0,0 @@
|
|||||||
appname := scrawl
|
|
||||||
sources := $(wildcard *.go)
|
|
||||||
|
|
||||||
build = GOOS=$(1) GOARCH=$(2) go build -o build/$(appname)$(3)
|
|
||||||
tar = cd build && tar -cvzf $(appname)_$(1)_$(2).tar.gz $(appname)$(3) && rm $(appname)$(3)
|
|
||||||
zip = cd build && zip $(appname)_$(1)_$(2).zip $(appname)$(3) && rm $(appname)$(3)
|
|
||||||
|
|
||||||
.PHONY: all windows darwin linux clean
|
|
||||||
|
|
||||||
all: windows darwin linux
|
|
||||||
|
|
||||||
clean:
|
|
||||||
rm -rf build/
|
|
||||||
|
|
||||||
# linux builds
|
|
||||||
linux: build/$(appname)_linux_arm.tar.gz build/$(appname)_linux_arm64.tar.gz build/$(appname)_linux_386.tar.gz build/$(appname)_linux_amd64.tar.gz
|
|
||||||
|
|
||||||
build/$(appname)_linux_386.tar.gz: $(sources)
|
|
||||||
$(call build,linux,386,)
|
|
||||||
$(call tar,linux,386)
|
|
||||||
|
|
||||||
build/$(appname)_linux_amd64.tar.gz: $(sources)
|
|
||||||
$(call build,linux,amd64,)
|
|
||||||
$(call tar,linux,amd64)
|
|
||||||
|
|
||||||
build/$(appname)_linux_arm.tar.gz: $(sources)
|
|
||||||
$(call build,linux,arm,)
|
|
||||||
$(call tar,linux,arm)
|
|
||||||
|
|
||||||
build/$(appname)_linux_arm64.tar.gz: $(sources)
|
|
||||||
$(call build,linux,arm64,)
|
|
||||||
$(call tar,linux,arm64)
|
|
||||||
|
|
||||||
# darwin builds
|
|
||||||
darwin: build/$(appname)_darwin_amd64.tar.gz
|
|
||||||
|
|
||||||
build/$(appname)_darwin_amd64.tar.gz: $(sources)
|
|
||||||
$(call build,darwin,amd64,)
|
|
||||||
$(call tar,darwin,amd64)
|
|
||||||
|
|
||||||
# windows builds
|
|
||||||
windows: build/$(appname)_windows_386.zip build/$(appname)_windows_amd64.zip
|
|
||||||
|
|
||||||
build/$(appname)_windows_386.zip: $(sources)
|
|
||||||
$(call build,windows,386,.exe)
|
|
||||||
$(call zip,windows,386,.exe)
|
|
||||||
|
|
||||||
build/$(appname)_windows_amd64.zip: $(sources)
|
|
||||||
$(call build,windows,amd64,.exe)
|
|
||||||
$(call zip,windows,amd64,.exe)
|
|
87
README.md
87
README.md
@ -1,4 +1,4 @@
|
|||||||
# Scrawl
|
# Scrawl #
|
||||||
|
|
||||||
Scrawl is a simple command line tool for downloading files referenced on websites using [CSS
|
Scrawl is a simple command line tool for downloading files referenced on websites using [CSS
|
||||||
selectors](http://www.w3schools.com/cssref/css_selectors.asp). This application is not meant to be a replacement for
|
selectors](http://www.w3schools.com/cssref/css_selectors.asp). This application is not meant to be a replacement for
|
||||||
@ -6,41 +6,66 @@ selectors](http://www.w3schools.com/cssref/css_selectors.asp). This application
|
|||||||
files when the context in which they are presented is known to. This capability is particularly useful when the path of
|
files when the context in which they are presented is known to. This capability is particularly useful when the path of
|
||||||
the desired file is not known but the URL of the website that links to it is (common for download pages).
|
the desired file is not known but the URL of the website that links to it is (common for download pages).
|
||||||
|
|
||||||
## Installation
|
## Installation ##
|
||||||
|
|
||||||
If you already have the Go environment and toolchain set up, you can get the latest version by running:
|
If you already have the Go environment and toolchain set up, you can get the latest version by running:
|
||||||
|
|
||||||
```
|
```
|
||||||
go install foosoft.net/projects/scrawl@latest
|
$ go get github.com/FooSoft/scrawl
|
||||||
```
|
```
|
||||||
|
|
||||||
Otherwise, you can use the [pre-built binaries](https://github.com/FooSoft/scrawl/releases) from the project page.
|
Otherwise, you can use the pre-built binaries for the platforms below:
|
||||||
|
|
||||||
## Usage
|
* [scrawl\_darwin\_386.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_darwin_386.tar.gz)
|
||||||
|
* [scrawl\_darwin\_amd64.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_darwin_amd64.tar.gz)
|
||||||
|
* [scrawl\_linux\_386.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_linux_386.tar.gz)
|
||||||
|
* [scrawl\_linux\_amd64.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_linux_amd64.tar.gz)
|
||||||
|
* [scrawl\_linux\_arm.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_linux_arm.tar.gz)
|
||||||
|
* [scrawl\_windows\_386.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_windows_386.tar.gz)
|
||||||
|
* [scrawl\_windows\_amd64.tar.gz](https://foosoft.net/projects/scrawl/dl/scrawl_windows_amd64.tar.gz)
|
||||||
|
|
||||||
Executing Scrawl with the `-help` command line argument will trigger online help to be displayed. Below is a more
|
## Usage ##
|
||||||
detailed description of what the parameters do.
|
|
||||||
|
|
||||||
* `attr`: The attribute containing the desired download path is specified by this argument.
|
Executing Scrawl with the `-help` command line argument will trigger online help to be displayed. The list below
|
||||||
* `dir`: This argument specifies the output directory for downloaded files.
|
provides a more detailed description of what the parameters do.
|
||||||
* `vebose`: Scrawl will output more details about what it is currently doing when this flag is set.
|
|
||||||
|
|
||||||
## Example
|
* **attr**
|
||||||
|
|
||||||
|
The attribute containing the desired download path is specified by this argument.
|
||||||
|
|
||||||
|
* **dir**
|
||||||
|
|
||||||
|
This argument specifies the output directory for downloaded files.
|
||||||
|
|
||||||
|
* **vebose**
|
||||||
|
|
||||||
|
Scrawl will output more details about what it is currently doing when this flag is set.
|
||||||
|
|
||||||
|
## Example ##
|
||||||
|
|
||||||
Let's say we want to create a script to download the latest Debian package of [Anki](http://ankisrs.net/):
|
Let's say we want to create a script to download the latest Debian package of [Anki](http://ankisrs.net/):
|
||||||
|
|
||||||
1. We load up the homepage and are presented with a big download button as shown in the screenshot below: \
|
1. We load up the homepage and are presented with a big download button as shown in the screenshot below:
|
||||||
![](img/anki.png)
|
|
||||||
2. Let's copy that link so we can download the latest version with wGet or curl from our script at any time! Hmm, it
|
[![Anki Homepage](https://foosoft.net/projects/scrawl/img/anki-thumb.png)](https://foosoft.net/projects/scrawl/img/anki.png)
|
||||||
looks like the path `http://ankisrs.net/download/mirror/anki-2.0.33.deb` has the version number embedded in the
|
|
||||||
filename. This means that even after a new version of Anki is released, our script will keep getting version
|
2. Let's copy that link so we can download the latest version with wGet or curl from our script at any time!
|
||||||
|
|
||||||
|
Hmm, it looks like the path `http://ankisrs.net/download/mirror/anki-2.0.33.deb` has the version number embedded in
|
||||||
|
the filename. This means that even after a new version of Anki is released, our script will keep getting version
|
||||||
`2.0.33` (unless of course it gets deleted).
|
`2.0.33` (unless of course it gets deleted).
|
||||||
3. Let's inspect the download link in your favorite browser to see what additional information we can get: \
|
|
||||||
![](img/inspect.png)
|
3. Let's inspect the download link in your favorite browser to see what additional information we can get:
|
||||||
4. It appears that we can easily create a selector for this element: `#linux > a:nth-child(2)`. Note that
|
|
||||||
[Chrome](https://www.google.com/chrome/) provides the option to copy the CSS selector for any element, making
|
[![Inspector](https://foosoft.net/projects/scrawl/img/inspect-thumb.png)](https://foosoft.net/projects/scrawl/img/inspect.png)
|
||||||
knowledge of web technology optional for this step.
|
|
||||||
|
4. It appears that we can easily create a selector for this element: `#linux > a:nth-child(2)`.
|
||||||
|
|
||||||
|
Note that [Chrome](https://www.google.com/chrome/) provides the option to copy the CSS selector for any element,
|
||||||
|
making knowledge of web technology optional for this step.
|
||||||
|
|
||||||
5. Now let's create a simple download and install script:
|
5. Now let's create a simple download and install script:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
rm -rf /tmp/anki
|
rm -rf /tmp/anki
|
||||||
@ -49,6 +74,26 @@ Let's say we want to create a script to download the latest Debian package of [A
|
|||||||
sudo dpkg -i /tmp/anki/*.deb
|
sudo dpkg -i /tmp/anki/*.deb
|
||||||
sudo apt-get install -y -f
|
sudo apt-get install -y -f
|
||||||
```
|
```
|
||||||
|
|
||||||
In this script, we prepare an empty download directory and tell Scrawl to scrape `http://ankisrs.net/`, extracting
|
In this script, we prepare an empty download directory and tell Scrawl to scrape `http://ankisrs.net/`, extracting
|
||||||
the `href` property of the download link identified by the CSS selector `#linux > a:nth-child(2)`. We then install
|
the `href` property of the download link identified by the CSS selector `#linux > a:nth-child(2)`. We then install
|
||||||
the package and bring in any unsatisfied dependencies.
|
the package and bring in any unsatisfied dependencies.
|
||||||
|
|
||||||
|
## License ##
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
the Software without restriction, including without limitation the rights to
|
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
10
go.mod
10
go.mod
@ -1,10 +0,0 @@
|
|||||||
module foosoft.net/projects/scrawl
|
|
||||||
|
|
||||||
go 1.18
|
|
||||||
|
|
||||||
require github.com/PuerkitoBio/goquery v1.8.0
|
|
||||||
|
|
||||||
require (
|
|
||||||
github.com/andybalholm/cascadia v1.3.1 // indirect
|
|
||||||
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 // indirect
|
|
||||||
)
|
|
11
go.sum
11
go.sum
@ -1,11 +0,0 @@
|
|||||||
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
|
|
||||||
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
|
|
||||||
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
|
|
||||||
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
|
|
||||||
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
|
|
||||||
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
|
||||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
|
||||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
|
BIN
img/anki.png
BIN
img/anki.png
Binary file not shown.
Before Width: | Height: | Size: 38 KiB |
BIN
img/inspect.png
BIN
img/inspect.png
Binary file not shown.
Before Width: | Height: | Size: 25 KiB |
22
scrawl.go
22
scrawl.go
@ -1,3 +1,25 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016 Alex Yatskov <alex@foosoft.net>
|
||||||
|
* Author: Alex Yatskov <alex@foosoft.net>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
* this software and associated documentation files (the "Software"), to deal in
|
||||||
|
* the Software without restriction, including without limitation the rights to
|
||||||
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
* the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
* subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
|
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
Loading…
Reference in New Issue
Block a user