Flesh out README and documentation
This commit is contained in:
parent
07a5acceaf
commit
c6361ea488
14
LICENSE.txt
Normal file
14
LICENSE.txt
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||||
|
Version 2, December 2004
|
||||||
|
|
||||||
|
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
||||||
|
|
||||||
|
Everyone is permitted to copy and distribute verbatim or modified
|
||||||
|
copies of this license document, and changing it is allowed as long
|
||||||
|
as the name is changed.
|
||||||
|
|
||||||
|
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
|
|
||||||
|
0. You just DO WHAT THE FUCK YOU WANT TO.
|
||||||
|
|
44
README.md
Normal file
44
README.md
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# DeadLinks
|
||||||
|
|
||||||
|
A tool for crawling and finding links to URLs which no longer exist. deadlinks
|
||||||
|
supports the HTTP(s) and gemini protocols, and is intended for periodically
|
||||||
|
checking links on personal websites and blogs.
|
||||||
|
|
||||||
|
## Library
|
||||||
|
|
||||||
|
The `deadlinks` package is designed to be easily embedded into a process and
|
||||||
|
have its results displayed in something like a status page.
|
||||||
|
|
||||||
|
[See the godocs for more info.](https://godocs.io/code.betamike.com/mediocregopher/deadlinks)
|
||||||
|
|
||||||
|
## Command-Line
|
||||||
|
|
||||||
|
The command-line utility can be installed using `go install`:
|
||||||
|
|
||||||
|
```
|
||||||
|
go install code.betamike.com/mediocregopher/deadlinks/cmd/deadlinks
|
||||||
|
```
|
||||||
|
|
||||||
|
The `-urls` parameter is required. Given one or more URLs it will check each one
|
||||||
|
for any dead links:
|
||||||
|
|
||||||
|
```
|
||||||
|
deadlinks -urls 'https://mediocregopher.com,gemini://mediocregopher.com'
|
||||||
|
```
|
||||||
|
|
||||||
|
Any links which are dead will be output to stdout as YAML objects, each
|
||||||
|
containing the dead URL, the error encountered, and which pages link to it.
|
||||||
|
|
||||||
|
In order to recursively crawl through links you can give one or more regex
|
||||||
|
patterns. Any URL which matches a pattern will have its links checked as well
|
||||||
|
(and if any of those link URLs match a pattern their links will be checked, and
|
||||||
|
so on):
|
||||||
|
|
||||||
|
```
|
||||||
|
deadlinks \
|
||||||
|
-urls 'https://mediocregopher.com,gemini://mediocregopher.com' \
|
||||||
|
-patterns '://mediocregopher.com'
|
||||||
|
```
|
||||||
|
|
||||||
|
There are further options available which affect the utility's behavior, see
|
||||||
|
`deadlinks -h` for more.
|
57
deadlinks.go
57
deadlinks.go
@ -1,6 +1,50 @@
|
|||||||
// Package deadlinks implements a liveness checker for hyperlinks in HTML and
|
// Package deadlinks implements a liveness checker for hyperlinks in HTML and
|
||||||
// gemtext documents.
|
// gemtext documents.
|
||||||
//
|
//
|
||||||
|
// # URLs
|
||||||
|
//
|
||||||
|
// DeadLinks crawls and keeps track of hyperlinks between different
|
||||||
|
// resources, such as webpages and gemtext documents. If a resource is not
|
||||||
|
// linked to from any other resources then DeadLinks forgets about it.
|
||||||
|
//
|
||||||
|
// For this reason it is required to have a starting set of URLs which DeadLinks
|
||||||
|
// will not forget about; these are the pinned URLs. Pinned URLs act as the
|
||||||
|
// starting point for crawling.
|
||||||
|
//
|
||||||
|
// When DeadLinks traverses a URL link, it will check the liveness of that URL's
|
||||||
|
// resource, but it will not by default recur into _that_ resource's links. It
|
||||||
|
// will only do so if the URL matches one of the given regex patterns which
|
||||||
|
// DeadLinks was configured with.
|
||||||
|
//
|
||||||
|
// # Basic Usage
|
||||||
|
//
|
||||||
|
// DeadLinks can be initialized using `New`:
|
||||||
|
//
|
||||||
|
// store := deadlinks.NewSQLiteStore(nil)
|
||||||
|
// defer store.Close()
|
||||||
|
//
|
||||||
|
// pinnedURLs := []string{"https://some.website.com"}
|
||||||
|
// patterns := []string{"website.com"}
|
||||||
|
//
|
||||||
|
// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil)
|
||||||
|
//
|
||||||
|
// `Update` is then used to crawl all links, starting with `pinnedURLs`:
|
||||||
|
//
|
||||||
|
// err := dl.Update(ctx, time.Now())
|
||||||
|
//
|
||||||
|
// Finally, `GetByStatus` can be used to query all discovered resources based on
|
||||||
|
// their current status. To retrieve all resources which have some error
|
||||||
|
// (indicating a broken link):
|
||||||
|
//
|
||||||
|
// erroredResources, err := miter.ToSlice(
|
||||||
|
// ctx, dl.GetByStatus(deadlinks.ResourceStatusError),
|
||||||
|
// )
|
||||||
|
//
|
||||||
|
// Note that `GetByStatus` returns a `miter.Iterator`, see its documentation for
|
||||||
|
// more options on how to use it beyond `ToSlice`:
|
||||||
|
//
|
||||||
|
// https://godocs.io/code.betamike.com/mediocregopher/mediocre-go-lib/miter
|
||||||
|
//
|
||||||
// # Storage
|
// # Storage
|
||||||
//
|
//
|
||||||
// By default DeadLinks uses an in-memory SQLite database for tracking the
|
// By default DeadLinks uses an in-memory SQLite database for tracking the
|
||||||
@ -12,9 +56,12 @@
|
|||||||
// })
|
// })
|
||||||
// defer store.Close()
|
// defer store.Close()
|
||||||
//
|
//
|
||||||
// dl, err := deadlinks.New(
|
// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil)
|
||||||
// ctx, store, pinnedURLs, patterns, nil,
|
//
|
||||||
// )
|
// # Further Customization
|
||||||
|
//
|
||||||
|
// Most functionality of DeadLinks can be extended or superceded by injecting
|
||||||
|
// alternate interface implementations via the various Opts structs.
|
||||||
package deadlinks
|
package deadlinks
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@ -49,7 +96,7 @@ type Opts struct {
|
|||||||
// RequestTimeout determines how long a request for a resource can run
|
// RequestTimeout determines how long a request for a resource can run
|
||||||
// before the resource is considered unavailable.
|
// before the resource is considered unavailable.
|
||||||
//
|
//
|
||||||
// Default: 10 * time.Second
|
// Default: 1 * time.Minute
|
||||||
RequestTimeout time.Duration
|
RequestTimeout time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -71,7 +118,7 @@ func (o *Opts) withDefaults() *Opts {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if o.RequestTimeout == 0 {
|
if o.RequestTimeout == 0 {
|
||||||
o.RequestTimeout = 10 * time.Second
|
o.RequestTimeout = 1 * time.Minute
|
||||||
}
|
}
|
||||||
|
|
||||||
return o
|
return o
|
||||||
|
Loading…
Reference in New Issue
Block a user