Flesh out README and documentation
This commit is contained in:
parent
07a5acceaf
commit
c6361ea488
14
LICENSE.txt
Normal file
14
LICENSE.txt
Normal file
@ -0,0 +1,14 @@
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
Version 2, December 2004
|
||||
|
||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim or modified
|
||||
copies of this license document, and changing it is allowed as long
|
||||
as the name is changed.
|
||||
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
||||
|
44
README.md
Normal file
44
README.md
Normal file
@ -0,0 +1,44 @@
|
||||
# DeadLinks
|
||||
|
||||
A tool for crawling and finding links to URLs which no longer exist. deadlinks
|
||||
supports the HTTP(s) and gemini protocols, and is intended for periodically
|
||||
checking links on personal websites and blogs.
|
||||
|
||||
## Library
|
||||
|
||||
The `deadlinks` package is designed to be easily embedded into a process and
|
||||
have its results displayed in something like a status page.
|
||||
|
||||
[See the godocs for more info.](https://godocs.io/code.betamike.com/mediocregopher/deadlinks)
|
||||
|
||||
## Command-Line
|
||||
|
||||
The command-line utility can be installed using `go install`:
|
||||
|
||||
```
|
||||
go install code.betamike.com/mediocregopher/deadlinks/cmd/deadlinks
|
||||
```
|
||||
|
||||
The `-urls` parameter is required. Given one or more URLs it will check each one
|
||||
for any dead links:
|
||||
|
||||
```
|
||||
deadlinks -urls 'https://mediocregopher.com,gemini://mediocregopher.com'
|
||||
```
|
||||
|
||||
Any links which are dead will be output to stdout as YAML objects, each
|
||||
containing the dead URL, the error encountered, and which pages link to it.
|
||||
|
||||
In order to recursively crawl through links you can give one or more regex
|
||||
patterns. Any URL which matches a pattern will have its links checked as well
|
||||
(and if any of those link URLs match a pattern their links will be checked, and
|
||||
so on):
|
||||
|
||||
```
|
||||
deadlinks \
|
||||
-urls 'https://mediocregopher.com,gemini://mediocregopher.com' \
|
||||
-patterns '://mediocregopher.com'
|
||||
```
|
||||
|
||||
There are further options available which affect the utility's behavior, see
|
||||
`deadlinks -h` for more.
|
57
deadlinks.go
57
deadlinks.go
@ -1,6 +1,50 @@
|
||||
// Package deadlinks implements a liveness checker for hyperlinks in HTML and
|
||||
// gemtext documents.
|
||||
//
|
||||
// # URLs
|
||||
//
|
||||
// DeadLinks crawls and keeps track of hyperlinks between different
|
||||
// resources, such as webpages and gemtext documents. If a resource is not
|
||||
// linked to from any other resources then DeadLinks forgets about it.
|
||||
//
|
||||
// For this reason it is required to have a starting set of URLs which DeadLinks
|
||||
// will not forget about; these are the pinned URLs. Pinned URLs act as the
|
||||
// starting point for crawling.
|
||||
//
|
||||
// When DeadLinks traverses a URL link, it will check the liveness of that URL's
|
||||
// resource, but it will not by default recur into _that_ resource's links. It
|
||||
// will only do so if the URL matches one of the given regex patterns which
|
||||
// DeadLinks was configured with.
|
||||
//
|
||||
// # Basic Usage
|
||||
//
|
||||
// DeadLinks can be initialized using `New`:
|
||||
//
|
||||
// store := deadlinks.NewSQLiteStore(nil)
|
||||
// defer store.Close()
|
||||
//
|
||||
// pinnedURLs := []string{"https://some.website.com"}
|
||||
// patterns := []string{"website.com"}
|
||||
//
|
||||
// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil)
|
||||
//
|
||||
// `Update` is then used to crawl all links, starting with `pinnedURLs`:
|
||||
//
|
||||
// err := dl.Update(ctx, time.Now())
|
||||
//
|
||||
// Finally, `GetByStatus` can be used to query all discovered resources based on
|
||||
// their current status. To retrieve all resources which have some error
|
||||
// (indicating a broken link):
|
||||
//
|
||||
// erroredResources, err := miter.ToSlice(
|
||||
// ctx, dl.GetByStatus(deadlinks.ResourceStatusError),
|
||||
// )
|
||||
//
|
||||
// Note that `GetByStatus` returns a `miter.Iterator`, see its documentation for
|
||||
// more options on how to use it beyond `ToSlice`:
|
||||
//
|
||||
// https://godocs.io/code.betamike.com/mediocregopher/mediocre-go-lib/miter
|
||||
//
|
||||
// # Storage
|
||||
//
|
||||
// By default DeadLinks uses an in-memory SQLite database for tracking the
|
||||
@ -12,9 +56,12 @@
|
||||
// })
|
||||
// defer store.Close()
|
||||
//
|
||||
// dl, err := deadlinks.New(
|
||||
// ctx, store, pinnedURLs, patterns, nil,
|
||||
// )
|
||||
// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil)
|
||||
//
|
||||
// # Further Customization
|
||||
//
|
||||
// Most functionality of DeadLinks can be extended or superceded by injecting
|
||||
// alternate interface implementations via the various Opts structs.
|
||||
package deadlinks
|
||||
|
||||
import (
|
||||
@ -49,7 +96,7 @@ type Opts struct {
|
||||
// RequestTimeout determines how long a request for a resource can run
|
||||
// before the resource is considered unavailable.
|
||||
//
|
||||
// Default: 10 * time.Second
|
||||
// Default: 1 * time.Minute
|
||||
RequestTimeout time.Duration
|
||||
}
|
||||
|
||||
@ -71,7 +118,7 @@ func (o *Opts) withDefaults() *Opts {
|
||||
}
|
||||
|
||||
if o.RequestTimeout == 0 {
|
||||
o.RequestTimeout = 10 * time.Second
|
||||
o.RequestTimeout = 1 * time.Minute
|
||||
}
|
||||
|
||||
return o
|
||||
|
Loading…
Reference in New Issue
Block a user