From c6361ea4886d6502ba16377ea6494acb74a68c4d Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Sat, 30 Dec 2023 13:43:06 +0100 Subject: [PATCH] Flesh out README and documentation --- LICENSE.txt | 14 +++++++++++++ README.md | 44 ++++++++++++++++++++++++++++++++++++++++ deadlinks.go | 57 +++++++++++++++++++++++++++++++++++++++++++++++----- flake.nix | 1 - 4 files changed, 110 insertions(+), 6 deletions(-) create mode 100644 LICENSE.txt create mode 100644 README.md diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..5a8e332 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,14 @@ + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + Version 2, December 2004 + + Copyright (C) 2004 Sam Hocevar + + Everyone is permitted to copy and distribute verbatim or modified + copies of this license document, and changing it is allowed as long + as the name is changed. + + DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. You just DO WHAT THE FUCK YOU WANT TO. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..7c9d8f6 --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +# DeadLinks + +A tool for crawling and finding links to URLs which no longer exist. deadlinks +supports the HTTP(s) and gemini protocols, and is intended for periodically +checking links on personal websites and blogs. + +## Library + +The `deadlinks` package is designed to be easily embedded into a process and +have its results displayed in something like a status page. + +[See the godocs for more info.](https://godocs.io/code.betamike.com/mediocregopher/deadlinks) + +## Command-Line + +The command-line utility can be installed using `go install`: + +``` +go install code.betamike.com/mediocregopher/deadlinks/cmd/deadlinks +``` + +The `-urls` parameter is required. Given one or more URLs it will check each one +for any dead links: + +``` +deadlinks -urls 'https://mediocregopher.com,gemini://mediocregopher.com' +``` + +Any links which are dead will be output to stdout as YAML objects, each +containing the dead URL, the error encountered, and which pages link to it. + +In order to recursively crawl through links you can give one or more regex +patterns. Any URL which matches a pattern will have its links checked as well +(and if any of those link URLs match a pattern their links will be checked, and +so on): + +``` +deadlinks \ + -urls 'https://mediocregopher.com,gemini://mediocregopher.com' \ + -patterns '://mediocregopher.com' +``` + +There are further options available which affect the utility's behavior, see +`deadlinks -h` for more. diff --git a/deadlinks.go b/deadlinks.go index ba51eb6..0785ccd 100644 --- a/deadlinks.go +++ b/deadlinks.go @@ -1,6 +1,50 @@ // Package deadlinks implements a liveness checker for hyperlinks in HTML and // gemtext documents. // +// # URLs +// +// DeadLinks crawls and keeps track of hyperlinks between different +// resources, such as webpages and gemtext documents. If a resource is not +// linked to from any other resources then DeadLinks forgets about it. +// +// For this reason it is required to have a starting set of URLs which DeadLinks +// will not forget about; these are the pinned URLs. Pinned URLs act as the +// starting point for crawling. +// +// When DeadLinks traverses a URL link, it will check the liveness of that URL's +// resource, but it will not by default recur into _that_ resource's links. It +// will only do so if the URL matches one of the given regex patterns which +// DeadLinks was configured with. +// +// # Basic Usage +// +// DeadLinks can be initialized using `New`: +// +// store := deadlinks.NewSQLiteStore(nil) +// defer store.Close() +// +// pinnedURLs := []string{"https://some.website.com"} +// patterns := []string{"website.com"} +// +// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil) +// +// `Update` is then used to crawl all links, starting with `pinnedURLs`: +// +// err := dl.Update(ctx, time.Now()) +// +// Finally, `GetByStatus` can be used to query all discovered resources based on +// their current status. To retrieve all resources which have some error +// (indicating a broken link): +// +// erroredResources, err := miter.ToSlice( +// ctx, dl.GetByStatus(deadlinks.ResourceStatusError), +// ) +// +// Note that `GetByStatus` returns a `miter.Iterator`, see its documentation for +// more options on how to use it beyond `ToSlice`: +// +// https://godocs.io/code.betamike.com/mediocregopher/mediocre-go-lib/miter +// // # Storage // // By default DeadLinks uses an in-memory SQLite database for tracking the @@ -12,9 +56,12 @@ // }) // defer store.Close() // -// dl, err := deadlinks.New( -// ctx, store, pinnedURLs, patterns, nil, -// ) +// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil) +// +// # Further Customization +// +// Most functionality of DeadLinks can be extended or superceded by injecting +// alternate interface implementations via the various Opts structs. package deadlinks import ( @@ -49,7 +96,7 @@ type Opts struct { // RequestTimeout determines how long a request for a resource can run // before the resource is considered unavailable. // - // Default: 10 * time.Second + // Default: 1 * time.Minute RequestTimeout time.Duration } @@ -71,7 +118,7 @@ func (o *Opts) withDefaults() *Opts { } if o.RequestTimeout == 0 { - o.RequestTimeout = 10 * time.Second + o.RequestTimeout = 1 * time.Minute } return o diff --git a/flake.nix b/flake.nix index 314ec80..f687217 100644 --- a/flake.nix +++ b/flake.nix @@ -35,7 +35,6 @@ pkgs.go pkgs.gotools pkgs.golangci-lint - pkgs.sqlite ]; }; });