A tool for crawling and finding links to URLs which no longer exist
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
deadlinks/deadlinks.go

78 lines
1.7 KiB

// Package deadlinks implements a liveness checker for hyperlinks in HTML and
// gemtext documents.
package deadlinks
import (
"errors"
"fmt"
"net/url"
"time"
)
// URL is a standard universal resource identifier, normalized particularly for
// this package.
type URL string
// ParseURL parses and returns a URL based on the given string, or an error.
func ParseURL(urlStr string) (URL, error) {
u, err := url.Parse(urlStr)
if err != nil {
return "", err
}
return URL(u.String()), nil
}
func parseURLs(urlStrs []string) ([]URL, error) {
var (
res = make([]URL, 0, len(urlStrs))
errs []error
)
for _, urlStr := range urlStrs {
u, err := ParseURL(urlStr)
if err == nil {
res = append(res, u)
} else {
errs = append(errs, err)
}
}
return res, errors.Join(errs...)
}
// ResourceStatus describes what state a particular Resource is in.
type ResourceStatus int
// Enumeration of ResourceStatus values.
const (
ResourceStatusUnknown ResourceStatus = iota
ResourceStatusOK
ResourceStatusError
)
func (ds ResourceStatus) String() string {
switch ds {
case ResourceStatusUnknown:
return "UNKNOWN"
case ResourceStatusOK:
return "OK"
case ResourceStatusError:
return "ERROR"
default:
panic(fmt.Sprintf("unknown ResourceStatus: %#v", ds))
}
}
// Resource describes the current state of a resource, with the resource being
// uniquely identified by a URL.
type Resource struct {
URL URL
Status ResourceStatus
Pinned bool
LastChecked time.Time
// only set if Status == ResourceStatusError
ErrorString string
// Indicate the URLs of resources which link to/are linked from this
// resource.
IncomingLinkURLs, OutgoingLinkURLs []URL
}