A tool for crawling and finding links to URLs which no longer exist
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
deadlinks/client.go

141 lines
3.1 KiB

package deadlinks
import (
"context"
"errors"
"fmt"
"io"
"git.sr.ht/~adnano/go-gemini"
)
// Client is a thread-safe type which fetches a resource at the given URL,
// returning its MIME type and body. If the MIME type is not known then empty
// string should be returned.
type Client interface {
Get(context.Context, URL) (string, io.ReadCloser, error)
}
// ClientOpts are optional fields which can be provided to NewClient. A nil
// ClientOpts is equivalent to an empty one.
type ClientOpts struct {
// GeminiClient will be used for retrieving resources via the gemini
// protocol.
//
// Defaults to `new(gemini.Client)`.
GeminiClient interface {
Do(context.Context, *gemini.Request) (*gemini.Response, error)
}
// MaxRedirects indicates the maximum number of redirects which will be
// allowed when resolving a resource. A negative value indicates no
// redirects are allowed.
//
// Default: 10.
MaxRedirects int
}
func (o *ClientOpts) withDefaults() *ClientOpts {
if o == nil {
o = new(ClientOpts)
}
if o.GeminiClient == nil {
o.GeminiClient = new(gemini.Client)
}
if o.MaxRedirects == 0 {
o.MaxRedirects = 10
}
return o
}
type client struct {
opts ClientOpts
}
// NewClient initializes and returns a Client which supports commonly used
// transport protocols. The returned Client will error when it encounters an
// unfamiliar protocol.
//
// Supported URL schemas:
// - gemini
// - http/https (TODO)
func NewClient(opts *ClientOpts) Client {
return &client{*opts.withDefaults()}
}
func (c *client) getGemini(
ctx context.Context, url URL, redirectDepth int,
) (
string, io.ReadCloser, error,
) {
req, err := gemini.NewRequest(string(url))
if err != nil {
return "", nil, fmt.Errorf("building request: %w", err)
}
// TODO allow specifying client cert
res, err := c.opts.GeminiClient.Do(ctx, req)
if err != nil {
return "", nil, fmt.Errorf("performing request: %w", err)
}
// all status numbers are grouped by their first digit, and actions taken
// can be entirely based on that.
switch res.Status / 10 {
case 1: // input required
// Assume that input required is fine, even though we don't know the
// MIME type.
return "", res.Body, nil
case 2: // success
return res.Meta, res.Body, nil
case 3: // redirect
defer res.Body.Close()
if redirectDepth >= c.opts.MaxRedirects {
return "", nil, errors.New("too many redirects")
}
metaURL, err := ParseURL(res.Meta)
if err != nil {
return "", nil, fmt.Errorf("parsing redirect URL %q: %w", res.Meta, err)
}
newURL := url.ResolveReference(metaURL)
return c.get(ctx, newURL, redirectDepth+1)
default:
defer res.Body.Close()
return "", nil, fmt.Errorf(
"response code %d (%v): %q", res.Status, res.Status, res.Meta,
)
}
}
func (c *client) get(
ctx context.Context, url URL, redirectDepth int,
) (
string, io.ReadCloser, error,
) {
scheme := url.toStd().Scheme
switch scheme {
case "gemini":
return c.getGemini(ctx, url, redirectDepth)
default:
return "", nil, fmt.Errorf("unsupported scheme %q", scheme)
}
}
func (c *client) Get(
ctx context.Context, url URL,
) (
string, io.ReadCloser, error,
) {
return c.get(ctx, url, 0)
}