Compare commits

..

3 Commits

Author SHA1 Message Date
f012eeebbf Multi-args instead of comma-separated 2024-01-04 21:17:57 +01:00
db3e6029b9 Add option to set http user agent 2024-01-04 20:51:36 +01:00
f5a91f918e Don't share http roundtripper between clients
This seems to fix the issues with concurrency.
2024-01-04 20:31:17 +01:00
4 changed files with 59 additions and 22 deletions

View File

@ -19,11 +19,11 @@ The command-line utility can be installed using `go install`:
go install code.betamike.com/mediocregopher/deadlinks/cmd/deadlinks go install code.betamike.com/mediocregopher/deadlinks/cmd/deadlinks
``` ```
The `-urls` parameter is required. Given one or more URLs it will check each one The `-url` parameter is required. Given a URL it will check it for
for any dead links: any dead links. Can be specified more than once:
``` ```
deadlinks -urls 'https://mediocregopher.com,gemini://mediocregopher.com' deadlinks -url='https://mediocregopher.com' -url='gemini://mediocregopher.com'
``` ```
Any links which are dead will be output to stdout as YAML objects, each Any links which are dead will be output to stdout as YAML objects, each
@ -36,8 +36,8 @@ so on):
``` ```
deadlinks \ deadlinks \
-urls 'https://mediocregopher.com,gemini://mediocregopher.com' \ -url='https://mediocregopher.com' -url='gemini://mediocregopher.com' \
-patterns '://mediocregopher.com' -pattern='://mediocregopher.com'
``` ```
There are further options available which affect the utility's behavior, see There are further options available which affect the utility's behavior, see

View File

@ -36,6 +36,11 @@ type ClientOpts struct {
Do(*http.Request) (*http.Response, error) Do(*http.Request) (*http.Response, error)
} }
// HTTPUserAgent overwrites the user agent used by the HTTPClient.
//
// Defaults to whatever http.Client uses by default.
HTTPUserAgent string
// MaxRedirects indicates the maximum number of redirects which will be // MaxRedirects indicates the maximum number of redirects which will be
// allowed when resolving a resource. A negative value indicates no // allowed when resolving a resource. A negative value indicates no
// redirects are allowed. // redirects are allowed.
@ -54,7 +59,9 @@ func (o *ClientOpts) withDefaults() *ClientOpts {
} }
if o.HTTPClient == nil { if o.HTTPClient == nil {
o.HTTPClient = new(http.Client) o.HTTPClient = &http.Client{
Transport: http.DefaultTransport.(*http.Transport).Clone(),
}
} }
if o.MaxRedirects == 0 { if o.MaxRedirects == 0 {
@ -188,6 +195,10 @@ func (c *client) getHTTP(
return "", nil, fmt.Errorf("building request: %w", err) return "", nil, fmt.Errorf("building request: %w", err)
} }
if c.opts.HTTPUserAgent != "" {
req.Header.Set("User-Agent", c.opts.HTTPUserAgent)
}
res, err := c.opts.HTTPClient.Do(req) res, err := c.opts.HTTPClient.Do(req)
if err != nil { if err != nil {
return "", nil, fmt.Errorf("performing request: %w", err) return "", nil, fmt.Errorf("performing request: %w", err)

29
cmd/deadlinks/flag.go Normal file
View File

@ -0,0 +1,29 @@
package main
import (
"flag"
"strings"
)
// Created so that multiple inputs can be accecpted
type arrayFlags struct {
strs *[]string
}
func flagStrings(name, usage string) arrayFlags {
f := arrayFlags{new([]string)}
flag.Var(&f, name, usage)
return f
}
func (i arrayFlags) String() string {
if i.strs == nil {
return ""
}
return strings.Join(*i.strs, ", ")
}
func (i arrayFlags) Set(value string) error {
*i.strs = append(*i.strs, strings.TrimSpace(value))
return nil
}

View File

@ -8,7 +8,6 @@ import (
"os" "os"
"os/signal" "os/signal"
"runtime" "runtime"
"strings"
"time" "time"
"code.betamike.com/mediocregopher/deadlinks" "code.betamike.com/mediocregopher/deadlinks"
@ -33,20 +32,16 @@ func main() {
var ( var (
storePath = flag.String("store-path", "", "Path to sqlite storage file. If not given then a temporary in-memory storage is used") storePath = flag.String("store-path", "", "Path to sqlite storage file. If not given then a temporary in-memory storage is used")
maxAge = flag.Duration("max-age", 0, "Maximum duration since last check of a resource, before it must be checked again. Must be used with -store-path") maxAge = flag.Duration("max-age", 0, "Maximum duration since last check of a resource, before it must be checked again. Must be used with -store-path")
urls = flag.String("urls", "", `Comma-separated list of URLs which are always checked. At least one is required`) urls = flagStrings("url", "URL which is always checked. Must be given at least once")
patternsStr = flag.String("patterns", "", "Comma-separated list of regexps. All URLs which match one of these will have their links checked as well") patterns = flagStrings("pattern", "URLs matching this regex will have their links checked as well. Can be specified multiple times")
concurrency = flag.Int("concurrency", runtime.NumCPU()/2, "Number simultaneous requests to make at a time") concurrency = flag.Int("concurrency", runtime.NumCPU()/2, "Number simultaneous requests to make at a time")
httpUserAgent = flag.String("http-user-agent", "", "User-agent to use for http requests")
) )
flag.Parse() flag.Parse()
if *urls == "" { if len(*urls.strs) == 0 {
log.Fatal("-urls is required") log.Fatal("at least one -url is required")
}
var patterns []string
if *patternsStr != "" {
patterns = strings.Split(*patternsStr, ",")
} }
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
@ -60,11 +55,13 @@ func main() {
dl, err := deadlinks.New( dl, err := deadlinks.New(
ctx, ctx,
store, store,
strings.Split(*urls, ","), *urls.strs,
patterns, *patterns.strs,
&deadlinks.Opts{ &deadlinks.Opts{
NewClient: func() deadlinks.Client { NewClient: func() deadlinks.Client {
return loggingClient{deadlinks.NewClient(nil)} return loggingClient{deadlinks.NewClient(&deadlinks.ClientOpts{
HTTPUserAgent: *httpUserAgent,
})}
}, },
Concurrency: *concurrency, Concurrency: *concurrency,
OnError: func(err error) { OnError: func(err error) {