Rename 'patterns' to 'follows'

This commit is contained in:
Brian Picciano 2024-01-04 21:31:32 +01:00
parent f012eeebbf
commit ff553fa8cb
3 changed files with 35 additions and 29 deletions

View File

@ -30,14 +30,14 @@ Any links which are dead will be output to stdout as YAML objects, each
containing the dead URL, the error encountered, and which pages link to it.
In order to recursively crawl through links you can give one or more regex
patterns. Any URL which matches a pattern will have its links checked as well
(and if any of those link URLs match a pattern their links will be checked, and
so on):
patterns. Any URL which matches a pattern will have its links followed and
checked as well (and if any of those linked URLs match a pattern their links
will be checked, and so on):
```
deadlinks \
-url='https://mediocregopher.com' -url='gemini://mediocregopher.com' \
-pattern='://mediocregopher.com'
-follow='://mediocregopher.com'
```
There are further options available which affect the utility's behavior, see

View File

@ -33,7 +33,7 @@ func main() {
storePath = flag.String("store-path", "", "Path to sqlite storage file. If not given then a temporary in-memory storage is used")
maxAge = flag.Duration("max-age", 0, "Maximum duration since last check of a resource, before it must be checked again. Must be used with -store-path")
urls = flagStrings("url", "URL which is always checked. Must be given at least once")
patterns = flagStrings("pattern", "URLs matching this regex will have their links checked as well. Can be specified multiple times")
follows = flagStrings("follow", "URLs matching this regex will have their links checked as well. Can be specified multiple times")
concurrency = flag.Int("concurrency", runtime.NumCPU()/2, "Number simultaneous requests to make at a time")
httpUserAgent = flag.String("http-user-agent", "", "User-agent to use for http requests")
)
@ -56,14 +56,14 @@ func main() {
ctx,
store,
*urls.strs,
*patterns.strs,
&deadlinks.Opts{
NewClient: func() deadlinks.Client {
return loggingClient{deadlinks.NewClient(&deadlinks.ClientOpts{
HTTPUserAgent: *httpUserAgent,
})}
},
Concurrency: *concurrency,
FollowRegexps: *follows.strs,
Concurrency: *concurrency,
OnError: func(err error) {
log.Printf("runtime error: %v", err)
},

View File

@ -24,9 +24,11 @@
// defer store.Close()
//
// pinnedURLs := []string{"https://some.website.com"}
// patterns := []string{"website.com"}
// followRegexps := []string{"website.com"}
//
// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil)
// dl, err := deadlinks.New(ctx, store, pinnedURLs, &deadlinks.Opts{
// FollowRegexps: followRegexps,
// })
//
// `Update` is then used to crawl all links, starting with `pinnedURLs`:
//
@ -56,7 +58,7 @@
// })
// defer store.Close()
//
// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil)
// dl, err := deadlinks.New(ctx, store, pinnedURLs, nil)
//
// # Further Customization
//
@ -83,6 +85,10 @@ type Opts struct {
NewClient func() Client // Defaults to `func () Client { return NewClient(nil) }`
Parser Parser // Defaults to `NewParser()`
// If a URL matches any of these regexps then any links found within it will
// be followed and checked for liveness themselves.
FollowRegexps []string
// Concurrency determines the maximum number of URLs which can be checked
// simultaneously.
//
@ -131,32 +137,32 @@ func (o *Opts) withDefaults() *Opts {
// out-of-the-box, and will traverse between them as necessary based on URL
// schemas. See the `NewClient` and `NewParser` functions for more details.
type DeadLinks struct {
opts Opts
store Store
patterns []*regexp.Regexp
clients []Client
opts Opts
store Store
follows []*regexp.Regexp
clients []Client
}
// New initializes and returns a DeadLinks instance which will track the
// liveness of the given set of pinned URLs, as well as all URLs linked to from
// those. If a linked URL matches one of the given regexp patterns then any
// URLs linked to from it will be tracked as well.
// liveness of the given set of pinned URLs, and potentially URLs linked to from
// those.
//
// If a non-empty Store is passed to New then whatever set of previously pinned
// URLs were present will be overwritten with the given ones.
// If a previously used Store is passed to New then whatever set of previously
// pinned URLs were present will be overwritten with the given ones.
func New(
ctx context.Context,
store Store,
pinnedURLStrs,
patternStrs []string,
pinnedURLStrs []string,
opts *Opts,
) (
*DeadLinks, error,
) {
opts = opts.withDefaults()
var (
err error
pinnedURLs = make([]URL, len(pinnedURLStrs))
patterns = make([]*regexp.Regexp, len(patternStrs))
follows = make([]*regexp.Regexp, len(opts.FollowRegexps))
)
for i, u := range pinnedURLStrs {
@ -165,16 +171,16 @@ func New(
}
}
for i, p := range patternStrs {
if patterns[i], err = regexp.Compile(p); err != nil {
for i, p := range opts.FollowRegexps {
if follows[i], err = regexp.Compile(p); err != nil {
return nil, fmt.Errorf("compiling regexp %q: %w", p, err)
}
}
d := &DeadLinks{
opts: *opts.withDefaults(),
store: store,
patterns: patterns,
opts: *opts,
store: store,
follows: follows,
}
d.clients = make([]Client, d.opts.Concurrency)
@ -200,8 +206,8 @@ func (d *DeadLinks) onError(ctx context.Context, err error) {
func (d *DeadLinks) shouldFollowURL(url URL) bool {
urlStr := string(url)
for _, pattern := range d.patterns {
if pattern.MatchString(urlStr) {
for _, follow := range d.follows {
if follow.MatchString(urlStr) {
return true
}
}