|
|
|
@ -89,6 +89,9 @@ type Opts struct { |
|
|
|
|
// be followed and checked for liveness themselves.
|
|
|
|
|
FollowRegexps []string |
|
|
|
|
|
|
|
|
|
// If a URL matches any of these regexps then it will not be checked at all.
|
|
|
|
|
IgnoreRegexps []string |
|
|
|
|
|
|
|
|
|
// Concurrency determines the maximum number of URLs which can be checked
|
|
|
|
|
// simultaneously.
|
|
|
|
|
//
|
|
|
|
@ -137,10 +140,23 @@ func (o *Opts) withDefaults() *Opts { |
|
|
|
|
// out-of-the-box, and will traverse between them as necessary based on URL
|
|
|
|
|
// schemas. See the `NewClient` and `NewParser` functions for more details.
|
|
|
|
|
type DeadLinks struct { |
|
|
|
|
opts Opts |
|
|
|
|
store Store |
|
|
|
|
follows []*regexp.Regexp |
|
|
|
|
clients []Client |
|
|
|
|
opts Opts |
|
|
|
|
store Store |
|
|
|
|
follows, ignores []*regexp.Regexp |
|
|
|
|
clients []Client |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func compileRegexps(strs []string) ([]*regexp.Regexp, error) { |
|
|
|
|
var ( |
|
|
|
|
rr = make([]*regexp.Regexp, len(strs)) |
|
|
|
|
err error |
|
|
|
|
) |
|
|
|
|
for i, str := range strs { |
|
|
|
|
if rr[i], err = regexp.Compile(str); err != nil { |
|
|
|
|
return nil, fmt.Errorf("compiling regexp %q: %w", str, err) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return rr, nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// New initializes and returns a DeadLinks instance which will track the
|
|
|
|
@ -162,7 +178,6 @@ func New( |
|
|
|
|
var ( |
|
|
|
|
err error |
|
|
|
|
pinnedURLs = make([]URL, len(pinnedURLStrs)) |
|
|
|
|
follows = make([]*regexp.Regexp, len(opts.FollowRegexps)) |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
for i, u := range pinnedURLStrs { |
|
|
|
@ -171,16 +186,21 @@ func New( |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for i, p := range opts.FollowRegexps { |
|
|
|
|
if follows[i], err = regexp.Compile(p); err != nil { |
|
|
|
|
return nil, fmt.Errorf("compiling regexp %q: %w", p, err) |
|
|
|
|
} |
|
|
|
|
follows, err := compileRegexps(opts.FollowRegexps) |
|
|
|
|
if err != nil { |
|
|
|
|
return nil, fmt.Errorf("compiling follows: %w", err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
ignores, err := compileRegexps(opts.IgnoreRegexps) |
|
|
|
|
if err != nil { |
|
|
|
|
return nil, fmt.Errorf("compiling ignores: %w", err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
d := &DeadLinks{ |
|
|
|
|
opts: *opts, |
|
|
|
|
store: store, |
|
|
|
|
follows: follows, |
|
|
|
|
ignores: ignores, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
d.clients = make([]Client, d.opts.Concurrency) |
|
|
|
@ -204,10 +224,10 @@ func (d *DeadLinks) onError(ctx context.Context, err error) { |
|
|
|
|
d.opts.OnError(err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
func (d *DeadLinks) shouldFollowURL(url URL) bool { |
|
|
|
|
func matchesAnyRegexp(url URL, rr []*regexp.Regexp) bool { |
|
|
|
|
urlStr := string(url) |
|
|
|
|
for _, follow := range d.follows { |
|
|
|
|
if follow.MatchString(urlStr) { |
|
|
|
|
for _, r := range rr { |
|
|
|
|
if r.MatchString(urlStr) { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -219,6 +239,10 @@ func (d *DeadLinks) getURL( |
|
|
|
|
) ( |
|
|
|
|
[]URL, error, |
|
|
|
|
) { |
|
|
|
|
if matchesAnyRegexp(url, d.ignores) { |
|
|
|
|
return nil, nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
ctx, cancel := context.WithTimeout(ctx, d.opts.RequestTimeout) |
|
|
|
|
defer cancel() |
|
|
|
|
|
|
|
|
@ -233,7 +257,7 @@ func (d *DeadLinks) getURL( |
|
|
|
|
mimeType = mimeType[:i] |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if !d.shouldFollowURL(url) { |
|
|
|
|
if !matchesAnyRegexp(url, d.follows) { |
|
|
|
|
return nil, nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|