|
|
|
@ -24,9 +24,11 @@ |
|
|
|
|
// defer store.Close()
|
|
|
|
|
//
|
|
|
|
|
// pinnedURLs := []string{"https://some.website.com"}
|
|
|
|
|
// patterns := []string{"website.com"}
|
|
|
|
|
// followRegexps := []string{"website.com"}
|
|
|
|
|
//
|
|
|
|
|
// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil)
|
|
|
|
|
// dl, err := deadlinks.New(ctx, store, pinnedURLs, &deadlinks.Opts{
|
|
|
|
|
// FollowRegexps: followRegexps,
|
|
|
|
|
// })
|
|
|
|
|
//
|
|
|
|
|
// `Update` is then used to crawl all links, starting with `pinnedURLs`:
|
|
|
|
|
//
|
|
|
|
@ -56,7 +58,7 @@ |
|
|
|
|
// })
|
|
|
|
|
// defer store.Close()
|
|
|
|
|
//
|
|
|
|
|
// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil)
|
|
|
|
|
// dl, err := deadlinks.New(ctx, store, pinnedURLs, nil)
|
|
|
|
|
//
|
|
|
|
|
// # Further Customization
|
|
|
|
|
//
|
|
|
|
@ -83,6 +85,10 @@ type Opts struct { |
|
|
|
|
NewClient func() Client // Defaults to `func () Client { return NewClient(nil) }`
|
|
|
|
|
Parser Parser // Defaults to `NewParser()`
|
|
|
|
|
|
|
|
|
|
// If a URL matches any of these regexps then any links found within it will
|
|
|
|
|
// be followed and checked for liveness themselves.
|
|
|
|
|
FollowRegexps []string |
|
|
|
|
|
|
|
|
|
// Concurrency determines the maximum number of URLs which can be checked
|
|
|
|
|
// simultaneously.
|
|
|
|
|
//
|
|
|
|
@ -131,32 +137,32 @@ func (o *Opts) withDefaults() *Opts { |
|
|
|
|
// out-of-the-box, and will traverse between them as necessary based on URL
|
|
|
|
|
// schemas. See the `NewClient` and `NewParser` functions for more details.
|
|
|
|
|
type DeadLinks struct { |
|
|
|
|
opts Opts |
|
|
|
|
store Store |
|
|
|
|
patterns []*regexp.Regexp |
|
|
|
|
clients []Client |
|
|
|
|
opts Opts |
|
|
|
|
store Store |
|
|
|
|
follows []*regexp.Regexp |
|
|
|
|
clients []Client |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// New initializes and returns a DeadLinks instance which will track the
|
|
|
|
|
// liveness of the given set of pinned URLs, as well as all URLs linked to from
|
|
|
|
|
// those. If a linked URL matches one of the given regexp patterns then any
|
|
|
|
|
// URLs linked to from it will be tracked as well.
|
|
|
|
|
// liveness of the given set of pinned URLs, and potentially URLs linked to from
|
|
|
|
|
// those.
|
|
|
|
|
//
|
|
|
|
|
// If a non-empty Store is passed to New then whatever set of previously pinned
|
|
|
|
|
// URLs were present will be overwritten with the given ones.
|
|
|
|
|
// If a previously used Store is passed to New then whatever set of previously
|
|
|
|
|
// pinned URLs were present will be overwritten with the given ones.
|
|
|
|
|
func New( |
|
|
|
|
ctx context.Context, |
|
|
|
|
store Store, |
|
|
|
|
pinnedURLStrs, |
|
|
|
|
patternStrs []string, |
|
|
|
|
pinnedURLStrs []string, |
|
|
|
|
opts *Opts, |
|
|
|
|
) ( |
|
|
|
|
*DeadLinks, error, |
|
|
|
|
) { |
|
|
|
|
opts = opts.withDefaults() |
|
|
|
|
|
|
|
|
|
var ( |
|
|
|
|
err error |
|
|
|
|
pinnedURLs = make([]URL, len(pinnedURLStrs)) |
|
|
|
|
patterns = make([]*regexp.Regexp, len(patternStrs)) |
|
|
|
|
follows = make([]*regexp.Regexp, len(opts.FollowRegexps)) |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
for i, u := range pinnedURLStrs { |
|
|
|
@ -165,16 +171,16 @@ func New( |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for i, p := range patternStrs { |
|
|
|
|
if patterns[i], err = regexp.Compile(p); err != nil { |
|
|
|
|
for i, p := range opts.FollowRegexps { |
|
|
|
|
if follows[i], err = regexp.Compile(p); err != nil { |
|
|
|
|
return nil, fmt.Errorf("compiling regexp %q: %w", p, err) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
d := &DeadLinks{ |
|
|
|
|
opts: *opts.withDefaults(), |
|
|
|
|
store: store, |
|
|
|
|
patterns: patterns, |
|
|
|
|
opts: *opts, |
|
|
|
|
store: store, |
|
|
|
|
follows: follows, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
d.clients = make([]Client, d.opts.Concurrency) |
|
|
|
@ -200,8 +206,8 @@ func (d *DeadLinks) onError(ctx context.Context, err error) { |
|
|
|
|
|
|
|
|
|
func (d *DeadLinks) shouldFollowURL(url URL) bool { |
|
|
|
|
urlStr := string(url) |
|
|
|
|
for _, pattern := range d.patterns { |
|
|
|
|
if pattern.MatchString(urlStr) { |
|
|
|
|
for _, follow := range d.follows { |
|
|
|
|
if follow.MatchString(urlStr) { |
|
|
|
|
return true |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|