Rename 'patterns' to 'follows'
This commit is contained in:
parent
f012eeebbf
commit
ff553fa8cb
@ -30,14 +30,14 @@ Any links which are dead will be output to stdout as YAML objects, each
|
|||||||
containing the dead URL, the error encountered, and which pages link to it.
|
containing the dead URL, the error encountered, and which pages link to it.
|
||||||
|
|
||||||
In order to recursively crawl through links you can give one or more regex
|
In order to recursively crawl through links you can give one or more regex
|
||||||
patterns. Any URL which matches a pattern will have its links checked as well
|
patterns. Any URL which matches a pattern will have its links followed and
|
||||||
(and if any of those link URLs match a pattern their links will be checked, and
|
checked as well (and if any of those linked URLs match a pattern their links
|
||||||
so on):
|
will be checked, and so on):
|
||||||
|
|
||||||
```
|
```
|
||||||
deadlinks \
|
deadlinks \
|
||||||
-url='https://mediocregopher.com' -url='gemini://mediocregopher.com' \
|
-url='https://mediocregopher.com' -url='gemini://mediocregopher.com' \
|
||||||
-pattern='://mediocregopher.com'
|
-follow='://mediocregopher.com'
|
||||||
```
|
```
|
||||||
|
|
||||||
There are further options available which affect the utility's behavior, see
|
There are further options available which affect the utility's behavior, see
|
||||||
|
@ -33,7 +33,7 @@ func main() {
|
|||||||
storePath = flag.String("store-path", "", "Path to sqlite storage file. If not given then a temporary in-memory storage is used")
|
storePath = flag.String("store-path", "", "Path to sqlite storage file. If not given then a temporary in-memory storage is used")
|
||||||
maxAge = flag.Duration("max-age", 0, "Maximum duration since last check of a resource, before it must be checked again. Must be used with -store-path")
|
maxAge = flag.Duration("max-age", 0, "Maximum duration since last check of a resource, before it must be checked again. Must be used with -store-path")
|
||||||
urls = flagStrings("url", "URL which is always checked. Must be given at least once")
|
urls = flagStrings("url", "URL which is always checked. Must be given at least once")
|
||||||
patterns = flagStrings("pattern", "URLs matching this regex will have their links checked as well. Can be specified multiple times")
|
follows = flagStrings("follow", "URLs matching this regex will have their links checked as well. Can be specified multiple times")
|
||||||
concurrency = flag.Int("concurrency", runtime.NumCPU()/2, "Number simultaneous requests to make at a time")
|
concurrency = flag.Int("concurrency", runtime.NumCPU()/2, "Number simultaneous requests to make at a time")
|
||||||
httpUserAgent = flag.String("http-user-agent", "", "User-agent to use for http requests")
|
httpUserAgent = flag.String("http-user-agent", "", "User-agent to use for http requests")
|
||||||
)
|
)
|
||||||
@ -56,14 +56,14 @@ func main() {
|
|||||||
ctx,
|
ctx,
|
||||||
store,
|
store,
|
||||||
*urls.strs,
|
*urls.strs,
|
||||||
*patterns.strs,
|
|
||||||
&deadlinks.Opts{
|
&deadlinks.Opts{
|
||||||
NewClient: func() deadlinks.Client {
|
NewClient: func() deadlinks.Client {
|
||||||
return loggingClient{deadlinks.NewClient(&deadlinks.ClientOpts{
|
return loggingClient{deadlinks.NewClient(&deadlinks.ClientOpts{
|
||||||
HTTPUserAgent: *httpUserAgent,
|
HTTPUserAgent: *httpUserAgent,
|
||||||
})}
|
})}
|
||||||
},
|
},
|
||||||
Concurrency: *concurrency,
|
FollowRegexps: *follows.strs,
|
||||||
|
Concurrency: *concurrency,
|
||||||
OnError: func(err error) {
|
OnError: func(err error) {
|
||||||
log.Printf("runtime error: %v", err)
|
log.Printf("runtime error: %v", err)
|
||||||
},
|
},
|
||||||
|
50
deadlinks.go
50
deadlinks.go
@ -24,9 +24,11 @@
|
|||||||
// defer store.Close()
|
// defer store.Close()
|
||||||
//
|
//
|
||||||
// pinnedURLs := []string{"https://some.website.com"}
|
// pinnedURLs := []string{"https://some.website.com"}
|
||||||
// patterns := []string{"website.com"}
|
// followRegexps := []string{"website.com"}
|
||||||
//
|
//
|
||||||
// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil)
|
// dl, err := deadlinks.New(ctx, store, pinnedURLs, &deadlinks.Opts{
|
||||||
|
// FollowRegexps: followRegexps,
|
||||||
|
// })
|
||||||
//
|
//
|
||||||
// `Update` is then used to crawl all links, starting with `pinnedURLs`:
|
// `Update` is then used to crawl all links, starting with `pinnedURLs`:
|
||||||
//
|
//
|
||||||
@ -56,7 +58,7 @@
|
|||||||
// })
|
// })
|
||||||
// defer store.Close()
|
// defer store.Close()
|
||||||
//
|
//
|
||||||
// dl, err := deadlinks.New(ctx, store, pinnedURLs, patterns, nil)
|
// dl, err := deadlinks.New(ctx, store, pinnedURLs, nil)
|
||||||
//
|
//
|
||||||
// # Further Customization
|
// # Further Customization
|
||||||
//
|
//
|
||||||
@ -83,6 +85,10 @@ type Opts struct {
|
|||||||
NewClient func() Client // Defaults to `func () Client { return NewClient(nil) }`
|
NewClient func() Client // Defaults to `func () Client { return NewClient(nil) }`
|
||||||
Parser Parser // Defaults to `NewParser()`
|
Parser Parser // Defaults to `NewParser()`
|
||||||
|
|
||||||
|
// If a URL matches any of these regexps then any links found within it will
|
||||||
|
// be followed and checked for liveness themselves.
|
||||||
|
FollowRegexps []string
|
||||||
|
|
||||||
// Concurrency determines the maximum number of URLs which can be checked
|
// Concurrency determines the maximum number of URLs which can be checked
|
||||||
// simultaneously.
|
// simultaneously.
|
||||||
//
|
//
|
||||||
@ -131,32 +137,32 @@ func (o *Opts) withDefaults() *Opts {
|
|||||||
// out-of-the-box, and will traverse between them as necessary based on URL
|
// out-of-the-box, and will traverse between them as necessary based on URL
|
||||||
// schemas. See the `NewClient` and `NewParser` functions for more details.
|
// schemas. See the `NewClient` and `NewParser` functions for more details.
|
||||||
type DeadLinks struct {
|
type DeadLinks struct {
|
||||||
opts Opts
|
opts Opts
|
||||||
store Store
|
store Store
|
||||||
patterns []*regexp.Regexp
|
follows []*regexp.Regexp
|
||||||
clients []Client
|
clients []Client
|
||||||
}
|
}
|
||||||
|
|
||||||
// New initializes and returns a DeadLinks instance which will track the
|
// New initializes and returns a DeadLinks instance which will track the
|
||||||
// liveness of the given set of pinned URLs, as well as all URLs linked to from
|
// liveness of the given set of pinned URLs, and potentially URLs linked to from
|
||||||
// those. If a linked URL matches one of the given regexp patterns then any
|
// those.
|
||||||
// URLs linked to from it will be tracked as well.
|
|
||||||
//
|
//
|
||||||
// If a non-empty Store is passed to New then whatever set of previously pinned
|
// If a previously used Store is passed to New then whatever set of previously
|
||||||
// URLs were present will be overwritten with the given ones.
|
// pinned URLs were present will be overwritten with the given ones.
|
||||||
func New(
|
func New(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
store Store,
|
store Store,
|
||||||
pinnedURLStrs,
|
pinnedURLStrs []string,
|
||||||
patternStrs []string,
|
|
||||||
opts *Opts,
|
opts *Opts,
|
||||||
) (
|
) (
|
||||||
*DeadLinks, error,
|
*DeadLinks, error,
|
||||||
) {
|
) {
|
||||||
|
opts = opts.withDefaults()
|
||||||
|
|
||||||
var (
|
var (
|
||||||
err error
|
err error
|
||||||
pinnedURLs = make([]URL, len(pinnedURLStrs))
|
pinnedURLs = make([]URL, len(pinnedURLStrs))
|
||||||
patterns = make([]*regexp.Regexp, len(patternStrs))
|
follows = make([]*regexp.Regexp, len(opts.FollowRegexps))
|
||||||
)
|
)
|
||||||
|
|
||||||
for i, u := range pinnedURLStrs {
|
for i, u := range pinnedURLStrs {
|
||||||
@ -165,16 +171,16 @@ func New(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, p := range patternStrs {
|
for i, p := range opts.FollowRegexps {
|
||||||
if patterns[i], err = regexp.Compile(p); err != nil {
|
if follows[i], err = regexp.Compile(p); err != nil {
|
||||||
return nil, fmt.Errorf("compiling regexp %q: %w", p, err)
|
return nil, fmt.Errorf("compiling regexp %q: %w", p, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
d := &DeadLinks{
|
d := &DeadLinks{
|
||||||
opts: *opts.withDefaults(),
|
opts: *opts,
|
||||||
store: store,
|
store: store,
|
||||||
patterns: patterns,
|
follows: follows,
|
||||||
}
|
}
|
||||||
|
|
||||||
d.clients = make([]Client, d.opts.Concurrency)
|
d.clients = make([]Client, d.opts.Concurrency)
|
||||||
@ -200,8 +206,8 @@ func (d *DeadLinks) onError(ctx context.Context, err error) {
|
|||||||
|
|
||||||
func (d *DeadLinks) shouldFollowURL(url URL) bool {
|
func (d *DeadLinks) shouldFollowURL(url URL) bool {
|
||||||
urlStr := string(url)
|
urlStr := string(url)
|
||||||
for _, pattern := range d.patterns {
|
for _, follow := range d.follows {
|
||||||
if pattern.MatchString(urlStr) {
|
if follow.MatchString(urlStr) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user