A tool for crawling and finding links to URLs which no longer exist
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
deadlinks/store_test.go

273 lines
7.4 KiB

package deadlinks
import (
"context"
"fmt"
"sort"
"testing"
"time"
"code.betamike.com/mediocregopher/mediocre-go-lib/miter"
"github.com/stretchr/testify/assert"
)
type sqliteStoreHarness struct {
ctx context.Context
now time.Time
store *SQLiteStore
}
func newSQLiteStoreHarness() *sqliteStoreHarness {
var (
ctx = context.Background()
now = time.Now().Truncate(time.Second).UTC()
store = NewSQLiteStore(nil)
)
return &sqliteStoreHarness{
ctx, now, store,
}
}
func (h *sqliteStoreHarness) assertGetByStatus(
t *testing.T, want []Resource, status ResourceStatus,
) {
norm := func(rr []Resource) {
for i, r := range rr {
sort.Slice(r.IncomingLinkURLs, func(i, j int) bool {
return r.IncomingLinkURLs[i] < r.IncomingLinkURLs[j]
})
sort.Slice(r.OutgoingLinkURLs, func(i, j int) bool {
return r.OutgoingLinkURLs[i] < r.OutgoingLinkURLs[j]
})
rr[i] = r
}
sort.Slice(rr, func(i, j int) bool {
return rr[i].URL < rr[j].URL
})
}
got, err := miter.ToSlice(h.ctx, h.store.GetByStatus(status))
assert.NoError(t, err)
norm(want)
norm(got)
assert.Equal(t, want, got)
}
func TestSQLiteStore(t *testing.T) {
t.Parallel()
t.Run("SetPinned", func(t *testing.T) {
t.Parallel()
var (
h = newSQLiteStoreHarness()
urlA = URL("https://a.com")
urlB = URL("https://b.com")
a = Resource{URL: urlA, Pinned: true}
b = Resource{URL: urlB, Pinned: true}
)
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA}))
h.assertGetByStatus(t, nil, ResourceStatusOK)
h.assertGetByStatus(t, []Resource{a}, ResourceStatusUnknown)
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB}))
h.assertGetByStatus(t, nil, ResourceStatusOK)
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlB}))
h.assertGetByStatus(t, nil, ResourceStatusOK)
// GetByStatus should not return resources which are not pinned and have
// no incoming links
h.assertGetByStatus(t, []Resource{b}, ResourceStatusUnknown)
})
t.Run("Update/general", func(t *testing.T) {
t.Parallel()
var (
h = newSQLiteStoreHarness()
urlA = URL("https://a.com")
urlB = URL("https://b.com")
urlC = URL("https://c.com")
a = Resource{URL: urlA, Pinned: true}
b = Resource{URL: urlB, Pinned: true}
c = Resource{URL: urlC}
)
// updating a non-existing URL should fail and make no changes
assert.Error(t, h.store.Update(h.ctx, h.now, urlA, ResourceStatusOK, "errstr", nil))
h.assertGetByStatus(t, nil, ResourceStatusUnknown)
h.assertGetByStatus(t, nil, ResourceStatusOK)
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB}))
assert.NoError(t, h.store.Update(h.ctx, h.now, urlA, ResourceStatusOK, "", nil))
a.LastChecked = h.now
a.Status = ResourceStatusOK
h.assertGetByStatus(t, []Resource{b}, ResourceStatusUnknown)
h.assertGetByStatus(t, []Resource{a}, ResourceStatusOK)
assert.NoError(t, h.store.Update(h.ctx, h.now, urlB, ResourceStatusError, "error!", nil))
b.LastChecked = h.now
b.Status = ResourceStatusError
b.ErrorString = "error!"
h.assertGetByStatus(t, nil, ResourceStatusUnknown)
h.assertGetByStatus(t, nil, ResourceStatusUnknown)
h.assertGetByStatus(t, []Resource{a}, ResourceStatusOK)
h.assertGetByStatus(t, []Resource{b}, ResourceStatusError)
assert.NoError(t, h.store.Update(
h.ctx, h.now, urlA, ResourceStatusOK, "", []URL{urlC},
))
a.OutgoingLinkURLs = []URL{urlC}
c.IncomingLinkURLs = []URL{urlA}
h.assertGetByStatus(t, []Resource{c}, ResourceStatusUnknown)
h.assertGetByStatus(t, []Resource{a}, ResourceStatusOK)
h.assertGetByStatus(t, []Resource{b}, ResourceStatusError)
assert.NoError(t, h.store.Update(
h.ctx, h.now, urlB, ResourceStatusOK, "", []URL{urlC},
))
b.Status = ResourceStatusOK
b.ErrorString = ""
b.OutgoingLinkURLs = []URL{urlC}
c.IncomingLinkURLs = []URL{urlA, urlB}
h.assertGetByStatus(t, []Resource{c}, ResourceStatusUnknown)
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusOK)
h.assertGetByStatus(t, nil, ResourceStatusError)
})
t.Run("Update/while_GetByStatus", func(t *testing.T) {
t.Parallel()
var (
h = newSQLiteStoreHarness()
urlA = URL("https://a.com")
urlB = URL("https://b.com")
urlC = URL("https://c.com")
a = Resource{
URL: urlA,
Status: ResourceStatusOK,
Pinned: true,
LastChecked: h.now,
OutgoingLinkURLs: []URL{urlC},
}
b = Resource{
URL: urlB,
Status: ResourceStatusOK,
Pinned: true,
LastChecked: h.now,
OutgoingLinkURLs: []URL{urlC},
}
c = Resource{
URL: urlC,
Status: ResourceStatusOK,
LastChecked: h.now,
IncomingLinkURLs: []URL{urlA, urlB, urlC},
OutgoingLinkURLs: []URL{urlC},
}
)
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB}))
iter := h.store.GetByStatus(ResourceStatusUnknown)
err := miter.ForEach(h.ctx, iter, func(r Resource) error {
err := h.store.Update(
h.ctx, h.now, r.URL, ResourceStatusOK, "", []URL{urlC},
)
if err != nil {
return fmt.Errorf("updating %+v: %w", r, err)
}
return nil
})
assert.NoError(t, err)
h.assertGetByStatus(t, []Resource{a, b, c}, ResourceStatusOK)
})
t.Run("GetURLsByLastChecked", func(t *testing.T) {
t.Parallel()
var (
h = newSQLiteStoreHarness()
nowA = h.now
nowB = h.now.Add(1 * time.Minute)
urlA = URL("https://a.com")
urlB = URL("https://b.com")
urlC = URL("https://c.com")
)
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB, urlC}))
assert.NoError(t, h.store.Update(
h.ctx, nowA, urlA, ResourceStatusOK, "", nil,
))
assert.NoError(t, h.store.Update(
h.ctx, nowB, urlB, ResourceStatusOK, "", nil,
))
assertGetURLsByLastChecked := func(want []URL, olderThan time.Time) {
got, err := miter.ToSlice(h.ctx, h.store.GetURLsByLastChecked(olderThan))
assert.NoError(t, err)
assert.ElementsMatch(t, want, got)
}
assertGetURLsByLastChecked([]URL{urlA, urlB, urlC}, nowB.Add(1*time.Second))
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowB)
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowA.Add(1*time.Second))
assertGetURLsByLastChecked([]URL{urlC}, nowA)
// A Resource which is not pinned and has no incoming links should not
// be returned
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB}))
assertGetURLsByLastChecked([]URL{urlA, urlB}, nowB.Add(1*time.Second))
assertGetURLsByLastChecked([]URL{}, nowA)
})
t.Run("GC", func(t *testing.T) {
t.Parallel()
var (
h = newSQLiteStoreHarness()
urlA = URL("https://a.com")
urlB = URL("https://b.com")
urlC = URL("https://c.com")
urlD = URL("https://d.com")
a = Resource{URL: urlA, Pinned: true, LastChecked: h.now, OutgoingLinkURLs: []URL{urlB}}
b = Resource{URL: urlB, IncomingLinkURLs: []URL{urlA}}
)
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB, urlC, urlD}))
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA}))
assert.NoError(t, h.store.Update(
h.ctx, h.now, urlA, ResourceStatusUnknown, "", []URL{urlB},
))
assert.NoError(t, h.store.Update(
h.ctx, h.now, urlC, ResourceStatusUnknown, "", []URL{urlD},
))
assert.NoError(t, h.store.GC(h.ctx))
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
// Calling again shouldn't do anything
assert.NoError(t, h.store.GC(h.ctx))
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
})
}