commit f08d66b247626c2a6047ca99efd2150df2cd8c30 Author: Brian Picciano Date: Tue Dec 26 23:18:09 2023 +0100 Initial commit, SQLiteStore is mostly implemented diff --git a/deadlinks.go b/deadlinks.go new file mode 100644 index 0000000..5d125c5 --- /dev/null +++ b/deadlinks.go @@ -0,0 +1,78 @@ +// Package deadlinks implements a liveness checker for hyperlinks in HTML and +// gemtext documents. +package deadlinks + +import ( + "errors" + "fmt" + "net/url" + "time" +) + +// URL is a standard universal resource identifier, normalized particularly for +// this package. +type URL string + +// ParseURL parses and returns a URL based on the given string, or an error. +func ParseURL(urlStr string) (URL, error) { + u, err := url.Parse(urlStr) + if err != nil { + return "", err + } + return URL(u.String()), nil +} + +func parseURLs(urlStrs []string) ([]URL, error) { + var ( + res = make([]URL, 0, len(urlStrs)) + errs []error + ) + for _, urlStr := range urlStrs { + u, err := ParseURL(urlStr) + if err == nil { + res = append(res, u) + } else { + errs = append(errs, err) + } + } + return res, errors.Join(errs...) +} + +// ResourceStatus describes what state a particular Resource is in. +type ResourceStatus int + +// Enumeration of ResourceStatus values. +const ( + ResourceStatusUnknown ResourceStatus = iota + ResourceStatusOK + ResourceStatusError +) + +func (ds ResourceStatus) String() string { + switch ds { + case ResourceStatusUnknown: + return "UNKNOWN" + case ResourceStatusOK: + return "OK" + case ResourceStatusError: + return "ERROR" + default: + panic(fmt.Sprintf("unknown ResourceStatus: %#v", ds)) + } +} + +// Resource describes the current state of a resource, with the resource being +// uniquely identified by a URL. +type Resource struct { + URL URL + Status ResourceStatus + Pinned bool + LastChecked time.Time + + // only set if Status == ResourceStatusError + ErrorString string + + // Indicate the URLs of resources which link to/are linked from this + // resource. + IncomingLinkURLs, OutgoingLinkURLs []URL +} diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..525a591 --- /dev/null +++ b/flake.lock @@ -0,0 +1,26 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1703351344, + "narHash": "sha256-9FEelzftkE9UaJ5nqxidaJJPEhe9TPhbypLHmc2Mysc=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "7790e078f8979a9fcd543f9a47427eeaba38f268", + "type": "github" + }, + "original": { + "id": "nixpkgs", + "ref": "nixos-23.05", + "type": "indirect" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..314ec80 --- /dev/null +++ b/flake.nix @@ -0,0 +1,44 @@ +{ + description = "radix development environment"; + + # Nixpkgs / NixOS version to use. + inputs.nixpkgs.url = "nixpkgs/nixos-23.05"; + + outputs = { self, nixpkgs }: + let + + # to work with older version of flakes + lastModifiedDate = self.lastModifiedDate or self.lastModified or "19700101"; + + # Generate a user-friendly version number. + version = builtins.substring 0 8 lastModifiedDate; + + # System types to support. + supportedSystems = [ "x86_64-linux" "x86_64-darwin" "aarch64-linux" "aarch64-darwin" ]; + + # Helper function to generate an attrset '{ x86_64-linux = f "x86_64-linux"; ... }'. + forAllSystems = nixpkgs.lib.genAttrs supportedSystems; + + # Nixpkgs instantiated for supported system types. + nixpkgsFor = forAllSystems (system: import nixpkgs { inherit system; }); + + in + { + + # Add dependencies that are only needed for development + devShells = forAllSystems (system: + let + pkgs = nixpkgsFor.${system}; + in { + default = pkgs.mkShell { + buildInputs = [ + pkgs.go + pkgs.gotools + pkgs.golangci-lint + pkgs.sqlite + ]; + }; + }); + }; +} + diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..36fbbf5 --- /dev/null +++ b/go.mod @@ -0,0 +1,15 @@ +module code.betamike.com/mediocregopher/deadlinks + +go 1.20 + +require ( + code.betamike.com/mediocregopher/mediocre-go-lib v0.0.0-20231226160338-0b5bdf3dfb03 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-gorp/gorp/v3 v3.1.0 // indirect + github.com/mattn/go-sqlite3 v1.14.19 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/rubenv/sql-migrate v1.6.0 // indirect + github.com/stretchr/objx v0.5.0 // indirect + github.com/stretchr/testify v1.8.4 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..1006625 --- /dev/null +++ b/go.sum @@ -0,0 +1,27 @@ +code.betamike.com/mediocregopher/mediocre-go-lib v0.0.0-20231226155808-e8376ef263a7 h1:Dm7oXBLmdPXVo8tPZzrmUvON6zXEdWxbGiO4chqrNHw= +code.betamike.com/mediocregopher/mediocre-go-lib v0.0.0-20231226155808-e8376ef263a7/go.mod h1:GJhpoMNnN/OT6O9NmeQBV02yq9kQP8zPyY1IvsslHak= +code.betamike.com/mediocregopher/mediocre-go-lib v0.0.0-20231226160338-0b5bdf3dfb03 h1:wJ6X1vc289RpHVGClD1P33yijPoNIdgCXbTn7DjVWYs= +code.betamike.com/mediocregopher/mediocre-go-lib v0.0.0-20231226160338-0b5bdf3dfb03/go.mod h1:GJhpoMNnN/OT6O9NmeQBV02yq9kQP8zPyY1IvsslHak= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-gorp/gorp/v3 v3.1.0 h1:ItKF/Vbuj31dmV4jxA1qblpSwkl9g1typ24xoe70IGs= +github.com/go-gorp/gorp/v3 v3.1.0/go.mod h1:dLEjIyyRNiXvNZ8PSmzpt1GsWAUK8kjVhEpjH8TixEw= +github.com/mattn/go-sqlite3 v1.14.19 h1:fhGleo2h1p8tVChob4I9HpmVFIAkKGpiukdrgQbWfGI= +github.com/mattn/go-sqlite3 v1.14.19/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rubenv/sql-migrate v1.6.0 h1:IZpcTlAx/VKXphWEpwWJ7BaMq05tYtE80zYz+8a5Il8= +github.com/rubenv/sql-migrate v1.6.0/go.mod h1:m3ilnKP7sNb4eYkLsp6cGdPOl4OBcXM6rcbzU+Oqc5k= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/store.go b/store.go new file mode 100644 index 0000000..3dd0faf --- /dev/null +++ b/store.go @@ -0,0 +1,407 @@ +package deadlinks + +import ( + "context" + "database/sql" + "errors" + "fmt" + "strings" + "time" + + "code.betamike.com/mediocregopher/mediocre-go-lib/miter" + + _ "github.com/mattn/go-sqlite3" + migrate "github.com/rubenv/sql-migrate" +) + +// Store keeps track of the current status of all discovered Resources. +// Resources with no incoming links will be periodically cleaned out. +// +// An implementation of Store must be thread-safe. +type Store interface { + // GetByStatus returns all Resources with the given Status. + GetByStatus(ResourceStatus) miter.Iterator[Resource] + + // GetURLsByLastChecked returns the URLs of all Resources with LastChecked + // values older than the given timestamp, or which have never been checked. + GetURLsByLastChecked(olderThan time.Time) miter.Iterator[URL] + + // SetPinned overwrites the set of pinned URLs with the given one. + SetPinned(context.Context, []URL) error + + // Update updates the Resource identified by the given URL with the given + // arguments. The Resource must have been Touch'd previously, or this + // returns an error. + Update( + ctx context.Context, + now time.Time, + url URL, + status ResourceStatus, + errorString string, + outgoing []URL, + ) error +} + +var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration{ + { + Id: "1", + Up: []string{ + `CREATE TABLE urls ( + id INTEGER NOT NULL PRIMARY KEY, + url TEXT NOT NULL, + UNIQUE(url) + )`, + + `CREATE TABLE resources ( + url_id INTEGER NOT NULL PRIMARY KEY, + status INTEGER NOT NULL DEFAULT 0, + pinned INTEGER NOT NULL, + last_checked INTEGER NOT NULL DEFAULT 0, + error_string TEXT NOT NULL DEFAULT '', + FOREIGN KEY(url_id) REFERENCES urls(id) ON DELETE CASCADE + )`, + + `CREATE TABLE links ( + from_url_id INTEGER NOT NULL, + to_url_id INTEGER NOT NULL, + FOREIGN KEY(from_url_id) REFERENCES urls(id) ON DELETE CASCADE, + FOREIGN KEY(to_url_id) REFERENCES urls(id) ON DELETE CASCADE, + PRIMARY KEY(from_url_id, to_url_id) + )`, + + `CREATE INDEX links_outgoing_idx ON links (to_url_id)`, + }, + }, +}} + +/* +TODO +- initialization options + - cleanup period +- document SQLiteStore properly +- teardown the cleanup goroutine +*/ + +type SQLiteStore struct { + db *sql.DB +} + +var _ Store = (*SQLiteStore)(nil) + +// NewInMemStore returns a Store implementation which uses an in-memory SQLite +// db. +func NewInMemStore() *SQLiteStore { + db, err := sql.Open("sqlite3", ":memory:?_foreign_keys=1") + if err != nil { + panic(fmt.Errorf("opening sqlite in memory: %w", err)) + } + + if _, err := migrate.Exec(db, "sqlite3", migrations, migrate.Up); err != nil { + panic(fmt.Errorf("running migrations: %w", err)) + } + + return &SQLiteStore{db} +} + +// Close cleans up all resources held by the SQLiteStore store, if any. It must +// be the final method call to the SQLiteStore. +func (s *SQLiteStore) Close() error { + return s.db.Close() +} + +// GetByStatus implements the method for the Store interface. +func (s *SQLiteStore) GetByStatus(status ResourceStatus) miter.Iterator[Resource] { + const query = ` + WITH + incoming(url_id, urls) AS ( + SELECT + to_url_id, + GROUP_CONCAT(url, char(0)) + FROM links + JOIN urls ON (urls.id = links.from_url_id) + GROUP BY to_url_id + ), + outgoing(url_id, urls) AS ( + SELECT + from_url_id, + GROUP_CONCAT(url, char(0)) + FROM links + JOIN urls ON (urls.id = links.to_url_id) + GROUP BY from_url_id + ) + SELECT + url, + status, + pinned, + last_checked, + error_string, + incoming.urls, + outgoing.urls + FROM resources + JOIN urls ON (urls.id = resources.url_id) + LEFT JOIN incoming ON (incoming.url_id = resources.url_id) + LEFT JOIN outgoing ON (outgoing.url_id = resources.url_id) + WHERE status = ?` + + return miter.Lazily(func(ctx context.Context) (miter.Iterator[Resource], error) { + rows, err := s.db.QueryContext(ctx, query, status) + if err != nil { + return nil, fmt.Errorf("executing query: %w", err) + } + + return miter.FromFunc(func(ctx context.Context) (Resource, error) { + var ( + r Resource + lastChecked int64 + incoming, outgoing sql.NullString + ) + + if !rows.Next() { + return Resource{}, errors.Join(rows.Close(), miter.ErrEnd) + } + + if err := rows.Scan( + &r.URL, + &r.Status, + &r.Pinned, + &lastChecked, + &r.ErrorString, + &incoming, + &outgoing, + ); err != nil { + return Resource{}, errors.Join( + rows.Close(), fmt.Errorf("scanning row: %w", err), + ) + } + + if lastChecked != 0 { + r.LastChecked = time.Unix(lastChecked, 0).UTC() + } + + if incoming.String != "" { + if r.IncomingLinkURLs, err = parseURLs( + strings.Split(incoming.String, "\x00"), + ); err != nil { + return Resource{}, errors.Join( + rows.Close(), fmt.Errorf("parsing incoming links: %w", err), + ) + } + } + + if outgoing.String != "" { + if r.OutgoingLinkURLs, err = parseURLs( + strings.Split(outgoing.String, "\x00"), + ); err != nil { + return Resource{}, errors.Join( + rows.Close(), fmt.Errorf("parsing outgoing links: %w", err), + ) + } + } + + return r, nil + }), nil + }) +} + +// GetURLsByLastChecked implements the method for the Store interface. +func (s *SQLiteStore) GetURLsByLastChecked( + olderThan time.Time, +) miter.Iterator[URL] { + const query = ` + SELECT url + FROM resources + JOIN urls ON (urls.id = resources.url_id) + WHERE last_checked < ?` + + return miter.Lazily(func(ctx context.Context) (miter.Iterator[URL], error) { + rows, err := s.db.QueryContext(ctx, query, olderThan.Unix()) + if err != nil { + return nil, fmt.Errorf("executing query: %w", err) + } + + return miter.FromFunc(func(ctx context.Context) (URL, error) { + if !rows.Next() { + return "", errors.Join(rows.Close(), miter.ErrEnd) + } + + var urlStr string + if err := rows.Scan(&urlStr); err != nil { + return "", errors.Join( + rows.Close(), fmt.Errorf("scanning url: %w", err), + ) + } + + url, err := ParseURL(urlStr) + if err != nil { + return "", errors.Join( + rows.Close(), + fmt.Errorf("parsing url %q from db: %w", urlStr, err), + ) + } + + return url, nil + }), nil + }) +} + +func (s *SQLiteStore) touch(ctx context.Context, urls []URL, pinned bool) ( + []int, error, +) { + var ( + urlsQueryParams = make([]any, len(urls)) + resourcesQueryParams = make([]any, 0, (len(urls)*2)+1) + ids = make([]int, 0, len(urls)) + ) + + for i := range urls { + urlsQueryParams[i] = urls[i] + } + + urlsQuery := ` + INSERT INTO urls (url) + VALUES ` + joinRepeated("(?)", ",", len(urls)) + ` + ON CONFLICT DO UPDATE SET url=url + RETURNING id` + + rows, err := s.db.QueryContext(ctx, urlsQuery, urlsQueryParams...) + if err != nil { + return nil, fmt.Errorf("inserting into urls: %w", err) + } + + for range urls { + if !rows.Next() { + return nil, errors.Join( + errors.New("expected a returned row"), rows.Close(), + ) + } + + var id int + if err := rows.Scan(&id); err != nil { + return nil, errors.Join( + fmt.Errorf("scanning return from insert into urls: %w", err), + rows.Close(), + ) + } + + resourcesQueryParams = append(resourcesQueryParams, id, pinned) + ids = append(ids, id) + } + + rows.Close() + + resourcesQuery := ` + INSERT INTO resources (url_id, pinned) + VALUES ` + joinRepeated("(?,?)", ",", len(urls)) + ` + ON CONFLICT DO UPDATE SET pinned = ?` + + resourcesQueryParams = append(resourcesQueryParams, pinned) + + _, err = s.db.ExecContext(ctx, resourcesQuery, resourcesQueryParams...) + if err != nil { + return nil, fmt.Errorf("inserting into resources: %w", err) + } + + return ids, nil +} + +// SetPinned implements the method for the Store interface. +func (s *SQLiteStore) SetPinned(ctx context.Context, urls []URL) error { + _, err := s.db.ExecContext(ctx, `UPDATE resources SET pinned = 0`) + if err != nil { + return fmt.Errorf("unsetting pinned on all resources: %w", err) + } + + if _, err := s.touch(ctx, urls, true); err != nil { + return fmt.Errorf("pinning resources: %w", err) + } + + return err +} + +// Update implements the method for the Store interface. +func (s *SQLiteStore) Update( + ctx context.Context, + now time.Time, + url URL, + status ResourceStatus, + errorString string, + outgoing []URL, +) error { + const resourcesQuery = ` + UPDATE resources + SET + status = ?, + last_checked = ?, + error_string = ? + WHERE url_id = (SELECT id FROM urls WHERE url = ?) + RETURNING url_id` + + var urlID int + + err := s.db.QueryRowContext( + ctx, resourcesQuery, status, now.Unix(), errorString, url, + ).Scan(&urlID) + if err != nil { + return fmt.Errorf("inserting into resources: %w", err) + } + + _, err = s.db.ExecContext( + ctx, `DELETE FROM links WHERE from_url_id = ?`, urlID, + ) + if err != nil { + return fmt.Errorf("deleting from links: %w", err) + } + + if len(outgoing) == 0 { + return nil + } + + outgoingIDs, err := s.touch(ctx, outgoing, false) + if err != nil { + return fmt.Errorf("touching outgoing links: %w", err) + } + + linksQueryParams := make([]any, 0, len(outgoingIDs)*2) + for i := range outgoingIDs { + linksQueryParams = append(linksQueryParams, urlID, outgoingIDs[i]) + } + + linksQuery := ` + INSERT INTO links (from_url_id, to_url_id) + VALUES ` + joinRepeated("(?,?)", ",", len(outgoing)) + ` + ON CONFLICT DO NOTHING` + + _, err = s.db.ExecContext(ctx, linksQuery, linksQueryParams...) + if err != nil { + return fmt.Errorf("inserting into links: %w", err) + } + + return nil +} + +func (s *SQLiteStore) deleteOrphans(ctx context.Context) error { + const query = ` + WITH orphans AS ( + SELECT url_id FROM resources + LEFT JOIN links ON (links.to_url_id = resources.url_id) + WHERE pinned = 0 AND from_url_id IS NULL + ) + DELETE FROM urls WHERE id IN orphans + ` + + for { + res, err := s.db.ExecContext(ctx, query) + if err != nil { + return fmt.Errorf("performing delete: %w", err) + } else if n, err := res.RowsAffected(); err != nil { + return fmt.Errorf("determining rows affected: %w", err) + } else if n == 0 { + return nil + } + } +} + +func joinRepeated(str, sep string, n int) string { + res := strings.Repeat(str+sep, n) + return res[:len(res)-len(sep)] +} diff --git a/store_test.go b/store_test.go new file mode 100644 index 0000000..7b0467f --- /dev/null +++ b/store_test.go @@ -0,0 +1,209 @@ +package deadlinks + +import ( + "context" + "sort" + "testing" + "time" + + "code.betamike.com/mediocregopher/mediocre-go-lib/miter" + "github.com/stretchr/testify/assert" +) + +type sqliteStoreHarness struct { + ctx context.Context + now time.Time + store *SQLiteStore +} + +func newSQLiteStoreHarness() *sqliteStoreHarness { + var ( + ctx = context.Background() + now = time.Now().Truncate(time.Second).UTC() + store = NewInMemStore() + ) + + return &sqliteStoreHarness{ + ctx, now, store, + } +} + +func (h *sqliteStoreHarness) assertGetByStatus( + t *testing.T, want []Resource, status ResourceStatus, +) { + norm := func(rr []Resource) { + for i, r := range rr { + sort.Slice(r.IncomingLinkURLs, func(i, j int) bool { + return r.IncomingLinkURLs[i] < r.IncomingLinkURLs[j] + }) + sort.Slice(r.OutgoingLinkURLs, func(i, j int) bool { + return r.OutgoingLinkURLs[i] < r.OutgoingLinkURLs[j] + }) + rr[i] = r + } + + sort.Slice(rr, func(i, j int) bool { + return rr[i].URL < rr[j].URL + }) + } + + got, err := miter.ToSlice(h.ctx, h.store.GetByStatus(status)) + assert.NoError(t, err) + + norm(want) + norm(got) + assert.Equal(t, want, got) +} + +func TestSQLiteStore(t *testing.T) { + t.Parallel() + + t.Run("SetPinned", func(t *testing.T) { + t.Parallel() + + var ( + h = newSQLiteStoreHarness() + urlA = URL("https://a.com") + urlB = URL("https://b.com") + a = Resource{URL: urlA, Pinned: true} + b = Resource{URL: urlB, Pinned: true} + ) + + assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA})) + h.assertGetByStatus(t, nil, ResourceStatusOK) + h.assertGetByStatus(t, []Resource{a}, ResourceStatusUnknown) + + assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB})) + h.assertGetByStatus(t, nil, ResourceStatusOK) + h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown) + + a.Pinned = false + assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlB})) + h.assertGetByStatus(t, nil, ResourceStatusOK) + h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown) + }) + + t.Run("Update", func(t *testing.T) { + t.Parallel() + + var ( + h = newSQLiteStoreHarness() + + urlA = URL("https://a.com") + urlB = URL("https://b.com") + urlC = URL("https://c.com") + + a = Resource{URL: urlA, Pinned: true} + b = Resource{URL: urlB, Pinned: true} + c = Resource{URL: urlC} + ) + + // updating a non-existing URL should fail and make no changes + assert.Error(t, h.store.Update(h.ctx, h.now, urlA, ResourceStatusOK, "errstr", nil)) + h.assertGetByStatus(t, nil, ResourceStatusUnknown) + h.assertGetByStatus(t, nil, ResourceStatusOK) + + assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB})) + + assert.NoError(t, h.store.Update(h.ctx, h.now, urlA, ResourceStatusOK, "", nil)) + a.LastChecked = h.now + a.Status = ResourceStatusOK + h.assertGetByStatus(t, []Resource{b}, ResourceStatusUnknown) + h.assertGetByStatus(t, []Resource{a}, ResourceStatusOK) + + assert.NoError(t, h.store.Update(h.ctx, h.now, urlB, ResourceStatusError, "error!", nil)) + b.LastChecked = h.now + b.Status = ResourceStatusError + b.ErrorString = "error!" + h.assertGetByStatus(t, nil, ResourceStatusUnknown) + h.assertGetByStatus(t, nil, ResourceStatusUnknown) + h.assertGetByStatus(t, []Resource{a}, ResourceStatusOK) + h.assertGetByStatus(t, []Resource{b}, ResourceStatusError) + + assert.NoError(t, h.store.Update( + h.ctx, h.now, urlA, ResourceStatusOK, "", []URL{urlC}, + )) + a.OutgoingLinkURLs = []URL{urlC} + c.IncomingLinkURLs = []URL{urlA} + h.assertGetByStatus(t, []Resource{c}, ResourceStatusUnknown) + h.assertGetByStatus(t, []Resource{a}, ResourceStatusOK) + h.assertGetByStatus(t, []Resource{b}, ResourceStatusError) + + assert.NoError(t, h.store.Update( + h.ctx, h.now, urlB, ResourceStatusOK, "", []URL{urlC}, + )) + b.Status = ResourceStatusOK + b.ErrorString = "" + b.OutgoingLinkURLs = []URL{urlC} + c.IncomingLinkURLs = []URL{urlA, urlB} + h.assertGetByStatus(t, []Resource{c}, ResourceStatusUnknown) + h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusOK) + h.assertGetByStatus(t, nil, ResourceStatusError) + }) + + t.Run("GetURLsByLastChecked", func(t *testing.T) { + t.Parallel() + + var ( + h = newSQLiteStoreHarness() + nowA = h.now + nowB = h.now.Add(1 * time.Minute) + + urlA = URL("https://a.com") + urlB = URL("https://b.com") + urlC = URL("https://c.com") + ) + + assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB, urlC})) + + assert.NoError(t, h.store.Update( + h.ctx, nowA, urlA, ResourceStatusOK, "", nil, + )) + + assert.NoError(t, h.store.Update( + h.ctx, nowB, urlB, ResourceStatusOK, "", nil, + )) + + assertGetURLsByLastChecked := func(want []URL, olderThan time.Time) { + got, err := miter.ToSlice(h.ctx, h.store.GetURLsByLastChecked(olderThan)) + assert.NoError(t, err) + assert.ElementsMatch(t, want, got) + } + + assertGetURLsByLastChecked([]URL{urlA, urlB, urlC}, nowB.Add(1*time.Second)) + assertGetURLsByLastChecked([]URL{urlA, urlC}, nowB) + assertGetURLsByLastChecked([]URL{urlA, urlC}, nowA.Add(1*time.Second)) + assertGetURLsByLastChecked([]URL{urlC}, nowA) + assertGetURLsByLastChecked([]URL{urlC}, h.now) + }) + + t.Run("deleteOrphans", func(t *testing.T) { + t.Parallel() + + var ( + h = newSQLiteStoreHarness() + + urlA = URL("https://a.com") + urlB = URL("https://b.com") + urlC = URL("https://c.com") + urlD = URL("https://d.com") + + a = Resource{URL: urlA, Pinned: true, LastChecked: h.now, OutgoingLinkURLs: []URL{urlB}} + b = Resource{URL: urlB, IncomingLinkURLs: []URL{urlA}} + ) + + assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB, urlC, urlD})) + assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA})) + + assert.NoError(t, h.store.Update( + h.ctx, h.now, urlA, ResourceStatusUnknown, "", []URL{urlB}, + )) + + assert.NoError(t, h.store.Update( + h.ctx, h.now, urlC, ResourceStatusUnknown, "", []URL{urlD}, + )) + + assert.NoError(t, h.store.deleteOrphans(h.ctx)) + h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown) + }) +}