Finish up Store

This commit is contained in:
Brian Picciano 2023-12-28 15:40:07 +01:00
parent f08d66b247
commit 571da7e2ac
3 changed files with 76 additions and 23 deletions

View File

@ -1,5 +1,17 @@
// Package deadlinks implements a liveness checker for hyperlinks in HTML and // Package deadlinks implements a liveness checker for hyperlinks in HTML and
// gemtext documents. // gemtext documents.
//
// # Storage
//
// By default DeadLinks uses an in-memory SQLite database for tracking the
// status of resources and the links between them. If memory usage becomes a
// problem it is also possible to use a SQLite database file:
//
// store := deadlinks.NewSQLiteStore(&deadlinks.SQLiteStoreOpts{
// Path: "/path/to/db/file.sqlite",
// })
//
// // TODO initialize DeadLinks
package deadlinks package deadlinks
import ( import (

View File

@ -14,8 +14,9 @@ import (
migrate "github.com/rubenv/sql-migrate" migrate "github.com/rubenv/sql-migrate"
) )
// Store keeps track of the current status of all discovered Resources. // Store keeps track of the current status of all discovered Resources, and
// Resources with no incoming links will be periodically cleaned out. // links between them. A Resource which is neither pinned nor linked to from
// another Resource is considered to not exist.
// //
// An implementation of Store must be thread-safe. // An implementation of Store must be thread-safe.
type Store interface { type Store interface {
@ -30,8 +31,10 @@ type Store interface {
SetPinned(context.Context, []URL) error SetPinned(context.Context, []URL) error
// Update updates the Resource identified by the given URL with the given // Update updates the Resource identified by the given URL with the given
// arguments. The Resource must have been Touch'd previously, or this // arguments.
// returns an error. //
// Update returns an error if the URL has not been pinned nor referenced as
// an outgoing URL of a different Resource.
Update( Update(
ctx context.Context, ctx context.Context,
now time.Time, now time.Time,
@ -40,6 +43,9 @@ type Store interface {
errorString string, errorString string,
outgoing []URL, outgoing []URL,
) error ) error
// GC will garbage collect the store, removing any orphaned Resources.
GC(context.Context) error
} }
var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration{ var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration{
@ -74,13 +80,26 @@ var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration
}, },
}} }}
/* // SQLiteSQLiteStoreOpts are optional fields which can be provided to NewSQLiteStore.
TODO // A nil SQLiteSQLiteStoreOpts is equivalent to an empty one.
- initialization options type SQLiteStoreOpts struct {
- cleanup period // Path to the database file to use.
- document SQLiteStore properly //
- teardown the cleanup goroutine // Defaults to ":memory:", indicating an in-memory database will be used.
*/ Path string
}
func (o *SQLiteStoreOpts) withDefaults() *SQLiteStoreOpts {
if o == nil {
o = new(SQLiteStoreOpts)
}
if o.Path == "" {
o.Path = ":memory:"
}
return o
}
type SQLiteStore struct { type SQLiteStore struct {
db *sql.DB db *sql.DB
@ -88,10 +107,12 @@ type SQLiteStore struct {
var _ Store = (*SQLiteStore)(nil) var _ Store = (*SQLiteStore)(nil)
// NewInMemStore returns a Store implementation which uses an in-memory SQLite // NewSQLiteStore returns a Store implementation which uses an in-memory SQLite
// db. // db.
func NewInMemStore() *SQLiteStore { func NewSQLiteStore(o *SQLiteStoreOpts) *SQLiteStore {
db, err := sql.Open("sqlite3", ":memory:?_foreign_keys=1") o = o.withDefaults()
db, err := sql.Open("sqlite3", o.Path+"?_foreign_keys=1")
if err != nil { if err != nil {
panic(fmt.Errorf("opening sqlite in memory: %w", err)) panic(fmt.Errorf("opening sqlite in memory: %w", err))
} }
@ -141,7 +162,8 @@ func (s *SQLiteStore) GetByStatus(status ResourceStatus) miter.Iterator[Resource
JOIN urls ON (urls.id = resources.url_id) JOIN urls ON (urls.id = resources.url_id)
LEFT JOIN incoming ON (incoming.url_id = resources.url_id) LEFT JOIN incoming ON (incoming.url_id = resources.url_id)
LEFT JOIN outgoing ON (outgoing.url_id = resources.url_id) LEFT JOIN outgoing ON (outgoing.url_id = resources.url_id)
WHERE status = ?` WHERE status = ?
AND (pinned OR incoming.urls IS NOT NULL)`
return miter.Lazily(func(ctx context.Context) (miter.Iterator[Resource], error) { return miter.Lazily(func(ctx context.Context) (miter.Iterator[Resource], error) {
rows, err := s.db.QueryContext(ctx, query, status) rows, err := s.db.QueryContext(ctx, query, status)
@ -208,10 +230,18 @@ func (s *SQLiteStore) GetURLsByLastChecked(
olderThan time.Time, olderThan time.Time,
) miter.Iterator[URL] { ) miter.Iterator[URL] {
const query = ` const query = `
WITH
incoming(url_id, urls) AS (
SELECT to_url_id, COUNT(1)
FROM links
GROUP BY to_url_id
)
SELECT url SELECT url
FROM resources FROM resources
JOIN urls ON (urls.id = resources.url_id) JOIN urls ON (urls.id = resources.url_id)
WHERE last_checked < ?` LEFT JOIN incoming ON (incoming.url_id = resources.url_id)
WHERE last_checked < ?
AND (pinned OR incoming.urls IS NOT NULL)`
return miter.Lazily(func(ctx context.Context) (miter.Iterator[URL], error) { return miter.Lazily(func(ctx context.Context) (miter.Iterator[URL], error) {
rows, err := s.db.QueryContext(ctx, query, olderThan.Unix()) rows, err := s.db.QueryContext(ctx, query, olderThan.Unix())
@ -379,7 +409,8 @@ func (s *SQLiteStore) Update(
return nil return nil
} }
func (s *SQLiteStore) deleteOrphans(ctx context.Context) error { // GC implements the method for the Store interface.
func (s *SQLiteStore) GC(ctx context.Context) error {
const query = ` const query = `
WITH orphans AS ( WITH orphans AS (
SELECT url_id FROM resources SELECT url_id FROM resources

View File

@ -20,7 +20,7 @@ func newSQLiteStoreHarness() *sqliteStoreHarness {
var ( var (
ctx = context.Background() ctx = context.Background()
now = time.Now().Truncate(time.Second).UTC() now = time.Now().Truncate(time.Second).UTC()
store = NewInMemStore() store = NewSQLiteStore(nil)
) )
return &sqliteStoreHarness{ return &sqliteStoreHarness{
@ -77,10 +77,11 @@ func TestSQLiteStore(t *testing.T) {
h.assertGetByStatus(t, nil, ResourceStatusOK) h.assertGetByStatus(t, nil, ResourceStatusOK)
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown) h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
a.Pinned = false
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlB})) assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlB}))
h.assertGetByStatus(t, nil, ResourceStatusOK) h.assertGetByStatus(t, nil, ResourceStatusOK)
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown) // GetByStatus should not return resources which are not pinned and have
// no incoming links
h.assertGetByStatus(t, []Resource{b}, ResourceStatusUnknown)
}) })
t.Run("Update", func(t *testing.T) { t.Run("Update", func(t *testing.T) {
@ -174,10 +175,15 @@ func TestSQLiteStore(t *testing.T) {
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowB) assertGetURLsByLastChecked([]URL{urlA, urlC}, nowB)
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowA.Add(1*time.Second)) assertGetURLsByLastChecked([]URL{urlA, urlC}, nowA.Add(1*time.Second))
assertGetURLsByLastChecked([]URL{urlC}, nowA) assertGetURLsByLastChecked([]URL{urlC}, nowA)
assertGetURLsByLastChecked([]URL{urlC}, h.now)
// A Resource which is not pinned and has no incoming links should not
// be returned
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB}))
assertGetURLsByLastChecked([]URL{urlA, urlB}, nowB.Add(1*time.Second))
assertGetURLsByLastChecked([]URL{}, nowA)
}) })
t.Run("deleteOrphans", func(t *testing.T) { t.Run("GC", func(t *testing.T) {
t.Parallel() t.Parallel()
var ( var (
@ -203,7 +209,11 @@ func TestSQLiteStore(t *testing.T) {
h.ctx, h.now, urlC, ResourceStatusUnknown, "", []URL{urlD}, h.ctx, h.now, urlC, ResourceStatusUnknown, "", []URL{urlD},
)) ))
assert.NoError(t, h.store.deleteOrphans(h.ctx)) assert.NoError(t, h.store.GC(h.ctx))
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
// Calling again shouldn't do anything
assert.NoError(t, h.store.GC(h.ctx))
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown) h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
}) })
} }