Finish up Store

main
Brian Picciano 5 months ago
parent f08d66b247
commit 571da7e2ac
  1. 12
      deadlinks.go
  2. 65
      store.go
  3. 22
      store_test.go

@ -1,5 +1,17 @@
// Package deadlinks implements a liveness checker for hyperlinks in HTML and
// gemtext documents.
//
// # Storage
//
// By default DeadLinks uses an in-memory SQLite database for tracking the
// status of resources and the links between them. If memory usage becomes a
// problem it is also possible to use a SQLite database file:
//
// store := deadlinks.NewSQLiteStore(&deadlinks.SQLiteStoreOpts{
// Path: "/path/to/db/file.sqlite",
// })
//
// // TODO initialize DeadLinks
package deadlinks
import (

@ -14,8 +14,9 @@ import (
migrate "github.com/rubenv/sql-migrate"
)
// Store keeps track of the current status of all discovered Resources.
// Resources with no incoming links will be periodically cleaned out.
// Store keeps track of the current status of all discovered Resources, and
// links between them. A Resource which is neither pinned nor linked to from
// another Resource is considered to not exist.
//
// An implementation of Store must be thread-safe.
type Store interface {
@ -30,8 +31,10 @@ type Store interface {
SetPinned(context.Context, []URL) error
// Update updates the Resource identified by the given URL with the given
// arguments. The Resource must have been Touch'd previously, or this
// returns an error.
// arguments.
//
// Update returns an error if the URL has not been pinned nor referenced as
// an outgoing URL of a different Resource.
Update(
ctx context.Context,
now time.Time,
@ -40,6 +43,9 @@ type Store interface {
errorString string,
outgoing []URL,
) error
// GC will garbage collect the store, removing any orphaned Resources.
GC(context.Context) error
}
var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration{
@ -74,13 +80,26 @@ var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration
},
}}
/*
TODO
- initialization options
- cleanup period
- document SQLiteStore properly
- teardown the cleanup goroutine
*/
// SQLiteSQLiteStoreOpts are optional fields which can be provided to NewSQLiteStore.
// A nil SQLiteSQLiteStoreOpts is equivalent to an empty one.
type SQLiteStoreOpts struct {
// Path to the database file to use.
//
// Defaults to ":memory:", indicating an in-memory database will be used.
Path string
}
func (o *SQLiteStoreOpts) withDefaults() *SQLiteStoreOpts {
if o == nil {
o = new(SQLiteStoreOpts)
}
if o.Path == "" {
o.Path = ":memory:"
}
return o
}
type SQLiteStore struct {
db *sql.DB
@ -88,10 +107,12 @@ type SQLiteStore struct {
var _ Store = (*SQLiteStore)(nil)
// NewInMemStore returns a Store implementation which uses an in-memory SQLite
// NewSQLiteStore returns a Store implementation which uses an in-memory SQLite
// db.
func NewInMemStore() *SQLiteStore {
db, err := sql.Open("sqlite3", ":memory:?_foreign_keys=1")
func NewSQLiteStore(o *SQLiteStoreOpts) *SQLiteStore {
o = o.withDefaults()
db, err := sql.Open("sqlite3", o.Path+"?_foreign_keys=1")
if err != nil {
panic(fmt.Errorf("opening sqlite in memory: %w", err))
}
@ -141,7 +162,8 @@ func (s *SQLiteStore) GetByStatus(status ResourceStatus) miter.Iterator[Resource
JOIN urls ON (urls.id = resources.url_id)
LEFT JOIN incoming ON (incoming.url_id = resources.url_id)
LEFT JOIN outgoing ON (outgoing.url_id = resources.url_id)
WHERE status = ?`
WHERE status = ?
AND (pinned OR incoming.urls IS NOT NULL)`
return miter.Lazily(func(ctx context.Context) (miter.Iterator[Resource], error) {
rows, err := s.db.QueryContext(ctx, query, status)
@ -208,10 +230,18 @@ func (s *SQLiteStore) GetURLsByLastChecked(
olderThan time.Time,
) miter.Iterator[URL] {
const query = `
WITH
incoming(url_id, urls) AS (
SELECT to_url_id, COUNT(1)
FROM links
GROUP BY to_url_id
)
SELECT url
FROM resources
JOIN urls ON (urls.id = resources.url_id)
WHERE last_checked < ?`
LEFT JOIN incoming ON (incoming.url_id = resources.url_id)
WHERE last_checked < ?
AND (pinned OR incoming.urls IS NOT NULL)`
return miter.Lazily(func(ctx context.Context) (miter.Iterator[URL], error) {
rows, err := s.db.QueryContext(ctx, query, olderThan.Unix())
@ -379,7 +409,8 @@ func (s *SQLiteStore) Update(
return nil
}
func (s *SQLiteStore) deleteOrphans(ctx context.Context) error {
// GC implements the method for the Store interface.
func (s *SQLiteStore) GC(ctx context.Context) error {
const query = `
WITH orphans AS (
SELECT url_id FROM resources

@ -20,7 +20,7 @@ func newSQLiteStoreHarness() *sqliteStoreHarness {
var (
ctx = context.Background()
now = time.Now().Truncate(time.Second).UTC()
store = NewInMemStore()
store = NewSQLiteStore(nil)
)
return &sqliteStoreHarness{
@ -77,10 +77,11 @@ func TestSQLiteStore(t *testing.T) {
h.assertGetByStatus(t, nil, ResourceStatusOK)
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
a.Pinned = false
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlB}))
h.assertGetByStatus(t, nil, ResourceStatusOK)
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
// GetByStatus should not return resources which are not pinned and have
// no incoming links
h.assertGetByStatus(t, []Resource{b}, ResourceStatusUnknown)
})
t.Run("Update", func(t *testing.T) {
@ -174,10 +175,15 @@ func TestSQLiteStore(t *testing.T) {
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowB)
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowA.Add(1*time.Second))
assertGetURLsByLastChecked([]URL{urlC}, nowA)
assertGetURLsByLastChecked([]URL{urlC}, h.now)
// A Resource which is not pinned and has no incoming links should not
// be returned
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB}))
assertGetURLsByLastChecked([]URL{urlA, urlB}, nowB.Add(1*time.Second))
assertGetURLsByLastChecked([]URL{}, nowA)
})
t.Run("deleteOrphans", func(t *testing.T) {
t.Run("GC", func(t *testing.T) {
t.Parallel()
var (
@ -203,7 +209,11 @@ func TestSQLiteStore(t *testing.T) {
h.ctx, h.now, urlC, ResourceStatusUnknown, "", []URL{urlD},
))
assert.NoError(t, h.store.deleteOrphans(h.ctx))
assert.NoError(t, h.store.GC(h.ctx))
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
// Calling again shouldn't do anything
assert.NoError(t, h.store.GC(h.ctx))
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
})
}

Loading…
Cancel
Save