Finish up Store
This commit is contained in:
parent
f08d66b247
commit
571da7e2ac
12
deadlinks.go
12
deadlinks.go
@ -1,5 +1,17 @@
|
||||
// Package deadlinks implements a liveness checker for hyperlinks in HTML and
|
||||
// gemtext documents.
|
||||
//
|
||||
// # Storage
|
||||
//
|
||||
// By default DeadLinks uses an in-memory SQLite database for tracking the
|
||||
// status of resources and the links between them. If memory usage becomes a
|
||||
// problem it is also possible to use a SQLite database file:
|
||||
//
|
||||
// store := deadlinks.NewSQLiteStore(&deadlinks.SQLiteStoreOpts{
|
||||
// Path: "/path/to/db/file.sqlite",
|
||||
// })
|
||||
//
|
||||
// // TODO initialize DeadLinks
|
||||
package deadlinks
|
||||
|
||||
import (
|
||||
|
65
store.go
65
store.go
@ -14,8 +14,9 @@ import (
|
||||
migrate "github.com/rubenv/sql-migrate"
|
||||
)
|
||||
|
||||
// Store keeps track of the current status of all discovered Resources.
|
||||
// Resources with no incoming links will be periodically cleaned out.
|
||||
// Store keeps track of the current status of all discovered Resources, and
|
||||
// links between them. A Resource which is neither pinned nor linked to from
|
||||
// another Resource is considered to not exist.
|
||||
//
|
||||
// An implementation of Store must be thread-safe.
|
||||
type Store interface {
|
||||
@ -30,8 +31,10 @@ type Store interface {
|
||||
SetPinned(context.Context, []URL) error
|
||||
|
||||
// Update updates the Resource identified by the given URL with the given
|
||||
// arguments. The Resource must have been Touch'd previously, or this
|
||||
// returns an error.
|
||||
// arguments.
|
||||
//
|
||||
// Update returns an error if the URL has not been pinned nor referenced as
|
||||
// an outgoing URL of a different Resource.
|
||||
Update(
|
||||
ctx context.Context,
|
||||
now time.Time,
|
||||
@ -40,6 +43,9 @@ type Store interface {
|
||||
errorString string,
|
||||
outgoing []URL,
|
||||
) error
|
||||
|
||||
// GC will garbage collect the store, removing any orphaned Resources.
|
||||
GC(context.Context) error
|
||||
}
|
||||
|
||||
var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration{
|
||||
@ -74,13 +80,26 @@ var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration
|
||||
},
|
||||
}}
|
||||
|
||||
/*
|
||||
TODO
|
||||
- initialization options
|
||||
- cleanup period
|
||||
- document SQLiteStore properly
|
||||
- teardown the cleanup goroutine
|
||||
*/
|
||||
// SQLiteSQLiteStoreOpts are optional fields which can be provided to NewSQLiteStore.
|
||||
// A nil SQLiteSQLiteStoreOpts is equivalent to an empty one.
|
||||
type SQLiteStoreOpts struct {
|
||||
// Path to the database file to use.
|
||||
//
|
||||
// Defaults to ":memory:", indicating an in-memory database will be used.
|
||||
Path string
|
||||
}
|
||||
|
||||
func (o *SQLiteStoreOpts) withDefaults() *SQLiteStoreOpts {
|
||||
if o == nil {
|
||||
o = new(SQLiteStoreOpts)
|
||||
}
|
||||
|
||||
if o.Path == "" {
|
||||
o.Path = ":memory:"
|
||||
}
|
||||
|
||||
return o
|
||||
}
|
||||
|
||||
type SQLiteStore struct {
|
||||
db *sql.DB
|
||||
@ -88,10 +107,12 @@ type SQLiteStore struct {
|
||||
|
||||
var _ Store = (*SQLiteStore)(nil)
|
||||
|
||||
// NewInMemStore returns a Store implementation which uses an in-memory SQLite
|
||||
// NewSQLiteStore returns a Store implementation which uses an in-memory SQLite
|
||||
// db.
|
||||
func NewInMemStore() *SQLiteStore {
|
||||
db, err := sql.Open("sqlite3", ":memory:?_foreign_keys=1")
|
||||
func NewSQLiteStore(o *SQLiteStoreOpts) *SQLiteStore {
|
||||
o = o.withDefaults()
|
||||
|
||||
db, err := sql.Open("sqlite3", o.Path+"?_foreign_keys=1")
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("opening sqlite in memory: %w", err))
|
||||
}
|
||||
@ -141,7 +162,8 @@ func (s *SQLiteStore) GetByStatus(status ResourceStatus) miter.Iterator[Resource
|
||||
JOIN urls ON (urls.id = resources.url_id)
|
||||
LEFT JOIN incoming ON (incoming.url_id = resources.url_id)
|
||||
LEFT JOIN outgoing ON (outgoing.url_id = resources.url_id)
|
||||
WHERE status = ?`
|
||||
WHERE status = ?
|
||||
AND (pinned OR incoming.urls IS NOT NULL)`
|
||||
|
||||
return miter.Lazily(func(ctx context.Context) (miter.Iterator[Resource], error) {
|
||||
rows, err := s.db.QueryContext(ctx, query, status)
|
||||
@ -208,10 +230,18 @@ func (s *SQLiteStore) GetURLsByLastChecked(
|
||||
olderThan time.Time,
|
||||
) miter.Iterator[URL] {
|
||||
const query = `
|
||||
WITH
|
||||
incoming(url_id, urls) AS (
|
||||
SELECT to_url_id, COUNT(1)
|
||||
FROM links
|
||||
GROUP BY to_url_id
|
||||
)
|
||||
SELECT url
|
||||
FROM resources
|
||||
JOIN urls ON (urls.id = resources.url_id)
|
||||
WHERE last_checked < ?`
|
||||
LEFT JOIN incoming ON (incoming.url_id = resources.url_id)
|
||||
WHERE last_checked < ?
|
||||
AND (pinned OR incoming.urls IS NOT NULL)`
|
||||
|
||||
return miter.Lazily(func(ctx context.Context) (miter.Iterator[URL], error) {
|
||||
rows, err := s.db.QueryContext(ctx, query, olderThan.Unix())
|
||||
@ -379,7 +409,8 @@ func (s *SQLiteStore) Update(
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SQLiteStore) deleteOrphans(ctx context.Context) error {
|
||||
// GC implements the method for the Store interface.
|
||||
func (s *SQLiteStore) GC(ctx context.Context) error {
|
||||
const query = `
|
||||
WITH orphans AS (
|
||||
SELECT url_id FROM resources
|
||||
|
@ -20,7 +20,7 @@ func newSQLiteStoreHarness() *sqliteStoreHarness {
|
||||
var (
|
||||
ctx = context.Background()
|
||||
now = time.Now().Truncate(time.Second).UTC()
|
||||
store = NewInMemStore()
|
||||
store = NewSQLiteStore(nil)
|
||||
)
|
||||
|
||||
return &sqliteStoreHarness{
|
||||
@ -77,10 +77,11 @@ func TestSQLiteStore(t *testing.T) {
|
||||
h.assertGetByStatus(t, nil, ResourceStatusOK)
|
||||
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
||||
|
||||
a.Pinned = false
|
||||
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlB}))
|
||||
h.assertGetByStatus(t, nil, ResourceStatusOK)
|
||||
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
||||
// GetByStatus should not return resources which are not pinned and have
|
||||
// no incoming links
|
||||
h.assertGetByStatus(t, []Resource{b}, ResourceStatusUnknown)
|
||||
})
|
||||
|
||||
t.Run("Update", func(t *testing.T) {
|
||||
@ -174,10 +175,15 @@ func TestSQLiteStore(t *testing.T) {
|
||||
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowB)
|
||||
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowA.Add(1*time.Second))
|
||||
assertGetURLsByLastChecked([]URL{urlC}, nowA)
|
||||
assertGetURLsByLastChecked([]URL{urlC}, h.now)
|
||||
|
||||
// A Resource which is not pinned and has no incoming links should not
|
||||
// be returned
|
||||
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB}))
|
||||
assertGetURLsByLastChecked([]URL{urlA, urlB}, nowB.Add(1*time.Second))
|
||||
assertGetURLsByLastChecked([]URL{}, nowA)
|
||||
})
|
||||
|
||||
t.Run("deleteOrphans", func(t *testing.T) {
|
||||
t.Run("GC", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var (
|
||||
@ -203,7 +209,11 @@ func TestSQLiteStore(t *testing.T) {
|
||||
h.ctx, h.now, urlC, ResourceStatusUnknown, "", []URL{urlD},
|
||||
))
|
||||
|
||||
assert.NoError(t, h.store.deleteOrphans(h.ctx))
|
||||
assert.NoError(t, h.store.GC(h.ctx))
|
||||
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
||||
|
||||
// Calling again shouldn't do anything
|
||||
assert.NoError(t, h.store.GC(h.ctx))
|
||||
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user