Finish up Store
This commit is contained in:
parent
f08d66b247
commit
571da7e2ac
12
deadlinks.go
12
deadlinks.go
@ -1,5 +1,17 @@
|
|||||||
// Package deadlinks implements a liveness checker for hyperlinks in HTML and
|
// Package deadlinks implements a liveness checker for hyperlinks in HTML and
|
||||||
// gemtext documents.
|
// gemtext documents.
|
||||||
|
//
|
||||||
|
// # Storage
|
||||||
|
//
|
||||||
|
// By default DeadLinks uses an in-memory SQLite database for tracking the
|
||||||
|
// status of resources and the links between them. If memory usage becomes a
|
||||||
|
// problem it is also possible to use a SQLite database file:
|
||||||
|
//
|
||||||
|
// store := deadlinks.NewSQLiteStore(&deadlinks.SQLiteStoreOpts{
|
||||||
|
// Path: "/path/to/db/file.sqlite",
|
||||||
|
// })
|
||||||
|
//
|
||||||
|
// // TODO initialize DeadLinks
|
||||||
package deadlinks
|
package deadlinks
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
65
store.go
65
store.go
@ -14,8 +14,9 @@ import (
|
|||||||
migrate "github.com/rubenv/sql-migrate"
|
migrate "github.com/rubenv/sql-migrate"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Store keeps track of the current status of all discovered Resources.
|
// Store keeps track of the current status of all discovered Resources, and
|
||||||
// Resources with no incoming links will be periodically cleaned out.
|
// links between them. A Resource which is neither pinned nor linked to from
|
||||||
|
// another Resource is considered to not exist.
|
||||||
//
|
//
|
||||||
// An implementation of Store must be thread-safe.
|
// An implementation of Store must be thread-safe.
|
||||||
type Store interface {
|
type Store interface {
|
||||||
@ -30,8 +31,10 @@ type Store interface {
|
|||||||
SetPinned(context.Context, []URL) error
|
SetPinned(context.Context, []URL) error
|
||||||
|
|
||||||
// Update updates the Resource identified by the given URL with the given
|
// Update updates the Resource identified by the given URL with the given
|
||||||
// arguments. The Resource must have been Touch'd previously, or this
|
// arguments.
|
||||||
// returns an error.
|
//
|
||||||
|
// Update returns an error if the URL has not been pinned nor referenced as
|
||||||
|
// an outgoing URL of a different Resource.
|
||||||
Update(
|
Update(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
now time.Time,
|
now time.Time,
|
||||||
@ -40,6 +43,9 @@ type Store interface {
|
|||||||
errorString string,
|
errorString string,
|
||||||
outgoing []URL,
|
outgoing []URL,
|
||||||
) error
|
) error
|
||||||
|
|
||||||
|
// GC will garbage collect the store, removing any orphaned Resources.
|
||||||
|
GC(context.Context) error
|
||||||
}
|
}
|
||||||
|
|
||||||
var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration{
|
var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration{
|
||||||
@ -74,13 +80,26 @@ var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration
|
|||||||
},
|
},
|
||||||
}}
|
}}
|
||||||
|
|
||||||
/*
|
// SQLiteSQLiteStoreOpts are optional fields which can be provided to NewSQLiteStore.
|
||||||
TODO
|
// A nil SQLiteSQLiteStoreOpts is equivalent to an empty one.
|
||||||
- initialization options
|
type SQLiteStoreOpts struct {
|
||||||
- cleanup period
|
// Path to the database file to use.
|
||||||
- document SQLiteStore properly
|
//
|
||||||
- teardown the cleanup goroutine
|
// Defaults to ":memory:", indicating an in-memory database will be used.
|
||||||
*/
|
Path string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *SQLiteStoreOpts) withDefaults() *SQLiteStoreOpts {
|
||||||
|
if o == nil {
|
||||||
|
o = new(SQLiteStoreOpts)
|
||||||
|
}
|
||||||
|
|
||||||
|
if o.Path == "" {
|
||||||
|
o.Path = ":memory:"
|
||||||
|
}
|
||||||
|
|
||||||
|
return o
|
||||||
|
}
|
||||||
|
|
||||||
type SQLiteStore struct {
|
type SQLiteStore struct {
|
||||||
db *sql.DB
|
db *sql.DB
|
||||||
@ -88,10 +107,12 @@ type SQLiteStore struct {
|
|||||||
|
|
||||||
var _ Store = (*SQLiteStore)(nil)
|
var _ Store = (*SQLiteStore)(nil)
|
||||||
|
|
||||||
// NewInMemStore returns a Store implementation which uses an in-memory SQLite
|
// NewSQLiteStore returns a Store implementation which uses an in-memory SQLite
|
||||||
// db.
|
// db.
|
||||||
func NewInMemStore() *SQLiteStore {
|
func NewSQLiteStore(o *SQLiteStoreOpts) *SQLiteStore {
|
||||||
db, err := sql.Open("sqlite3", ":memory:?_foreign_keys=1")
|
o = o.withDefaults()
|
||||||
|
|
||||||
|
db, err := sql.Open("sqlite3", o.Path+"?_foreign_keys=1")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("opening sqlite in memory: %w", err))
|
panic(fmt.Errorf("opening sqlite in memory: %w", err))
|
||||||
}
|
}
|
||||||
@ -141,7 +162,8 @@ func (s *SQLiteStore) GetByStatus(status ResourceStatus) miter.Iterator[Resource
|
|||||||
JOIN urls ON (urls.id = resources.url_id)
|
JOIN urls ON (urls.id = resources.url_id)
|
||||||
LEFT JOIN incoming ON (incoming.url_id = resources.url_id)
|
LEFT JOIN incoming ON (incoming.url_id = resources.url_id)
|
||||||
LEFT JOIN outgoing ON (outgoing.url_id = resources.url_id)
|
LEFT JOIN outgoing ON (outgoing.url_id = resources.url_id)
|
||||||
WHERE status = ?`
|
WHERE status = ?
|
||||||
|
AND (pinned OR incoming.urls IS NOT NULL)`
|
||||||
|
|
||||||
return miter.Lazily(func(ctx context.Context) (miter.Iterator[Resource], error) {
|
return miter.Lazily(func(ctx context.Context) (miter.Iterator[Resource], error) {
|
||||||
rows, err := s.db.QueryContext(ctx, query, status)
|
rows, err := s.db.QueryContext(ctx, query, status)
|
||||||
@ -208,10 +230,18 @@ func (s *SQLiteStore) GetURLsByLastChecked(
|
|||||||
olderThan time.Time,
|
olderThan time.Time,
|
||||||
) miter.Iterator[URL] {
|
) miter.Iterator[URL] {
|
||||||
const query = `
|
const query = `
|
||||||
|
WITH
|
||||||
|
incoming(url_id, urls) AS (
|
||||||
|
SELECT to_url_id, COUNT(1)
|
||||||
|
FROM links
|
||||||
|
GROUP BY to_url_id
|
||||||
|
)
|
||||||
SELECT url
|
SELECT url
|
||||||
FROM resources
|
FROM resources
|
||||||
JOIN urls ON (urls.id = resources.url_id)
|
JOIN urls ON (urls.id = resources.url_id)
|
||||||
WHERE last_checked < ?`
|
LEFT JOIN incoming ON (incoming.url_id = resources.url_id)
|
||||||
|
WHERE last_checked < ?
|
||||||
|
AND (pinned OR incoming.urls IS NOT NULL)`
|
||||||
|
|
||||||
return miter.Lazily(func(ctx context.Context) (miter.Iterator[URL], error) {
|
return miter.Lazily(func(ctx context.Context) (miter.Iterator[URL], error) {
|
||||||
rows, err := s.db.QueryContext(ctx, query, olderThan.Unix())
|
rows, err := s.db.QueryContext(ctx, query, olderThan.Unix())
|
||||||
@ -379,7 +409,8 @@ func (s *SQLiteStore) Update(
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SQLiteStore) deleteOrphans(ctx context.Context) error {
|
// GC implements the method for the Store interface.
|
||||||
|
func (s *SQLiteStore) GC(ctx context.Context) error {
|
||||||
const query = `
|
const query = `
|
||||||
WITH orphans AS (
|
WITH orphans AS (
|
||||||
SELECT url_id FROM resources
|
SELECT url_id FROM resources
|
||||||
|
@ -20,7 +20,7 @@ func newSQLiteStoreHarness() *sqliteStoreHarness {
|
|||||||
var (
|
var (
|
||||||
ctx = context.Background()
|
ctx = context.Background()
|
||||||
now = time.Now().Truncate(time.Second).UTC()
|
now = time.Now().Truncate(time.Second).UTC()
|
||||||
store = NewInMemStore()
|
store = NewSQLiteStore(nil)
|
||||||
)
|
)
|
||||||
|
|
||||||
return &sqliteStoreHarness{
|
return &sqliteStoreHarness{
|
||||||
@ -77,10 +77,11 @@ func TestSQLiteStore(t *testing.T) {
|
|||||||
h.assertGetByStatus(t, nil, ResourceStatusOK)
|
h.assertGetByStatus(t, nil, ResourceStatusOK)
|
||||||
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
||||||
|
|
||||||
a.Pinned = false
|
|
||||||
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlB}))
|
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlB}))
|
||||||
h.assertGetByStatus(t, nil, ResourceStatusOK)
|
h.assertGetByStatus(t, nil, ResourceStatusOK)
|
||||||
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
// GetByStatus should not return resources which are not pinned and have
|
||||||
|
// no incoming links
|
||||||
|
h.assertGetByStatus(t, []Resource{b}, ResourceStatusUnknown)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("Update", func(t *testing.T) {
|
t.Run("Update", func(t *testing.T) {
|
||||||
@ -174,10 +175,15 @@ func TestSQLiteStore(t *testing.T) {
|
|||||||
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowB)
|
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowB)
|
||||||
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowA.Add(1*time.Second))
|
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowA.Add(1*time.Second))
|
||||||
assertGetURLsByLastChecked([]URL{urlC}, nowA)
|
assertGetURLsByLastChecked([]URL{urlC}, nowA)
|
||||||
assertGetURLsByLastChecked([]URL{urlC}, h.now)
|
|
||||||
|
// A Resource which is not pinned and has no incoming links should not
|
||||||
|
// be returned
|
||||||
|
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB}))
|
||||||
|
assertGetURLsByLastChecked([]URL{urlA, urlB}, nowB.Add(1*time.Second))
|
||||||
|
assertGetURLsByLastChecked([]URL{}, nowA)
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("deleteOrphans", func(t *testing.T) {
|
t.Run("GC", func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -203,7 +209,11 @@ func TestSQLiteStore(t *testing.T) {
|
|||||||
h.ctx, h.now, urlC, ResourceStatusUnknown, "", []URL{urlD},
|
h.ctx, h.now, urlC, ResourceStatusUnknown, "", []URL{urlD},
|
||||||
))
|
))
|
||||||
|
|
||||||
assert.NoError(t, h.store.deleteOrphans(h.ctx))
|
assert.NoError(t, h.store.GC(h.ctx))
|
||||||
|
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
||||||
|
|
||||||
|
// Calling again shouldn't do anything
|
||||||
|
assert.NoError(t, h.store.GC(h.ctx))
|
||||||
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user