Initial commit, SQLiteStore is mostly implemented
This commit is contained in:
commit
f08d66b247
78
deadlinks.go
Normal file
78
deadlinks.go
Normal file
@ -0,0 +1,78 @@
|
||||
// Package deadlinks implements a liveness checker for hyperlinks in HTML and
|
||||
// gemtext documents.
|
||||
package deadlinks
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"time"
|
||||
)
|
||||
|
||||
// URL is a standard universal resource identifier, normalized particularly for
|
||||
// this package.
|
||||
type URL string
|
||||
|
||||
// ParseURL parses and returns a URL based on the given string, or an error.
|
||||
func ParseURL(urlStr string) (URL, error) {
|
||||
u, err := url.Parse(urlStr)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return URL(u.String()), nil
|
||||
}
|
||||
|
||||
func parseURLs(urlStrs []string) ([]URL, error) {
|
||||
var (
|
||||
res = make([]URL, 0, len(urlStrs))
|
||||
errs []error
|
||||
)
|
||||
for _, urlStr := range urlStrs {
|
||||
u, err := ParseURL(urlStr)
|
||||
if err == nil {
|
||||
res = append(res, u)
|
||||
} else {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
return res, errors.Join(errs...)
|
||||
}
|
||||
|
||||
// ResourceStatus describes what state a particular Resource is in.
|
||||
type ResourceStatus int
|
||||
|
||||
// Enumeration of ResourceStatus values.
|
||||
const (
|
||||
ResourceStatusUnknown ResourceStatus = iota
|
||||
ResourceStatusOK
|
||||
ResourceStatusError
|
||||
)
|
||||
|
||||
func (ds ResourceStatus) String() string {
|
||||
switch ds {
|
||||
case ResourceStatusUnknown:
|
||||
return "UNKNOWN"
|
||||
case ResourceStatusOK:
|
||||
return "OK"
|
||||
case ResourceStatusError:
|
||||
return "ERROR"
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown ResourceStatus: %#v", ds))
|
||||
}
|
||||
}
|
||||
|
||||
// Resource describes the current state of a resource, with the resource being
|
||||
// uniquely identified by a URL.
|
||||
type Resource struct {
|
||||
URL URL
|
||||
Status ResourceStatus
|
||||
Pinned bool
|
||||
LastChecked time.Time
|
||||
|
||||
// only set if Status == ResourceStatusError
|
||||
ErrorString string
|
||||
|
||||
// Indicate the URLs of resources which link to/are linked from this
|
||||
// resource.
|
||||
IncomingLinkURLs, OutgoingLinkURLs []URL
|
||||
}
|
26
flake.lock
Normal file
26
flake.lock
Normal file
@ -0,0 +1,26 @@
|
||||
{
|
||||
"nodes": {
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1703351344,
|
||||
"narHash": "sha256-9FEelzftkE9UaJ5nqxidaJJPEhe9TPhbypLHmc2Mysc=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "7790e078f8979a9fcd543f9a47427eeaba38f268",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"id": "nixpkgs",
|
||||
"ref": "nixos-23.05",
|
||||
"type": "indirect"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
44
flake.nix
Normal file
44
flake.nix
Normal file
@ -0,0 +1,44 @@
|
||||
{
|
||||
description = "radix development environment";
|
||||
|
||||
# Nixpkgs / NixOS version to use.
|
||||
inputs.nixpkgs.url = "nixpkgs/nixos-23.05";
|
||||
|
||||
outputs = { self, nixpkgs }:
|
||||
let
|
||||
|
||||
# to work with older version of flakes
|
||||
lastModifiedDate = self.lastModifiedDate or self.lastModified or "19700101";
|
||||
|
||||
# Generate a user-friendly version number.
|
||||
version = builtins.substring 0 8 lastModifiedDate;
|
||||
|
||||
# System types to support.
|
||||
supportedSystems = [ "x86_64-linux" "x86_64-darwin" "aarch64-linux" "aarch64-darwin" ];
|
||||
|
||||
# Helper function to generate an attrset '{ x86_64-linux = f "x86_64-linux"; ... }'.
|
||||
forAllSystems = nixpkgs.lib.genAttrs supportedSystems;
|
||||
|
||||
# Nixpkgs instantiated for supported system types.
|
||||
nixpkgsFor = forAllSystems (system: import nixpkgs { inherit system; });
|
||||
|
||||
in
|
||||
{
|
||||
|
||||
# Add dependencies that are only needed for development
|
||||
devShells = forAllSystems (system:
|
||||
let
|
||||
pkgs = nixpkgsFor.${system};
|
||||
in {
|
||||
default = pkgs.mkShell {
|
||||
buildInputs = [
|
||||
pkgs.go
|
||||
pkgs.gotools
|
||||
pkgs.golangci-lint
|
||||
pkgs.sqlite
|
||||
];
|
||||
};
|
||||
});
|
||||
};
|
||||
}
|
||||
|
15
go.mod
Normal file
15
go.mod
Normal file
@ -0,0 +1,15 @@
|
||||
module code.betamike.com/mediocregopher/deadlinks
|
||||
|
||||
go 1.20
|
||||
|
||||
require (
|
||||
code.betamike.com/mediocregopher/mediocre-go-lib v0.0.0-20231226160338-0b5bdf3dfb03 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/go-gorp/gorp/v3 v3.1.0 // indirect
|
||||
github.com/mattn/go-sqlite3 v1.14.19 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/rubenv/sql-migrate v1.6.0 // indirect
|
||||
github.com/stretchr/objx v0.5.0 // indirect
|
||||
github.com/stretchr/testify v1.8.4 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
27
go.sum
Normal file
27
go.sum
Normal file
@ -0,0 +1,27 @@
|
||||
code.betamike.com/mediocregopher/mediocre-go-lib v0.0.0-20231226155808-e8376ef263a7 h1:Dm7oXBLmdPXVo8tPZzrmUvON6zXEdWxbGiO4chqrNHw=
|
||||
code.betamike.com/mediocregopher/mediocre-go-lib v0.0.0-20231226155808-e8376ef263a7/go.mod h1:GJhpoMNnN/OT6O9NmeQBV02yq9kQP8zPyY1IvsslHak=
|
||||
code.betamike.com/mediocregopher/mediocre-go-lib v0.0.0-20231226160338-0b5bdf3dfb03 h1:wJ6X1vc289RpHVGClD1P33yijPoNIdgCXbTn7DjVWYs=
|
||||
code.betamike.com/mediocregopher/mediocre-go-lib v0.0.0-20231226160338-0b5bdf3dfb03/go.mod h1:GJhpoMNnN/OT6O9NmeQBV02yq9kQP8zPyY1IvsslHak=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-gorp/gorp/v3 v3.1.0 h1:ItKF/Vbuj31dmV4jxA1qblpSwkl9g1typ24xoe70IGs=
|
||||
github.com/go-gorp/gorp/v3 v3.1.0/go.mod h1:dLEjIyyRNiXvNZ8PSmzpt1GsWAUK8kjVhEpjH8TixEw=
|
||||
github.com/mattn/go-sqlite3 v1.14.19 h1:fhGleo2h1p8tVChob4I9HpmVFIAkKGpiukdrgQbWfGI=
|
||||
github.com/mattn/go-sqlite3 v1.14.19/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/rubenv/sql-migrate v1.6.0 h1:IZpcTlAx/VKXphWEpwWJ7BaMq05tYtE80zYz+8a5Il8=
|
||||
github.com/rubenv/sql-migrate v1.6.0/go.mod h1:m3ilnKP7sNb4eYkLsp6cGdPOl4OBcXM6rcbzU+Oqc5k=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
407
store.go
Normal file
407
store.go
Normal file
@ -0,0 +1,407 @@
|
||||
package deadlinks
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"code.betamike.com/mediocregopher/mediocre-go-lib/miter"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
migrate "github.com/rubenv/sql-migrate"
|
||||
)
|
||||
|
||||
// Store keeps track of the current status of all discovered Resources.
|
||||
// Resources with no incoming links will be periodically cleaned out.
|
||||
//
|
||||
// An implementation of Store must be thread-safe.
|
||||
type Store interface {
|
||||
// GetByStatus returns all Resources with the given Status.
|
||||
GetByStatus(ResourceStatus) miter.Iterator[Resource]
|
||||
|
||||
// GetURLsByLastChecked returns the URLs of all Resources with LastChecked
|
||||
// values older than the given timestamp, or which have never been checked.
|
||||
GetURLsByLastChecked(olderThan time.Time) miter.Iterator[URL]
|
||||
|
||||
// SetPinned overwrites the set of pinned URLs with the given one.
|
||||
SetPinned(context.Context, []URL) error
|
||||
|
||||
// Update updates the Resource identified by the given URL with the given
|
||||
// arguments. The Resource must have been Touch'd previously, or this
|
||||
// returns an error.
|
||||
Update(
|
||||
ctx context.Context,
|
||||
now time.Time,
|
||||
url URL,
|
||||
status ResourceStatus,
|
||||
errorString string,
|
||||
outgoing []URL,
|
||||
) error
|
||||
}
|
||||
|
||||
var migrations = &migrate.MemoryMigrationSource{Migrations: []*migrate.Migration{
|
||||
{
|
||||
Id: "1",
|
||||
Up: []string{
|
||||
`CREATE TABLE urls (
|
||||
id INTEGER NOT NULL PRIMARY KEY,
|
||||
url TEXT NOT NULL,
|
||||
UNIQUE(url)
|
||||
)`,
|
||||
|
||||
`CREATE TABLE resources (
|
||||
url_id INTEGER NOT NULL PRIMARY KEY,
|
||||
status INTEGER NOT NULL DEFAULT 0,
|
||||
pinned INTEGER NOT NULL,
|
||||
last_checked INTEGER NOT NULL DEFAULT 0,
|
||||
error_string TEXT NOT NULL DEFAULT '',
|
||||
FOREIGN KEY(url_id) REFERENCES urls(id) ON DELETE CASCADE
|
||||
)`,
|
||||
|
||||
`CREATE TABLE links (
|
||||
from_url_id INTEGER NOT NULL,
|
||||
to_url_id INTEGER NOT NULL,
|
||||
FOREIGN KEY(from_url_id) REFERENCES urls(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY(to_url_id) REFERENCES urls(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY(from_url_id, to_url_id)
|
||||
)`,
|
||||
|
||||
`CREATE INDEX links_outgoing_idx ON links (to_url_id)`,
|
||||
},
|
||||
},
|
||||
}}
|
||||
|
||||
/*
|
||||
TODO
|
||||
- initialization options
|
||||
- cleanup period
|
||||
- document SQLiteStore properly
|
||||
- teardown the cleanup goroutine
|
||||
*/
|
||||
|
||||
type SQLiteStore struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
var _ Store = (*SQLiteStore)(nil)
|
||||
|
||||
// NewInMemStore returns a Store implementation which uses an in-memory SQLite
|
||||
// db.
|
||||
func NewInMemStore() *SQLiteStore {
|
||||
db, err := sql.Open("sqlite3", ":memory:?_foreign_keys=1")
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("opening sqlite in memory: %w", err))
|
||||
}
|
||||
|
||||
if _, err := migrate.Exec(db, "sqlite3", migrations, migrate.Up); err != nil {
|
||||
panic(fmt.Errorf("running migrations: %w", err))
|
||||
}
|
||||
|
||||
return &SQLiteStore{db}
|
||||
}
|
||||
|
||||
// Close cleans up all resources held by the SQLiteStore store, if any. It must
|
||||
// be the final method call to the SQLiteStore.
|
||||
func (s *SQLiteStore) Close() error {
|
||||
return s.db.Close()
|
||||
}
|
||||
|
||||
// GetByStatus implements the method for the Store interface.
|
||||
func (s *SQLiteStore) GetByStatus(status ResourceStatus) miter.Iterator[Resource] {
|
||||
const query = `
|
||||
WITH
|
||||
incoming(url_id, urls) AS (
|
||||
SELECT
|
||||
to_url_id,
|
||||
GROUP_CONCAT(url, char(0))
|
||||
FROM links
|
||||
JOIN urls ON (urls.id = links.from_url_id)
|
||||
GROUP BY to_url_id
|
||||
),
|
||||
outgoing(url_id, urls) AS (
|
||||
SELECT
|
||||
from_url_id,
|
||||
GROUP_CONCAT(url, char(0))
|
||||
FROM links
|
||||
JOIN urls ON (urls.id = links.to_url_id)
|
||||
GROUP BY from_url_id
|
||||
)
|
||||
SELECT
|
||||
url,
|
||||
status,
|
||||
pinned,
|
||||
last_checked,
|
||||
error_string,
|
||||
incoming.urls,
|
||||
outgoing.urls
|
||||
FROM resources
|
||||
JOIN urls ON (urls.id = resources.url_id)
|
||||
LEFT JOIN incoming ON (incoming.url_id = resources.url_id)
|
||||
LEFT JOIN outgoing ON (outgoing.url_id = resources.url_id)
|
||||
WHERE status = ?`
|
||||
|
||||
return miter.Lazily(func(ctx context.Context) (miter.Iterator[Resource], error) {
|
||||
rows, err := s.db.QueryContext(ctx, query, status)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("executing query: %w", err)
|
||||
}
|
||||
|
||||
return miter.FromFunc(func(ctx context.Context) (Resource, error) {
|
||||
var (
|
||||
r Resource
|
||||
lastChecked int64
|
||||
incoming, outgoing sql.NullString
|
||||
)
|
||||
|
||||
if !rows.Next() {
|
||||
return Resource{}, errors.Join(rows.Close(), miter.ErrEnd)
|
||||
}
|
||||
|
||||
if err := rows.Scan(
|
||||
&r.URL,
|
||||
&r.Status,
|
||||
&r.Pinned,
|
||||
&lastChecked,
|
||||
&r.ErrorString,
|
||||
&incoming,
|
||||
&outgoing,
|
||||
); err != nil {
|
||||
return Resource{}, errors.Join(
|
||||
rows.Close(), fmt.Errorf("scanning row: %w", err),
|
||||
)
|
||||
}
|
||||
|
||||
if lastChecked != 0 {
|
||||
r.LastChecked = time.Unix(lastChecked, 0).UTC()
|
||||
}
|
||||
|
||||
if incoming.String != "" {
|
||||
if r.IncomingLinkURLs, err = parseURLs(
|
||||
strings.Split(incoming.String, "\x00"),
|
||||
); err != nil {
|
||||
return Resource{}, errors.Join(
|
||||
rows.Close(), fmt.Errorf("parsing incoming links: %w", err),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if outgoing.String != "" {
|
||||
if r.OutgoingLinkURLs, err = parseURLs(
|
||||
strings.Split(outgoing.String, "\x00"),
|
||||
); err != nil {
|
||||
return Resource{}, errors.Join(
|
||||
rows.Close(), fmt.Errorf("parsing outgoing links: %w", err),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return r, nil
|
||||
}), nil
|
||||
})
|
||||
}
|
||||
|
||||
// GetURLsByLastChecked implements the method for the Store interface.
|
||||
func (s *SQLiteStore) GetURLsByLastChecked(
|
||||
olderThan time.Time,
|
||||
) miter.Iterator[URL] {
|
||||
const query = `
|
||||
SELECT url
|
||||
FROM resources
|
||||
JOIN urls ON (urls.id = resources.url_id)
|
||||
WHERE last_checked < ?`
|
||||
|
||||
return miter.Lazily(func(ctx context.Context) (miter.Iterator[URL], error) {
|
||||
rows, err := s.db.QueryContext(ctx, query, olderThan.Unix())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("executing query: %w", err)
|
||||
}
|
||||
|
||||
return miter.FromFunc(func(ctx context.Context) (URL, error) {
|
||||
if !rows.Next() {
|
||||
return "", errors.Join(rows.Close(), miter.ErrEnd)
|
||||
}
|
||||
|
||||
var urlStr string
|
||||
if err := rows.Scan(&urlStr); err != nil {
|
||||
return "", errors.Join(
|
||||
rows.Close(), fmt.Errorf("scanning url: %w", err),
|
||||
)
|
||||
}
|
||||
|
||||
url, err := ParseURL(urlStr)
|
||||
if err != nil {
|
||||
return "", errors.Join(
|
||||
rows.Close(),
|
||||
fmt.Errorf("parsing url %q from db: %w", urlStr, err),
|
||||
)
|
||||
}
|
||||
|
||||
return url, nil
|
||||
}), nil
|
||||
})
|
||||
}
|
||||
|
||||
func (s *SQLiteStore) touch(ctx context.Context, urls []URL, pinned bool) (
|
||||
[]int, error,
|
||||
) {
|
||||
var (
|
||||
urlsQueryParams = make([]any, len(urls))
|
||||
resourcesQueryParams = make([]any, 0, (len(urls)*2)+1)
|
||||
ids = make([]int, 0, len(urls))
|
||||
)
|
||||
|
||||
for i := range urls {
|
||||
urlsQueryParams[i] = urls[i]
|
||||
}
|
||||
|
||||
urlsQuery := `
|
||||
INSERT INTO urls (url)
|
||||
VALUES ` + joinRepeated("(?)", ",", len(urls)) + `
|
||||
ON CONFLICT DO UPDATE SET url=url
|
||||
RETURNING id`
|
||||
|
||||
rows, err := s.db.QueryContext(ctx, urlsQuery, urlsQueryParams...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("inserting into urls: %w", err)
|
||||
}
|
||||
|
||||
for range urls {
|
||||
if !rows.Next() {
|
||||
return nil, errors.Join(
|
||||
errors.New("expected a returned row"), rows.Close(),
|
||||
)
|
||||
}
|
||||
|
||||
var id int
|
||||
if err := rows.Scan(&id); err != nil {
|
||||
return nil, errors.Join(
|
||||
fmt.Errorf("scanning return from insert into urls: %w", err),
|
||||
rows.Close(),
|
||||
)
|
||||
}
|
||||
|
||||
resourcesQueryParams = append(resourcesQueryParams, id, pinned)
|
||||
ids = append(ids, id)
|
||||
}
|
||||
|
||||
rows.Close()
|
||||
|
||||
resourcesQuery := `
|
||||
INSERT INTO resources (url_id, pinned)
|
||||
VALUES ` + joinRepeated("(?,?)", ",", len(urls)) + `
|
||||
ON CONFLICT DO UPDATE SET pinned = ?`
|
||||
|
||||
resourcesQueryParams = append(resourcesQueryParams, pinned)
|
||||
|
||||
_, err = s.db.ExecContext(ctx, resourcesQuery, resourcesQueryParams...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("inserting into resources: %w", err)
|
||||
}
|
||||
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
// SetPinned implements the method for the Store interface.
|
||||
func (s *SQLiteStore) SetPinned(ctx context.Context, urls []URL) error {
|
||||
_, err := s.db.ExecContext(ctx, `UPDATE resources SET pinned = 0`)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unsetting pinned on all resources: %w", err)
|
||||
}
|
||||
|
||||
if _, err := s.touch(ctx, urls, true); err != nil {
|
||||
return fmt.Errorf("pinning resources: %w", err)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Update implements the method for the Store interface.
|
||||
func (s *SQLiteStore) Update(
|
||||
ctx context.Context,
|
||||
now time.Time,
|
||||
url URL,
|
||||
status ResourceStatus,
|
||||
errorString string,
|
||||
outgoing []URL,
|
||||
) error {
|
||||
const resourcesQuery = `
|
||||
UPDATE resources
|
||||
SET
|
||||
status = ?,
|
||||
last_checked = ?,
|
||||
error_string = ?
|
||||
WHERE url_id = (SELECT id FROM urls WHERE url = ?)
|
||||
RETURNING url_id`
|
||||
|
||||
var urlID int
|
||||
|
||||
err := s.db.QueryRowContext(
|
||||
ctx, resourcesQuery, status, now.Unix(), errorString, url,
|
||||
).Scan(&urlID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("inserting into resources: %w", err)
|
||||
}
|
||||
|
||||
_, err = s.db.ExecContext(
|
||||
ctx, `DELETE FROM links WHERE from_url_id = ?`, urlID,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting from links: %w", err)
|
||||
}
|
||||
|
||||
if len(outgoing) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
outgoingIDs, err := s.touch(ctx, outgoing, false)
|
||||
if err != nil {
|
||||
return fmt.Errorf("touching outgoing links: %w", err)
|
||||
}
|
||||
|
||||
linksQueryParams := make([]any, 0, len(outgoingIDs)*2)
|
||||
for i := range outgoingIDs {
|
||||
linksQueryParams = append(linksQueryParams, urlID, outgoingIDs[i])
|
||||
}
|
||||
|
||||
linksQuery := `
|
||||
INSERT INTO links (from_url_id, to_url_id)
|
||||
VALUES ` + joinRepeated("(?,?)", ",", len(outgoing)) + `
|
||||
ON CONFLICT DO NOTHING`
|
||||
|
||||
_, err = s.db.ExecContext(ctx, linksQuery, linksQueryParams...)
|
||||
if err != nil {
|
||||
return fmt.Errorf("inserting into links: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *SQLiteStore) deleteOrphans(ctx context.Context) error {
|
||||
const query = `
|
||||
WITH orphans AS (
|
||||
SELECT url_id FROM resources
|
||||
LEFT JOIN links ON (links.to_url_id = resources.url_id)
|
||||
WHERE pinned = 0 AND from_url_id IS NULL
|
||||
)
|
||||
DELETE FROM urls WHERE id IN orphans
|
||||
`
|
||||
|
||||
for {
|
||||
res, err := s.db.ExecContext(ctx, query)
|
||||
if err != nil {
|
||||
return fmt.Errorf("performing delete: %w", err)
|
||||
} else if n, err := res.RowsAffected(); err != nil {
|
||||
return fmt.Errorf("determining rows affected: %w", err)
|
||||
} else if n == 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func joinRepeated(str, sep string, n int) string {
|
||||
res := strings.Repeat(str+sep, n)
|
||||
return res[:len(res)-len(sep)]
|
||||
}
|
209
store_test.go
Normal file
209
store_test.go
Normal file
@ -0,0 +1,209 @@
|
||||
package deadlinks
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"code.betamike.com/mediocregopher/mediocre-go-lib/miter"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type sqliteStoreHarness struct {
|
||||
ctx context.Context
|
||||
now time.Time
|
||||
store *SQLiteStore
|
||||
}
|
||||
|
||||
func newSQLiteStoreHarness() *sqliteStoreHarness {
|
||||
var (
|
||||
ctx = context.Background()
|
||||
now = time.Now().Truncate(time.Second).UTC()
|
||||
store = NewInMemStore()
|
||||
)
|
||||
|
||||
return &sqliteStoreHarness{
|
||||
ctx, now, store,
|
||||
}
|
||||
}
|
||||
|
||||
func (h *sqliteStoreHarness) assertGetByStatus(
|
||||
t *testing.T, want []Resource, status ResourceStatus,
|
||||
) {
|
||||
norm := func(rr []Resource) {
|
||||
for i, r := range rr {
|
||||
sort.Slice(r.IncomingLinkURLs, func(i, j int) bool {
|
||||
return r.IncomingLinkURLs[i] < r.IncomingLinkURLs[j]
|
||||
})
|
||||
sort.Slice(r.OutgoingLinkURLs, func(i, j int) bool {
|
||||
return r.OutgoingLinkURLs[i] < r.OutgoingLinkURLs[j]
|
||||
})
|
||||
rr[i] = r
|
||||
}
|
||||
|
||||
sort.Slice(rr, func(i, j int) bool {
|
||||
return rr[i].URL < rr[j].URL
|
||||
})
|
||||
}
|
||||
|
||||
got, err := miter.ToSlice(h.ctx, h.store.GetByStatus(status))
|
||||
assert.NoError(t, err)
|
||||
|
||||
norm(want)
|
||||
norm(got)
|
||||
assert.Equal(t, want, got)
|
||||
}
|
||||
|
||||
func TestSQLiteStore(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("SetPinned", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var (
|
||||
h = newSQLiteStoreHarness()
|
||||
urlA = URL("https://a.com")
|
||||
urlB = URL("https://b.com")
|
||||
a = Resource{URL: urlA, Pinned: true}
|
||||
b = Resource{URL: urlB, Pinned: true}
|
||||
)
|
||||
|
||||
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA}))
|
||||
h.assertGetByStatus(t, nil, ResourceStatusOK)
|
||||
h.assertGetByStatus(t, []Resource{a}, ResourceStatusUnknown)
|
||||
|
||||
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB}))
|
||||
h.assertGetByStatus(t, nil, ResourceStatusOK)
|
||||
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
||||
|
||||
a.Pinned = false
|
||||
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlB}))
|
||||
h.assertGetByStatus(t, nil, ResourceStatusOK)
|
||||
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
||||
})
|
||||
|
||||
t.Run("Update", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var (
|
||||
h = newSQLiteStoreHarness()
|
||||
|
||||
urlA = URL("https://a.com")
|
||||
urlB = URL("https://b.com")
|
||||
urlC = URL("https://c.com")
|
||||
|
||||
a = Resource{URL: urlA, Pinned: true}
|
||||
b = Resource{URL: urlB, Pinned: true}
|
||||
c = Resource{URL: urlC}
|
||||
)
|
||||
|
||||
// updating a non-existing URL should fail and make no changes
|
||||
assert.Error(t, h.store.Update(h.ctx, h.now, urlA, ResourceStatusOK, "errstr", nil))
|
||||
h.assertGetByStatus(t, nil, ResourceStatusUnknown)
|
||||
h.assertGetByStatus(t, nil, ResourceStatusOK)
|
||||
|
||||
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB}))
|
||||
|
||||
assert.NoError(t, h.store.Update(h.ctx, h.now, urlA, ResourceStatusOK, "", nil))
|
||||
a.LastChecked = h.now
|
||||
a.Status = ResourceStatusOK
|
||||
h.assertGetByStatus(t, []Resource{b}, ResourceStatusUnknown)
|
||||
h.assertGetByStatus(t, []Resource{a}, ResourceStatusOK)
|
||||
|
||||
assert.NoError(t, h.store.Update(h.ctx, h.now, urlB, ResourceStatusError, "error!", nil))
|
||||
b.LastChecked = h.now
|
||||
b.Status = ResourceStatusError
|
||||
b.ErrorString = "error!"
|
||||
h.assertGetByStatus(t, nil, ResourceStatusUnknown)
|
||||
h.assertGetByStatus(t, nil, ResourceStatusUnknown)
|
||||
h.assertGetByStatus(t, []Resource{a}, ResourceStatusOK)
|
||||
h.assertGetByStatus(t, []Resource{b}, ResourceStatusError)
|
||||
|
||||
assert.NoError(t, h.store.Update(
|
||||
h.ctx, h.now, urlA, ResourceStatusOK, "", []URL{urlC},
|
||||
))
|
||||
a.OutgoingLinkURLs = []URL{urlC}
|
||||
c.IncomingLinkURLs = []URL{urlA}
|
||||
h.assertGetByStatus(t, []Resource{c}, ResourceStatusUnknown)
|
||||
h.assertGetByStatus(t, []Resource{a}, ResourceStatusOK)
|
||||
h.assertGetByStatus(t, []Resource{b}, ResourceStatusError)
|
||||
|
||||
assert.NoError(t, h.store.Update(
|
||||
h.ctx, h.now, urlB, ResourceStatusOK, "", []URL{urlC},
|
||||
))
|
||||
b.Status = ResourceStatusOK
|
||||
b.ErrorString = ""
|
||||
b.OutgoingLinkURLs = []URL{urlC}
|
||||
c.IncomingLinkURLs = []URL{urlA, urlB}
|
||||
h.assertGetByStatus(t, []Resource{c}, ResourceStatusUnknown)
|
||||
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusOK)
|
||||
h.assertGetByStatus(t, nil, ResourceStatusError)
|
||||
})
|
||||
|
||||
t.Run("GetURLsByLastChecked", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var (
|
||||
h = newSQLiteStoreHarness()
|
||||
nowA = h.now
|
||||
nowB = h.now.Add(1 * time.Minute)
|
||||
|
||||
urlA = URL("https://a.com")
|
||||
urlB = URL("https://b.com")
|
||||
urlC = URL("https://c.com")
|
||||
)
|
||||
|
||||
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB, urlC}))
|
||||
|
||||
assert.NoError(t, h.store.Update(
|
||||
h.ctx, nowA, urlA, ResourceStatusOK, "", nil,
|
||||
))
|
||||
|
||||
assert.NoError(t, h.store.Update(
|
||||
h.ctx, nowB, urlB, ResourceStatusOK, "", nil,
|
||||
))
|
||||
|
||||
assertGetURLsByLastChecked := func(want []URL, olderThan time.Time) {
|
||||
got, err := miter.ToSlice(h.ctx, h.store.GetURLsByLastChecked(olderThan))
|
||||
assert.NoError(t, err)
|
||||
assert.ElementsMatch(t, want, got)
|
||||
}
|
||||
|
||||
assertGetURLsByLastChecked([]URL{urlA, urlB, urlC}, nowB.Add(1*time.Second))
|
||||
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowB)
|
||||
assertGetURLsByLastChecked([]URL{urlA, urlC}, nowA.Add(1*time.Second))
|
||||
assertGetURLsByLastChecked([]URL{urlC}, nowA)
|
||||
assertGetURLsByLastChecked([]URL{urlC}, h.now)
|
||||
})
|
||||
|
||||
t.Run("deleteOrphans", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var (
|
||||
h = newSQLiteStoreHarness()
|
||||
|
||||
urlA = URL("https://a.com")
|
||||
urlB = URL("https://b.com")
|
||||
urlC = URL("https://c.com")
|
||||
urlD = URL("https://d.com")
|
||||
|
||||
a = Resource{URL: urlA, Pinned: true, LastChecked: h.now, OutgoingLinkURLs: []URL{urlB}}
|
||||
b = Resource{URL: urlB, IncomingLinkURLs: []URL{urlA}}
|
||||
)
|
||||
|
||||
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA, urlB, urlC, urlD}))
|
||||
assert.NoError(t, h.store.SetPinned(h.ctx, []URL{urlA}))
|
||||
|
||||
assert.NoError(t, h.store.Update(
|
||||
h.ctx, h.now, urlA, ResourceStatusUnknown, "", []URL{urlB},
|
||||
))
|
||||
|
||||
assert.NoError(t, h.store.Update(
|
||||
h.ctx, h.now, urlC, ResourceStatusUnknown, "", []URL{urlD},
|
||||
))
|
||||
|
||||
assert.NoError(t, h.store.deleteOrphans(h.ctx))
|
||||
h.assertGetByStatus(t, []Resource{a, b}, ResourceStatusUnknown)
|
||||
})
|
||||
}
|
Loading…
Reference in New Issue
Block a user