Compare commits

...

3 Commits

30 changed files with 383 additions and 556 deletions

View File

@ -118,19 +118,6 @@ in rec {
'';
};
tests = pkgs.writeScript "isle-tests" ''
export PATH=${pkgs.lib.makeBinPath [
build.appImage
pkgs.busybox
pkgs.yq-go
pkgs.jq
pkgs.dig
pkgs.nebula
]}
export SHELL=${pkgs.bash}/bin/bash
exec ${pkgs.bash}/bin/bash ${./tests}/entrypoint.sh "$@"
'';
devShell = pkgs.mkShell {
buildInputs = [
pkgs.go

View File

@ -11,5 +11,6 @@ to better understand how to navigate and work on the codebase.
These pages can be helpful in specific situations.
* [Building Isle](./building.md)
* [Testing Isle](./testing.md)
* [Rebuilding Documentation](./rebuilding-documentation.md)
* [Releases](./releases.md)

39
docs/dev/testing.md Normal file
View File

@ -0,0 +1,39 @@
# Testing Isle
All tests are currently written as go tests, and as such can be run from the
`go` directory using the normal go testing tool.
```
cd go
go test -run Foo ./daemon
go test ./... # Test everything
```
## Integration Tests
Integration tests are those which require processes or state external to the
test itself. Integration tests are marked using the
`toolkit.MarkIntegrationTest` function, which will cause them to be skipped
unless being run in the integration test environment.
Besides a normal nix installation (like all Isle development needs), integration
tests also require `sudo` and [capsh][capsh] to be installed on the system.
[capsh]: https://www.man7.org/linux/man-pages/man1/capsh.1.html
By running tests using the `go/integration_test.sh` script the tests will be
automatically run in the integration test environment. All arguments will be
passed directly to the go testing tool.
```
cd go
./integration_test.sh -run Foo ./daemon
```
`integration_test.sh` wraps a call to `go test` in a bash shell which has all
required binaries available to it, and which has acquired necessary
[capabilities][capabilities] to use the binaries as needed. Acquiring
capabilities is done by elevating the user to root using `sudo`, and then
dropping them back down to a shell of the original user with capabilities set.
[capabilities]: https://wiki.archlinux.org/title/Capabilities

View File

@ -4,26 +4,26 @@ import (
"isle/garage"
)
// GaragePeers returns a Peer for each known garage instance in the network.
func (b Bootstrap) GaragePeers() []garage.RemotePeer {
var peers []garage.RemotePeer
// GarageNodes returns a Node for each known garage instance in the network.
func (b Bootstrap) GarageNodes() []garage.RemoteNode {
var nodes []garage.RemoteNode
for _, host := range b.Hosts {
peers = append(peers, host.GaragePeers()...)
nodes = append(nodes, host.GarageNodes()...)
}
return peers
return nodes
}
// ChooseGaragePeer returns a Peer for a garage instance from the network. It
// will prefer a garage instance on this particular host, if there is one, but
// will otherwise return a random endpoint.
func (b Bootstrap) ChooseGaragePeer() garage.RemotePeer {
// ChooseGarageNode returns a RemoteNode for a garage instance from the network.
// It will prefer a garage instance on this particular host, if there is one,
// but will otherwise return a random endpoint.
func (b Bootstrap) ChooseGarageNode() garage.RemoteNode {
thisHost := b.ThisHost()
if len(thisHost.Garage.Instances) > 0 {
return thisHost.GaragePeers()[0]
return thisHost.GarageNodes()[0]
}
for _, peer := range b.GaragePeers() {
return peer
for _, node := range b.GarageNodes() {
return node
}
panic("no garage instances configured")

View File

@ -93,17 +93,17 @@ func (h Host) IP() netip.Addr {
return addr
}
// GaragePeers returns a RemotePeer for each garage instance advertised by this
// GarageNodes returns a RemoteNode for each garage instance advertised by this
// Host.
func (h Host) GaragePeers() []garage.RemotePeer {
var peers []garage.RemotePeer
func (h Host) GarageNodes() []garage.RemoteNode {
var nodes []garage.RemoteNode
for _, instance := range h.Garage.Instances {
peers = append(peers, garage.RemotePeer{
nodes = append(nodes, garage.RemoteNode{
ID: instance.ID,
IP: h.IP().String(),
RPCPort: instance.RPCPort,
S3APIPort: instance.S3APIPort,
})
}
return peers
return nodes
}

View File

@ -56,7 +56,7 @@ var subCmdGarageMC = subCmd{
return fmt.Errorf("calling GetGarageClientParams: %w", err)
}
s3APIAddr := clientParams.Peer.S3APIAddr()
s3APIAddr := clientParams.Node.S3APIAddr()
if *keyID == "" {
*keyID = clientParams.GlobalBucketS3APICredentials.ID
@ -135,7 +135,7 @@ var subCmdGarageCLI = subCmd{
args = append([]string{"garage"}, ctx.args...)
cliEnv = append(
os.Environ(),
"GARAGE_RPC_HOST="+clientParams.Peer.RPCPeerAddr(),
"GARAGE_RPC_HOST="+clientParams.Node.RPCNodeAddr(),
"GARAGE_RPC_SECRET="+clientParams.RPCSecret,
)
)

View File

@ -184,7 +184,6 @@ func (c *Children) reloadNebula(
return nil
}
// TODO this doesn't handle removing garage nodes
func (c *Children) reloadGarage(
ctx context.Context,
networkConfig daecommon.NetworkConfig,
@ -206,6 +205,8 @@ func (c *Children) reloadGarage(
)
)
// TODO it's possible that the config changed, but only the bootstrap
// peers, in which case we don't need to restart the node.
childConfigPath, changed, err := garageWriteChildConfig(
ctx,
c.logger,

View File

@ -76,8 +76,8 @@ func garageWriteChildConfig(
thisHost = hostBootstrap.ThisHost()
id = daecommon.BootstrapGarageHostForAlloc(thisHost, alloc).ID
peer = garage.LocalPeer{
RemotePeer: garage.RemotePeer{
node = garage.LocalNode{
RemoteNode: garage.RemoteNode{
ID: id,
IP: thisHost.IP().String(),
RPCPort: alloc.RPCPort,
@ -102,8 +102,8 @@ func garageWriteChildConfig(
RPCSecret: rpcSecret,
AdminToken: adminToken,
LocalPeer: peer,
BootstrapPeers: hostBootstrap.GaragePeers(),
LocalNode: node,
BootstrapPeers: hostBootstrap.GarageNodes(),
},
)

View File

@ -12,13 +12,16 @@ import (
"isle/nebula"
"isle/secrets"
"net"
"net/netip"
"path/filepath"
"slices"
"strconv"
"time"
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
"github.com/minio/minio-go/v7"
"golang.org/x/exp/maps"
)
// Paths within garage's global bucket.
@ -46,7 +49,7 @@ func getGarageClientParams(
}
return GarageClientParams{
Peer: currBootstrap.ChooseGaragePeer(),
Node: currBootstrap.ChooseGarageNode(),
GlobalBucketS3APICredentials: creds,
RPCSecret: rpcSecret,
}, nil
@ -88,8 +91,8 @@ func garageApplyLayout(
)
hostName = currHost.Name
allocs = networkConfig.Storage.Allocations
peers = make([]garage.PeerLayout, len(allocs))
peerIDs = map[string]struct{}{}
roles = make([]garage.Role, len(allocs))
roleIDs = map[string]struct{}{}
idsToRemove = make([]string, 0, len(prevHost.Garage.Instances))
)
@ -98,14 +101,14 @@ func garageApplyLayout(
for i, alloc := range allocs {
id := daecommon.BootstrapGarageHostForAlloc(currHost, alloc).ID
peerIDs[id] = struct{}{}
roleIDs[id] = struct{}{}
zone := string(hostName)
if alloc.Zone != "" {
zone = alloc.Zone
}
peers[i] = garage.PeerLayout{
roles[i] = garage.Role{
ID: id,
Capacity: alloc.Capacity * 1_000_000_000,
Zone: zone,
@ -114,12 +117,12 @@ func garageApplyLayout(
}
for _, prevInst := range prevHost.Garage.Instances {
if _, ok := peerIDs[prevInst.ID]; !ok {
if _, ok := roleIDs[prevInst.ID]; !ok {
idsToRemove = append(idsToRemove, prevInst.ID)
}
}
return adminClient.ApplyLayout(ctx, peers, idsToRemove)
return adminClient.ApplyLayout(ctx, roles, idsToRemove)
}
func garageInitializeGlobalBucket(
@ -341,3 +344,67 @@ func garageWaitForAlloc(
return nil
}
}
// garageNodeBuddyPeers returns the "buddy" peers of the given host, based on
// the given garage cluster status. It will return zero values if the host has
// no buddy.
//
// For situations where we want one host to affect the cluster layout of another
// host's peers, we use a simple system to determine a single host which is
// responsible. The goal is not to be 100% race-proof (garage handles that), but
// rather to try to prevent all hosts from modifying the same host's layout at
// the same time.
//
// The system is to order all hosts by their IP, and say that each host is
// responsible for (aka the "buddy" of) the host immediately after their own in
// that list. The last host in that list is responsible for the first.
func garageNodeBuddyPeers(
status garage.ClusterStatus, host bootstrap.Host,
) (
netip.Addr, []garage.Role,
) {
var (
thisIP = host.IP()
rolesByID = make(
map[string]garage.Role, len(status.Layout.Roles),
)
nodeRolesByIP = map[netip.Addr][]garage.Role{}
)
for _, role := range status.Layout.Roles {
rolesByID[role.ID] = role
}
for _, node := range status.Nodes {
role, ok := rolesByID[node.ID]
if !ok {
continue
}
ip := node.Addr.Addr()
nodeRolesByIP[ip] = append(nodeRolesByIP[ip], role)
}
// If there is only a single host in the cluster (or, somehow, none) then
// that host has no buddy.
if len(nodeRolesByIP) < 2 {
return netip.Addr{}, nil
}
nodeIPs := maps.Keys(nodeRolesByIP)
slices.SortFunc(nodeIPs, netip.Addr.Compare)
for i, nodeIP := range nodeIPs {
var buddyIP netip.Addr
if i == len(nodeIPs)-1 {
buddyIP = nodeIPs[0]
} else if nodeIP == thisIP {
buddyIP = nodeIPs[i+1]
} else {
continue
}
return buddyIP, nodeRolesByIP[buddyIP]
}
panic("Unreachable")
}

View File

@ -31,7 +31,7 @@ import (
// GarageClientParams contains all the data needed to instantiate garage
// clients.
type GarageClientParams struct {
Peer garage.RemotePeer
Node garage.RemoteNode
GlobalBucketS3APICredentials garage.S3APICredentials
// RPCSecret may be empty, if the secret is not available on the host.
@ -44,7 +44,7 @@ type GarageClientParams struct {
// the global bucket.
func (p GarageClientParams) GlobalBucketS3APIClient() *garage.S3APIClient {
var (
addr = p.Peer.S3APIAddr()
addr = p.Node.S3APIAddr()
creds = p.GlobalBucketS3APICredentials
)
return garage.NewS3APIClient(addr, creds)
@ -419,7 +419,7 @@ func (n *network) initializeDirs(mayExist bool) error {
}
func (n *network) periodically(
logger *mlog.Logger,
label string,
fn func(context.Context) error,
period time.Duration,
) {
@ -427,13 +427,13 @@ func (n *network) periodically(
go func() {
defer n.wg.Done()
ctx := mctx.Annotate(n.workerCtx, "period", period)
ctx := mctx.Annotate(n.workerCtx, "workerLabel", label)
ticker := time.NewTicker(period)
defer ticker.Stop()
logger.Info(ctx, "Starting background job runner")
defer logger.Info(ctx, "Stopping background job runner")
n.logger.Info(ctx, "Starting background job runner")
defer n.logger.Info(ctx, "Stopping background job runner")
for {
select {
@ -441,9 +441,9 @@ func (n *network) periodically(
return
case <-ticker.C:
logger.Info(ctx, "Background job running")
n.logger.Info(ctx, "Background job running")
if err := fn(ctx); err != nil {
logger.Error(ctx, "Background job failed", err)
n.logger.Error(ctx, "Background job failed", err)
}
}
}
@ -510,10 +510,10 @@ func (n *network) initialize(
return fmt.Errorf("Reloading network bootstrap: %w", err)
}
n.periodically("reloadHosts", n.reloadHosts, 3*time.Minute)
n.periodically(
n.logger.WithNamespace("reloadHosts"),
n.reloadHosts,
3*time.Minute,
"removeOrphanGarageNodes", n.removeOrphanGarageNodes, 1*time.Minute,
)
return nil
@ -531,7 +531,7 @@ func (n *network) postChildrenInit(
thisHost := n.currBootstrap.ThisHost()
if len(prevThisHost.Garage.Instances)+len(thisHost.Garage.Instances) > 0 {
if len(thisHost.Garage.Instances) > 0 {
n.logger.Info(ctx, "Applying garage layout")
if err := garageApplyLayout(
ctx,
@ -618,6 +618,77 @@ func (n *network) reloadHosts(ctx context.Context) error {
return nil
}
// In general each host will manage the garage cluster layout of its own storage
// allocations via garageApplyLayout. There are three situations which are
// handled here, rather than garageApplyLayout:
//
// - A host removes all of its allocations via SetConfig.
// - A host removes all of its allocations by calling Load with no allocations
// in the provided daecommon.NetworkConfig.
// - A host is removed from the network by another host.
//
// In all of these cases the host no longer has any garage instances running,
// and so can't call garageApplyLayout on itself. To combat this we have all
// hosts which do have garage instances running periodically check that there's
// not some garage nodes orphaned in the cluster layout, and remove them if so.
func (n *network) removeOrphanGarageNodes(ctx context.Context) error {
n.l.RLock()
defer n.l.RUnlock()
thisHost := n.currBootstrap.ThisHost()
if len(thisHost.Garage.Instances) == 0 {
n.logger.Info(ctx, "No local garage instances, cannot remove orphans")
return nil
}
adminClient := newGarageAdminClient(
n.logger, n.networkConfig, n.opts.GarageAdminToken, thisHost,
)
defer adminClient.Close()
clusterStatus, err := adminClient.Status(ctx)
if err != nil {
return fmt.Errorf("retrieving garage cluster status: %w", err)
}
buddyIP, buddyNodes := garageNodeBuddyPeers(clusterStatus, thisHost)
if len(buddyNodes) == 0 {
return nil
}
ctx = mctx.Annotate(ctx, "buddyIP", buddyIP)
for _, host := range n.currBootstrap.Hosts {
if host.IP() != buddyIP {
continue
} else if len(host.Garage.Instances) > 0 {
n.logger.Info(ctx, "Buddy instance has garage nodes configured in its bootstrap, doing nothing")
return nil
}
break
}
// Either the host is no longer in the network, or it no longer has any
// garage instances set on it. Either way, remove its nodes from the cluster
// layout.
buddyNodeIDs := make([]string, len(buddyNodes))
for i, buddyNode := range buddyNodes {
buddyNodeIDs[i] = buddyNode.ID
}
n.logger.Info(ctx, "Applying garage layout to remove orphaned garage nodes")
if err := adminClient.ApplyLayout(ctx, nil, buddyNodeIDs); err != nil {
return fmt.Errorf(
"applying garage cluster layout, removing nodes %+v: %w",
buddyNodes,
err,
)
}
return nil
}
// returns the bootstrap prior to the reload being applied.
func (n *network) reload(
ctx context.Context,

View File

@ -54,19 +54,44 @@ func TestLoad(t *testing.T) {
}
func TestJoin(t *testing.T) {
var (
h = newIntegrationHarness(t)
primus = h.createNetwork(t, "primus", nil)
secondus = h.joinNetwork(t, primus, "secondus", nil)
)
t.Run("simple", func(t *testing.T) {
var (
h = newIntegrationHarness(t)
primus = h.createNetwork(t, "primus", nil)
secondus = h.joinNetwork(t, primus, "secondus", nil)
)
primusHosts, err := primus.GetHosts(h.ctx)
assert.NoError(t, err)
primusHosts, err := primus.GetHosts(h.ctx)
assert.NoError(t, err)
secondusHosts, err := secondus.GetHosts(h.ctx)
assert.NoError(t, err)
secondusHosts, err := secondus.GetHosts(h.ctx)
assert.NoError(t, err)
assert.Equal(t, primusHosts, secondusHosts)
assert.Equal(t, primusHosts, secondusHosts)
})
t.Run("with alloc", func(t *testing.T) {
var (
h = newIntegrationHarness(t)
primus = h.createNetwork(t, "primus", nil)
secondus = h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{
networkConfigOpts: &networkConfigOpts{
numStorageAllocs: 1,
},
})
)
t.Log("reloading primus' hosts")
assert.NoError(t, primus.Network.(*network).reloadHosts(h.ctx))
primusHosts, err := primus.GetHosts(h.ctx)
assert.NoError(t, err)
secondusHosts, err := secondus.GetHosts(h.ctx)
assert.NoError(t, err)
assert.Equal(t, primusHosts, secondusHosts)
})
}
func TestNetwork_GetConfig(t *testing.T) {
@ -82,19 +107,19 @@ func TestNetwork_GetConfig(t *testing.T) {
}
func TestNetwork_SetConfig(t *testing.T) {
allocsToPeerLayouts := func(
allocsToRoles := func(
hostName nebula.HostName, allocs []bootstrap.GarageHostInstance,
) []garage.PeerLayout {
peers := make([]garage.PeerLayout, len(allocs))
) []garage.Role {
roles := make([]garage.Role, len(allocs))
for i := range allocs {
peers[i] = garage.PeerLayout{
roles[i] = garage.Role{
ID: allocs[i].ID,
Capacity: 1_000_000_000,
Zone: string(hostName),
Tags: []string{},
}
}
return peers
return roles
}
t.Run("add storage alloc", func(t *testing.T) {
@ -142,10 +167,10 @@ func TestNetwork_SetConfig(t *testing.T) {
assert.Equal(t, newHostsByName, storedBootstrap.Hosts)
t.Log("Checking that garage layout contains the new allocation")
expPeers := allocsToPeerLayouts(network.hostName, allocs)
expRoles := allocsToRoles(network.hostName, allocs)
layout, err := network.garageAdminClient(t).GetLayout(h.ctx)
assert.NoError(t, err)
assert.ElementsMatch(t, expPeers, layout.Peers)
assert.ElementsMatch(t, expRoles, layout.Roles)
})
t.Run("remove storage alloc", func(t *testing.T) {
@ -183,11 +208,39 @@ func TestNetwork_SetConfig(t *testing.T) {
assert.Equal(t, newHostsByName, storedBootstrap.Hosts)
t.Log("Checking that garage layout contains the new allocation")
expPeers := allocsToPeerLayouts(network.hostName, allocs)
expRoles := allocsToRoles(network.hostName, allocs)
layout, err := network.garageAdminClient(t).GetLayout(h.ctx)
assert.NoError(t, err)
assert.ElementsMatch(t, expPeers, layout.Peers)
assert.ElementsMatch(t, expRoles, layout.Roles)
})
// TODO a host having allocs but removing all of them
t.Run("remove all storage allocs", func(t *testing.T) {
var (
h = newIntegrationHarness(t)
primus = h.createNetwork(t, "primus", nil)
secondus = h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{
networkConfigOpts: &networkConfigOpts{
numStorageAllocs: 1,
},
})
networkConfig = secondus.getConfig(t)
prevHost = secondus.getHostsByName(t)[secondus.hostName]
//removedAlloc = networkConfig.Storage.Allocations[0]
removedRole = allocsToRoles(
secondus.hostName, prevHost.Garage.Instances,
)[0]
//removedGarageInst = daecommon.BootstrapGarageHostForAlloc(
// prevHost, removedAlloc,
//)
)
networkConfig.Storage.Allocations = nil
assert.NoError(t, secondus.SetConfig(h.ctx, networkConfig))
t.Log("Checking that garage layout still contains the old allocation")
layout, err := secondus.garageAdminClient(t).GetLayout(h.ctx)
assert.NoError(t, err)
assert.Contains(t, layout.Roles, removedRole)
})
}

View File

@ -8,6 +8,7 @@ import (
"io"
"net/http"
"net/http/httputil"
"net/netip"
"time"
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
@ -158,6 +159,45 @@ func (c *AdminClient) do(
return nil
}
// KnownNode describes the fields of a known node in the cluster, as returned
// as part of [ClusterStatus].
type KnownNode struct {
ID string `json:"id"`
Addr netip.AddrPort `json:"addr"`
IsUp bool `json:"isUp"`
LastSeenSecsAgo int `json:"lastSeenSecsAgo"`
HostName string `json:"hostname"`
}
// Role descibes a node's role in the garage cluster, i.e. what storage it is
// providing.
type Role struct {
ID string `json:"id"`
Capacity int `json:"capacity"` // Gb (SI units)
Zone string `json:"zone"`
Tags []string `json:"tags"`
}
// ClusterLayout describes the layout of the cluster as a whole.
type ClusterLayout struct {
Roles []Role `json:"roles"`
}
// ClusterStatus is returned from the Status endpoint, describing the currently
// known state of the cluster.
type ClusterStatus struct {
Nodes []KnownNode `json:"nodes"`
Layout ClusterLayout `json:"layout"`
}
// Status returns the current state of the cluster.
func (c *AdminClient) Status(ctx context.Context) (ClusterStatus, error) {
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Nodes/operation/GetNodes
var clusterStatus ClusterStatus
err := c.do(ctx, &clusterStatus, "GET", "/v1/status", nil)
return clusterStatus, err
}
// Wait will block until the instance connected to can see at least
// ReplicationFactor other garage instances. If the context is canceled it
// will return the context error.
@ -166,30 +206,32 @@ func (c *AdminClient) Wait(ctx context.Context) error {
for first := true; ; first = false {
if !first {
time.Sleep(250 * time.Millisecond)
select {
case <-time.After(500 * time.Millisecond):
case <-ctx.Done():
return ctx.Err()
}
}
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Nodes/operation/GetNodes
var clusterStatus struct {
Nodes []struct {
IsUp bool `json:"isUp"`
} `json:"nodes"`
}
err := c.do(ctx, &clusterStatus, "GET", "/v1/status", nil)
c.logger.Debug(ctx, "Getting cluster status")
clusterStatus, err := c.Status(ctx)
if ctxErr := ctx.Err(); ctxErr != nil {
return ctxErr
} else if err != nil {
c.logger.Warn(ctx, "waiting for instance to become ready", err)
c.logger.Warn(ctx, "Instance is not yet ready", err)
continue
}
var numUp int
for _, node := range clusterStatus.Nodes {
if node.IsUp {
// There seems to be some kind of step between IsUp becoming true
// and garage actually loading the full state of a node, so we check
// for the HostName as well. We could also use LastSeenSecsAgo, but
// that remains null for the node being queried so it's more
// annoying to use.
if node.IsUp && node.HostName != "" {
numUp++
}
}
@ -204,7 +246,7 @@ func (c *AdminClient) Wait(ctx context.Context) error {
return nil
}
c.logger.Debug(ctx, "instance not online yet, will continue waiting")
c.logger.Debug(ctx, "Instance not online yet, will continue waiting")
}
}
@ -283,20 +325,6 @@ func (c *AdminClient) GrantBucketPermissions(
})
}
// PeerLayout describes the properties of a garage peer in the context of the
// layout of the cluster.
type PeerLayout struct {
ID string `json:"id"`
Capacity int `json:"capacity"` // Gb (SI units)
Zone string `json:"zone"`
Tags []string `json:"tags"`
}
// ClusterLayout describes the layout of the cluster as a whole.
type ClusterLayout struct {
Peers []PeerLayout `json:"roles"`
}
// GetLayout returns the currently applied ClusterLayout.
func (c *AdminClient) GetLayout(ctx context.Context) (ClusterLayout, error) {
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/GetLayout
@ -306,42 +334,34 @@ func (c *AdminClient) GetLayout(ctx context.Context) (ClusterLayout, error) {
}
// ApplyLayout modifies the layout of the garage cluster. Only layout of the
// given peers will be modified/created/removed, other peers are not affected.
// given roles will be modified/created/removed, other roles are not affected.
func (c *AdminClient) ApplyLayout(
ctx context.Context, addModifyPeers []PeerLayout, removePeerIDs []string,
ctx context.Context, addModifyRoles []Role, removeRoleIDs []string,
) error {
type removePeer struct {
type removeRole struct {
ID string `json:"id"`
Remove bool `json:"remove"`
}
peers := make([]any, 0, len(addModifyPeers)+len(removePeerIDs))
for _, p := range addModifyPeers {
peers = append(peers, p)
roles := make([]any, 0, len(addModifyRoles)+len(removeRoleIDs))
for _, p := range addModifyRoles {
roles = append(roles, p)
}
for _, id := range removePeerIDs {
peers = append(peers, removePeer{ID: id, Remove: true})
}
{
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/ApplyLayout
err := c.do(ctx, nil, "POST", "/v1/layout", peers)
if err != nil {
return fmt.Errorf("staging layout changes: %w", err)
}
for _, id := range removeRoleIDs {
roles = append(roles, removeRole{ID: id, Remove: true})
}
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/GetLayout
var clusterLayout struct {
Version int `json:"version"`
StagedRoleChanges []PeerLayout `json:"stagedRoleChanges"`
Version int `json:"version"`
StagedRoleChanges []Role `json:"stagedRoleChanges"`
}
if err := c.do(ctx, &clusterLayout, "GET", "/v1/layout", nil); err != nil {
return fmt.Errorf("retrieving staged layout change: %w", err)
}
if len(clusterLayout.StagedRoleChanges) == 0 {
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/ApplyLayout
err := c.do(ctx, &clusterLayout, "POST", "/v1/layout", roles)
if err != nil {
return fmt.Errorf("staging layout changes: %w", err)
} else if len(clusterLayout.StagedRoleChanges) == 0 {
return nil
}
@ -352,7 +372,7 @@ func (c *AdminClient) ApplyLayout(
Version: clusterLayout.Version + 1,
}
err := c.do(ctx, nil, "POST", "/v1/layout/apply", applyClusterLayout)
err = c.do(ctx, nil, "POST", "/v1/layout/apply", applyClusterLayout)
if err != nil {
return fmt.Errorf("applying new layout (new version:%d): %w", applyClusterLayout.Version, err)
}

View File

@ -23,8 +23,8 @@ type GarageTomlData struct {
RPCSecret string
AdminToken string
garage.LocalPeer
BootstrapPeers []garage.RemotePeer
garage.LocalNode
BootstrapPeers []garage.RemoteNode
}
var garageTomlTpl = template.Must(template.New("").Parse(`
@ -38,7 +38,7 @@ rpc_bind_addr = "{{ .RPCAddr }}"
rpc_public_addr = "{{ .RPCAddr }}"
bootstrap_peers = [{{- range .BootstrapPeers }}
"{{ .RPCPeerAddr }}",
"{{ .RPCNodeAddr }}",
{{ end -}}]
[s3_api]
@ -66,7 +66,7 @@ func WriteGarageTomlFile(
) (
bool, error,
) {
slices.SortFunc(data.BootstrapPeers, func(i, j garage.RemotePeer) int {
slices.SortFunc(data.BootstrapPeers, func(i, j garage.RemoteNode) int {
return cmp.Or(
cmp.Compare(i.IP, j.IP),
cmp.Compare(i.RPCPort, j.RPCPort),

View File

@ -6,39 +6,39 @@ import (
"strconv"
)
// RemotePeer describes all information necessary to connect to a given garage
// RemoteNode describes all information necessary to connect to a given garage
// node.
type RemotePeer struct {
type RemoteNode struct {
ID string
IP string
RPCPort int
S3APIPort int
}
// LocalPeer describes the configuration of a local garage instance.
type LocalPeer struct {
RemotePeer
// LocalNode describes the configuration of a local garage instance.
type LocalNode struct {
RemoteNode
AdminPort int
}
// RPCAddr returns the address of the peer's RPC port.
func (p RemotePeer) RPCAddr() string {
// RPCAddr returns the address of the node's RPC port.
func (p RemoteNode) RPCAddr() string {
return net.JoinHostPort(p.IP, strconv.Itoa(p.RPCPort))
}
// RPCPeerAddr returns the full peer address (e.g. "id@ip:port") of the garage
// RPCNodeAddr returns the full node address (e.g. "id@ip:port") of the garage
// node for use in communicating over RPC.
func (p RemotePeer) RPCPeerAddr() string {
func (p RemoteNode) RPCNodeAddr() string {
return fmt.Sprintf("%s@%s", p.ID, p.RPCAddr())
}
// S3APIAddr returns the address of the peer's S3 API port.
func (p RemotePeer) S3APIAddr() string {
// S3APIAddr returns the address of the node's S3 API port.
func (p RemoteNode) S3APIAddr() string {
return net.JoinHostPort(p.IP, strconv.Itoa(p.S3APIPort))
}
// AdminAddr returns the address of the peer's S3 API port.
func (p LocalPeer) AdminAddr() string {
// AdminAddr returns the address of the node's S3 API port.
func (p LocalNode) AdminAddr() string {
return net.JoinHostPort(p.IP, strconv.Itoa(p.AdminPort))
}

View File

@ -1,16 +0,0 @@
#!/usr/bin/env bash
set -e
entrypoint="$(nix-build --no-out-link -A tests)"
this_user="$(whoami)"
echo "Requesting sudo in order to set thread capabilities, will drop back down to user '$this_user' immediately"
sudo -E capsh \
--caps="cap_net_admin,cap_net_bind_service+eip cap_setpcap,cap_setuid,cap_setgid+ep" \
--keep=1 \
--user="$this_user" \
--addamb=cap_net_admin \
--addamb=cap_net_bind_service \
-- "$entrypoint" "$@"

View File

@ -1,10 +0,0 @@
Ctrl+A X -> exits
qemu-system-aarch64 -nographic -cdrom tests/alpine-virt-3.18.4-aarch64.iso
Ctrl+Alt+G -> Escape mouse capture
qemu-system-x86_64 \
-cdrom tests/virt/Win11_23H2_English_x64.iso \
-m 8G \
-boot order=d \
-drive file=./tests/virt/winblows.qcow2

View File

@ -1,3 +0,0 @@
isle version | grep -q 'Release:'
isle version | grep -q 'Platform:'
isle version | grep -q 'Build Platform:'

View File

@ -1,17 +0,0 @@
# shellcheck source=../../utils/with-1-data-1-empty-node-network.sh
source "$UTILS"/with-1-data-1-empty-node-network.sh
function assert_a {
want_ip="$1"
hostname="$2"
r="$(dig @"$current_ip" +noall +answer "$hostname")"
echo "$r" | grep -q "$want_ip"
}
as_primus
assert_a "$primus_ip" primus.hosts.shared.test
assert_a "$secondus_ip" secondus.hosts.shared.test
as_secondus
assert_a "$primus_ip" primus.hosts.shared.test
assert_a "$secondus_ip" secondus.hosts.shared.test

View File

@ -1,21 +0,0 @@
# shellcheck source=../../utils/with-1-data-1-empty-node-network.sh
source "$UTILS"/with-1-data-1-empty-node-network.sh
function do_tests {
status="$(isle garage cli status | tail -n+3)"
[ "$(echo "$status" | wc -l)" = "3" ]
echo "$status" | grep -q '10.6.9.1:3900'
echo "$status" | grep -q '10.6.9.1:3910'
echo "$status" | grep -q '10.6.9.1:3920'
buckets="$(isle garage cli bucket list | tail -n+2)"
[ "$(echo "$buckets" | wc -l)" = 1 ]
echo "$buckets" | grep -q 'global-shared'
}
as_primus
do_tests
as_secondus
do_tests

View File

@ -1,16 +0,0 @@
# shellcheck source=../../utils/with-1-data-1-empty-node-network.sh
source "$UTILS"/with-1-data-1-empty-node-network.sh
function do_tests {
files="$(isle garage mc -- tree --json garage)"
[ "$(echo "$files" | jq -s '.|length')" -ge "1" ]
file="$(echo "$files" | jq -sr '.[0].key')"
[ "$(isle garage mc -- cat "garage/$file" | wc -c)" -gt "0" ]
}
as_primus
do_tests
as_secondus
do_tests

View File

@ -1,20 +0,0 @@
# shellcheck source=../../utils/with-1-data-1-empty-node-network.sh
source "$UTILS"/with-1-data-1-empty-node-network.sh
function do_tests {
hosts="$(isle hosts list)"
[ "$(echo "$hosts" | jq -r '.[0].Name')" = "primus" ]
[ "$(echo "$hosts" | jq -r '.[0].VPN.IP')" = "10.6.9.1" ]
[ "$(echo "$hosts" | jq -r '.[0].Storage.Instances|length')" = "3" ]
[ "$(echo "$hosts" | jq -r '.[1].Name')" = "secondus" ]
[ "$(echo "$hosts" | jq -r '.[1].VPN.IP')" = "$secondus_ip" ]
[ "$(echo "$hosts" | jq -r '.[1].Storage.Instances|length')" = "0" ]
}
as_primus
do_tests
as_secondus
do_tests

View File

@ -1,16 +0,0 @@
# shellcheck source=../../utils/with-1-data-1-empty-node-network.sh
source "$UTILS"/with-1-data-1-empty-node-network.sh
bs="$secondus_bootstrap" # set in with-1-data-1-empty-node-network.sh
[ "$(jq -r <"$bs" '.Bootstrap.NetworkCreationParams.Domain')" = "shared.test" ]
[ "$(jq -r <"$bs" '.Bootstrap.NetworkCreationParams.Name')" = "testing" ]
[ "$(jq -r <"$bs" '.Bootstrap.SignedHostAssigned.Body.Name')" = "secondus" ]
[ "$(jq -r <"$bs" '.Bootstrap.Hosts.primus.PublicCredentials')" \
= "$(jq -r <"$BOOTSTRAP_FILE" '.SignedHostAssigned.Body.PublicCredentials')" ]
[ "$(jq <"$bs" '.Bootstrap.Hosts.primus.Garage.Instances|length')" = "3" ]
[ "$(jq <"$bs" '.Secrets["garage-rpc-secret"]')" != "null" ]

View File

@ -1,12 +0,0 @@
# shellcheck source=../../utils/with-1-data-1-empty-node-network.sh
source "$UTILS"/with-1-data-1-empty-node-network.sh
info="$(isle nebula show)"
[ "$(echo "$info" | jq -r '.CACert')" \
= "$(jq -r <"$BOOTSTRAP_FILE" '.CAPublicCredentials.Cert')" ]
[ "$(echo "$info" | jq -r '.SubnetCIDR')" = "10.6.9.0/24" ]
[ "$(echo "$info" | jq -r '.Lighthouses|length')" = "1" ]
[ "$(echo "$info" | jq -r '.Lighthouses[0].PublicAddr')" = "127.0.0.1:60000" ]
[ "$(echo "$info" | jq -r '.Lighthouses[0].IP')" = "10.6.9.1" ]

View File

@ -1,17 +0,0 @@
# shellcheck source=../../utils/with-1-data-1-empty-node-network.sh
source "$UTILS"/with-1-data-1-empty-node-network.sh
nebula-cert keygen -out-key /dev/null -out-pub pubkey
cat pubkey
(
isle nebula create-cert \
--hostname non-esiste \
--public-key-path pubkey \
2>&1 || true \
) | grep '\[1002\] Host not found'
isle nebula create-cert \
--hostname primus \
--public-key-path pubkey \
| grep -- '-----BEGIN NEBULA CERTIFICATE-----'

View File

@ -1,12 +0,0 @@
# shellcheck source=../../utils/with-1-data-1-empty-node-network.sh
source "$UTILS"/with-1-data-1-empty-node-network.sh
[ "$(cat a/meta/isle/rpc_port)" = "3900" ]
[ "$(cat b/meta/isle/rpc_port)" = "3910" ]
[ "$(cat c/meta/isle/rpc_port)" = "3920" ]
[ "$(jq -r <"$BOOTSTRAP_FILE" '.NetworkCreationParams.ID')" != "" ]
[ "$(jq -r <"$BOOTSTRAP_FILE" '.NetworkCreationParams.Name')" = "testing" ]
[ "$(jq -r <"$BOOTSTRAP_FILE" '.NetworkCreationParams.Domain')" = "shared.test" ]
[ "$(jq -r <"$BOOTSTRAP_FILE" '.SignedHostAssigned.Body.Name')" = "primus" ]

View File

@ -1,115 +0,0 @@
set -e
# cd into script's directory
cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null
root=$(pwd)
export UTILS="$root"/utils
REGEXS=()
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
cat <<EOF
USAGE: [flags] [test regexs...]
FLAGS
--keep-tmp
--verbose (-v)
--help (-h)
EOF
exit 1
;;
-v|--verbose)
VERBOSE=1
shift
;;
--keep-tmp)
KEEP_TMP=1
shift
;;
*)
REGEXS+=("$1")
shift
;;
esac
done
[ -n "$VERBOSE" ] && set -x
ROOT_TMPDIR="$(mktemp --tmpdir -d isle-tests.XXXXXX)"
if [ -z "$KEEP_TMP" ]; then trap 'rm -rf $ROOT_TMPDIR' EXIT; fi
TMPDIR="$ROOT_TMPDIR"
export ROOT_TMPDIR TMPDIR
echo "tmp dir is $ROOT_TMPDIR"
# Blackhole these directories so that tests don't accidentally use the host's
# real ones.
export XDG_RUNTIME_DIR=/dev/null
export XDG_STATE_HOME=/dev/null
test_files=$(
find ./cases -type f -name '*.sh' \
| sed "s|^\./cases/||" \
| grep -v entrypoint.sh \
| sort
)
for r in "${REGEXS[@]}"; do
test_files="$(echo "$test_files" | grep "$r")"
done
echo -e "number of tests: $(echo "$test_files" | wc -l)\n"
for file in $test_files; do
echo "Running test case: $file"
if [ -z "$VERBOSE" ]; then
output="$TMPDIR/$file.log"
mkdir -p "$(dirname "$output")"
exec 3>"$output"
else
exec 3>&1
fi
(
export TEST_CASE_FILE="$file"
if ! $SHELL -e -x "$root/cases/$file" >&3 2>&1; then
echo "$file FAILED"
if [ -z "$VERBOSE" ]; then
echo "output of test is as follows"
echo "------------------------------"
cat "$output"
echo "------------------------------"
fi
exit 1
fi
) || TESTS_FAILED=1
if [ -n "$TESTS_FAILED" ]; then break; fi
done
# Clean up any shared running networks. Each cleanup script is responsible for
# figuring out if its shared network was actually instantiated during any tests.
if [ -e "$ROOT_TMPDIR/cleanup-pids" ]; then
echo "Cleaning up running pids"
tac "$ROOT_TMPDIR/cleanup-pids" | while read -r line; do
pid="$(echo "$line" | cut -d' ' -f1)"
descr="$(echo "$line" | cut -d' ' -f2-)"
echo "Killing $descr ($pid)"
kill "$pid"
done
# This is easier than checking if the pids are still running, and for some
# reason it doesn't occur until after the pids have died anyway
echo "Waiting for appimage mounts to unmount"
while [ "$(find "$ROOT_TMPDIR" -type d -name '*.mount_isle*' | wc -l)" -ge "1" ]; do
sleep 1
done
fi
if [ -z "$TESTS_FAILED" ]; then echo -e '\nall tests succeeded!'; fi

View File

@ -1,3 +0,0 @@
set -e
echo "$1" "$2" >> "$ROOT_TMPDIR/cleanup-pids"

View File

@ -1,18 +0,0 @@
set -e
base="$1"
TMPDIR="$ROOT_TMPDIR/$base"
XDG_RUNTIME_DIR="$TMPDIR/.run"
XDG_STATE_HOME="$TMPDIR/.state"
mkdir -p "$TMPDIR" "$XDG_RUNTIME_DIR" "$XDG_STATE_HOME"
cat <<EOF
export TMPDIR="$TMPDIR"
export XDG_RUNTIME_DIR="$XDG_RUNTIME_DIR"
export XDG_STATE_HOME="$XDG_STATE_HOME"
export ISLE_DAEMON_HTTP_SOCKET_PATH="$ROOT_TMPDIR/$base-daemon.sock"
BOOTSTRAP_FILE="$XDG_STATE_HOME/isle/networks/$NETWORK_ID/bootstrap.json"
cd "$TMPDIR"
EOF

View File

@ -1,107 +0,0 @@
set -e
base="shared/1-data-1-empty"
ipNet="10.6.9.0/24"
primus_base="$base/primus"
primus_ip="10.6.9.1"
secondus_base="$base/secondus"
function as_primus {
current_ip="$primus_ip"
eval "$($SHELL "$UTILS/shared-daemon-env.sh" "$primus_base")"
}
function as_secondus {
current_ip="$secondus_ip"
eval "$($SHELL "$UTILS/shared-daemon-env.sh" "$secondus_base")"
}
# Even if it's already intialized, we want to put the caller in primus'
# environment
as_primus
secondus_bootstrap="$(pwd)/secondus-bootstrap.json"
if [ ! -d "$XDG_RUNTIME_DIR/isle" ]; then
echo "Initializing shared single node network"
mkdir a
mkdir b
mkdir c
cat >daemon.yml <<EOF
networks:
testing:
vpn:
public_addr: 127.0.0.1:60000
tun:
device: isle-primus
storage:
allocations:
- data_path: a/data
meta_path: a/meta
capacity: 1
- data_path: b/data
meta_path: b/meta
capacity: 1
- data_path: c/data
meta_path: c/meta
capacity: 1
EOF
isle daemon -l debug --config-path daemon.yml >daemon.log 2>&1 &
pid="$!"
$SHELL "$UTILS/register-cleanup.sh" "$pid" "1-data-1-empty-node-network/primus"
echo "Waiting for primus daemon (process $pid) to start"
while ! [ -e "$ISLE_DAEMON_HTTP_SOCKET_PATH" ]; do sleep 1; done
echo "Creating 1-data-1-empty network"
isle network create \
--domain shared.test \
--hostname primus \
--ip-net "$ipNet" \
--name "testing"
echo "Creating secondus bootstrap"
isle hosts create \
--hostname secondus \
> "$secondus_bootstrap"
(
as_secondus
cat >daemon.yml <<EOF
vpn:
tun:
device: isle-secondus
EOF
isle daemon -l debug -c daemon.yml >daemon.log 2>&1 &
pid="$!"
$SHELL "$UTILS/register-cleanup.sh" "$pid" "1-data-1-empty-node-network/secondus"
echo "Waiting for secondus daemon (process $!) to start"
while ! [ -e "$ISLE_DAEMON_HTTP_SOCKET_PATH" ]; do sleep 1; done
echo "Joining secondus to the network"
isle network join -b "$secondus_bootstrap"
)
fi
secondus_ip="$(
nebula-cert print -json \
-path <(jq -r '.Bootstrap.Hosts["secondus"].PublicCredentials.Cert' "$secondus_bootstrap") \
| jq -r '.details.ips[0]' \
| cut -d/ -f1
)"
NETWORK_ID="$(jq '.Bootstrap.NetworkCreationParams.ID' "$secondus_bootstrap")"
export NETWORK_ID
# shared-daemon-env.sh depends on NETWORK_ID, so we re-call as_primus in order
# to fully populate the envvars we need.
as_primus

View File

@ -1,9 +0,0 @@
set -e
TMPDIR="$TMPDIR/$TEST_CASE_FILE.tmp"
XDG_RUNTIME_DIR="$TMPDIR/.run"
XDG_STATE_HOME="$TMPDIR/.state"
mkdir -p "$TMPDIR" "$XDG_RUNTIME_DIR" "$XDG_STATE_HOME"
cd "$TMPDIR"