Have hosts update garage cluster layout to remove other nodes if necessary

2024-11-05 22:31:57 +01:00 · 2024-11-05 22:31:57 +01:00 · 032bdb9e43
commit 032bdb9e43
parent 53a06af9ba
6 changed files with 300 additions and 62 deletions
--- a/docs/operator/contributing-storage.md
+++ b/docs/operator/contributing-storage.md
@ -1,8 +1,8 @@
 # Contributing Storage
-If your host machine can be reasonably sure of being online most, if not all, of
+This document is for you if your host machine can be reliably be online at all
-the time, and has 1GB or more of unused drive space you'd like to contribute to
+times and has 1GB or more of unused drive space you'd like to contribute to the
-the network, then this document is for you.
+network.
 ## Edit `daemon.yml`
@ -16,7 +16,7 @@ one allocation listed.
 The comments in the file should be self-explanatory, but ask your admin if you
 need any clarification.
-Here are an example set of allocations for a host which is contributing space
+Here is an example set of allocations for a host which is contributing space
 from two separate drives:
 ```
@ -47,6 +47,16 @@ process.
 The `isle daemon` will automatically allow the ports used for your
 storage allocations in the vpn firewall.
 ## Removing Allocations
 If you later decide to no longer provide storage simply remove the
 `storage.allocations` item from your `/etc/isle/daemon.yml` file and restart the
 `isle daemon` process.
 Once removed, it is advisable to wait some time before removing storage
 allocations from other hosts. This ensures that all data which was previously
 on this host has had a chance to fully replicate to multiple other hosts.
 ## Further Reading
 Isle uses the [garage][garage] project for its storage system. See the
--- a/go/daemon/children/children.go
+++ b/go/daemon/children/children.go
@ -184,7 +184,6 @@ func (c *Children) reloadNebula(
 	return nil
 }
 // TODO this doesn't handle removing garage nodes
 func (c *Children) reloadGarage(
 	ctx context.Context,
 	networkConfig daecommon.NetworkConfig,
@ -206,6 +205,8 @@ func (c *Children) reloadGarage(
 			)
 		)
 		// TODO it's possible that the config changed, but only the bootstrap
 		// peers, in which case we don't need to restart the node.
 		childConfigPath, changed, err := garageWriteChildConfig(
 			ctx,
 			c.logger,
--- a/go/daemon/network/garage.go
+++ b/go/daemon/network/garage.go
@ -12,13 +12,16 @@ import (
 	"isle/nebula"
 	"isle/secrets"
 	"net"
 	"net/netip"
 	"path/filepath"
 	"slices"
 	"strconv"
 	"time"
 	"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
 	"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
 	"github.com/minio/minio-go/v7"
 	"golang.org/x/exp/maps"
 )
 // Paths within garage's global bucket.
@ -341,3 +344,59 @@ func garageWaitForAlloc(
 		return nil
 	}
 }
 // garageNodeBuddyPeers returns the "buddy" peers of the given host, based on
 // the given garage cluster status. It will return zero values if the host has
 // no buddy.
 //
 // For situations where we want one host to affect the cluster layout of another
 // host's peers, we use a simple system to determine a single host which is
 // responsible. The goal is not to be 100% race-proof (garage handles that), but
 // rather to try to prevent all hosts from modifying the same host's layout at
 // the same time.
 //
 // The system is to order all hosts by their IP, and say that each host is
 // responsible for (aka the "buddy" of) the host immediately after their own in
 // that list. The last host in that list is responsible for the first.
 func garageNodeBuddyPeers(
 	status garage.ClusterStatus, host bootstrap.Host,
 ) (
 	netip.Addr, []garage.Role,
 ) {
 	var (
 		thisIP        = host.IP()
 		nodeRolesByIP = map[netip.Addr][]garage.Role{}
 	)
 	for _, node := range status.Nodes {
 		if node.Role == nil {
 			continue
 		}
 		ip := node.Addr.Addr()
 		nodeRolesByIP[ip] = append(nodeRolesByIP[ip], *node.Role)
 	}
 	// If there is only a single host in the cluster (or, somehow, none) then
 	// that host has no buddy.
 	if len(nodeRolesByIP) < 2 {
 		return netip.Addr{}, nil
 	}
 	nodeIPs := maps.Keys(nodeRolesByIP)
 	slices.SortFunc(nodeIPs, netip.Addr.Compare)
 	for i, nodeIP := range nodeIPs {
 		var buddyIP netip.Addr
 		if i == len(nodeIPs)-1 {
 			buddyIP = nodeIPs[0]
 		} else if nodeIP == thisIP {
 			buddyIP = nodeIPs[i+1]
 		} else {
 			continue
 		}
 		return buddyIP, nodeRolesByIP[buddyIP]
 	}
 	panic("Unreachable")
 }
--- a/go/daemon/network/network.go
+++ b/go/daemon/network/network.go
@ -419,7 +419,7 @@ func (n *network) initializeDirs(mayExist bool) error {
 }
 func (n *network) periodically(
-	logger *mlog.Logger,
+	label string,
 	fn func(context.Context) error,
 	period time.Duration,
 ) {
@ -427,13 +427,13 @@ func (n *network) periodically(
 	go func() {
 		defer n.wg.Done()
-		ctx := mctx.Annotate(n.workerCtx, "period", period)
+		ctx := mctx.Annotate(n.workerCtx, "workerLabel", label)
 		ticker := time.NewTicker(period)
 		defer ticker.Stop()
-		logger.Info(ctx, "Starting background job runner")
+		n.logger.Info(ctx, "Starting background job runner")
-		defer logger.Info(ctx, "Stopping background job runner")
+		defer n.logger.Info(ctx, "Stopping background job runner")
 		for {
 			select {
@ -441,9 +441,9 @@ func (n *network) periodically(
 				return
 			case <-ticker.C:
-				logger.Info(ctx, "Background job running")
+				n.logger.Info(ctx, "Background job running")
 				if err := fn(ctx); err != nil {
-					logger.Error(ctx, "Background job failed", err)
+					n.logger.Error(ctx, "Background job failed", err)
 				}
 			}
 		}
@ -510,10 +510,10 @@ func (n *network) initialize(
 		return fmt.Errorf("Reloading network bootstrap: %w", err)
 	}
 	n.periodically("reloadHosts", n.reloadHosts, 3*time.Minute)
 	n.periodically(
-		n.logger.WithNamespace("reloadHosts"),
+		"removeOrphanGarageNodes", n.removeOrphanGarageNodes, 1*time.Minute,
 		n.reloadHosts,
 		3*time.Minute,
 	)
 	return nil
@ -531,7 +531,7 @@ func (n *network) postChildrenInit(
 	thisHost := n.currBootstrap.ThisHost()
-	if len(prevThisHost.Garage.Instances)+len(thisHost.Garage.Instances) > 0 {
+	if len(thisHost.Garage.Instances) > 0 {
 		n.logger.Info(ctx, "Applying garage layout")
 		if err := garageApplyLayout(
 			ctx,
@ -618,6 +618,77 @@ func (n *network) reloadHosts(ctx context.Context) error {
 	return nil
 }
 // In general each host will manage the garage cluster layout of its own storage
 // allocations via garageApplyLayout. There are three situations which are
 // handled here, rather than garageApplyLayout:
 //
 //   - A host removes all of its allocations via SetConfig.
 //   - A host removes all of its allocations by calling Load with no allocations
 //     in the provided daecommon.NetworkConfig.
 //   - A host is removed from the network by another host.
 //
 // In all of these cases the host no longer has any garage instances running,
 // and so can't call garageApplyLayout on itself. To combat this we have all
 // hosts which do have garage instances running periodically check that there's
 // not some garage nodes orphaned in the cluster layout, and remove them if so.
 func (n *network) removeOrphanGarageNodes(ctx context.Context) error {
 	n.l.RLock()
 	defer n.l.RUnlock()
 	thisHost := n.currBootstrap.ThisHost()
 	if len(thisHost.Garage.Instances) == 0 {
 		n.logger.Info(ctx, "No local garage instances, cannot remove orphans")
 		return nil
 	}
 	adminClient := newGarageAdminClient(
 		n.logger, n.networkConfig, n.opts.GarageAdminToken, thisHost,
 	)
 	defer adminClient.Close()
 	clusterStatus, err := adminClient.Status(ctx)
 	if err != nil {
 		return fmt.Errorf("retrieving garage cluster status: %w", err)
 	}
 	buddyIP, buddyNodes := garageNodeBuddyPeers(clusterStatus, thisHost)
 	if len(buddyNodes) == 0 {
 		return nil
 	}
 	ctx = mctx.Annotate(ctx, "buddyIP", buddyIP)
 	for _, host := range n.currBootstrap.Hosts {
 		if host.IP() != buddyIP {
 			continue
 		} else if len(host.Garage.Instances) > 0 {
 			n.logger.Info(ctx, "Buddy instance has garage nodes configured in its bootstrap, doing nothing")
 			return nil
 		}
 		break
 	}
 	// Either the host is no longer in the network, or it no longer has any
 	// garage instances set on it. Either way, remove its nodes from the cluster
 	// layout.
 	buddyNodeIDs := make([]string, len(buddyNodes))
 	for i, buddyNode := range buddyNodes {
 		buddyNodeIDs[i] = buddyNode.ID
 	}
 	n.logger.Info(ctx, "Applying garage layout to remove orphaned garage nodes")
 	if err := adminClient.ApplyLayout(ctx, nil, buddyNodeIDs); err != nil {
 		return fmt.Errorf(
 			"applying garage cluster layout, removing nodes %+v: %w",
 			buddyNodes,
 			err,
 		)
 	}
 	return nil
 }
 // returns the bootstrap prior to the reload being applied.
 func (n *network) reload(
 	ctx context.Context,
--- a/go/daemon/network/network_it_test.go
+++ b/go/daemon/network/network_it_test.go
@ -54,19 +54,44 @@ func TestLoad(t *testing.T) {
 }
 func TestJoin(t *testing.T) {
-	var (
+	t.Run("simple", func(t *testing.T) {
-		h        = newIntegrationHarness(t)
+		var (
-		primus   = h.createNetwork(t, "primus", nil)
+			h        = newIntegrationHarness(t)
-		secondus = h.joinNetwork(t, primus, "secondus", nil)
+			primus   = h.createNetwork(t, "primus", nil)
-	)
+			secondus = h.joinNetwork(t, primus, "secondus", nil)
 		)
-	primusHosts, err := primus.GetHosts(h.ctx)
+		primusHosts, err := primus.GetHosts(h.ctx)
-	assert.NoError(t, err)
+		assert.NoError(t, err)
-	secondusHosts, err := secondus.GetHosts(h.ctx)
+		secondusHosts, err := secondus.GetHosts(h.ctx)
-	assert.NoError(t, err)
+		assert.NoError(t, err)
-	assert.Equal(t, primusHosts, secondusHosts)
+		assert.Equal(t, primusHosts, secondusHosts)
 	})
 	t.Run("with alloc", func(t *testing.T) {
 		var (
 			h        = newIntegrationHarness(t)
 			primus   = h.createNetwork(t, "primus", nil)
 			secondus = h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{
 				networkConfigOpts: &networkConfigOpts{
 					numStorageAllocs: 1,
 				},
 			})
 		)
 		t.Log("reloading primus' hosts")
 		assert.NoError(t, primus.Network.(*network).reloadHosts(h.ctx))
 		primusHosts, err := primus.GetHosts(h.ctx)
 		assert.NoError(t, err)
 		secondusHosts, err := secondus.GetHosts(h.ctx)
 		assert.NoError(t, err)
 		assert.Equal(t, primusHosts, secondusHosts)
 	})
 }
 func TestNetwork_GetConfig(t *testing.T) {
@ -189,5 +214,54 @@ func TestNetwork_SetConfig(t *testing.T) {
 		assert.ElementsMatch(t, expRoles, layout.Roles)
 	})
-	// TODO a host having allocs but removing all of them
+	t.Run("remove all storage allocs", func(t *testing.T) {
 		var (
 			h        = newIntegrationHarness(t)
 			primus   = h.createNetwork(t, "primus", nil)
 			secondus = h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{
 				networkConfigOpts: &networkConfigOpts{
 					numStorageAllocs: 1,
 				},
 			})
 			networkConfig = secondus.getConfig(t)
 			prevHost     = secondus.getHostsByName(t)[secondus.hostName]
 			removedAlloc = networkConfig.Storage.Allocations[0]
 			removedRole  = allocsToRoles(
 				secondus.hostName, prevHost.Garage.Instances,
 			)[0]
 			removedGarageInst = daecommon.BootstrapGarageHostForAlloc(
 				prevHost, removedAlloc,
 			)
 			primusGarageAdminClient = primus.garageAdminClient(t)
 		)
 		networkConfig.Storage.Allocations = nil
 		assert.NoError(t, secondus.SetConfig(h.ctx, networkConfig))
 		t.Log("Checking that the Host information was updated")
 		newHostsByName := primus.getHostsByName(t)
 		newHost, ok := newHostsByName[secondus.hostName]
 		assert.True(t, ok)
 		allocs := newHost.HostConfigured.Garage.Instances
 		assert.Len(t, allocs, 3)
 		assert.NotContains(t, allocs, removedGarageInst)
 		t.Log("Checking that garage layout still contains the old allocation")
 		layout, err := primusGarageAdminClient.GetLayout(h.ctx)
 		assert.NoError(t, err)
 		assert.Contains(t, layout.Roles, removedRole)
 		t.Log("Removing orphan garage nodes with primus")
 		assert.NoError(
 			t, primus.Network.(*network).removeOrphanGarageNodes(h.ctx),
 		)
 		t.Log("Checking that garage layout no longer contains the old allocation")
 		layout, err = primusGarageAdminClient.GetLayout(h.ctx)
 		assert.NoError(t, err)
 		assert.NotContains(t, layout.Roles, removedRole)
 	})
 }
--- a/go/garage/admin_client.go
+++ b/go/garage/admin_client.go
@ -8,6 +8,7 @@ import (
 	"io"
 	"net/http"
 	"net/http/httputil"
 	"net/netip"
 	"time"
 	"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
@ -158,38 +159,81 @@ func (c *AdminClient) do(
 	return nil
 }
 // KnownNode describes the fields of a known node in the cluster, as returned
 // as part of [ClusterStatus].
 type KnownNode struct {
 	ID              string         `json:"id"`
 	Role            *Role          `json:"role"`
 	Addr            netip.AddrPort `json:"addr"`
 	IsUp            bool           `json:"isUp"`
 	LastSeenSecsAgo int            `json:"lastSeenSecsAgo"`
 	HostName        string         `json:"hostname"`
 }
 // Role descibes a node's role in the garage cluster, i.e. what storage it is
 // providing.
 type Role struct {
 	ID       string   `json:"id"`
 	Capacity int      `json:"capacity"` // Gb (SI units)
 	Zone     string   `json:"zone"`
 	Tags     []string `json:"tags"`
 }
 // ClusterLayout describes the layout of the cluster as a whole.
 type ClusterLayout struct {
 	Version           int    `json:"version"`
 	Roles             []Role `json:"roles"`
 	StagedRoleChanges []Role `json:"stagedRoleChanges"`
 }
 // ClusterStatus is returned from the Status endpoint, describing the currently
 // known state of the cluster.
 type ClusterStatus struct {
 	Nodes []KnownNode `json:"nodes"`
 }
 // Status returns the current state of the cluster.
 func (c *AdminClient) Status(ctx context.Context) (ClusterStatus, error) {
 	// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Nodes/operation/GetNodes
 	var clusterStatus ClusterStatus
 	err := c.do(ctx, &clusterStatus, "GET", "/v1/status", nil)
 	return clusterStatus, err
 }
 // Wait will block until the instance connected to can see at least
 // ReplicationFactor other garage instances. If the context is canceled it
 // will return the context error.
 func (c *AdminClient) Wait(ctx context.Context) error {
 	for first := true; ; first = false {
 		if !first {
-			time.Sleep(250 * time.Millisecond)
+			select {
 			case <-time.After(2 * time.Second):
 			case <-ctx.Done():
 				return ctx.Err()
 			}
 		}
-		// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Nodes/operation/GetNodes
+		c.logger.Debug(ctx, "Getting cluster status")
-		var clusterStatus struct {
+		clusterStatus, err := c.Status(ctx)
 			Nodes []struct {
 				IsUp bool `json:"isUp"`
 			} `json:"nodes"`
 		}
 		err := c.do(ctx, &clusterStatus, "GET", "/v1/status", nil)
 		if ctxErr := ctx.Err(); ctxErr != nil {
 			return ctxErr
 		} else if err != nil {
-			c.logger.Warn(ctx, "waiting for instance to become ready", err)
+			ctx := mctx.Annotate(ctx, "errMsg", err.Error())
 			c.logger.Info(ctx, "Instance is not online yet")
 			continue
 		}
 		var numUp int
 		for _, node := range clusterStatus.Nodes {
-			if node.IsUp {
+			// There seems to be some kind of step between IsUp becoming true
 			// and garage actually loading the full state of a node, so we check
 			// for the HostName as well. We could also use LastSeenSecsAgo, but
 			// that remains null for the node being queried so it's more
 			// annoying to use.
 			if node.IsUp && node.HostName != "" {
 				numUp++
 			}
 		}
@ -204,7 +248,7 @@ func (c *AdminClient) Wait(ctx context.Context) error {
 			return nil
 		}
-		c.logger.Debug(ctx, "instance not online yet, will continue waiting")
+		c.logger.Info(ctx, "Instance is not joined to the cluster yet")
 	}
 }
@ -283,20 +327,6 @@ func (c *AdminClient) GrantBucketPermissions(
 	})
 }
 // Role descibes a node's role in the garage cluster, i.e. what storage it is
 // providing.
 type Role struct {
 	ID       string   `json:"id"`
 	Capacity int      `json:"capacity"` // Gb (SI units)
 	Zone     string   `json:"zone"`
 	Tags     []string `json:"tags"`
 }
 // ClusterLayout describes the layout of the cluster as a whole.
 type ClusterLayout struct {
 	Roles []Role `json:"roles"`
 }
 // GetLayout returns the currently applied ClusterLayout.
 func (c *AdminClient) GetLayout(ctx context.Context) (ClusterLayout, error) {
 	// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/GetLayout
@ -323,13 +353,7 @@ func (c *AdminClient) ApplyLayout(
 		roles = append(roles, removeRole{ID: id, Remove: true})
 	}
-	// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/GetLayout
+	var clusterLayout ClusterLayout
 	var clusterLayout struct {
 		Version           int    `json:"version"`
 		StagedRoleChanges []Role `json:"stagedRoleChanges"`
 	}
 	// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/ApplyLayout
 	err := c.do(ctx, &clusterLayout, "POST", "/v1/layout", roles)
 	if err != nil {
 		return fmt.Errorf("staging layout changes: %w", err)
@ -337,7 +361,6 @@ func (c *AdminClient) ApplyLayout(
 		return nil
 	}
 	// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/ApplyLayout
 	applyClusterLayout := struct {
 		Version int `json:"version"`
 	}{