This commit is contained in:
Brian Picciano 2024-11-05 22:31:57 +01:00
parent 53a06af9ba
commit 6032c6d8dc
5 changed files with 269 additions and 49 deletions

View File

@ -184,7 +184,6 @@ func (c *Children) reloadNebula(
return nil
}
// TODO this doesn't handle removing garage nodes
func (c *Children) reloadGarage(
ctx context.Context,
networkConfig daecommon.NetworkConfig,
@ -206,6 +205,8 @@ func (c *Children) reloadGarage(
)
)
// TODO it's possible that the config changed, but only the bootstrap
// peers, in which case we don't need to restart the node.
childConfigPath, changed, err := garageWriteChildConfig(
ctx,
c.logger,

View File

@ -12,13 +12,16 @@ import (
"isle/nebula"
"isle/secrets"
"net"
"net/netip"
"path/filepath"
"slices"
"strconv"
"time"
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
"github.com/minio/minio-go/v7"
"golang.org/x/exp/maps"
)
// Paths within garage's global bucket.
@ -341,3 +344,67 @@ func garageWaitForAlloc(
return nil
}
}
// garageNodeBuddyPeers returns the "buddy" peers of the given host, based on
// the given garage cluster status. It will return zero values if the host has
// no buddy.
//
// For situations where we want one host to affect the cluster layout of another
// host's peers, we use a simple system to determine a single host which is
// responsible. The goal is not to be 100% race-proof (garage handles that), but
// rather to try to prevent all hosts from modifying the same host's layout at
// the same time.
//
// The system is to order all hosts by their IP, and say that each host is
// responsible for (aka the "buddy" of) the host immediately after their own in
// that list. The last host in that list is responsible for the first.
func garageNodeBuddyPeers(
status garage.ClusterStatus, host bootstrap.Host,
) (
netip.Addr, []garage.Role,
) {
var (
thisIP = host.IP()
rolesByID = make(
map[string]garage.Role, len(status.Layout.Roles),
)
nodeRolesByIP = map[netip.Addr][]garage.Role{}
)
for _, role := range status.Layout.Roles {
rolesByID[role.ID] = role
}
for _, node := range status.Nodes {
role, ok := rolesByID[node.ID]
if !ok {
continue
}
ip := node.Addr.Addr()
nodeRolesByIP[ip] = append(nodeRolesByIP[ip], role)
}
// If there is only a single host in the cluster (or, somehow, none) then
// that host has no buddy.
if len(nodeRolesByIP) < 2 {
return netip.Addr{}, nil
}
nodeIPs := maps.Keys(nodeRolesByIP)
slices.SortFunc(nodeIPs, netip.Addr.Compare)
for i, nodeIP := range nodeIPs {
var buddyIP netip.Addr
if i == len(nodeIPs)-1 {
buddyIP = nodeIPs[0]
} else if nodeIP == thisIP {
buddyIP = nodeIPs[i+1]
} else {
continue
}
return buddyIP, nodeRolesByIP[buddyIP]
}
panic("Unreachable")
}

View File

@ -419,7 +419,7 @@ func (n *network) initializeDirs(mayExist bool) error {
}
func (n *network) periodically(
logger *mlog.Logger,
label string,
fn func(context.Context) error,
period time.Duration,
) {
@ -427,13 +427,13 @@ func (n *network) periodically(
go func() {
defer n.wg.Done()
ctx := mctx.Annotate(n.workerCtx, "period", period)
ctx := mctx.Annotate(n.workerCtx, "workerLabel", label)
ticker := time.NewTicker(period)
defer ticker.Stop()
logger.Info(ctx, "Starting background job runner")
defer logger.Info(ctx, "Stopping background job runner")
n.logger.Info(ctx, "Starting background job runner")
defer n.logger.Info(ctx, "Stopping background job runner")
for {
select {
@ -441,9 +441,9 @@ func (n *network) periodically(
return
case <-ticker.C:
logger.Info(ctx, "Background job running")
n.logger.Info(ctx, "Background job running")
if err := fn(ctx); err != nil {
logger.Error(ctx, "Background job failed", err)
n.logger.Error(ctx, "Background job failed", err)
}
}
}
@ -510,10 +510,10 @@ func (n *network) initialize(
return fmt.Errorf("Reloading network bootstrap: %w", err)
}
n.periodically("reloadHosts", n.reloadHosts, 3*time.Minute)
n.periodically(
n.logger.WithNamespace("reloadHosts"),
n.reloadHosts,
3*time.Minute,
"removeOrphanGarageNodes", n.removeOrphanGarageNodes, 1*time.Minute,
)
return nil
@ -531,7 +531,7 @@ func (n *network) postChildrenInit(
thisHost := n.currBootstrap.ThisHost()
if len(prevThisHost.Garage.Instances)+len(thisHost.Garage.Instances) > 0 {
if len(thisHost.Garage.Instances) > 0 {
n.logger.Info(ctx, "Applying garage layout")
if err := garageApplyLayout(
ctx,
@ -618,6 +618,77 @@ func (n *network) reloadHosts(ctx context.Context) error {
return nil
}
// In general each host will manage the garage cluster layout of its own storage
// allocations via garageApplyLayout. There are three situations which are
// handled here, rather than garageApplyLayout:
//
// - A host removes all of its allocations via SetConfig.
// - A host removes all of its allocations by calling Load with no allocations
// in the provided daecommon.NetworkConfig.
// - A host is removed from the network by another host.
//
// In all of these cases the host no longer has any garage instances running,
// and so can't call garageApplyLayout on itself. To combat this we have all
// hosts which do have garage instances running periodically check that there's
// not some garage nodes orphaned in the cluster layout, and remove them if so.
func (n *network) removeOrphanGarageNodes(ctx context.Context) error {
n.l.RLock()
defer n.l.RUnlock()
thisHost := n.currBootstrap.ThisHost()
if len(thisHost.Garage.Instances) == 0 {
n.logger.Info(ctx, "No local garage instances, cannot remove orphans")
return nil
}
adminClient := newGarageAdminClient(
n.logger, n.networkConfig, n.opts.GarageAdminToken, thisHost,
)
defer adminClient.Close()
clusterStatus, err := adminClient.Status(ctx)
if err != nil {
return fmt.Errorf("retrieving garage cluster status: %w", err)
}
buddyIP, buddyNodes := garageNodeBuddyPeers(clusterStatus, thisHost)
if len(buddyNodes) == 0 {
return nil
}
ctx = mctx.Annotate(ctx, "buddyIP", buddyIP)
for _, host := range n.currBootstrap.Hosts {
if host.IP() != buddyIP {
continue
} else if len(host.Garage.Instances) > 0 {
n.logger.Info(ctx, "Buddy instance has garage nodes configured in its bootstrap, doing nothing")
return nil
}
break
}
// Either the host is no longer in the network, or it no longer has any
// garage instances set on it. Either way, remove its nodes from the cluster
// layout.
buddyNodeIDs := make([]string, len(buddyNodes))
for i, buddyNode := range buddyNodes {
buddyNodeIDs[i] = buddyNode.ID
}
n.logger.Info(ctx, "Applying garage layout to remove orphaned garage nodes")
if err := adminClient.ApplyLayout(ctx, nil, buddyNodeIDs); err != nil {
return fmt.Errorf(
"applying garage cluster layout, removing nodes %+v: %w",
buddyNodes,
err,
)
}
return nil
}
// returns the bootstrap prior to the reload being applied.
func (n *network) reload(
ctx context.Context,

View File

@ -54,19 +54,44 @@ func TestLoad(t *testing.T) {
}
func TestJoin(t *testing.T) {
var (
h = newIntegrationHarness(t)
primus = h.createNetwork(t, "primus", nil)
secondus = h.joinNetwork(t, primus, "secondus", nil)
)
t.Run("simple", func(t *testing.T) {
var (
h = newIntegrationHarness(t)
primus = h.createNetwork(t, "primus", nil)
secondus = h.joinNetwork(t, primus, "secondus", nil)
)
primusHosts, err := primus.GetHosts(h.ctx)
assert.NoError(t, err)
primusHosts, err := primus.GetHosts(h.ctx)
assert.NoError(t, err)
secondusHosts, err := secondus.GetHosts(h.ctx)
assert.NoError(t, err)
secondusHosts, err := secondus.GetHosts(h.ctx)
assert.NoError(t, err)
assert.Equal(t, primusHosts, secondusHosts)
assert.Equal(t, primusHosts, secondusHosts)
})
t.Run("with alloc", func(t *testing.T) {
var (
h = newIntegrationHarness(t)
primus = h.createNetwork(t, "primus", nil)
secondus = h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{
networkConfigOpts: &networkConfigOpts{
numStorageAllocs: 1,
},
})
)
t.Log("reloading primus' hosts")
assert.NoError(t, primus.Network.(*network).reloadHosts(h.ctx))
primusHosts, err := primus.GetHosts(h.ctx)
assert.NoError(t, err)
secondusHosts, err := secondus.GetHosts(h.ctx)
assert.NoError(t, err)
assert.Equal(t, primusHosts, secondusHosts)
})
}
func TestNetwork_GetConfig(t *testing.T) {
@ -189,5 +214,33 @@ func TestNetwork_SetConfig(t *testing.T) {
assert.ElementsMatch(t, expRoles, layout.Roles)
})
// TODO a host having allocs but removing all of them
t.Run("remove all storage allocs", func(t *testing.T) {
var (
h = newIntegrationHarness(t)
primus = h.createNetwork(t, "primus", nil)
secondus = h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{
networkConfigOpts: &networkConfigOpts{
numStorageAllocs: 1,
},
})
networkConfig = secondus.getConfig(t)
prevHost = secondus.getHostsByName(t)[secondus.hostName]
//removedAlloc = networkConfig.Storage.Allocations[0]
removedRole = allocsToRoles(
secondus.hostName, prevHost.Garage.Instances,
)[0]
//removedGarageInst = daecommon.BootstrapGarageHostForAlloc(
// prevHost, removedAlloc,
//)
)
networkConfig.Storage.Allocations = nil
assert.NoError(t, secondus.SetConfig(h.ctx, networkConfig))
t.Log("Checking that garage layout still contains the old allocation")
layout, err := secondus.garageAdminClient(t).GetLayout(h.ctx)
assert.NoError(t, err)
assert.Contains(t, layout.Roles, removedRole)
})
}

View File

@ -8,6 +8,7 @@ import (
"io"
"net/http"
"net/http/httputil"
"net/netip"
"time"
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
@ -158,6 +159,45 @@ func (c *AdminClient) do(
return nil
}
// KnownNode describes the fields of a known node in the cluster, as returned
// as part of [ClusterStatus].
type KnownNode struct {
ID string `json:"id"`
Addr netip.AddrPort `json:"addr"`
IsUp bool `json:"isUp"`
LastSeenSecsAgo int `json:"lastSeenSecsAgo"`
HostName string `json:"hostname"`
}
// Role descibes a node's role in the garage cluster, i.e. what storage it is
// providing.
type Role struct {
ID string `json:"id"`
Capacity int `json:"capacity"` // Gb (SI units)
Zone string `json:"zone"`
Tags []string `json:"tags"`
}
// ClusterLayout describes the layout of the cluster as a whole.
type ClusterLayout struct {
Roles []Role `json:"roles"`
}
// ClusterStatus is returned from the Status endpoint, describing the currently
// known state of the cluster.
type ClusterStatus struct {
Nodes []KnownNode `json:"nodes"`
Layout ClusterLayout `json:"layout"`
}
// Status returns the current state of the cluster.
func (c *AdminClient) Status(ctx context.Context) (ClusterStatus, error) {
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Nodes/operation/GetNodes
var clusterStatus ClusterStatus
err := c.do(ctx, &clusterStatus, "GET", "/v1/status", nil)
return clusterStatus, err
}
// Wait will block until the instance connected to can see at least
// ReplicationFactor other garage instances. If the context is canceled it
// will return the context error.
@ -166,30 +206,32 @@ func (c *AdminClient) Wait(ctx context.Context) error {
for first := true; ; first = false {
if !first {
time.Sleep(250 * time.Millisecond)
select {
case <-time.After(500 * time.Millisecond):
case <-ctx.Done():
return ctx.Err()
}
}
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Nodes/operation/GetNodes
var clusterStatus struct {
Nodes []struct {
IsUp bool `json:"isUp"`
} `json:"nodes"`
}
err := c.do(ctx, &clusterStatus, "GET", "/v1/status", nil)
c.logger.Debug(ctx, "Getting cluster status")
clusterStatus, err := c.Status(ctx)
if ctxErr := ctx.Err(); ctxErr != nil {
return ctxErr
} else if err != nil {
c.logger.Warn(ctx, "waiting for instance to become ready", err)
c.logger.Warn(ctx, "Instance is not yet ready", err)
continue
}
var numUp int
for _, node := range clusterStatus.Nodes {
if node.IsUp {
// There seems to be some kind of step between IsUp becoming true
// and garage actually loading the full state of a node, so we check
// for the HostName as well. We could also use LastSeenSecsAgo, but
// that remains null for the node being queried so it's more
// annoying to use.
if node.IsUp && node.HostName != "" {
numUp++
}
}
@ -204,7 +246,7 @@ func (c *AdminClient) Wait(ctx context.Context) error {
return nil
}
c.logger.Debug(ctx, "instance not online yet, will continue waiting")
c.logger.Debug(ctx, "Instance not online yet, will continue waiting")
}
}
@ -283,20 +325,6 @@ func (c *AdminClient) GrantBucketPermissions(
})
}
// Role descibes a node's role in the garage cluster, i.e. what storage it is
// providing.
type Role struct {
ID string `json:"id"`
Capacity int `json:"capacity"` // Gb (SI units)
Zone string `json:"zone"`
Tags []string `json:"tags"`
}
// ClusterLayout describes the layout of the cluster as a whole.
type ClusterLayout struct {
Roles []Role `json:"roles"`
}
// GetLayout returns the currently applied ClusterLayout.
func (c *AdminClient) GetLayout(ctx context.Context) (ClusterLayout, error) {
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/GetLayout