Implement removal of nodes from the garage layout

This commit is contained in:
Brian Picciano 2024-10-31 13:04:19 +01:00
parent 8dab458291
commit 2cdec586b2
6 changed files with 210 additions and 87 deletions

View File

@ -6,6 +6,7 @@ import (
"isle/daemon/daecommon"
"isle/garage/garagesrv"
"isle/jsonutil"
"maps"
"os"
"path/filepath"
)
@ -67,6 +68,7 @@ func coalesceNetworkConfigAndBootstrap(
}
}
hostBootstrap.Hosts = maps.Clone(hostBootstrap.Hosts)
hostBootstrap.Hosts[host.Name] = host
return hostBootstrap, nil

View File

@ -61,15 +61,12 @@ func newGarageAdminClient(
logger *mlog.Logger,
networkConfig daecommon.NetworkConfig,
adminToken string,
hostBootstrap bootstrap.Bootstrap,
host bootstrap.Host,
) *garage.AdminClient {
thisHost := hostBootstrap.ThisHost()
return garage.NewAdminClient(
garageAdminClientLogger(logger),
net.JoinHostPort(
thisHost.IP().String(),
host.IP().String(),
strconv.Itoa(networkConfig.Storage.Allocations[0].AdminPort),
),
adminToken,
@ -81,24 +78,26 @@ func garageApplyLayout(
logger *mlog.Logger,
networkConfig daecommon.NetworkConfig,
adminToken string,
hostBootstrap bootstrap.Bootstrap,
prevHost, currHost bootstrap.Host,
) error {
var (
adminClient = newGarageAdminClient(
logger, networkConfig, adminToken, hostBootstrap,
logger, networkConfig, adminToken, currHost,
)
thisHost = hostBootstrap.ThisHost()
hostName = thisHost.Name
hostName = currHost.Name
allocs = networkConfig.Storage.Allocations
peers = make([]garage.PeerLayout, len(allocs))
peerIDs = map[string]struct{}{}
idsToRemove = make([]string, 0, len(prevHost.Garage.Instances))
)
defer adminClient.Close()
for i, alloc := range allocs {
id := daecommon.BootstrapGarageHostForAlloc(thisHost, alloc).ID
id := daecommon.BootstrapGarageHostForAlloc(currHost, alloc).ID
peerIDs[id] = struct{}{}
zone := string(hostName)
if alloc.Zone != "" {
@ -113,7 +112,13 @@ func garageApplyLayout(
}
}
return adminClient.ApplyLayout(ctx, peers)
for _, prevInst := range prevHost.Garage.Instances {
if _, ok := peerIDs[prevInst.ID]; !ok {
idsToRemove = append(idsToRemove, prevInst.ID)
}
}
return adminClient.ApplyLayout(ctx, peers, idsToRemove)
}
func garageInitializeGlobalBucket(
@ -121,13 +126,11 @@ func garageInitializeGlobalBucket(
logger *mlog.Logger,
networkConfig daecommon.NetworkConfig,
adminToken string,
hostBootstrap bootstrap.Bootstrap,
host bootstrap.Host,
) (
garage.S3APICredentials, error,
) {
adminClient := newGarageAdminClient(
logger, networkConfig, adminToken, hostBootstrap,
)
adminClient := newGarageAdminClient(logger, networkConfig, adminToken, host)
defer adminClient.Close()
creds, err := adminClient.CreateS3APICredentials(

View File

@ -335,7 +335,7 @@ func Create(
stateDir toolkit.Dir,
runtimeDir toolkit.Dir,
creationParams bootstrap.CreationParams,
ipNet nebula.IPNet, // TODO should this go in CreationParams?
ipNet nebula.IPNet,
hostName nebula.HostName,
opts *Opts,
) (
@ -408,14 +408,16 @@ func (n *network) initializeDirs(mayExist bool) error {
}
func (n *network) initialize(
ctx context.Context, currBootstrap bootstrap.Bootstrap,
ctx context.Context, prevBootstrap bootstrap.Bootstrap,
) error {
prevThisHost := prevBootstrap.ThisHost()
// we update this Host's data using whatever configuration has been provided
// by the daemon config. This way the network has the most up-to-date
// possible bootstrap. This updated bootstrap will later get updated in
// garage as a background task, so other hosts will see it as well.
currBootstrap, err := coalesceNetworkConfigAndBootstrap(
n.networkConfig, currBootstrap,
n.networkConfig, prevBootstrap,
)
if err != nil {
return fmt.Errorf("combining configuration into bootstrap: %w", err)
@ -447,7 +449,7 @@ func (n *network) initialize(
n.logger.Info(ctx, "Child processes created")
if err := n.postInit(ctx); err != nil {
if err := n.postInit(ctx, prevThisHost); err != nil {
n.logger.Error(ctx, "Post-initialization failed, stopping child processes", err)
n.children.Shutdown()
return fmt.Errorf("performing post-initialization: %w", err)
@ -480,15 +482,22 @@ func (n *network) initialize(
return nil
}
func (n *network) postInit(ctx context.Context) error {
if len(n.networkConfig.Storage.Allocations) > 0 {
func (n *network) postInit(
ctx context.Context, prevThisHost bootstrap.Host,
) error {
n.l.RLock()
defer n.l.RUnlock()
thisHost := n.currBootstrap.ThisHost()
if len(prevThisHost.Garage.Instances)+len(thisHost.Garage.Instances) > 0 {
n.logger.Info(ctx, "Applying garage layout")
if err := garageApplyLayout(
ctx,
n.logger,
n.networkConfig,
n.opts.GarageAdminToken,
n.currBootstrap,
prevThisHost, thisHost,
); err != nil {
return fmt.Errorf("applying garage layout: %w", err)
}
@ -509,7 +518,7 @@ func (n *network) postInit(ctx context.Context) error {
n.logger,
n.networkConfig,
n.opts.GarageAdminToken,
n.currBootstrap,
thisHost,
)
if err != nil {
return fmt.Errorf("initializing global bucket: %w", err)
@ -559,8 +568,7 @@ func (n *network) reloadHosts(ctx context.Context) error {
thisHost := currBootstrap.ThisHost()
newBootstrap.Hosts[thisHost.Name] = thisHost
err = n.reload(ctx, nil, &newBootstrap)
if err != nil {
if _, err = n.reload(ctx, nil, &newBootstrap); err != nil {
return fmt.Errorf("reloading with new host data: %w", err)
}
@ -586,14 +594,19 @@ func (n *network) reloadLoop(ctx context.Context) {
}
}
// returns the bootstrap prior to the reload being applied.
func (n *network) reload(
ctx context.Context,
newNetworkConfig *daecommon.NetworkConfig,
newBootstrap *bootstrap.Bootstrap,
) error {
) (
bootstrap.Bootstrap, error,
) {
n.l.Lock()
defer n.l.Unlock()
prevBootstrap := n.currBootstrap
if newBootstrap != nil {
n.currBootstrap = *newBootstrap
}
@ -606,22 +619,28 @@ func (n *network) reload(
if n.currBootstrap, err = coalesceNetworkConfigAndBootstrap(
n.networkConfig, n.currBootstrap,
); err != nil {
return fmt.Errorf("combining configuration into bootstrap: %w", err)
return bootstrap.Bootstrap{}, fmt.Errorf(
"combining configuration into bootstrap: %w", err,
)
}
n.logger.Info(ctx, "Writing updated bootstrap to state dir")
err = writeBootstrapToStateDir(n.stateDir.Path, n.currBootstrap)
if err != nil {
return fmt.Errorf("writing bootstrap to state dir: %w", err)
return bootstrap.Bootstrap{}, fmt.Errorf(
"writing bootstrap to state dir: %w", err,
)
}
n.logger.Info(ctx, "Reloading child processes")
err = n.children.Reload(ctx, n.networkConfig, n.currBootstrap)
if err != nil {
return fmt.Errorf("reloading child processes: %w", err)
return bootstrap.Bootstrap{}, fmt.Errorf(
"reloading child processes: %w", err,
)
}
return nil
return prevBootstrap, nil
}
func withCurrBootstrap[Res any](
@ -873,8 +892,7 @@ func (n *network) CreateHost(
newBootstrap.Hosts = joiningBootstrap.Bootstrap.Hosts
n.logger.Info(ctx, "Reloading local state with new host")
err = n.reload(ctx, nil, &newBootstrap)
if err != nil {
if _, err = n.reload(ctx, nil, &newBootstrap); err != nil {
return JoiningBootstrap{}, fmt.Errorf("reloading child processes: %w", err)
}
@ -921,11 +939,12 @@ func (n *network) GetConfig(context.Context) (daecommon.NetworkConfig, error) {
func (n *network) SetConfig(
ctx context.Context, config daecommon.NetworkConfig,
) error {
if err := n.reload(ctx, &config, nil); err != nil {
prevBootstrap, err := n.reload(ctx, &config, nil)
if err != nil {
return fmt.Errorf("reloading config: %w", err)
}
if err := n.postInit(ctx); err != nil {
if err := n.postInit(ctx, prevBootstrap.ThisHost()); err != nil {
return fmt.Errorf("performing post-initialization: %w", err)
}

View File

@ -19,7 +19,9 @@ func TestCreate(t *testing.T) {
gotCreationParams, err := LoadCreationParams(network.stateDir)
assert.NoError(t, err)
assert.Equal(t, gotCreationParams, network.creationParams)
assert.Equal(
t, gotCreationParams, network.getBootstrap(t).NetworkCreationParams,
)
}
func TestLoad(t *testing.T) {
@ -37,7 +39,7 @@ func TestLoad(t *testing.T) {
loadedNetwork, err := Load(
h.ctx,
h.logger.WithNamespace("loadedNetwork"),
network.networkConfig,
network.getConfig(t),
getEnvBinDirPath(),
network.stateDir,
h.mkDir(t, "runtime"),
@ -76,18 +78,34 @@ func TestNetwork_GetConfig(t *testing.T) {
config, err := network.GetConfig(h.ctx)
assert.NoError(t, err)
assert.Equal(t, config, network.networkConfig)
assert.Equal(t, config, network.getConfig(t))
}
func TestNetwork_SetConfig(t *testing.T) {
t.Run("adding storage alloc", func(t *testing.T) {
allocsToPeerLayouts := func(
hostName nebula.HostName, allocs []bootstrap.GarageHostInstance,
) []garage.PeerLayout {
peers := make([]garage.PeerLayout, len(allocs))
for i := range allocs {
peers[i] = garage.PeerLayout{
ID: allocs[i].ID,
Capacity: 1_000_000_000,
Zone: string(hostName),
Tags: []string{},
}
}
return peers
}
t.Run("add storage alloc", func(t *testing.T) {
var (
h = newIntegrationHarness(t)
network = h.createNetwork(t, "primus", nil)
h = newIntegrationHarness(t)
network = h.createNetwork(t, "primus", nil)
networkConfig = network.getConfig(t)
)
network.networkConfig.Storage.Allocations = append(
network.networkConfig.Storage.Allocations,
networkConfig.Storage.Allocations = append(
networkConfig.Storage.Allocations,
daecommon.ConfigStorageAllocation{
DataPath: h.mkDir(t, "data").Path,
MetaPath: h.mkDir(t, "meta").Path,
@ -98,17 +116,10 @@ func TestNetwork_SetConfig(t *testing.T) {
},
)
assert.NoError(t, network.SetConfig(h.ctx, network.networkConfig))
// Check that the Host information was updated
newHosts, err := network.GetHosts(h.ctx)
assert.NoError(t, err)
newHostsByName := map[nebula.HostName]bootstrap.Host{}
for _, h := range newHosts {
newHostsByName[h.Name] = h
}
assert.NoError(t, network.SetConfig(h.ctx, networkConfig))
t.Log("Checking that the Host information was updated")
newHostsByName := network.getHostsByName(t)
newHost, ok := newHostsByName[network.hostName]
assert.True(t, ok)
@ -123,34 +134,60 @@ func TestNetwork_SetConfig(t *testing.T) {
RPCPort: 4901,
}, newAlloc)
// Check that the bootstrap file was written with the new host config
t.Log("Checking that the bootstrap file was written with the new host config")
var storedBootstrap bootstrap.Bootstrap
assert.NoError(t, jsonutil.LoadFile(
&storedBootstrap, bootstrap.StateDirPath(network.stateDir.Path),
))
assert.Equal(t, newHostsByName, storedBootstrap.Hosts)
// Check that garage layout contains the new allocation
garageAdminClient := newGarageAdminClient(
h.logger,
network.networkConfig,
network.opts.GarageAdminToken,
storedBootstrap,
)
layout, err := garageAdminClient.GetLayout(h.ctx)
t.Log("Checking that garage layout contains the new allocation")
expPeers := allocsToPeerLayouts(network.hostName, allocs)
layout, err := network.garageAdminClient(t).GetLayout(h.ctx)
assert.NoError(t, err)
expPeers := make([]garage.PeerLayout, len(allocs))
for i := range allocs {
expPeers[i] = garage.PeerLayout{
ID: allocs[i].ID,
Capacity: 1_000_000_000,
Zone: string(network.hostName),
Tags: []string{},
}
}
assert.ElementsMatch(t, expPeers, layout.Peers)
})
t.Run("remove storage alloc", func(t *testing.T) {
var (
h = newIntegrationHarness(t)
network = h.createNetwork(t, "primus", &createNetworkOpts{
numStorageAllocs: 4,
})
networkConfig = network.getConfig(t)
prevHost = network.getHostsByName(t)[network.hostName]
removedAlloc = networkConfig.Storage.Allocations[3]
removedGarageInst = daecommon.BootstrapGarageHostForAlloc(
prevHost, removedAlloc,
)
)
networkConfig.Storage.Allocations = networkConfig.Storage.Allocations[:3]
assert.NoError(t, network.SetConfig(h.ctx, networkConfig))
t.Log("Checking that the Host information was updated")
newHostsByName := network.getHostsByName(t)
newHost, ok := newHostsByName[network.hostName]
assert.True(t, ok)
allocs := newHost.HostConfigured.Garage.Instances
assert.Len(t, allocs, 3)
assert.NotContains(t, allocs, removedGarageInst)
t.Log("Checking that the bootstrap file was written with the new host config")
var storedBootstrap bootstrap.Bootstrap
assert.NoError(t, jsonutil.LoadFile(
&storedBootstrap, bootstrap.StateDirPath(network.stateDir.Path),
))
assert.Equal(t, newHostsByName, storedBootstrap.Hosts)
t.Log("Checking that garage layout contains the new allocation")
expPeers := allocsToPeerLayouts(network.hostName, allocs)
layout, err := network.garageAdminClient(t).GetLayout(h.ctx)
assert.NoError(t, err)
assert.ElementsMatch(t, expPeers, layout.Peers)
})
// TODO a host having allocs but removing all of them
}

View File

@ -6,6 +6,7 @@ import (
"fmt"
"isle/bootstrap"
"isle/daemon/daecommon"
"isle/garage"
"isle/nebula"
"isle/toolkit"
"os"
@ -167,8 +168,9 @@ func (h *integrationHarness) mkNetworkConfig(
}
type createNetworkOpts struct {
creationParams bootstrap.CreationParams
manualShutdown bool
creationParams bootstrap.CreationParams
manualShutdown bool
numStorageAllocs int
}
func (o *createNetworkOpts) withDefaults() *createNetworkOpts {
@ -180,15 +182,19 @@ func (o *createNetworkOpts) withDefaults() *createNetworkOpts {
o.creationParams = bootstrap.NewCreationParams("test", "test.localnet")
}
if o.numStorageAllocs == 0 {
o.numStorageAllocs = 3
}
return o
}
type integrationHarnessNetwork struct {
Network
ctx context.Context
logger *mlog.Logger
hostName nebula.HostName
creationParams bootstrap.CreationParams
networkConfig daecommon.NetworkConfig
stateDir, runtimeDir toolkit.Dir
opts *Opts
}
@ -202,9 +208,10 @@ func (h *integrationHarness) createNetwork(
opts = opts.withDefaults()
var (
logger = h.logger.WithNamespace("network").WithNamespace(hostNameStr)
networkConfig = h.mkNetworkConfig(t, &networkConfigOpts{
hasPublicAddr: true,
numStorageAllocs: 3,
numStorageAllocs: opts.numStorageAllocs,
})
stateDir = h.mkDir(t, "state")
@ -220,7 +227,7 @@ func (h *integrationHarness) createNetwork(
network, err := Create(
h.ctx,
h.logger.WithNamespace("network").WithNamespace(hostNameStr),
logger,
networkConfig,
getEnvBinDirPath(),
stateDir,
@ -245,9 +252,9 @@ func (h *integrationHarness) createNetwork(
return integrationHarnessNetwork{
network,
h.ctx,
logger,
hostName,
opts.creationParams,
networkConfig,
stateDir,
runtimeDir,
networkOpts,
@ -288,6 +295,7 @@ func (h *integrationHarness) joinNetwork(
}
var (
logger = h.logger.WithNamespace("network").WithNamespace(hostNameStr)
networkConfig = h.mkNetworkConfig(t, opts.networkConfigOpts)
stateDir = h.mkDir(t, "state")
runtimeDir = h.mkDir(t, "runtime")
@ -299,7 +307,7 @@ func (h *integrationHarness) joinNetwork(
t.Logf("Joining as %q", hostNameStr)
joinedNetwork, err := Join(
h.ctx,
h.logger.WithNamespace("network").WithNamespace(hostNameStr),
logger,
networkConfig,
joiningBootstrap,
getEnvBinDirPath(),
@ -322,11 +330,52 @@ func (h *integrationHarness) joinNetwork(
return integrationHarnessNetwork{
joinedNetwork,
h.ctx,
logger,
hostName,
network.creationParams,
networkConfig,
stateDir,
runtimeDir,
networkOpts,
}
}
func (nh *integrationHarnessNetwork) getConfig(t *testing.T) daecommon.NetworkConfig {
networkConfig, err := nh.Network.GetConfig(nh.ctx)
require.NoError(t, err)
return networkConfig
}
func (nh *integrationHarnessNetwork) getBootstrap(
t *testing.T,
) bootstrap.Bootstrap {
currBootstrap, err := nh.Network.(*network).getBootstrap()
require.NoError(t, err)
return currBootstrap
}
func (nh *integrationHarnessNetwork) garageAdminClient(
t *testing.T,
) *garage.AdminClient {
c := newGarageAdminClient(
nh.logger,
nh.getConfig(t),
nh.opts.GarageAdminToken,
nh.getBootstrap(t).ThisHost(),
)
t.Cleanup(func() { assert.NoError(t, c.Close()) })
return c
}
func (nh *integrationHarnessNetwork) getHostsByName(
t *testing.T,
) map[nebula.HostName]bootstrap.Host {
hosts, err := nh.Network.GetHosts(nh.ctx)
require.NoError(t, err)
hostsByName := map[nebula.HostName]bootstrap.Host{}
for _, h := range hosts {
hostsByName[h.Name] = h
}
return hostsByName
}

View File

@ -293,10 +293,23 @@ func (c *AdminClient) GetLayout(ctx context.Context) (ClusterLayout, error) {
}
// ApplyLayout modifies the layout of the garage cluster. Only layout of the
// given peers will be modified/created, other peers are not affected.
// given peers will be modified/created/removed, other peers are not affected.
func (c *AdminClient) ApplyLayout(
ctx context.Context, peers []PeerLayout,
ctx context.Context, addModifyPeers []PeerLayout, removePeerIDs []string,
) error {
type removePeer struct {
ID string `json:"id"`
Remove bool `json:"remove"`
}
peers := make([]any, 0, len(addModifyPeers)+len(removePeerIDs))
for _, p := range addModifyPeers {
peers = append(peers, p)
}
for _, id := range removePeerIDs {
peers = append(peers, removePeer{ID: id, Remove: true})
}
{
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/ApplyLayout
err := c.do(ctx, nil, "POST", "/v1/layout", peers)