Have hosts update garage cluster layout to remove other nodes if necessary
This commit is contained in:
parent
53a06af9ba
commit
032bdb9e43
@ -1,8 +1,8 @@
|
|||||||
# Contributing Storage
|
# Contributing Storage
|
||||||
|
|
||||||
If your host machine can be reasonably sure of being online most, if not all, of
|
This document is for you if your host machine can be reliably be online at all
|
||||||
the time, and has 1GB or more of unused drive space you'd like to contribute to
|
times and has 1GB or more of unused drive space you'd like to contribute to the
|
||||||
the network, then this document is for you.
|
network.
|
||||||
|
|
||||||
## Edit `daemon.yml`
|
## Edit `daemon.yml`
|
||||||
|
|
||||||
@ -16,7 +16,7 @@ one allocation listed.
|
|||||||
The comments in the file should be self-explanatory, but ask your admin if you
|
The comments in the file should be self-explanatory, but ask your admin if you
|
||||||
need any clarification.
|
need any clarification.
|
||||||
|
|
||||||
Here are an example set of allocations for a host which is contributing space
|
Here is an example set of allocations for a host which is contributing space
|
||||||
from two separate drives:
|
from two separate drives:
|
||||||
|
|
||||||
```
|
```
|
||||||
@ -47,6 +47,16 @@ process.
|
|||||||
The `isle daemon` will automatically allow the ports used for your
|
The `isle daemon` will automatically allow the ports used for your
|
||||||
storage allocations in the vpn firewall.
|
storage allocations in the vpn firewall.
|
||||||
|
|
||||||
|
## Removing Allocations
|
||||||
|
|
||||||
|
If you later decide to no longer provide storage simply remove the
|
||||||
|
`storage.allocations` item from your `/etc/isle/daemon.yml` file and restart the
|
||||||
|
`isle daemon` process.
|
||||||
|
|
||||||
|
Once removed, it is advisable to wait some time before removing storage
|
||||||
|
allocations from other hosts. This ensures that all data which was previously
|
||||||
|
on this host has had a chance to fully replicate to multiple other hosts.
|
||||||
|
|
||||||
## Further Reading
|
## Further Reading
|
||||||
|
|
||||||
Isle uses the [garage][garage] project for its storage system. See the
|
Isle uses the [garage][garage] project for its storage system. See the
|
||||||
|
@ -184,7 +184,6 @@ func (c *Children) reloadNebula(
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO this doesn't handle removing garage nodes
|
|
||||||
func (c *Children) reloadGarage(
|
func (c *Children) reloadGarage(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
networkConfig daecommon.NetworkConfig,
|
networkConfig daecommon.NetworkConfig,
|
||||||
@ -206,6 +205,8 @@ func (c *Children) reloadGarage(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TODO it's possible that the config changed, but only the bootstrap
|
||||||
|
// peers, in which case we don't need to restart the node.
|
||||||
childConfigPath, changed, err := garageWriteChildConfig(
|
childConfigPath, changed, err := garageWriteChildConfig(
|
||||||
ctx,
|
ctx,
|
||||||
c.logger,
|
c.logger,
|
||||||
|
@ -12,13 +12,16 @@ import (
|
|||||||
"isle/nebula"
|
"isle/nebula"
|
||||||
"isle/secrets"
|
"isle/secrets"
|
||||||
"net"
|
"net"
|
||||||
|
"net/netip"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
|
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
|
||||||
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
|
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
|
||||||
"github.com/minio/minio-go/v7"
|
"github.com/minio/minio-go/v7"
|
||||||
|
"golang.org/x/exp/maps"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Paths within garage's global bucket.
|
// Paths within garage's global bucket.
|
||||||
@ -341,3 +344,59 @@ func garageWaitForAlloc(
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// garageNodeBuddyPeers returns the "buddy" peers of the given host, based on
|
||||||
|
// the given garage cluster status. It will return zero values if the host has
|
||||||
|
// no buddy.
|
||||||
|
//
|
||||||
|
// For situations where we want one host to affect the cluster layout of another
|
||||||
|
// host's peers, we use a simple system to determine a single host which is
|
||||||
|
// responsible. The goal is not to be 100% race-proof (garage handles that), but
|
||||||
|
// rather to try to prevent all hosts from modifying the same host's layout at
|
||||||
|
// the same time.
|
||||||
|
//
|
||||||
|
// The system is to order all hosts by their IP, and say that each host is
|
||||||
|
// responsible for (aka the "buddy" of) the host immediately after their own in
|
||||||
|
// that list. The last host in that list is responsible for the first.
|
||||||
|
func garageNodeBuddyPeers(
|
||||||
|
status garage.ClusterStatus, host bootstrap.Host,
|
||||||
|
) (
|
||||||
|
netip.Addr, []garage.Role,
|
||||||
|
) {
|
||||||
|
var (
|
||||||
|
thisIP = host.IP()
|
||||||
|
nodeRolesByIP = map[netip.Addr][]garage.Role{}
|
||||||
|
)
|
||||||
|
|
||||||
|
for _, node := range status.Nodes {
|
||||||
|
if node.Role == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ip := node.Addr.Addr()
|
||||||
|
nodeRolesByIP[ip] = append(nodeRolesByIP[ip], *node.Role)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there is only a single host in the cluster (or, somehow, none) then
|
||||||
|
// that host has no buddy.
|
||||||
|
if len(nodeRolesByIP) < 2 {
|
||||||
|
return netip.Addr{}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeIPs := maps.Keys(nodeRolesByIP)
|
||||||
|
slices.SortFunc(nodeIPs, netip.Addr.Compare)
|
||||||
|
|
||||||
|
for i, nodeIP := range nodeIPs {
|
||||||
|
var buddyIP netip.Addr
|
||||||
|
if i == len(nodeIPs)-1 {
|
||||||
|
buddyIP = nodeIPs[0]
|
||||||
|
} else if nodeIP == thisIP {
|
||||||
|
buddyIP = nodeIPs[i+1]
|
||||||
|
} else {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return buddyIP, nodeRolesByIP[buddyIP]
|
||||||
|
}
|
||||||
|
|
||||||
|
panic("Unreachable")
|
||||||
|
}
|
||||||
|
@ -419,7 +419,7 @@ func (n *network) initializeDirs(mayExist bool) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (n *network) periodically(
|
func (n *network) periodically(
|
||||||
logger *mlog.Logger,
|
label string,
|
||||||
fn func(context.Context) error,
|
fn func(context.Context) error,
|
||||||
period time.Duration,
|
period time.Duration,
|
||||||
) {
|
) {
|
||||||
@ -427,13 +427,13 @@ func (n *network) periodically(
|
|||||||
go func() {
|
go func() {
|
||||||
defer n.wg.Done()
|
defer n.wg.Done()
|
||||||
|
|
||||||
ctx := mctx.Annotate(n.workerCtx, "period", period)
|
ctx := mctx.Annotate(n.workerCtx, "workerLabel", label)
|
||||||
|
|
||||||
ticker := time.NewTicker(period)
|
ticker := time.NewTicker(period)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
logger.Info(ctx, "Starting background job runner")
|
n.logger.Info(ctx, "Starting background job runner")
|
||||||
defer logger.Info(ctx, "Stopping background job runner")
|
defer n.logger.Info(ctx, "Stopping background job runner")
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@ -441,9 +441,9 @@ func (n *network) periodically(
|
|||||||
return
|
return
|
||||||
|
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
logger.Info(ctx, "Background job running")
|
n.logger.Info(ctx, "Background job running")
|
||||||
if err := fn(ctx); err != nil {
|
if err := fn(ctx); err != nil {
|
||||||
logger.Error(ctx, "Background job failed", err)
|
n.logger.Error(ctx, "Background job failed", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -510,10 +510,10 @@ func (n *network) initialize(
|
|||||||
return fmt.Errorf("Reloading network bootstrap: %w", err)
|
return fmt.Errorf("Reloading network bootstrap: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
n.periodically("reloadHosts", n.reloadHosts, 3*time.Minute)
|
||||||
|
|
||||||
n.periodically(
|
n.periodically(
|
||||||
n.logger.WithNamespace("reloadHosts"),
|
"removeOrphanGarageNodes", n.removeOrphanGarageNodes, 1*time.Minute,
|
||||||
n.reloadHosts,
|
|
||||||
3*time.Minute,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@ -531,7 +531,7 @@ func (n *network) postChildrenInit(
|
|||||||
|
|
||||||
thisHost := n.currBootstrap.ThisHost()
|
thisHost := n.currBootstrap.ThisHost()
|
||||||
|
|
||||||
if len(prevThisHost.Garage.Instances)+len(thisHost.Garage.Instances) > 0 {
|
if len(thisHost.Garage.Instances) > 0 {
|
||||||
n.logger.Info(ctx, "Applying garage layout")
|
n.logger.Info(ctx, "Applying garage layout")
|
||||||
if err := garageApplyLayout(
|
if err := garageApplyLayout(
|
||||||
ctx,
|
ctx,
|
||||||
@ -618,6 +618,77 @@ func (n *network) reloadHosts(ctx context.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// In general each host will manage the garage cluster layout of its own storage
|
||||||
|
// allocations via garageApplyLayout. There are three situations which are
|
||||||
|
// handled here, rather than garageApplyLayout:
|
||||||
|
//
|
||||||
|
// - A host removes all of its allocations via SetConfig.
|
||||||
|
// - A host removes all of its allocations by calling Load with no allocations
|
||||||
|
// in the provided daecommon.NetworkConfig.
|
||||||
|
// - A host is removed from the network by another host.
|
||||||
|
//
|
||||||
|
// In all of these cases the host no longer has any garage instances running,
|
||||||
|
// and so can't call garageApplyLayout on itself. To combat this we have all
|
||||||
|
// hosts which do have garage instances running periodically check that there's
|
||||||
|
// not some garage nodes orphaned in the cluster layout, and remove them if so.
|
||||||
|
func (n *network) removeOrphanGarageNodes(ctx context.Context) error {
|
||||||
|
n.l.RLock()
|
||||||
|
defer n.l.RUnlock()
|
||||||
|
|
||||||
|
thisHost := n.currBootstrap.ThisHost()
|
||||||
|
if len(thisHost.Garage.Instances) == 0 {
|
||||||
|
n.logger.Info(ctx, "No local garage instances, cannot remove orphans")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
adminClient := newGarageAdminClient(
|
||||||
|
n.logger, n.networkConfig, n.opts.GarageAdminToken, thisHost,
|
||||||
|
)
|
||||||
|
defer adminClient.Close()
|
||||||
|
|
||||||
|
clusterStatus, err := adminClient.Status(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("retrieving garage cluster status: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
buddyIP, buddyNodes := garageNodeBuddyPeers(clusterStatus, thisHost)
|
||||||
|
if len(buddyNodes) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx = mctx.Annotate(ctx, "buddyIP", buddyIP)
|
||||||
|
|
||||||
|
for _, host := range n.currBootstrap.Hosts {
|
||||||
|
if host.IP() != buddyIP {
|
||||||
|
continue
|
||||||
|
} else if len(host.Garage.Instances) > 0 {
|
||||||
|
n.logger.Info(ctx, "Buddy instance has garage nodes configured in its bootstrap, doing nothing")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Either the host is no longer in the network, or it no longer has any
|
||||||
|
// garage instances set on it. Either way, remove its nodes from the cluster
|
||||||
|
// layout.
|
||||||
|
|
||||||
|
buddyNodeIDs := make([]string, len(buddyNodes))
|
||||||
|
for i, buddyNode := range buddyNodes {
|
||||||
|
buddyNodeIDs[i] = buddyNode.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
n.logger.Info(ctx, "Applying garage layout to remove orphaned garage nodes")
|
||||||
|
if err := adminClient.ApplyLayout(ctx, nil, buddyNodeIDs); err != nil {
|
||||||
|
return fmt.Errorf(
|
||||||
|
"applying garage cluster layout, removing nodes %+v: %w",
|
||||||
|
buddyNodes,
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// returns the bootstrap prior to the reload being applied.
|
// returns the bootstrap prior to the reload being applied.
|
||||||
func (n *network) reload(
|
func (n *network) reload(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
|
@ -54,6 +54,7 @@ func TestLoad(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestJoin(t *testing.T) {
|
func TestJoin(t *testing.T) {
|
||||||
|
t.Run("simple", func(t *testing.T) {
|
||||||
var (
|
var (
|
||||||
h = newIntegrationHarness(t)
|
h = newIntegrationHarness(t)
|
||||||
primus = h.createNetwork(t, "primus", nil)
|
primus = h.createNetwork(t, "primus", nil)
|
||||||
@ -67,6 +68,30 @@ func TestJoin(t *testing.T) {
|
|||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
assert.Equal(t, primusHosts, secondusHosts)
|
assert.Equal(t, primusHosts, secondusHosts)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("with alloc", func(t *testing.T) {
|
||||||
|
var (
|
||||||
|
h = newIntegrationHarness(t)
|
||||||
|
primus = h.createNetwork(t, "primus", nil)
|
||||||
|
secondus = h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{
|
||||||
|
networkConfigOpts: &networkConfigOpts{
|
||||||
|
numStorageAllocs: 1,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
t.Log("reloading primus' hosts")
|
||||||
|
assert.NoError(t, primus.Network.(*network).reloadHosts(h.ctx))
|
||||||
|
|
||||||
|
primusHosts, err := primus.GetHosts(h.ctx)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
secondusHosts, err := secondus.GetHosts(h.ctx)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
assert.Equal(t, primusHosts, secondusHosts)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNetwork_GetConfig(t *testing.T) {
|
func TestNetwork_GetConfig(t *testing.T) {
|
||||||
@ -189,5 +214,54 @@ func TestNetwork_SetConfig(t *testing.T) {
|
|||||||
assert.ElementsMatch(t, expRoles, layout.Roles)
|
assert.ElementsMatch(t, expRoles, layout.Roles)
|
||||||
})
|
})
|
||||||
|
|
||||||
// TODO a host having allocs but removing all of them
|
t.Run("remove all storage allocs", func(t *testing.T) {
|
||||||
|
var (
|
||||||
|
h = newIntegrationHarness(t)
|
||||||
|
primus = h.createNetwork(t, "primus", nil)
|
||||||
|
secondus = h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{
|
||||||
|
networkConfigOpts: &networkConfigOpts{
|
||||||
|
numStorageAllocs: 1,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
networkConfig = secondus.getConfig(t)
|
||||||
|
|
||||||
|
prevHost = secondus.getHostsByName(t)[secondus.hostName]
|
||||||
|
removedAlloc = networkConfig.Storage.Allocations[0]
|
||||||
|
removedRole = allocsToRoles(
|
||||||
|
secondus.hostName, prevHost.Garage.Instances,
|
||||||
|
)[0]
|
||||||
|
removedGarageInst = daecommon.BootstrapGarageHostForAlloc(
|
||||||
|
prevHost, removedAlloc,
|
||||||
|
)
|
||||||
|
|
||||||
|
primusGarageAdminClient = primus.garageAdminClient(t)
|
||||||
|
)
|
||||||
|
|
||||||
|
networkConfig.Storage.Allocations = nil
|
||||||
|
assert.NoError(t, secondus.SetConfig(h.ctx, networkConfig))
|
||||||
|
|
||||||
|
t.Log("Checking that the Host information was updated")
|
||||||
|
newHostsByName := primus.getHostsByName(t)
|
||||||
|
newHost, ok := newHostsByName[secondus.hostName]
|
||||||
|
assert.True(t, ok)
|
||||||
|
|
||||||
|
allocs := newHost.HostConfigured.Garage.Instances
|
||||||
|
assert.Len(t, allocs, 3)
|
||||||
|
assert.NotContains(t, allocs, removedGarageInst)
|
||||||
|
|
||||||
|
t.Log("Checking that garage layout still contains the old allocation")
|
||||||
|
layout, err := primusGarageAdminClient.GetLayout(h.ctx)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Contains(t, layout.Roles, removedRole)
|
||||||
|
|
||||||
|
t.Log("Removing orphan garage nodes with primus")
|
||||||
|
assert.NoError(
|
||||||
|
t, primus.Network.(*network).removeOrphanGarageNodes(h.ctx),
|
||||||
|
)
|
||||||
|
|
||||||
|
t.Log("Checking that garage layout no longer contains the old allocation")
|
||||||
|
layout, err = primusGarageAdminClient.GetLayout(h.ctx)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotContains(t, layout.Roles, removedRole)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httputil"
|
"net/http/httputil"
|
||||||
|
"net/netip"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
|
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
|
||||||
@ -158,38 +159,81 @@ func (c *AdminClient) do(
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// KnownNode describes the fields of a known node in the cluster, as returned
|
||||||
|
// as part of [ClusterStatus].
|
||||||
|
type KnownNode struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Role *Role `json:"role"`
|
||||||
|
Addr netip.AddrPort `json:"addr"`
|
||||||
|
IsUp bool `json:"isUp"`
|
||||||
|
LastSeenSecsAgo int `json:"lastSeenSecsAgo"`
|
||||||
|
HostName string `json:"hostname"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Role descibes a node's role in the garage cluster, i.e. what storage it is
|
||||||
|
// providing.
|
||||||
|
type Role struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Capacity int `json:"capacity"` // Gb (SI units)
|
||||||
|
Zone string `json:"zone"`
|
||||||
|
Tags []string `json:"tags"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClusterLayout describes the layout of the cluster as a whole.
|
||||||
|
type ClusterLayout struct {
|
||||||
|
Version int `json:"version"`
|
||||||
|
Roles []Role `json:"roles"`
|
||||||
|
StagedRoleChanges []Role `json:"stagedRoleChanges"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClusterStatus is returned from the Status endpoint, describing the currently
|
||||||
|
// known state of the cluster.
|
||||||
|
type ClusterStatus struct {
|
||||||
|
Nodes []KnownNode `json:"nodes"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Status returns the current state of the cluster.
|
||||||
|
func (c *AdminClient) Status(ctx context.Context) (ClusterStatus, error) {
|
||||||
|
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Nodes/operation/GetNodes
|
||||||
|
var clusterStatus ClusterStatus
|
||||||
|
err := c.do(ctx, &clusterStatus, "GET", "/v1/status", nil)
|
||||||
|
return clusterStatus, err
|
||||||
|
}
|
||||||
|
|
||||||
// Wait will block until the instance connected to can see at least
|
// Wait will block until the instance connected to can see at least
|
||||||
// ReplicationFactor other garage instances. If the context is canceled it
|
// ReplicationFactor other garage instances. If the context is canceled it
|
||||||
// will return the context error.
|
// will return the context error.
|
||||||
func (c *AdminClient) Wait(ctx context.Context) error {
|
func (c *AdminClient) Wait(ctx context.Context) error {
|
||||||
|
|
||||||
for first := true; ; first = false {
|
for first := true; ; first = false {
|
||||||
|
|
||||||
if !first {
|
if !first {
|
||||||
time.Sleep(250 * time.Millisecond)
|
select {
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Nodes/operation/GetNodes
|
c.logger.Debug(ctx, "Getting cluster status")
|
||||||
var clusterStatus struct {
|
clusterStatus, err := c.Status(ctx)
|
||||||
Nodes []struct {
|
|
||||||
IsUp bool `json:"isUp"`
|
|
||||||
} `json:"nodes"`
|
|
||||||
}
|
|
||||||
|
|
||||||
err := c.do(ctx, &clusterStatus, "GET", "/v1/status", nil)
|
|
||||||
|
|
||||||
if ctxErr := ctx.Err(); ctxErr != nil {
|
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||||
return ctxErr
|
return ctxErr
|
||||||
|
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
c.logger.Warn(ctx, "waiting for instance to become ready", err)
|
ctx := mctx.Annotate(ctx, "errMsg", err.Error())
|
||||||
|
c.logger.Info(ctx, "Instance is not online yet")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
var numUp int
|
var numUp int
|
||||||
|
|
||||||
for _, node := range clusterStatus.Nodes {
|
for _, node := range clusterStatus.Nodes {
|
||||||
if node.IsUp {
|
// There seems to be some kind of step between IsUp becoming true
|
||||||
|
// and garage actually loading the full state of a node, so we check
|
||||||
|
// for the HostName as well. We could also use LastSeenSecsAgo, but
|
||||||
|
// that remains null for the node being queried so it's more
|
||||||
|
// annoying to use.
|
||||||
|
if node.IsUp && node.HostName != "" {
|
||||||
numUp++
|
numUp++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -204,7 +248,7 @@ func (c *AdminClient) Wait(ctx context.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
c.logger.Debug(ctx, "instance not online yet, will continue waiting")
|
c.logger.Info(ctx, "Instance is not joined to the cluster yet")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -283,20 +327,6 @@ func (c *AdminClient) GrantBucketPermissions(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Role descibes a node's role in the garage cluster, i.e. what storage it is
|
|
||||||
// providing.
|
|
||||||
type Role struct {
|
|
||||||
ID string `json:"id"`
|
|
||||||
Capacity int `json:"capacity"` // Gb (SI units)
|
|
||||||
Zone string `json:"zone"`
|
|
||||||
Tags []string `json:"tags"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ClusterLayout describes the layout of the cluster as a whole.
|
|
||||||
type ClusterLayout struct {
|
|
||||||
Roles []Role `json:"roles"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetLayout returns the currently applied ClusterLayout.
|
// GetLayout returns the currently applied ClusterLayout.
|
||||||
func (c *AdminClient) GetLayout(ctx context.Context) (ClusterLayout, error) {
|
func (c *AdminClient) GetLayout(ctx context.Context) (ClusterLayout, error) {
|
||||||
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/GetLayout
|
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/GetLayout
|
||||||
@ -323,13 +353,7 @@ func (c *AdminClient) ApplyLayout(
|
|||||||
roles = append(roles, removeRole{ID: id, Remove: true})
|
roles = append(roles, removeRole{ID: id, Remove: true})
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/GetLayout
|
var clusterLayout ClusterLayout
|
||||||
var clusterLayout struct {
|
|
||||||
Version int `json:"version"`
|
|
||||||
StagedRoleChanges []Role `json:"stagedRoleChanges"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/ApplyLayout
|
|
||||||
err := c.do(ctx, &clusterLayout, "POST", "/v1/layout", roles)
|
err := c.do(ctx, &clusterLayout, "POST", "/v1/layout", roles)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("staging layout changes: %w", err)
|
return fmt.Errorf("staging layout changes: %w", err)
|
||||||
@ -337,7 +361,6 @@ func (c *AdminClient) ApplyLayout(
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Layout/operation/ApplyLayout
|
|
||||||
applyClusterLayout := struct {
|
applyClusterLayout := struct {
|
||||||
Version int `json:"version"`
|
Version int `json:"version"`
|
||||||
}{
|
}{
|
||||||
|
Loading…
Reference in New Issue
Block a user