Get rid of orphan removal after investigation into race conditions, new solution pending
This commit is contained in:
parent
e3d4fc5a8e
commit
9e508ef4e2
@ -13,16 +13,13 @@ import (
|
|||||||
"isle/secrets"
|
"isle/secrets"
|
||||||
"isle/toolkit"
|
"isle/toolkit"
|
||||||
"net"
|
"net"
|
||||||
"net/netip"
|
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"slices"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
|
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
|
||||||
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
|
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
|
||||||
"github.com/minio/minio-go/v7"
|
"github.com/minio/minio-go/v7"
|
||||||
"golang.org/x/exp/maps"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Paths within garage's global bucket.
|
// Paths within garage's global bucket.
|
||||||
@ -218,6 +215,7 @@ func getGarageBootstrapHosts(
|
|||||||
host, err := authedHost.Unwrap(currBootstrap.CAPublicCredentials)
|
host, err := authedHost.Unwrap(currBootstrap.CAPublicCredentials)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Warn(ctx, "Host could not be authenticated", err)
|
logger.Warn(ctx, "Host could not be authenticated", err)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
hosts[host.Name] = host
|
hosts[host.Name] = host
|
||||||
@ -363,59 +361,3 @@ func garageWaitForAlloc(
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// garageNodeBuddyPeers returns the "buddy" peers of the given host, based on
|
|
||||||
// the given garage cluster status. It will return zero values if the host has
|
|
||||||
// no buddy.
|
|
||||||
//
|
|
||||||
// For situations where we want one host to affect the cluster layout of another
|
|
||||||
// host's peers, we use a simple system to determine a single host which is
|
|
||||||
// responsible. The goal is not to be 100% race-proof (garage handles that), but
|
|
||||||
// rather to try to prevent all hosts from modifying the same host's layout at
|
|
||||||
// the same time.
|
|
||||||
//
|
|
||||||
// The system is to order all hosts by their IP, and say that each host is
|
|
||||||
// responsible for (aka the "buddy" of) the host immediately after their own in
|
|
||||||
// that list. The last host in that list is responsible for the first.
|
|
||||||
func garageNodeBuddyPeers(
|
|
||||||
status garage.ClusterStatus, host bootstrap.Host,
|
|
||||||
) (
|
|
||||||
netip.Addr, []garage.Role,
|
|
||||||
) {
|
|
||||||
var (
|
|
||||||
thisIP = host.IP()
|
|
||||||
nodeRolesByIP = map[netip.Addr][]garage.Role{}
|
|
||||||
)
|
|
||||||
|
|
||||||
for _, node := range status.Nodes {
|
|
||||||
if node.Role == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
ip := node.Addr.Addr()
|
|
||||||
nodeRolesByIP[ip] = append(nodeRolesByIP[ip], *node.Role)
|
|
||||||
}
|
|
||||||
|
|
||||||
// If there is only a single host in the cluster (or, somehow, none) then
|
|
||||||
// that host has no buddy.
|
|
||||||
if len(nodeRolesByIP) < 2 {
|
|
||||||
return netip.Addr{}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
nodeIPs := maps.Keys(nodeRolesByIP)
|
|
||||||
slices.SortFunc(nodeIPs, netip.Addr.Compare)
|
|
||||||
|
|
||||||
for i, nodeIP := range nodeIPs {
|
|
||||||
var buddyIP netip.Addr
|
|
||||||
if i == len(nodeIPs)-1 {
|
|
||||||
buddyIP = nodeIPs[0]
|
|
||||||
} else if nodeIP == thisIP {
|
|
||||||
buddyIP = nodeIPs[i+1]
|
|
||||||
} else {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
return buddyIP, nodeRolesByIP[buddyIP]
|
|
||||||
}
|
|
||||||
|
|
||||||
panic("Unreachable")
|
|
||||||
}
|
|
||||||
|
@ -160,6 +160,9 @@ type Opts struct {
|
|||||||
// used, either that which it was most recently initialized with or which
|
// used, either that which it was most recently initialized with or which
|
||||||
// was passed to [SetConfig].
|
// was passed to [SetConfig].
|
||||||
Config *daecommon.NetworkConfig
|
Config *daecommon.NetworkConfig
|
||||||
|
|
||||||
|
// testBlocker is used by tests to set blockpoints.
|
||||||
|
testBlocker *toolkit.TestBlocker
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *Opts) withDefaults() *Opts {
|
func (o *Opts) withDefaults() *Opts {
|
||||||
@ -508,10 +511,6 @@ func (n *network) initialize(
|
|||||||
|
|
||||||
n.periodically("reloadHosts", n.reloadHosts, 3*time.Minute)
|
n.periodically("reloadHosts", n.reloadHosts, 3*time.Minute)
|
||||||
|
|
||||||
n.periodically(
|
|
||||||
"removeOrphanGarageNodes", n.removeOrphanGarageNodes, 1*time.Minute,
|
|
||||||
)
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -603,77 +602,6 @@ func (n *network) reloadHosts(ctx context.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// In general each host will manage the garage cluster layout of its own storage
|
|
||||||
// allocations via garageApplyLayout. There are three situations which are
|
|
||||||
// handled here, rather than garageApplyLayout:
|
|
||||||
//
|
|
||||||
// - A host removes all of its allocations via SetConfig.
|
|
||||||
// - A host removes all of its allocations by calling Load with no allocations
|
|
||||||
// in the provided daecommon.NetworkConfig.
|
|
||||||
// - A host is removed from the network by another host.
|
|
||||||
//
|
|
||||||
// In all of these cases the host no longer has any garage instances running,
|
|
||||||
// and so can't call garageApplyLayout on itself. To combat this we have all
|
|
||||||
// hosts which do have garage instances running periodically check that there's
|
|
||||||
// not some garage nodes orphaned in the cluster layout, and remove them if so.
|
|
||||||
func (n *network) removeOrphanGarageNodes(ctx context.Context) error {
|
|
||||||
n.l.RLock()
|
|
||||||
defer n.l.RUnlock()
|
|
||||||
|
|
||||||
thisHost := n.currBootstrap.ThisHost()
|
|
||||||
if len(thisHost.Garage.Instances) == 0 {
|
|
||||||
n.logger.Info(ctx, "No local garage instances, cannot remove orphans")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
adminClient := newGarageAdminClient(
|
|
||||||
n.logger, n.networkConfig, n.opts.GarageAdminToken, thisHost,
|
|
||||||
)
|
|
||||||
defer adminClient.Close()
|
|
||||||
|
|
||||||
clusterStatus, err := adminClient.Status(ctx)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("retrieving garage cluster status: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
buddyIP, buddyNodes := garageNodeBuddyPeers(clusterStatus, thisHost)
|
|
||||||
if len(buddyNodes) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx = mctx.Annotate(ctx, "buddyIP", buddyIP)
|
|
||||||
|
|
||||||
for _, host := range n.currBootstrap.Hosts {
|
|
||||||
if host.IP() != buddyIP {
|
|
||||||
continue
|
|
||||||
} else if len(host.Garage.Instances) > 0 {
|
|
||||||
n.logger.Info(ctx, "Buddy instance has garage nodes configured in its bootstrap, doing nothing")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Either the host is no longer in the network, or it no longer has any
|
|
||||||
// garage instances set on it. Either way, remove its nodes from the cluster
|
|
||||||
// layout.
|
|
||||||
|
|
||||||
buddyNodeIDs := make([]string, len(buddyNodes))
|
|
||||||
for i, buddyNode := range buddyNodes {
|
|
||||||
buddyNodeIDs[i] = buddyNode.ID
|
|
||||||
}
|
|
||||||
|
|
||||||
n.logger.Info(ctx, "Applying garage layout to remove orphaned garage nodes")
|
|
||||||
if err := adminClient.ApplyLayout(ctx, nil, buddyNodeIDs); err != nil {
|
|
||||||
return fmt.Errorf(
|
|
||||||
"applying garage cluster layout, removing nodes %+v: %w",
|
|
||||||
buddyNodes,
|
|
||||||
err,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// returns the bootstrap prior to the reload being applied.
|
// returns the bootstrap prior to the reload being applied.
|
||||||
func (n *network) reload(
|
func (n *network) reload(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
|
@ -1,15 +1,18 @@
|
|||||||
package network
|
package network
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"isle/bootstrap"
|
"isle/bootstrap"
|
||||||
"isle/daemon/daecommon"
|
"isle/daemon/daecommon"
|
||||||
"isle/garage"
|
"isle/garage"
|
||||||
"isle/garage/garagesrv"
|
"isle/garage/garagesrv"
|
||||||
"isle/jsonutil"
|
"isle/jsonutil"
|
||||||
"isle/nebula"
|
"isle/nebula"
|
||||||
|
"isle/toolkit"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
@ -71,7 +74,7 @@ func TestJoin(t *testing.T) {
|
|||||||
assert.Equal(t, primus.getHostsByName(t), secondus.getHostsByName(t))
|
assert.Equal(t, primus.getHostsByName(t), secondus.getHostsByName(t))
|
||||||
})
|
})
|
||||||
|
|
||||||
t.Run("with alloc", func(t *testing.T) {
|
t.Run("with alloc/simple", func(t *testing.T) {
|
||||||
var (
|
var (
|
||||||
h = newIntegrationHarness(t)
|
h = newIntegrationHarness(t)
|
||||||
primus = h.createNetwork(t, "primus", nil)
|
primus = h.createNetwork(t, "primus", nil)
|
||||||
@ -86,6 +89,61 @@ func TestJoin(t *testing.T) {
|
|||||||
assert.NoError(t, primus.Network.(*network).reloadHosts(h.ctx))
|
assert.NoError(t, primus.Network.(*network).reloadHosts(h.ctx))
|
||||||
|
|
||||||
assert.Equal(t, primus.getHostsByName(t), secondus.getHostsByName(t))
|
assert.Equal(t, primus.getHostsByName(t), secondus.getHostsByName(t))
|
||||||
|
assertGarageLayout(t, map[*integrationHarnessNetwork]int{
|
||||||
|
primus: 3,
|
||||||
|
secondus: 1,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
// Assert that if primus runs the orphan remover at the same moment that
|
||||||
|
// secondus is joining that the layout applied by secondus doesn't get
|
||||||
|
// overwritten.
|
||||||
|
t.Run("with alloc/remove orphans after garage layout applied", func(t *testing.T) {
|
||||||
|
t.Skip("This is currently expected to fail. Orphan removal is going to be reworked accordingly")
|
||||||
|
|
||||||
|
var (
|
||||||
|
h = newIntegrationHarness(t)
|
||||||
|
primus = h.createNetwork(t, "primus", nil)
|
||||||
|
primusAdminClient = primus.garageAdminClient(t)
|
||||||
|
secondusBlocker = toolkit.NewTestBlocker(t)
|
||||||
|
)
|
||||||
|
|
||||||
|
secondusBlocker.ExpectBlockpoint("garageLayoutApplied").On(
|
||||||
|
t, h.ctx, func() {
|
||||||
|
h.logger.Info(h.ctx, "Waiting for new layout to propagate to primus")
|
||||||
|
err := toolkit.UntilTrue(
|
||||||
|
h.ctx, h.logger, 1*time.Second, func() (bool, error) {
|
||||||
|
layout, err := primusAdminClient.GetLayout(h.ctx)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("getting layout: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return len(layout.Roles) == 4, nil
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if !assert.NoError(t, err) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
//h.logger.Info(h.ctx, "Calling removeOrphanGarageNodes")
|
||||||
|
//assert.NoError(
|
||||||
|
// t, primus.Network.(*network).removeOrphanGarageNodes(h.ctx),
|
||||||
|
//)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
secondus := h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{
|
||||||
|
networkConfigOpts: &networkConfigOpts{
|
||||||
|
numStorageAllocs: 1,
|
||||||
|
},
|
||||||
|
blocker: secondusBlocker,
|
||||||
|
})
|
||||||
|
|
||||||
|
assertGarageLayout(t, map[*integrationHarnessNetwork]int{
|
||||||
|
primus: 3,
|
||||||
|
secondus: 1,
|
||||||
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -285,6 +343,8 @@ func TestNetwork_SetConfig(t *testing.T) {
|
|||||||
})
|
})
|
||||||
|
|
||||||
t.Run("remove all storage allocs", func(t *testing.T) {
|
t.Run("remove all storage allocs", func(t *testing.T) {
|
||||||
|
t.Skip("This is currently expected to fail. Orphan removal is going to be reworked accordingly")
|
||||||
|
|
||||||
var (
|
var (
|
||||||
h = newIntegrationHarness(t)
|
h = newIntegrationHarness(t)
|
||||||
primus = h.createNetwork(t, "primus", nil)
|
primus = h.createNetwork(t, "primus", nil)
|
||||||
@ -319,10 +379,10 @@ func TestNetwork_SetConfig(t *testing.T) {
|
|||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Contains(t, layout.Roles, removedRole)
|
assert.Contains(t, layout.Roles, removedRole)
|
||||||
|
|
||||||
t.Log("Removing orphan garage nodes with primus")
|
//t.Log("Removing orphan garage nodes with primus")
|
||||||
assert.NoError(
|
//assert.NoError(
|
||||||
t, primus.Network.(*network).removeOrphanGarageNodes(h.ctx),
|
// t, primus.Network.(*network).removeOrphanGarageNodes(h.ctx),
|
||||||
)
|
//)
|
||||||
|
|
||||||
t.Log("Checking that garage layout no longer contains the old allocation")
|
t.Log("Checking that garage layout no longer contains the old allocation")
|
||||||
layout, err = primusGarageAdminClient.GetLayout(h.ctx)
|
layout, err = primusGarageAdminClient.GetLayout(h.ctx)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package network
|
package network
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"cmp"
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"isle/bootstrap"
|
"isle/bootstrap"
|
||||||
@ -11,6 +12,7 @@ import (
|
|||||||
"isle/toolkit"
|
"isle/toolkit"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
@ -251,6 +253,7 @@ type joinNetworkOpts struct {
|
|||||||
*networkConfigOpts
|
*networkConfigOpts
|
||||||
canCreateHosts bool
|
canCreateHosts bool
|
||||||
manualShutdown bool
|
manualShutdown bool
|
||||||
|
blocker *toolkit.TestBlocker
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *joinNetworkOpts) withDefaults() *joinNetworkOpts {
|
func (o *joinNetworkOpts) withDefaults() *joinNetworkOpts {
|
||||||
@ -288,6 +291,7 @@ func (h *integrationHarness) joinNetwork(
|
|||||||
networkOpts = &Opts{
|
networkOpts = &Opts{
|
||||||
GarageAdminToken: "admin_token",
|
GarageAdminToken: "admin_token",
|
||||||
Config: &networkConfig,
|
Config: &networkConfig,
|
||||||
|
testBlocker: opts.blocker,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -382,3 +386,56 @@ func (nh *integrationHarnessNetwork) getHostsByName(
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
return currBootstrap.Hosts
|
return currBootstrap.Hosts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func assertGarageLayout(
|
||||||
|
t *testing.T,
|
||||||
|
wantLayout map[*integrationHarnessNetwork]int, // network -> num allocs
|
||||||
|
) {
|
||||||
|
wantLayoutSimple := map[string]int{}
|
||||||
|
for nh, wantAllocs := range wantLayout {
|
||||||
|
wantLayoutSimple[string(nh.hostName)] = wantAllocs
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizeLayout := func(layout *garage.ClusterLayout) {
|
||||||
|
slices.SortFunc(layout.Roles, func(a, b garage.Role) int {
|
||||||
|
return cmp.Compare(a.ID, b.ID)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
assertSingle := func(
|
||||||
|
nh *integrationHarnessNetwork, layout garage.ClusterLayout,
|
||||||
|
) {
|
||||||
|
gotLayoutSimple := map[string]int{}
|
||||||
|
for _, role := range layout.Roles {
|
||||||
|
gotLayoutSimple[role.Zone]++
|
||||||
|
}
|
||||||
|
assert.Equal(t, wantLayoutSimple, gotLayoutSimple, "layout from %q", nh.hostName)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
lastLayoutHostName nebula.HostName
|
||||||
|
lastLayout garage.ClusterLayout
|
||||||
|
)
|
||||||
|
|
||||||
|
for nh := range wantLayout {
|
||||||
|
layout, err := nh.garageAdminClient(t).GetLayout(nh.ctx)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
normalizeLayout(&layout)
|
||||||
|
assertSingle(nh, layout)
|
||||||
|
|
||||||
|
if lastLayoutHostName != "" {
|
||||||
|
assert.Equal(
|
||||||
|
t,
|
||||||
|
lastLayout,
|
||||||
|
layout,
|
||||||
|
"layout of %q not equal to layout of %q",
|
||||||
|
lastLayoutHostName,
|
||||||
|
nh.hostName,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
lastLayoutHostName = nh.hostName
|
||||||
|
lastLayout = layout
|
||||||
|
}
|
||||||
|
}
|
||||||
|
147
go/toolkit/testutils_blocker.go
Normal file
147
go/toolkit/testutils_blocker.go
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
package toolkit
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"sync"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ExpectedBlockpoint represents the expectation that Blockpoint will be called
|
||||||
|
// on a TestBlocker. It is possible to both wait for the Blockpoint call to
|
||||||
|
// occur and to unblock it once it has occured.
|
||||||
|
type ExpectedBlockpoint struct {
|
||||||
|
waitCh chan struct{}
|
||||||
|
unblockCh chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait will block until blockpoint has been hit and is itself blocking, or will
|
||||||
|
// return the context error.
|
||||||
|
func (eb ExpectedBlockpoint) Wait(ctx context.Context) error {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
case <-eb.waitCh:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unblock unblocks the Blockpoint call which is/was expected. If Unblock can be
|
||||||
|
// called prior to Wait being called (and therefore prior to the Blockpoint
|
||||||
|
// being hit).
|
||||||
|
func (eb ExpectedBlockpoint) Unblock() {
|
||||||
|
close(eb.unblockCh)
|
||||||
|
}
|
||||||
|
|
||||||
|
// On is a helper which will spawn a go-routine, call Wait on the
|
||||||
|
// ExpectedBlockpoint, call the given callback, and then Unblock the
|
||||||
|
// ExpectedBlockpoint.
|
||||||
|
//
|
||||||
|
// If Wait returns an error (due to context cancellation) then this fails the
|
||||||
|
// test and returns without calling the callback.
|
||||||
|
func (eb ExpectedBlockpoint) On(t *testing.T, ctx context.Context, cb func()) {
|
||||||
|
go func() {
|
||||||
|
defer eb.Unblock()
|
||||||
|
if !assert.NoError(t, eb.Wait(ctx)) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cb()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestBlocker is used as an injected dependency into components, so that tests
|
||||||
|
// can cause those components to block at specific execution points internally.
|
||||||
|
// This is useful for testing race conditions between multiple components.
|
||||||
|
//
|
||||||
|
// A TestBlocker is initialized using `new`. A nil TestBlocker will never block.
|
||||||
|
type TestBlocker struct {
|
||||||
|
l sync.Mutex
|
||||||
|
blockpointsByID map[string][]ExpectedBlockpoint
|
||||||
|
blocksByID map[string]int
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTestBlocker initializes a TestBlocker and registers a Cleanup callback on
|
||||||
|
// the T which will call AssertExpectations.
|
||||||
|
func NewTestBlocker(t *testing.T) *TestBlocker {
|
||||||
|
b := new(TestBlocker)
|
||||||
|
t.Cleanup(func() { b.AssertExpectations(t) })
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blockpoint will block if and only if TestBlocker is non-nil and
|
||||||
|
// ExpectBlockpoint has been called with the same ID previously. If the context
|
||||||
|
// is canceled while blocking then this call will return.
|
||||||
|
func (b *TestBlocker) Blockpoint(ctx context.Context, id string) {
|
||||||
|
if b == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
b.l.Lock()
|
||||||
|
|
||||||
|
blockpoints := b.blockpointsByID[id]
|
||||||
|
if len(blockpoints) == 0 {
|
||||||
|
b.l.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
blockpoint, blockpoints := blockpoints[0], blockpoints[1:]
|
||||||
|
b.blockpointsByID[id] = blockpoints
|
||||||
|
b.blocksByID[id]++
|
||||||
|
|
||||||
|
b.l.Unlock()
|
||||||
|
|
||||||
|
close(blockpoint.waitCh)
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
case <-blockpoint.unblockCh:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExpectBlockpoint will cause the TestBlocker to block upon the next call to
|
||||||
|
// Blockpoint using the same id. The returned ExpectBlockpoint can be used to
|
||||||
|
// wait until Blockpoint is called, as well as to unblock it.
|
||||||
|
func (b *TestBlocker) ExpectBlockpoint(id string) ExpectedBlockpoint {
|
||||||
|
b.l.Lock()
|
||||||
|
defer b.l.Unlock()
|
||||||
|
|
||||||
|
if b.blockpointsByID == nil {
|
||||||
|
b.blockpointsByID = map[string][]ExpectedBlockpoint{}
|
||||||
|
}
|
||||||
|
|
||||||
|
if b.blocksByID == nil {
|
||||||
|
b.blocksByID = map[string]int{}
|
||||||
|
}
|
||||||
|
|
||||||
|
blockpoint := ExpectedBlockpoint{
|
||||||
|
waitCh: make(chan struct{}),
|
||||||
|
unblockCh: make(chan struct{}),
|
||||||
|
}
|
||||||
|
|
||||||
|
b.blockpointsByID[id] = append(b.blockpointsByID[id], blockpoint)
|
||||||
|
|
||||||
|
return blockpoint
|
||||||
|
}
|
||||||
|
|
||||||
|
// AssertExpectations will Fail the test and return false if any calls to
|
||||||
|
// ExpectBlockpoint have not had a corresponding Blockpoint call made.
|
||||||
|
func (b *TestBlocker) AssertExpectations(t *testing.T) bool {
|
||||||
|
b.l.Lock()
|
||||||
|
defer b.l.Unlock()
|
||||||
|
|
||||||
|
var failed bool
|
||||||
|
for id, blockpoints := range b.blockpointsByID {
|
||||||
|
if len(blockpoints) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
failed = true
|
||||||
|
t.Errorf(
|
||||||
|
"Blockpoint(%q) called %d times, expected %d more",
|
||||||
|
id, b.blocksByID[id], len(blockpoints),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return !failed
|
||||||
|
}
|
@ -1,14 +0,0 @@
|
|||||||
---
|
|
||||||
type: task
|
|
||||||
---
|
|
||||||
|
|
||||||
When removing a storage allocation the new layout should be applied _before_ the
|
|
||||||
impacted garage instance is shut down. Isle should ideally also wait for the
|
|
||||||
impacted instance to no longer be in the "draining" state prior to shutting it
|
|
||||||
down, if possible.
|
|
||||||
|
|
||||||
Some care needs to be taken in the case of the `daemon.yml` file being used for
|
|
||||||
configuration. The daemon should probably initially load using the old
|
|
||||||
configuration, and only then apply the new configuration as if it were applied
|
|
||||||
using `SetConfig`. This way the garage instance being removed can be brought
|
|
||||||
back up, drained, then shut down again.
|
|
84
tasks/bugs/garage-layout-management.md
Normal file
84
tasks/bugs/garage-layout-management.md
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
---
|
||||||
|
type: task
|
||||||
|
---
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
|
||||||
|
There are high-level but extremely problematic issues with how garage layout
|
||||||
|
management is being done.
|
||||||
|
|
||||||
|
In general the strategy around layout management is that each host only modifies
|
||||||
|
the cluster layout related to itself, and never touches applied roles of other
|
||||||
|
hosts. This works great for all except one case: a host removing one or more of
|
||||||
|
its allocations.
|
||||||
|
|
||||||
|
There are two separate issues which must be dealt with, each related partially
|
||||||
|
to the other.
|
||||||
|
|
||||||
|
## Draining of garage data
|
||||||
|
|
||||||
|
When a garage node is removed from the cluster it first goes into the "draining"
|
||||||
|
state, so that other nodes in the cluster can ensure that the replication factor
|
||||||
|
for each piece of data is met prior to the node being decommissioned.
|
||||||
|
|
||||||
|
While the node is in draining state it cannot be used for S3 API calls, as the
|
||||||
|
bucket credentials are no longer present on it.
|
||||||
|
|
||||||
|
## Configuration change on restart
|
||||||
|
|
||||||
|
For hosts whose configuration is managed by `daemon.yml` it is not necessarily
|
||||||
|
known that a garage node used to exist at all upon restart. The host can't
|
||||||
|
investigate the cluster layout because it won't have a garage instance running,
|
||||||
|
and even if it could it wouldn't be able to bring up a garage node to properly
|
||||||
|
drain the old allocations.
|
||||||
|
|
||||||
|
# Invalid Solutions
|
||||||
|
|
||||||
|
One solution which is tempting but ultimately NOT viable is to make all hosts
|
||||||
|
run at least one garage instance, and if they have no storage allocations to
|
||||||
|
make that instance be a "gateway" instance. This is won't work though, because
|
||||||
|
it would require all hosts to open up the RPC port on their firewall, and
|
||||||
|
firewall management requires extra user involvement.
|
||||||
|
|
||||||
|
Another previous solution was to use an "orphan remover" process on each host,
|
||||||
|
where the host would compare the garage cluster layout to the expected layout
|
||||||
|
based on the bootstrap data in the common bucket, and remove any hosts from the
|
||||||
|
layout which shouldn't be there and don't have a garage instance to remove
|
||||||
|
themselves with. This had a bunch of unresolveable race conditions, and it
|
||||||
|
didn't account for draining besides.
|
||||||
|
|
||||||
|
# Possible Solution
|
||||||
|
|
||||||
|
The solution seems to be that the host must maintain two views of its garage
|
||||||
|
allocations: the last known allocation state, and the desired allocation state.
|
||||||
|
|
||||||
|
The last known state needs to contain both what state the allocation was in
|
||||||
|
(healthy or draining), along with its directories and capacity. This should get
|
||||||
|
updated anytime the host performs an action which changes it (modifying the
|
||||||
|
cluster layout to add a new instance or move an existing one to draining, or
|
||||||
|
actually removing an instance which is done draining).
|
||||||
|
|
||||||
|
The desired state is essentially the network configuration as it is now. This
|
||||||
|
will be used along with the last known state to take actions.
|
||||||
|
|
||||||
|
There are a few details to note with this solution:
|
||||||
|
|
||||||
|
- There will need to be a worker which periodically checks the last known state
|
||||||
|
for any nodes which were draining, and if they are done draining then remove
|
||||||
|
them.
|
||||||
|
|
||||||
|
- When the host starts up it should _always_ use the last known state, and only
|
||||||
|
once started up should it go to apply the desired configuration.
|
||||||
|
|
||||||
|
- When choosing an admin endpoint to use the last known state should be used,
|
||||||
|
even though it might result in unexpected behavior from the user's perspective
|
||||||
|
(since the user only knows about the desired state). This applies for RPC
|
||||||
|
endpoints as well.
|
||||||
|
|
||||||
|
- The last/desired states need to be checked for conflicts, and an error emitted
|
||||||
|
in the event that there is one (either returned from SetConfig or Load). This
|
||||||
|
includes a new allocation using the same directory as an old one (based on RPC
|
||||||
|
port), or two allocations using the same RPC port.
|
||||||
|
|
||||||
|
- The nebula firewall must base its opened ports on the last known state rather
|
||||||
|
than desired state.
|
@ -1,8 +0,0 @@
|
|||||||
---
|
|
||||||
type: task
|
|
||||||
---
|
|
||||||
|
|
||||||
There is a race condition in the garage orphan removal worker. Basically when
|
|
||||||
host A is adding its first allocation, it's possible that it hasn't yet updated
|
|
||||||
the host info in the common bucket, and so host B removes it from the layout
|
|
||||||
prior to that host info being updated.
|
|
@ -1,14 +0,0 @@
|
|||||||
---
|
|
||||||
type: task
|
|
||||||
after:
|
|
||||||
- ./garage-apply-layout-before-stopping-instance.md
|
|
||||||
---
|
|
||||||
|
|
||||||
I think there is currently a bug related to re-adding a storage allocation on an
|
|
||||||
RPC port of a garage instance which was previously used:
|
|
||||||
|
|
||||||
- Step 1) Remove a storage allocation using RPC port N
|
|
||||||
- Step 2) Add a new allocation using RPC port N, but with a new data/meta dir
|
|
||||||
|
|
||||||
I believe in this case garage will go back to using the old data/meta dir, and
|
|
||||||
possibly even re-use the old pubkey.
|
|
@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
type: task
|
type: task
|
||||||
after:
|
after:
|
||||||
- ./garage-apply-layout-before-stopping-instance.md
|
- ./garage-layout-management.md
|
||||||
---
|
---
|
||||||
|
|
||||||
When SetConfig is called, but ends up erroring, the new config should not end up
|
When SetConfig is called, but ends up erroring, the new config should not end up
|
||||||
|
Loading…
Reference in New Issue
Block a user