From 03f187396b9a6db12a36bae42772ab88046bc30a Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Wed, 1 Jan 2025 12:38:16 +0100 Subject: [PATCH] Improve logic around waiting for garage to be considered ready --- go/cmd/entrypoint/garage.go | 9 +- go/daemon/children/children.go | 32 ++++- go/daemon/children/garage.go | 3 +- go/daemon/network/garage.go | 54 ++++++--- go/daemon/network/network.go | 3 + go/garage/admin_client.go | 109 +++++++++++------- go/toolkit/toolkit.go | 39 ++++++- ...e-apply-layout-before-stopping-instance.md | 2 +- .../bugs/garage-credential-propagation-fix.md | 13 --- .../garage-orphan-remover-race-condition.md | 8 ++ .../misc/network-integration-tests-sigint.md | 11 ++ tasks/misc/test-message-logger-ts.md | 6 + tasks/remote-debugging/daemon-rpc.md | 11 ++ tasks/remote-debugging/logs.md | 9 ++ tasks/remote-debugging/restart-command.md | 5 + 15 files changed, 241 insertions(+), 73 deletions(-) delete mode 100644 tasks/bugs/garage-credential-propagation-fix.md create mode 100644 tasks/bugs/garage-orphan-remover-race-condition.md create mode 100644 tasks/misc/network-integration-tests-sigint.md create mode 100644 tasks/misc/test-message-logger-ts.md create mode 100644 tasks/remote-debugging/daemon-rpc.md create mode 100644 tasks/remote-debugging/logs.md create mode 100644 tasks/remote-debugging/restart-command.md diff --git a/go/cmd/entrypoint/garage.go b/go/cmd/entrypoint/garage.go index 7d6c3f9..4ce9705 100644 --- a/go/cmd/entrypoint/garage.go +++ b/go/cmd/entrypoint/garage.go @@ -123,6 +123,8 @@ var subCmdGarageCLI = subCmd{ name: "cli", descr: "Runs the garage binary, automatically configured to point to the garage sub-process of a running isle daemon", do: func(ctx subCmdCtx) error { + rpcNodeAddr := ctx.flags.String("rpc-node-addr", "", "RPC address of the garage instance to hit in the form 'id@host:port'. Will default to a local garage instance if not given, or some random garage instance in the cluster if no local instance is configured.") + ctx, err := ctx.withParsedFlags(&withParsedFlagsOpts{ passthroughArgs: true, }) @@ -145,12 +147,17 @@ var subCmdGarageCLI = subCmd{ return errors.New("this host does not have the garage RPC secret") } + chosenRPCNodeAddr := clientParams.Node.RPCNodeAddr() + if *rpcNodeAddr != "" { + chosenRPCNodeAddr = *rpcNodeAddr + } + var ( binPath = binPath("garage") args = append([]string{"garage"}, ctx.opts.args...) cliEnv = append( os.Environ(), - "GARAGE_RPC_HOST="+clientParams.Node.RPCNodeAddr(), + "GARAGE_RPC_HOST="+chosenRPCNodeAddr, "GARAGE_RPC_SECRET="+clientParams.RPCSecret, ) ) diff --git a/go/daemon/children/children.go b/go/daemon/children/children.go index 0f850d2..2d567a4 100644 --- a/go/daemon/children/children.go +++ b/go/daemon/children/children.go @@ -17,6 +17,21 @@ import ( "isle/toolkit" ) +// Opts are optional fields which can be passed into New. A nil value is +// equivalent to a zero value. +type Opts struct { + // GarageNewCluster should be true if the garage instances being started + // are the first instances in a cluster which is being created. + GarageNewCluster bool +} + +func (o *Opts) withDefaults() *Opts { + if o == nil { + o = new(Opts) + } + return o +} + // Children manages all child processes of a network. Child processes are // comprised of: // - nebula @@ -48,9 +63,12 @@ func New( garageAdminToken string, nebulaDeviceNamer *NebulaDeviceNamer, hostBootstrap bootstrap.Bootstrap, + opts *Opts, ) ( *Children, error, ) { + opts = opts.withDefaults() + logger.Info(ctx, "Loading secrets") garageRPCSecret, err := daecommon.GetGarageRPCSecret(ctx, secretsStore) if err != nil && !errors.Is(err, secrets.ErrNotFound) { @@ -113,7 +131,12 @@ func New( } if err := waitForGarage( - ctx, c.logger, networkConfig, garageAdminToken, hostBootstrap, + ctx, + c.logger, + networkConfig, + garageAdminToken, + hostBootstrap, + opts.GarageNewCluster, ); err != nil { logger.Warn(ctx, "Failed waiting for garage processes to initialize, shutting down child processes", err) c.Shutdown() @@ -225,7 +248,12 @@ func (c *Children) reloadGarage( if anyChanged { if err := waitForGarage( - ctx, c.logger, networkConfig, c.garageAdminToken, hostBootstrap, + ctx, + c.logger, + networkConfig, + c.garageAdminToken, + hostBootstrap, + false, ); err != nil { return fmt.Errorf("waiting for garage to start: %w", err) } diff --git a/go/daemon/children/garage.go b/go/daemon/children/garage.go index 4c83120..cc29598 100644 --- a/go/daemon/children/garage.go +++ b/go/daemon/children/garage.go @@ -27,6 +27,7 @@ func waitForGarage( networkConfig daecommon.NetworkConfig, adminToken string, hostBootstrap bootstrap.Bootstrap, + newCluster bool, ) error { allocs := networkConfig.Storage.Allocations @@ -52,7 +53,7 @@ func waitForGarage( ) logger.Info(ctx, "Waiting for garage instance to be healthy") - if err := adminClient.Wait(ctx); err != nil { + if err := adminClient.Wait(ctx, !newCluster); err != nil { return fmt.Errorf("waiting for garage instance %q to start up: %w", adminAddr, err) } diff --git a/go/daemon/network/garage.go b/go/daemon/network/garage.go index cb6458c..0456764 100644 --- a/go/daemon/network/garage.go +++ b/go/daemon/network/garage.go @@ -11,6 +11,7 @@ import ( "isle/garage" "isle/nebula" "isle/secrets" + "isle/toolkit" "net" "net/netip" "path/filepath" @@ -84,7 +85,6 @@ func garageApplyLayout( adminToken string, prevHost, currHost bootstrap.Host, ) error { - var ( adminClient = newGarageAdminClient( logger, networkConfig, adminToken, currHost, @@ -321,23 +321,47 @@ func garageWaitForAlloc( defer adminClient.Close() ctx = mctx.WithAnnotator(ctx, alloc) - for { - logger.Info(ctx, "Checking if node has synced bucket list") - buckets, err := adminClient.ListBuckets(ctx) - if err != nil { - return fmt.Errorf("listing buckets: %w", err) - } else if len(buckets) == 0 { - logger.WarnString(ctx, "No buckets found, will wait a bit and try again") - select { - case <-time.After(1 * time.Second): - continue - case <-ctx.Done(): - return ctx.Err() + logger.Info(ctx, "Checking if garage instance has synced bucket list") + if err := toolkit.UntilTrue( + ctx, + logger, + 2*time.Second, + func() (bool, error) { + buckets, err := adminClient.ListBuckets(ctx) + if err != nil { + return false, fmt.Errorf("listing buckets: %w", err) + } else if len(buckets) == 0 { + logger.Info(ctx, "Bucket list not yet synced") + return false, nil } - } - return nil + return true, nil + }, + ); err != nil { + return fmt.Errorf("waiting for bucket list to sync: %w", err) } + + logger.Info(ctx, "Checking if garage instance has synced credentials") + if err := toolkit.UntilTrue( + ctx, + logger, + 2*time.Second, + func() (bool, error) { + credentials, err := adminClient.ListS3APICredentials(ctx) + if err != nil { + return false, fmt.Errorf("listing credentials: %w", err) + } else if len(credentials) == 0 { + logger.Info(ctx, "Credentials not yet synced") + return false, nil + } + + return true, nil + }, + ); err != nil { + return fmt.Errorf("waiting for credentials list to sync: %w", err) + } + + return nil } // garageNodeBuddyPeers returns the "buddy" peers of the given host, based on diff --git a/go/daemon/network/network.go b/go/daemon/network/network.go index 6dad009..3a84107 100644 --- a/go/daemon/network/network.go +++ b/go/daemon/network/network.go @@ -479,6 +479,9 @@ func (n *network) initialize( n.opts.GarageAdminToken, n.nebulaDeviceNamer, n.currBootstrap, + &children.Opts{ + GarageNewCluster: isCreate, + }, ) if err != nil { return fmt.Errorf("creating child processes: %w", err) diff --git a/go/garage/admin_client.go b/go/garage/admin_client.go index 84cb77c..005c9f9 100644 --- a/go/garage/admin_client.go +++ b/go/garage/admin_client.go @@ -163,7 +163,8 @@ type ClusterLayout struct { // ClusterStatus is returned from the Status endpoint, describing the currently // known state of the cluster. type ClusterStatus struct { - Nodes []KnownNode `json:"nodes"` + LayoutVersion int `json:"layoutVersion"` + Nodes []KnownNode `json:"nodes"` } // Status returns the current state of the cluster. @@ -177,53 +178,63 @@ func (c *AdminClient) Status(ctx context.Context) (ClusterStatus, error) { // Wait will block until the instance connected to can see at least // ReplicationFactor other garage instances. If the context is canceled it // will return the context error. -func (c *AdminClient) Wait(ctx context.Context) error { +func (c *AdminClient) Wait( + ctx context.Context, clusterAlreadyInitialized bool, +) error { + c.logger.Info(ctx, "Waiting for instance to connect to other instances") + err := toolkit.UntilTrue( + ctx, + c.logger, + 2*time.Second, + func() (bool, error) { + clusterStatus, err := c.Status(ctx) + if ctxErr := ctx.Err(); ctxErr != nil { + return false, ctxErr - for first := true; ; first = false { - if !first { - select { - case <-time.After(2 * time.Second): - case <-ctx.Done(): - return ctx.Err() + } else if err != nil { + ctx := mctx.Annotate(ctx, "errMsg", err.Error()) + c.logger.Info(ctx, "Instance is not online yet") + return false, nil } - } - c.logger.Debug(ctx, "Getting cluster status") - clusterStatus, err := c.Status(ctx) - if ctxErr := ctx.Err(); ctxErr != nil { - return ctxErr + var numUp int - } else if err != nil { - ctx := mctx.Annotate(ctx, "errMsg", err.Error()) - c.logger.Info(ctx, "Instance is not online yet") - continue - } - - var numUp int - - for _, node := range clusterStatus.Nodes { - // There seems to be some kind of step between IsUp becoming true - // and garage actually loading the full state of a node, so we check - // for the HostName as well. We could also use LastSeenSecsAgo, but - // that remains null for the node being queried so it's more - // annoying to use. - if node.IsUp && node.HostName != "" { - numUp++ + for _, node := range clusterStatus.Nodes { + // There seems to be some kind of step between IsUp becoming + // true and garage actually loading the full state of a node, so + // we check for the HostName as well. We could also use + // LastSeenSecsAgo, but that remains null for the node being + // queried so it's more annoying to use. + if node.IsUp && node.HostName != "" { + numUp++ + } } - } - ctx := mctx.Annotate(ctx, - "numNodes", len(clusterStatus.Nodes), - "numUp", numUp, - ) + ctx := mctx.Annotate(ctx, + "layoutVersion", clusterStatus.LayoutVersion, + "numNodes", len(clusterStatus.Nodes), + "numUp", numUp, + ) - if numUp >= ReplicationFactor { - c.logger.Debug(ctx, "instance appears to be online") - return nil - } + if clusterAlreadyInitialized && clusterStatus.LayoutVersion == 0 { + c.logger.Info(ctx, "Cluster layout has not yet propagated") + return false, nil + } - c.logger.Info(ctx, "Instance is not joined to the cluster yet") + if numUp < ReplicationFactor { + c.logger.Info(ctx, "Not enough other instances in the cluster are seen as 'up'") + return false, nil + } + + return true, nil + }, + ) + + if err != nil { + return fmt.Errorf("waiting for cluster status: %w", err) } + + return err } // CreateS3APICredentials creates an S3APICredentials with the given name. The @@ -252,6 +263,26 @@ func (c *AdminClient) CreateS3APICredentials( }, err } +// S3APICredentialsPublic are the publicly available fields of an +// S3APICredentials. These are visibile to all nodes in the cluster. +type S3APICredentialsPublic struct { + ID string + Name string +} + +// ListS3APICredentials returns all S3APICredentials in the cluster, without +// returning their +func (c *AdminClient) ListS3APICredentials( + ctx context.Context, +) ( + []S3APICredentialsPublic, error, +) { + // https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html#tag/Key/operation/ListKeys + var res []S3APICredentialsPublic + err := c.do(ctx, &res, "GET", "/v1/key?list", nil) + return res, err +} + // CreateBucket creates a bucket with the given global alias, returning its ID. func (c *AdminClient) CreateBucket( ctx context.Context, globalAlias string, diff --git a/go/toolkit/toolkit.go b/go/toolkit/toolkit.go index 9fe9184..29049ad 100644 --- a/go/toolkit/toolkit.go +++ b/go/toolkit/toolkit.go @@ -2,7 +2,14 @@ // specific part of isle. package toolkit -import "reflect" +import ( + "context" + "reflect" + "time" + + "dev.mediocregopher.com/mediocre-go-lib.git/mctx" + "dev.mediocregopher.com/mediocre-go-lib.git/mlog" +) // IsZero returns true if the value is equal to its zero value according to // reflect.DeepEqual. @@ -15,3 +22,33 @@ func IsZero[T any](v T) bool { func Ptr[T any](v T) *T { return &v } + +// UntilTrue will repeatedly call the given callback, waiting the given duration +// between calls until: +// - The context is canceled, returning the context's error +// - The callback returns an error, returning that error +// - The callback returns true, returning nil +func UntilTrue( + ctx context.Context, + logger *mlog.Logger, + period time.Duration, + fn func() (bool, error), +) error { + ctx = mctx.Annotate(ctx, "retryAfter", period) + + for { + if ok, err := fn(); err != nil { + return err + } else if ok { + return nil + } + + logger.Info(ctx, "Waiting before retrying") + + select { + case <-time.After(period): + case <-ctx.Done(): + return ctx.Err() + } + } +} diff --git a/tasks/bugs/garage-apply-layout-before-stopping-instance.md b/tasks/bugs/garage-apply-layout-before-stopping-instance.md index 4759a31..dc4fb34 100644 --- a/tasks/bugs/garage-apply-layout-before-stopping-instance.md +++ b/tasks/bugs/garage-apply-layout-before-stopping-instance.md @@ -1,5 +1,5 @@ --- -type: host +type: task --- When removing a storage allocation the new layout should be applied _before_ the diff --git a/tasks/bugs/garage-credential-propagation-fix.md b/tasks/bugs/garage-credential-propagation-fix.md deleted file mode 100644 index 4f72606..0000000 --- a/tasks/bugs/garage-credential-propagation-fix.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -type: task ---- - -It seems in real world testing that this error came up after `isle storage -allocation add` had already completed: - -``` -ERROR [daemon/networks/micropelago.net] Background job failed errMsg="getting hosts from garage: listing objects: Forbidden: No such key: XXXXXXXXXXXXXXXXXXXXXXXXXX" workerLabel=reloadHosts -``` - -Isle should wait until bucket credentials are propagated to the new garage -instance before considering it to be online. diff --git a/tasks/bugs/garage-orphan-remover-race-condition.md b/tasks/bugs/garage-orphan-remover-race-condition.md new file mode 100644 index 0000000..7322dc2 --- /dev/null +++ b/tasks/bugs/garage-orphan-remover-race-condition.md @@ -0,0 +1,8 @@ +--- +type: task +--- + +There is a race condition in the garage orphan removal worker. Basically when +host A is adding its first allocation, it's possible that it hasn't yet updated +the host info in the common bucket, and so host B removes it from the layout +prior to that host info being updated. diff --git a/tasks/misc/network-integration-tests-sigint.md b/tasks/misc/network-integration-tests-sigint.md new file mode 100644 index 0000000..c568abb --- /dev/null +++ b/tasks/misc/network-integration-tests-sigint.md @@ -0,0 +1,11 @@ +--- +type: task +--- + +When SIGINT (ctrl-c) is sent to the integration tests as they are running, they +should cancel themselves and clean up all sub-processes as best as they can. A +second SIGINT can then be used to immediately kill, if for some reason cleanup +fails to occur cleanly. + +Alternatively, maybe the `integration_test.sh` script can do some kind of +process cleanup. diff --git a/tasks/misc/test-message-logger-ts.md b/tasks/misc/test-message-logger-ts.md new file mode 100644 index 0000000..369e5d3 --- /dev/null +++ b/tasks/misc/test-message-logger-ts.md @@ -0,0 +1,6 @@ +--- +type: task +--- + +On each message which is logged the test message logger (toolkit.NewTestLogger) +should include the duration since then the test started. diff --git a/tasks/remote-debugging/daemon-rpc.md b/tasks/remote-debugging/daemon-rpc.md new file mode 100644 index 0000000..0c81afd --- /dev/null +++ b/tasks/remote-debugging/daemon-rpc.md @@ -0,0 +1,11 @@ +--- +type: task +status: draft +--- + +It should be possible for a host to enable access to its daemon's RPC socket +_but only for a specific other host_. + +At the same time, the `isle` tool would need to be able to specify a remote host +to operate against. How this interacts with local state (like staged firewall) +needs to be dealt with as well. diff --git a/tasks/remote-debugging/logs.md b/tasks/remote-debugging/logs.md new file mode 100644 index 0000000..a0396c5 --- /dev/null +++ b/tasks/remote-debugging/logs.md @@ -0,0 +1,9 @@ +--- +type: task +status: draft +--- + +It would be very convenient for debugging hosts remotely if we could see the +logs of another host. This would include CHILD logs and down (even if the daemon +is set to output a lower log level to stdout). Being able to tail the logs +remotely would be especially nice. diff --git a/tasks/remote-debugging/restart-command.md b/tasks/remote-debugging/restart-command.md new file mode 100644 index 0000000..cfa4ba9 --- /dev/null +++ b/tasks/remote-debugging/restart-command.md @@ -0,0 +1,5 @@ +--- +type: task +--- + +It should be possible to restart the daemon via its RPC socket.