Fix startup sequence for daemon

Putting bootstrap host data into garage, and applying garage layout
diff, no longer happen simultaneously in the background. This was
causing some weird non-determinism in the startup which wasn't really
breaking anything, but made the logs harder to debug.

This also potentially fixes `waitForGarageAndNebula`, which was
neglecting to wait for nebula if there were allocations defined.
This commit is contained in:
Brian Picciano 2022-11-13 14:55:25 +01:00
parent 838c548706
commit 90a30bef5e
3 changed files with 27 additions and 40 deletions

View File

@ -116,8 +116,6 @@ func runDaemonPmuxOnce(
), ),
} }
doneCh := ctx.Done()
var wg sync.WaitGroup var wg sync.WaitGroup
defer wg.Wait() defer wg.Wait()
@ -130,16 +128,11 @@ func runDaemonPmuxOnce(
pmuxlib.Run(ctx, os.Stdout, os.Stderr, pmuxConfig) pmuxlib.Run(ctx, os.Stdout, os.Stderr, pmuxConfig)
}() }()
wg.Add(1)
go func() {
defer wg.Done()
if err := waitForGarageAndNebula(ctx, hostBootstrap, daemonConfig); err != nil { if err := waitForGarageAndNebula(ctx, hostBootstrap, daemonConfig); err != nil {
fmt.Fprintf(os.Stderr, "aborted waiting for garage instances to be accessible: %v\n", err) return bootstrap.Bootstrap{}, fmt.Errorf("waiting for nebula/garage to start up: %w", err)
return
} }
err := doOnce(ctx, func(ctx context.Context) error { err = doOnce(ctx, func(ctx context.Context) error {
if err := hostBootstrap.PutGarageBoostrapHost(ctx); err != nil { if err := hostBootstrap.PutGarageBoostrapHost(ctx); err != nil {
fmt.Fprintf(os.Stderr, "updating host info in garage: %v\n", err) fmt.Fprintf(os.Stderr, "updating host info in garage: %v\n", err)
return err return err
@ -149,19 +142,10 @@ func runDaemonPmuxOnce(
}) })
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "aborted updating host info in garage: %v\n", err) return bootstrap.Bootstrap{}, fmt.Errorf("updating host info in garage: %w", err)
} }
}()
if len(daemonConfig.Storage.Allocations) > 0 { if len(daemonConfig.Storage.Allocations) > 0 {
wg.Add(1)
go func() {
defer wg.Done()
if err := waitForGarageAndNebula(ctx, hostBootstrap, daemonConfig); err != nil {
fmt.Fprintf(os.Stderr, "aborted waiting for garage instances to be accessible: %v\n", err)
return
}
err := doOnce(ctx, func(ctx context.Context) error { err := doOnce(ctx, func(ctx context.Context) error {
if err := garageApplyLayout(ctx, hostBootstrap, daemonConfig); err != nil { if err := garageApplyLayout(ctx, hostBootstrap, daemonConfig); err != nil {
@ -173,9 +157,8 @@ func runDaemonPmuxOnce(
}) })
if err != nil { if err != nil {
fmt.Fprintf(os.Stderr, "aborted applying garage layout: %v\n", err) return bootstrap.Bootstrap{}, fmt.Errorf("applying garage layout: %w", err)
} }
}()
} }
ticker := time.NewTicker(3 * time.Minute) ticker := time.NewTicker(3 * time.Minute)
@ -184,7 +167,7 @@ func runDaemonPmuxOnce(
for { for {
select { select {
case <-doneCh: case <-ctx.Done():
return bootstrap.Bootstrap{}, ctx.Err() return bootstrap.Bootstrap{}, ctx.Err()
case <-ticker.C: case <-ticker.C:

View File

@ -58,6 +58,6 @@ func doOnce(ctx context.Context, fn func(context.Context) error) error {
return ctxErr return ctxErr
} }
time.Sleep(250 * time.Millisecond) time.Sleep(1 * time.Second)
} }
} }

View File

@ -36,12 +36,16 @@ func waitForGarageAndNebula(
daemonConfig daemon.Config, daemonConfig daemon.Config,
) error { ) error {
if err := waitForNebula(ctx, hostBootstrap); err != nil {
return fmt.Errorf("waiting for nebula to start: %w", err)
}
allocs := daemonConfig.Storage.Allocations allocs := daemonConfig.Storage.Allocations
// if this host doesn't have any allocations specified then fall back to // if this host doesn't have any allocations specified then fall back to
// waiting for nebula // waiting for nebula
if len(allocs) == 0 { if len(allocs) == 0 {
return waitForNebula(ctx, hostBootstrap) return nil
} }
for _, alloc := range allocs { for _, alloc := range allocs {
@ -57,7 +61,7 @@ func waitForGarageAndNebula(
) )
if err := adminClient.Wait(ctx); err != nil { if err := adminClient.Wait(ctx); err != nil {
return fmt.Errorf("waiting for instance %q to start up: %w", adminAddr, err) return fmt.Errorf("waiting for garage instance %q to start up: %w", adminAddr, err)
} }
} }