Fix startup sequence for daemon

Putting bootstrap host data into garage, and applying garage layout
diff, no longer happen simultaneously in the background. This was
causing some weird non-determinism in the startup which wasn't really
breaking anything, but made the logs harder to debug.

This also potentially fixes `waitForGarageAndNebula`, which was
neglecting to wait for nebula if there were allocations defined.
This commit is contained in:
Brian Picciano 2022-11-13 14:55:25 +01:00
parent 838c548706
commit 90a30bef5e
3 changed files with 27 additions and 40 deletions

View File

@ -116,8 +116,6 @@ func runDaemonPmuxOnce(
),
}
doneCh := ctx.Done()
var wg sync.WaitGroup
defer wg.Wait()
@ -130,18 +128,28 @@ func runDaemonPmuxOnce(
pmuxlib.Run(ctx, os.Stdout, os.Stderr, pmuxConfig)
}()
wg.Add(1)
go func() {
defer wg.Done()
if err := waitForGarageAndNebula(ctx, hostBootstrap, daemonConfig); err != nil {
return bootstrap.Bootstrap{}, fmt.Errorf("waiting for nebula/garage to start up: %w", err)
}
if err := waitForGarageAndNebula(ctx, hostBootstrap, daemonConfig); err != nil {
fmt.Fprintf(os.Stderr, "aborted waiting for garage instances to be accessible: %v\n", err)
return
err = doOnce(ctx, func(ctx context.Context) error {
if err := hostBootstrap.PutGarageBoostrapHost(ctx); err != nil {
fmt.Fprintf(os.Stderr, "updating host info in garage: %v\n", err)
return err
}
return nil
})
if err != nil {
return bootstrap.Bootstrap{}, fmt.Errorf("updating host info in garage: %w", err)
}
if len(daemonConfig.Storage.Allocations) > 0 {
err := doOnce(ctx, func(ctx context.Context) error {
if err := hostBootstrap.PutGarageBoostrapHost(ctx); err != nil {
fmt.Fprintf(os.Stderr, "updating host info in garage: %v\n", err)
if err := garageApplyLayout(ctx, hostBootstrap, daemonConfig); err != nil {
fmt.Fprintf(os.Stderr, "applying garage layout: %v\n", err)
return err
}
@ -149,33 +157,8 @@ func runDaemonPmuxOnce(
})
if err != nil {
fmt.Fprintf(os.Stderr, "aborted updating host info in garage: %v\n", err)
return bootstrap.Bootstrap{}, fmt.Errorf("applying garage layout: %w", err)
}
}()
if len(daemonConfig.Storage.Allocations) > 0 {
wg.Add(1)
go func() {
defer wg.Done()
if err := waitForGarageAndNebula(ctx, hostBootstrap, daemonConfig); err != nil {
fmt.Fprintf(os.Stderr, "aborted waiting for garage instances to be accessible: %v\n", err)
return
}
err := doOnce(ctx, func(ctx context.Context) error {
if err := garageApplyLayout(ctx, hostBootstrap, daemonConfig); err != nil {
fmt.Fprintf(os.Stderr, "applying garage layout: %v\n", err)
return err
}
return nil
})
if err != nil {
fmt.Fprintf(os.Stderr, "aborted applying garage layout: %v\n", err)
}
}()
}
ticker := time.NewTicker(3 * time.Minute)
@ -184,7 +167,7 @@ func runDaemonPmuxOnce(
for {
select {
case <-doneCh:
case <-ctx.Done():
return bootstrap.Bootstrap{}, ctx.Err()
case <-ticker.C:

View File

@ -58,6 +58,6 @@ func doOnce(ctx context.Context, fn func(context.Context) error) error {
return ctxErr
}
time.Sleep(250 * time.Millisecond)
time.Sleep(1 * time.Second)
}
}

View File

@ -36,12 +36,16 @@ func waitForGarageAndNebula(
daemonConfig daemon.Config,
) error {
if err := waitForNebula(ctx, hostBootstrap); err != nil {
return fmt.Errorf("waiting for nebula to start: %w", err)
}
allocs := daemonConfig.Storage.Allocations
// if this host doesn't have any allocations specified then fall back to
// waiting for nebula
if len(allocs) == 0 {
return waitForNebula(ctx, hostBootstrap)
return nil
}
for _, alloc := range allocs {
@ -57,7 +61,7 @@ func waitForGarageAndNebula(
)
if err := adminClient.Wait(ctx); err != nil {
return fmt.Errorf("waiting for instance %q to start up: %w", adminAddr, err)
return fmt.Errorf("waiting for garage instance %q to start up: %w", adminAddr, err)
}
}