263 lines
6.2 KiB
Go
263 lines
6.2 KiB
Go
package daemon
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"isle/bootstrap"
|
|
"sync"
|
|
"time"
|
|
|
|
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
|
|
)
|
|
|
|
const (
|
|
daemonRestarterStateInitializing = iota
|
|
daemonRestarterStateOk
|
|
daemonRestarterStateRestarting
|
|
daemonRestarterStateShutdown
|
|
)
|
|
|
|
type daemonRestarter struct {
|
|
logger *mlog.Logger
|
|
daemonConfig Config
|
|
envBinDirPath string
|
|
opts *Opts
|
|
|
|
l sync.Mutex
|
|
state int
|
|
inner Daemon
|
|
currBootstrap bootstrap.Bootstrap
|
|
|
|
cancelFn context.CancelFunc
|
|
stoppedCh chan struct{}
|
|
}
|
|
|
|
// NewDaemonRestarter will wrap a Daemon such that it will be automatically
|
|
// shutdown and re-created whenever there's changes in the cluster which require
|
|
// the configuration to be changed (e.g. a new nebula lighthouse).
|
|
//
|
|
// While still starting up the daemon for the first time all methods will return
|
|
// ErrInitializing, except Shutdown which will block until initialization is
|
|
// canceled.
|
|
//
|
|
// During a restart all methods will return ErrRestarting, except Shutdown which
|
|
// will block until the currently executing restart is finished and then
|
|
// shutdown cleanly from there.
|
|
//
|
|
// TODO make daemonRestarter smarter, it currently restarts on _any_ change, but
|
|
// it should restart itself only when there's something actually requiring a
|
|
// restart.
|
|
func NewDaemonRestarter(
|
|
logger *mlog.Logger,
|
|
daemonConfig Config,
|
|
envBinDirPath string,
|
|
currBootstrap bootstrap.Bootstrap,
|
|
opts *Opts,
|
|
) Daemon {
|
|
ctx, cancelFn := context.WithCancel(context.Background())
|
|
|
|
dr := &daemonRestarter{
|
|
logger: logger,
|
|
daemonConfig: daemonConfig,
|
|
envBinDirPath: envBinDirPath,
|
|
opts: opts.withDefaults(),
|
|
currBootstrap: currBootstrap,
|
|
cancelFn: cancelFn,
|
|
stoppedCh: make(chan struct{}),
|
|
}
|
|
|
|
go func() {
|
|
dr.restartLoop(ctx)
|
|
dr.logger.Debug(ctx, "DaemonRestarter stopped")
|
|
close(dr.stoppedCh)
|
|
}()
|
|
|
|
return dr
|
|
}
|
|
|
|
func withInnerDaemon[Res any](
|
|
dr *daemonRestarter, fn func(Daemon) (Res, error),
|
|
) (Res, error) {
|
|
var zero Res
|
|
dr.l.Lock()
|
|
inner, state := dr.inner, dr.state
|
|
dr.l.Unlock()
|
|
|
|
switch state {
|
|
case daemonRestarterStateInitializing:
|
|
return zero, ErrInitializing
|
|
case daemonRestarterStateOk:
|
|
return fn(inner)
|
|
case daemonRestarterStateRestarting:
|
|
return zero, ErrRestarting
|
|
case daemonRestarterStateShutdown:
|
|
return zero, errors.New("already shutdown")
|
|
default:
|
|
panic(fmt.Sprintf("unknown state %d", dr.state))
|
|
}
|
|
}
|
|
|
|
// creates a new bootstrap file using available information from the network. If
|
|
// the new bootstrap file is different than the existing one, the existing one
|
|
// is overwritten and true is returned.
|
|
func (dr *daemonRestarter) checkBootstrap(
|
|
ctx context.Context, hostBootstrap bootstrap.Bootstrap,
|
|
) (
|
|
bootstrap.Bootstrap, bool, error,
|
|
) {
|
|
|
|
thisHost := hostBootstrap.ThisHost()
|
|
|
|
newHosts, err := dr.inner.GetGarageBootstrapHosts(ctx)
|
|
if err != nil {
|
|
return bootstrap.Bootstrap{}, false, fmt.Errorf("getting hosts from garage: %w", err)
|
|
}
|
|
|
|
// the daemon's view of this host's bootstrap info takes precedence over
|
|
// whatever is in garage
|
|
newHosts[thisHost.Name] = thisHost
|
|
|
|
newHostsHash, err := bootstrap.HostsHash(newHosts)
|
|
if err != nil {
|
|
return bootstrap.Bootstrap{}, false, fmt.Errorf("calculating hash of new hosts: %w", err)
|
|
}
|
|
|
|
currHostsHash, err := bootstrap.HostsHash(hostBootstrap.Hosts)
|
|
if err != nil {
|
|
return bootstrap.Bootstrap{}, false, fmt.Errorf("calculating hash of current hosts: %w", err)
|
|
}
|
|
|
|
if bytes.Equal(newHostsHash, currHostsHash) {
|
|
return hostBootstrap, false, nil
|
|
}
|
|
|
|
hostBootstrap.Hosts = newHosts
|
|
|
|
return hostBootstrap, true, nil
|
|
}
|
|
|
|
// blocks until a new bootstrap is available or context is canceled
|
|
func (dr *daemonRestarter) watchForChanges(ctx context.Context) bootstrap.Bootstrap {
|
|
|
|
ticker := time.NewTicker(3 * time.Minute)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
return bootstrap.Bootstrap{}
|
|
|
|
case <-ticker.C:
|
|
|
|
dr.logger.Info(ctx, "Checking for changes to bootstrap")
|
|
|
|
newBootstrap, changed, err := dr.checkBootstrap(
|
|
ctx, dr.currBootstrap,
|
|
)
|
|
if err != nil {
|
|
dr.logger.Error(ctx, "Checking bootstrap for changes failed", err)
|
|
continue
|
|
} else if !changed {
|
|
continue
|
|
}
|
|
|
|
err = writeBootstrapToStateDir(dr.opts.EnvVars.StateDirPath, newBootstrap)
|
|
if err != nil {
|
|
dr.logger.Error(ctx, "Writing new bootstrap to disk failed", err)
|
|
continue
|
|
}
|
|
|
|
return newBootstrap
|
|
}
|
|
}
|
|
}
|
|
|
|
func (dr *daemonRestarter) restartLoop(ctx context.Context) {
|
|
defer func() {
|
|
dr.l.Lock()
|
|
dr.state = daemonRestarterStateShutdown
|
|
inner := dr.inner
|
|
dr.l.Unlock()
|
|
|
|
if inner != nil {
|
|
if err := inner.Shutdown(); err != nil {
|
|
dr.logger.Fatal(ctx, "failed to cleanly shutdown daemon, there may be orphaned child processes", err)
|
|
}
|
|
}
|
|
}()
|
|
|
|
wait := func(d time.Duration) bool {
|
|
select {
|
|
case <-ctx.Done():
|
|
return false
|
|
case <-time.After(d):
|
|
return true
|
|
}
|
|
}
|
|
|
|
for {
|
|
if ctx.Err() != nil {
|
|
return
|
|
}
|
|
|
|
dr.logger.Info(ctx, "Creating new daemon")
|
|
daemonInst, err := New(
|
|
ctx,
|
|
dr.logger.WithNamespace("daemon"),
|
|
dr.daemonConfig,
|
|
dr.currBootstrap,
|
|
dr.envBinDirPath,
|
|
dr.opts,
|
|
)
|
|
if errors.Is(err, context.Canceled) {
|
|
return
|
|
} else if err != nil {
|
|
dr.logger.Error(ctx, "failed to initialize daemon", err)
|
|
if !wait(1 * time.Second) {
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
|
|
dr.l.Lock()
|
|
dr.inner = daemonInst
|
|
dr.state = daemonRestarterStateOk
|
|
dr.l.Unlock()
|
|
|
|
newBootstrap := dr.watchForChanges(ctx)
|
|
if ctx.Err() != nil {
|
|
return
|
|
}
|
|
|
|
dr.logger.Info(ctx, "Bootstrap has changed, will restart daemon")
|
|
dr.l.Lock()
|
|
dr.currBootstrap = newBootstrap
|
|
dr.state = daemonRestarterStateRestarting
|
|
dr.l.Unlock()
|
|
|
|
dr.logger.Info(ctx, "Shutting down previous daemon")
|
|
if err := dr.inner.Shutdown(); err != nil {
|
|
dr.logger.Fatal(ctx, "failed to cleanly shutdown daemon, there may be orphaned child processes", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (dr *daemonRestarter) GetGarageBootstrapHosts(
|
|
ctx context.Context,
|
|
) (
|
|
map[string]bootstrap.Host, error,
|
|
) {
|
|
return withInnerDaemon(dr, func(inner Daemon) (map[string]bootstrap.Host, error) {
|
|
return inner.GetGarageBootstrapHosts(ctx)
|
|
})
|
|
}
|
|
|
|
func (dr *daemonRestarter) Shutdown() error {
|
|
dr.cancelFn()
|
|
<-dr.stoppedCh
|
|
return nil
|
|
}
|