isle/go/daemon/daemon.go

346 lines
8.0 KiB
Go
Raw Normal View History

// Package daemon implements the isle daemon, which is a long-running service
// managing all isle background tasks and sub-processes for a single network.
package daemon
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"isle/bootstrap"
"os"
"sync"
"time"
2024-06-22 15:49:56 +00:00
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
)
// Daemon presents all functionality required for client frontends to interact
// with isle, typically via the unix socket.
type Daemon interface {
// GetGarageBootstrapHosts loads (and verifies) the <hostname>.json.signed
// file for all hosts stored in garage.
GetGarageBootstrapHosts(
ctx context.Context,
) (
map[string]bootstrap.Host, error,
)
// Shutdown blocks until all resources held or created by the daemon,
// including child processes it has started, have been cleaned up.
//
// If this returns an error then it's possible that child processes are
// still running and are no longer managed.
Shutdown() error
}
// Opts are optional parameters which can be passed in when initializing a new
// Daemon instance. A nil Opts is equivalent to a zero value.
type Opts struct {
// Stdout and Stderr are what the associated outputs from child processes
// will be directed to.
Stdout, Stderr io.Writer
EnvVars EnvVars // Defaults to that returned by GetEnvVars.
}
func (o *Opts) withDefaults() *Opts {
if o == nil {
o = new(Opts)
}
if o.Stdout == nil {
o.Stdout = os.Stdout
}
if o.Stderr == nil {
o.Stderr = os.Stderr
}
if o.EnvVars == (EnvVars{}) {
o.EnvVars = GetEnvVars()
}
return o
}
const (
daemonStateInitializing = iota
daemonStateOk
daemonStateRestarting
daemonStateShutdown
)
type daemon struct {
logger *mlog.Logger
daemonConfig Config
envBinDirPath string
opts *Opts
l sync.Mutex
state int
children *Children
currBootstrap bootstrap.Bootstrap
cancelFn context.CancelFunc
stoppedCh chan struct{}
}
// NewDaemon initializes and returns a Daemon instance which will manage all
// child processes and state required by the isle service, as well as an HTTP
// socket over which RPC calls will be served.
//
// Inner Children instance(s) will be wrapped such that they will be
// automatically shutdown and re-created whenever there's changes in the network
// which require the configuration to be changed (e.g. a new nebula lighthouse).
// During such an inner restart all methods will return ErrRestarting, except
// Shutdown which will block until the currently executing restart is finished
// and then shutdown cleanly from there.
//
// While still starting up the Daemon for the first time all methods will return
// ErrInitializing, except Shutdown which will block until initialization is
// canceled.
//
// TODO make daemon smarter, it currently restarts on _any_ change, but
// it should restart itself only when there's something actually requiring a
// restart.
func NewDaemon(
logger *mlog.Logger,
daemonConfig Config,
envBinDirPath string,
currBootstrap bootstrap.Bootstrap,
opts *Opts,
) Daemon {
ctx, cancelFn := context.WithCancel(context.Background())
d := &daemon{
logger: logger,
daemonConfig: daemonConfig,
envBinDirPath: envBinDirPath,
opts: opts.withDefaults(),
currBootstrap: currBootstrap,
cancelFn: cancelFn,
stoppedCh: make(chan struct{}),
}
go func() {
d.restartLoop(ctx)
d.logger.Debug(ctx, "DaemonRestarter stopped")
close(d.stoppedCh)
}()
return d
}
func withInnerChildren[Res any](
d *daemon, fn func(*Children) (Res, error),
) (Res, error) {
var zero Res
d.l.Lock()
children, state := d.children, d.state
d.l.Unlock()
switch state {
case daemonStateInitializing:
return zero, ErrInitializing
case daemonStateOk:
return fn(children)
case daemonStateRestarting:
return zero, ErrRestarting
case daemonStateShutdown:
return zero, errors.New("already shutdown")
default:
panic(fmt.Sprintf("unknown state %d", d.state))
}
}
// creates a new bootstrap file using available information from the network. If
// the new bootstrap file is different than the existing one, the existing one
// is overwritten and true is returned.
func (d *daemon) checkBootstrap(
ctx context.Context, hostBootstrap bootstrap.Bootstrap,
) (
bootstrap.Bootstrap, bool, error,
) {
thisHost := hostBootstrap.ThisHost()
newHosts, err := d.getGarageBootstrapHosts(ctx)
if err != nil {
return bootstrap.Bootstrap{}, false, fmt.Errorf("getting hosts from garage: %w", err)
}
// the daemon's view of this host's bootstrap info takes precedence over
// whatever is in garage
newHosts[thisHost.Name] = thisHost
newHostsHash, err := bootstrap.HostsHash(newHosts)
if err != nil {
return bootstrap.Bootstrap{}, false, fmt.Errorf("calculating hash of new hosts: %w", err)
}
currHostsHash, err := bootstrap.HostsHash(hostBootstrap.Hosts)
if err != nil {
return bootstrap.Bootstrap{}, false, fmt.Errorf("calculating hash of current hosts: %w", err)
}
if bytes.Equal(newHostsHash, currHostsHash) {
return hostBootstrap, false, nil
}
hostBootstrap.Hosts = newHosts
return hostBootstrap, true, nil
}
// blocks until a new bootstrap is available or context is canceled
func (d *daemon) watchForChanges(ctx context.Context) bootstrap.Bootstrap {
ticker := time.NewTicker(3 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return bootstrap.Bootstrap{}
case <-ticker.C:
d.logger.Info(ctx, "Checking for changes to bootstrap")
newBootstrap, changed, err := d.checkBootstrap(
ctx, d.currBootstrap,
)
if err != nil {
d.logger.Error(ctx, "Checking bootstrap for changes failed", err)
continue
} else if !changed {
continue
}
err = writeBootstrapToStateDir(d.opts.EnvVars.StateDirPath, newBootstrap)
if err != nil {
d.logger.Error(ctx, "Writing new bootstrap to disk failed", err)
continue
}
return newBootstrap
}
}
}
func (d *daemon) restartLoop(ctx context.Context) {
defer func() {
d.l.Lock()
d.state = daemonStateShutdown
children := d.children
d.l.Unlock()
if children != nil {
if err := children.Shutdown(); err != nil {
d.logger.Fatal(ctx, "Failed to cleanly shutdown daemon children, there may be orphaned child processes", err)
}
}
}()
wait := func(d time.Duration) bool {
select {
case <-ctx.Done():
return false
case <-time.After(d):
return true
}
}
for {
if ctx.Err() != nil {
return
}
d.logger.Info(ctx, "Creating new daemon")
children, err := NewChildren(
ctx,
d.logger.WithNamespace("daemon"),
d.daemonConfig,
d.currBootstrap,
d.envBinDirPath,
d.opts,
)
if errors.Is(err, context.Canceled) {
return
} else if err != nil {
d.logger.Error(ctx, "failed to initialize daemon", err)
if !wait(1 * time.Second) {
return
}
continue
}
d.l.Lock()
d.children = children
d.state = daemonStateOk
d.l.Unlock()
if len(d.daemonConfig.Storage.Allocations) > 0 {
if !until(
ctx,
d.logger,
"Applying garage layout",
func(ctx context.Context) error {
return GarageApplyLayout(
ctx, d.logger, d.currBootstrap, d.daemonConfig,
)
},
) {
return
}
}
if !until(
ctx,
d.logger,
"Updating host info in garage",
func(ctx context.Context) error {
return d.putGarageBoostrapHost(ctx)
},
) {
return
}
newBootstrap := d.watchForChanges(ctx)
if ctx.Err() != nil {
return
}
d.logger.Info(ctx, "Bootstrap has changed, will restart daemon")
d.l.Lock()
d.currBootstrap = newBootstrap
d.state = daemonStateRestarting
d.l.Unlock()
d.logger.Info(ctx, "Shutting down previous child processes")
if err := d.children.Shutdown(); err != nil {
d.logger.Fatal(ctx, "Failed to cleanly shutdown children, there may be orphaned child processes", err)
}
}
}
func (d *daemon) GetGarageBootstrapHosts(
ctx context.Context,
) (
map[string]bootstrap.Host, error,
) {
return withInnerChildren(d, func(*Children) (map[string]bootstrap.Host, error) {
return d.getGarageBootstrapHosts(ctx)
})
}
func (d *daemon) Shutdown() error {
d.cancelFn()
<-d.stoppedCh
return nil
}