Move daemon restarting logic into daemon package
This commit is contained in:
parent
c808fa81b9
commit
05e91cd657
@ -199,7 +199,7 @@ var subCmdAdminCreateNetwork = subCmd{
|
|||||||
return fmt.Errorf("initializing garage shared global bucket: %w", err)
|
return fmt.Errorf("initializing garage shared global bucket: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := daemonInst.Shutdown(ctx); err != nil {
|
if err := daemonInst.Shutdown(); err != nil {
|
||||||
return fmt.Errorf("shutting down daemon: %w (this can mean there are zombie children leftover)", err)
|
return fmt.Errorf("shutting down daemon: %w (this can mean there are zombie children leftover)", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,13 +1,11 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"os"
|
"os"
|
||||||
"time"
|
|
||||||
|
|
||||||
"isle/bootstrap"
|
"isle/bootstrap"
|
||||||
"isle/daemon"
|
"isle/daemon"
|
||||||
@ -16,151 +14,6 @@ import (
|
|||||||
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
|
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
|
||||||
)
|
)
|
||||||
|
|
||||||
// The daemon sub-command deals with starting an actual isle daemon
|
|
||||||
// process, which is required to be running for most other Isle
|
|
||||||
// functionality. The sub-command does the following:
|
|
||||||
//
|
|
||||||
// * Creates and locks the runtime directory.
|
|
||||||
//
|
|
||||||
// * Creates the data directory and copies the appdir bootstrap file into there,
|
|
||||||
// if it's not already there.
|
|
||||||
//
|
|
||||||
// * Merges daemon configuration into the bootstrap configuration, and rewrites
|
|
||||||
// the bootstrap file.
|
|
||||||
//
|
|
||||||
// * Sets up environment variables that all other sub-processes then use, based
|
|
||||||
// on the runtime dir.
|
|
||||||
//
|
|
||||||
// * Dynamically creates the root pmux config and runs pmux.
|
|
||||||
//
|
|
||||||
// * (On exit) cleans up the runtime directory.
|
|
||||||
|
|
||||||
// creates a new bootstrap file using available information from the network. If
|
|
||||||
// the new bootstrap file is different than the existing one, the existing one
|
|
||||||
// is overwritten and true is returned.
|
|
||||||
func reloadBootstrap(
|
|
||||||
ctx context.Context,
|
|
||||||
logger *mlog.Logger,
|
|
||||||
daemonInst daemon.Daemon,
|
|
||||||
hostBootstrap bootstrap.Bootstrap,
|
|
||||||
) (
|
|
||||||
bootstrap.Bootstrap, bool, error,
|
|
||||||
) {
|
|
||||||
|
|
||||||
thisHost := hostBootstrap.ThisHost()
|
|
||||||
|
|
||||||
newHosts, err := daemonInst.GetGarageBootstrapHosts(ctx)
|
|
||||||
if err != nil {
|
|
||||||
return bootstrap.Bootstrap{}, false, fmt.Errorf("getting hosts from garage: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// the daemon's view of this host's bootstrap info takes precedence over
|
|
||||||
// whatever is in garage
|
|
||||||
newHosts[thisHost.Name] = thisHost
|
|
||||||
|
|
||||||
newHostsHash, err := bootstrap.HostsHash(newHosts)
|
|
||||||
if err != nil {
|
|
||||||
return bootstrap.Bootstrap{}, false, fmt.Errorf("calculating hash of new hosts: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
currHostsHash, err := bootstrap.HostsHash(hostBootstrap.Hosts)
|
|
||||||
if err != nil {
|
|
||||||
return bootstrap.Bootstrap{}, false, fmt.Errorf("calculating hash of current hosts: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if bytes.Equal(newHostsHash, currHostsHash) {
|
|
||||||
return hostBootstrap, false, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
hostBootstrap.Hosts = newHosts
|
|
||||||
|
|
||||||
if err := writeBootstrapToStateDir(hostBootstrap); err != nil {
|
|
||||||
return bootstrap.Bootstrap{}, false, fmt.Errorf("writing new bootstrap to data dir: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return hostBootstrap, true, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// runs a single pmux process of daemon, returning only once the env.Context has
|
|
||||||
// been canceled or bootstrap info has been changed. This will always block
|
|
||||||
// until the spawned pmux has returned, and returns a copy of hostBootstrap with
|
|
||||||
// updated boostrap info.
|
|
||||||
func runDaemonPmuxOnce(
|
|
||||||
ctx context.Context,
|
|
||||||
logger *mlog.Logger,
|
|
||||||
hostBootstrap bootstrap.Bootstrap,
|
|
||||||
daemonConfig daemon.Config,
|
|
||||||
) (
|
|
||||||
bootstrap.Bootstrap, error,
|
|
||||||
) {
|
|
||||||
daemonInst, err := daemon.New(
|
|
||||||
ctx,
|
|
||||||
logger.WithNamespace("daemon"),
|
|
||||||
daemonConfig,
|
|
||||||
hostBootstrap,
|
|
||||||
envBinDirPath,
|
|
||||||
nil,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return bootstrap.Bootstrap{}, fmt.Errorf("initializing daemon: %w", err)
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
// context.Background() is deliberate here. At this point the entire
|
|
||||||
// process is shutting down, so whatever owns the process should decide
|
|
||||||
// when it's been too long.
|
|
||||||
if err := daemonInst.Shutdown(context.Background()); err != nil {
|
|
||||||
logger.Error(ctx, "failed to cleanly shutdown daemon", err)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
{
|
|
||||||
logger := logger.WithNamespace("http")
|
|
||||||
httpSrv, err := newHTTPServer(
|
|
||||||
ctx, logger, daemon.NewRPC(daemonInst),
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return bootstrap.Bootstrap{}, fmt.Errorf("starting HTTP server: %w", err)
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
// see comment in daemonInst shutdown logic regarding background
|
|
||||||
// context.
|
|
||||||
if err := httpSrv.Shutdown(context.Background()); err != nil {
|
|
||||||
logger.Error(ctx, "Failed to cleanly shutdown http server", err)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
ticker := time.NewTicker(3 * time.Minute)
|
|
||||||
defer ticker.Stop()
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
|
|
||||||
case <-ctx.Done():
|
|
||||||
return bootstrap.Bootstrap{}, ctx.Err()
|
|
||||||
|
|
||||||
case <-ticker.C:
|
|
||||||
|
|
||||||
logger.Info(ctx, "checking for changes to bootstrap")
|
|
||||||
|
|
||||||
var (
|
|
||||||
changed bool
|
|
||||||
err error
|
|
||||||
)
|
|
||||||
|
|
||||||
if hostBootstrap, changed, err = reloadBootstrap(
|
|
||||||
ctx, logger, daemonInst, hostBootstrap,
|
|
||||||
); err != nil {
|
|
||||||
return bootstrap.Bootstrap{}, fmt.Errorf("reloading bootstrap: %w", err)
|
|
||||||
|
|
||||||
} else if changed {
|
|
||||||
fmt.Fprintln(os.Stderr, "bootstrap info has changed, restarting all processes")
|
|
||||||
return hostBootstrap, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var subCmdDaemon = subCmd{
|
var subCmdDaemon = subCmd{
|
||||||
name: "daemon",
|
name: "daemon",
|
||||||
descr: "Runs the isle daemon (Default if no sub-command given)",
|
descr: "Runs the isle daemon (Default if no sub-command given)",
|
||||||
@ -274,16 +127,36 @@ var subCmdDaemon = subCmd{
|
|||||||
return fmt.Errorf("merging daemon config into bootstrap data: %w", err)
|
return fmt.Errorf("merging daemon config into bootstrap data: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for {
|
daemonInst := daemon.NewDaemonRestarter(
|
||||||
|
logger, daemonConfig, envBinDirPath, hostBootstrap, nil,
|
||||||
|
)
|
||||||
|
defer func() {
|
||||||
|
logger.Info(ctx, "Stopping child processes")
|
||||||
|
if err := daemonInst.Shutdown(); err != nil {
|
||||||
|
logger.Error(ctx, "Shutting down daemon cleanly failed, there may be orphaned child processes", err)
|
||||||
|
}
|
||||||
|
logger.Info(ctx, "Child processes successfully stopped")
|
||||||
|
}()
|
||||||
|
|
||||||
hostBootstrap, err = runDaemonPmuxOnce(ctx, logger, hostBootstrap, daemonConfig)
|
{
|
||||||
|
logger := logger.WithNamespace("http")
|
||||||
|
httpSrv, err := newHTTPServer(
|
||||||
|
ctx, logger, daemon.NewRPC(daemonInst),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("starting HTTP server: %w", err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
// see comment in daemonInst shutdown logic regarding background
|
||||||
|
// context.
|
||||||
|
logger.Info(ctx, "Shutting down HTTP socket")
|
||||||
|
if err := httpSrv.Shutdown(context.Background()); err != nil {
|
||||||
|
logger.Error(ctx, "Failed to cleanly shutdown http server", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
if errors.Is(err, context.Canceled) {
|
<-ctx.Done()
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
} else if err != nil {
|
|
||||||
return fmt.Errorf("running pmux for daemon: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
50
go/daemon/bootstrap.go
Normal file
50
go/daemon/bootstrap.go
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
package daemon
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"isle/bootstrap"
|
||||||
|
)
|
||||||
|
|
||||||
|
func loadHostBootstrap(stateDirPath string) (bootstrap.Bootstrap, error) {
|
||||||
|
path := bootstrap.StateDirPath(stateDirPath)
|
||||||
|
|
||||||
|
hostBootstrap, err := bootstrap.FromFile(path)
|
||||||
|
if errors.Is(err, fs.ErrNotExist) {
|
||||||
|
return bootstrap.Bootstrap{}, fmt.Errorf(
|
||||||
|
"%q not found, has the daemon ever been run?",
|
||||||
|
stateDirPath,
|
||||||
|
)
|
||||||
|
|
||||||
|
} else if err != nil {
|
||||||
|
return bootstrap.Bootstrap{}, fmt.Errorf("loading %q: %w", stateDirPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return hostBootstrap, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeBootstrapToStateDir(
|
||||||
|
stateDirPath string, hostBootstrap bootstrap.Bootstrap,
|
||||||
|
) error {
|
||||||
|
var (
|
||||||
|
path = bootstrap.StateDirPath(stateDirPath)
|
||||||
|
dirPath = filepath.Dir(path)
|
||||||
|
)
|
||||||
|
|
||||||
|
if err := os.MkdirAll(dirPath, 0700); err != nil {
|
||||||
|
return fmt.Errorf("creating directory %q: %w", dirPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.Create(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("creating file %q: %w", path, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return hostBootstrap.WriteTo(f)
|
||||||
|
}
|
95
go/daemon/child_pmux.go
Normal file
95
go/daemon/child_pmux.go
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
package daemon
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"code.betamike.com/micropelago/pmux/pmuxlib"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (d *daemon) newPmuxConfig() (pmuxlib.Config, error) {
|
||||||
|
nebulaPmuxProcConfig, err := nebulaPmuxProcConfig(
|
||||||
|
d.opts.EnvVars.RuntimeDirPath,
|
||||||
|
d.binDirPath,
|
||||||
|
d.hostBootstrap,
|
||||||
|
d.config,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return pmuxlib.Config{}, fmt.Errorf("generating nebula config: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
dnsmasqPmuxProcConfig, err := dnsmasqPmuxProcConfig(
|
||||||
|
d.opts.EnvVars.RuntimeDirPath,
|
||||||
|
d.binDirPath,
|
||||||
|
d.hostBootstrap,
|
||||||
|
d.config,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return pmuxlib.Config{}, fmt.Errorf(
|
||||||
|
"generating dnsmasq config: %w", err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
garagePmuxProcConfigs, err := garagePmuxProcConfigs(
|
||||||
|
d.opts.EnvVars.RuntimeDirPath, d.binDirPath, d.hostBootstrap, d.config,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return pmuxlib.Config{}, fmt.Errorf(
|
||||||
|
"generating garage children configs: %w", err,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return pmuxlib.Config{
|
||||||
|
Processes: append(
|
||||||
|
[]pmuxlib.ProcessConfig{
|
||||||
|
nebulaPmuxProcConfig,
|
||||||
|
dnsmasqPmuxProcConfig,
|
||||||
|
},
|
||||||
|
garagePmuxProcConfigs...,
|
||||||
|
),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *daemon) postPmuxInit(ctx context.Context) error {
|
||||||
|
d.logger.Info(ctx, "waiting for nebula VPN to come online")
|
||||||
|
if err := waitForNebula(ctx, d.hostBootstrap); err != nil {
|
||||||
|
return fmt.Errorf("waiting for nebula to start: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
d.logger.Info(ctx, "waiting for garage instances to come online")
|
||||||
|
if err := d.waitForGarage(ctx); err != nil {
|
||||||
|
return fmt.Errorf("waiting for garage to start: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(d.config.Storage.Allocations) > 0 {
|
||||||
|
|
||||||
|
err := until(ctx, func(ctx context.Context) error {
|
||||||
|
err := garageApplyLayout(ctx, d.logger, d.hostBootstrap, d.config)
|
||||||
|
if err != nil {
|
||||||
|
d.logger.Error(ctx, "applying garage layout", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("applying garage layout: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !d.opts.SkipHostBootstrapPush {
|
||||||
|
if err := until(ctx, func(ctx context.Context) error {
|
||||||
|
if err := d.putGarageBoostrapHost(ctx); err != nil {
|
||||||
|
d.logger.Error(ctx, "updating host info in garage", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}); err != nil {
|
||||||
|
return fmt.Errorf("updating host info in garage: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
@ -8,23 +8,11 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"isle/bootstrap"
|
"isle/bootstrap"
|
||||||
"os"
|
"os"
|
||||||
"time"
|
|
||||||
|
|
||||||
"code.betamike.com/micropelago/pmux/pmuxlib"
|
"code.betamike.com/micropelago/pmux/pmuxlib"
|
||||||
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
|
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
|
||||||
)
|
)
|
||||||
|
|
||||||
type daemon struct {
|
|
||||||
logger *mlog.Logger
|
|
||||||
config Config
|
|
||||||
hostBootstrap bootstrap.Bootstrap
|
|
||||||
binDirPath string
|
|
||||||
opts Opts
|
|
||||||
|
|
||||||
pmuxCancelFn context.CancelFunc
|
|
||||||
pmuxStoppedCh chan struct{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Daemon presents all functionality required for client frontends to interact
|
// Daemon presents all functionality required for client frontends to interact
|
||||||
// with isle, typically via the unix socket.
|
// with isle, typically via the unix socket.
|
||||||
type Daemon interface {
|
type Daemon interface {
|
||||||
@ -38,12 +26,11 @@ type Daemon interface {
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Shutdown blocks until all resources held or created by the daemon,
|
// Shutdown blocks until all resources held or created by the daemon,
|
||||||
// including child processes it has started, have been cleaned up, or until
|
// including child processes it has started, have been cleaned up.
|
||||||
// the context is canceled.
|
|
||||||
//
|
//
|
||||||
// If this returns an error then it's possible that child processes are
|
// If this returns an error then it's possible that child processes are
|
||||||
// still running and are no longer managed.
|
// still running and are no longer managed.
|
||||||
Shutdown(context.Context) error
|
Shutdown() error
|
||||||
}
|
}
|
||||||
|
|
||||||
// Opts are optional parameters which can be passed in when initializing a new
|
// Opts are optional parameters which can be passed in when initializing a new
|
||||||
@ -80,6 +67,17 @@ func (o *Opts) withDefaults() *Opts {
|
|||||||
return o
|
return o
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type daemon struct {
|
||||||
|
logger *mlog.Logger
|
||||||
|
config Config
|
||||||
|
hostBootstrap bootstrap.Bootstrap
|
||||||
|
binDirPath string
|
||||||
|
opts Opts
|
||||||
|
|
||||||
|
pmuxCancelFn context.CancelFunc
|
||||||
|
pmuxStoppedCh chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
// New initialized and returns a Daemon. If initialization fails an error is
|
// New initialized and returns a Daemon. If initialization fails an error is
|
||||||
// returned.
|
// returned.
|
||||||
func New(
|
func New(
|
||||||
@ -94,43 +92,6 @@ func New(
|
|||||||
) {
|
) {
|
||||||
opts = opts.withDefaults()
|
opts = opts.withDefaults()
|
||||||
|
|
||||||
nebulaPmuxProcConfig, err := nebulaPmuxProcConfig(
|
|
||||||
opts.EnvVars.RuntimeDirPath,
|
|
||||||
binDirPath,
|
|
||||||
hostBootstrap,
|
|
||||||
config,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("generating nebula config: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
dnsmasqPmuxProcConfig, err := dnsmasqPmuxProcConfig(
|
|
||||||
opts.EnvVars.RuntimeDirPath,
|
|
||||||
binDirPath,
|
|
||||||
hostBootstrap,
|
|
||||||
config,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("generating dnsmasq config: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
garagePmuxProcConfigs, err := garagePmuxProcConfigs(
|
|
||||||
opts.EnvVars.RuntimeDirPath, binDirPath, hostBootstrap, config,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("generating garage children configs: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
pmuxConfig := pmuxlib.Config{
|
|
||||||
Processes: append(
|
|
||||||
[]pmuxlib.ProcessConfig{
|
|
||||||
nebulaPmuxProcConfig,
|
|
||||||
dnsmasqPmuxProcConfig,
|
|
||||||
},
|
|
||||||
garagePmuxProcConfigs...,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
pmuxCtx, pmuxCancelFn := context.WithCancel(context.Background())
|
pmuxCtx, pmuxCancelFn := context.WithCancel(context.Background())
|
||||||
|
|
||||||
d := &daemon{
|
d := &daemon{
|
||||||
@ -143,16 +104,20 @@ func New(
|
|||||||
pmuxStoppedCh: make(chan struct{}),
|
pmuxStoppedCh: make(chan struct{}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pmuxConfig, err := d.newPmuxConfig()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("generating pmux config: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
defer close(d.pmuxStoppedCh)
|
defer close(d.pmuxStoppedCh)
|
||||||
pmuxlib.Run(pmuxCtx, d.opts.Stdout, d.opts.Stderr, pmuxConfig)
|
pmuxlib.Run(pmuxCtx, d.opts.Stdout, d.opts.Stderr, pmuxConfig)
|
||||||
|
d.logger.Debug(pmuxCtx, "pmux stopped")
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if initErr := d.postPmuxInit(ctx); initErr != nil {
|
if initErr := d.postPmuxInit(ctx); initErr != nil {
|
||||||
logger.Warn(ctx, "failed to initialize daemon, shutting down child processes", err)
|
logger.Warn(ctx, "failed to initialize daemon, shutting down child processes", err)
|
||||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
|
if err := d.Shutdown(); err != nil {
|
||||||
defer cancel()
|
|
||||||
if err := d.Shutdown(shutdownCtx); err != nil {
|
|
||||||
panic(fmt.Sprintf(
|
panic(fmt.Sprintf(
|
||||||
"failed to shut down child processes after initialization"+
|
"failed to shut down child processes after initialization"+
|
||||||
" error, there may be zombie children leftover."+
|
" error, there may be zombie children leftover."+
|
||||||
@ -167,56 +132,8 @@ func New(
|
|||||||
return d, nil
|
return d, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *daemon) postPmuxInit(ctx context.Context) error {
|
func (d *daemon) Shutdown() error {
|
||||||
d.logger.Info(ctx, "waiting for nebula VPN to come online")
|
|
||||||
if err := waitForNebula(ctx, d.hostBootstrap); err != nil {
|
|
||||||
return fmt.Errorf("waiting for nebula to start: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
d.logger.Info(ctx, "waiting for garage instances to come online")
|
|
||||||
if err := d.waitForGarage(ctx); err != nil {
|
|
||||||
return fmt.Errorf("waiting for garage to start: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(d.config.Storage.Allocations) > 0 {
|
|
||||||
|
|
||||||
err := until(ctx, func(ctx context.Context) error {
|
|
||||||
err := garageApplyLayout(ctx, d.logger, d.hostBootstrap, d.config)
|
|
||||||
if err != nil {
|
|
||||||
d.logger.Error(ctx, "applying garage layout", err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("applying garage layout: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !d.opts.SkipHostBootstrapPush {
|
|
||||||
if err := until(ctx, func(ctx context.Context) error {
|
|
||||||
if err := d.putGarageBoostrapHost(ctx); err != nil {
|
|
||||||
d.logger.Error(ctx, "updating host info in garage", err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}); err != nil {
|
|
||||||
return fmt.Errorf("updating host info in garage: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *daemon) Shutdown(ctx context.Context) error {
|
|
||||||
d.pmuxCancelFn()
|
d.pmuxCancelFn()
|
||||||
select {
|
<-d.pmuxStoppedCh
|
||||||
case <-ctx.Done():
|
|
||||||
return ctx.Err()
|
|
||||||
case <-d.pmuxStoppedCh:
|
|
||||||
return nil
|
return nil
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
262
go/daemon/daemon_restarter.go
Normal file
262
go/daemon/daemon_restarter.go
Normal file
@ -0,0 +1,262 @@
|
|||||||
|
package daemon
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"isle/bootstrap"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
daemonRestarterStateInitializing = iota
|
||||||
|
daemonRestarterStateOk
|
||||||
|
daemonRestarterStateRestarting
|
||||||
|
daemonRestarterStateShutdown
|
||||||
|
)
|
||||||
|
|
||||||
|
type daemonRestarter struct {
|
||||||
|
logger *mlog.Logger
|
||||||
|
daemonConfig Config
|
||||||
|
envBinDirPath string
|
||||||
|
opts *Opts
|
||||||
|
|
||||||
|
l sync.Mutex
|
||||||
|
state int
|
||||||
|
inner Daemon
|
||||||
|
currBootstrap bootstrap.Bootstrap
|
||||||
|
|
||||||
|
cancelFn context.CancelFunc
|
||||||
|
stoppedCh chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDaemonRestarter will wrap a Daemon such that it will be automatically
|
||||||
|
// shutdown and re-created whenever there's changes in the cluster which require
|
||||||
|
// the configuration to be changed (e.g. a new nebula lighthouse).
|
||||||
|
//
|
||||||
|
// While still starting up the daemon for the first time all methods will return
|
||||||
|
// ErrInitializing, except Shutdown which will block until initialization is
|
||||||
|
// canceled.
|
||||||
|
//
|
||||||
|
// During a restart all methods will return ErrRestarting, except Shutdown which
|
||||||
|
// will block until the currently executing restart is finished and then
|
||||||
|
// shutdown cleanly from there.
|
||||||
|
//
|
||||||
|
// TODO make daemonRestarter smarter, it currently restarts on _any_ change, but
|
||||||
|
// it should restart itself only when there's something actually requiring a
|
||||||
|
// restart.
|
||||||
|
func NewDaemonRestarter(
|
||||||
|
logger *mlog.Logger,
|
||||||
|
daemonConfig Config,
|
||||||
|
envBinDirPath string,
|
||||||
|
currBootstrap bootstrap.Bootstrap,
|
||||||
|
opts *Opts,
|
||||||
|
) Daemon {
|
||||||
|
ctx, cancelFn := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
dr := &daemonRestarter{
|
||||||
|
logger: logger,
|
||||||
|
daemonConfig: daemonConfig,
|
||||||
|
envBinDirPath: envBinDirPath,
|
||||||
|
opts: opts.withDefaults(),
|
||||||
|
currBootstrap: currBootstrap,
|
||||||
|
cancelFn: cancelFn,
|
||||||
|
stoppedCh: make(chan struct{}),
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
dr.restartLoop(ctx)
|
||||||
|
dr.logger.Debug(ctx, "DaemonRestarter stopped")
|
||||||
|
close(dr.stoppedCh)
|
||||||
|
}()
|
||||||
|
|
||||||
|
return dr
|
||||||
|
}
|
||||||
|
|
||||||
|
func withInnerDaemon[Res any](
|
||||||
|
dr *daemonRestarter, fn func(Daemon) (Res, error),
|
||||||
|
) (Res, error) {
|
||||||
|
var zero Res
|
||||||
|
dr.l.Lock()
|
||||||
|
inner, state := dr.inner, dr.state
|
||||||
|
dr.l.Unlock()
|
||||||
|
|
||||||
|
switch state {
|
||||||
|
case daemonRestarterStateInitializing:
|
||||||
|
return zero, ErrInitializing
|
||||||
|
case daemonRestarterStateOk:
|
||||||
|
return fn(inner)
|
||||||
|
case daemonRestarterStateRestarting:
|
||||||
|
return zero, ErrRestarting
|
||||||
|
case daemonRestarterStateShutdown:
|
||||||
|
return zero, errors.New("already shutdown")
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("unknown state %d", dr.state))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// creates a new bootstrap file using available information from the network. If
|
||||||
|
// the new bootstrap file is different than the existing one, the existing one
|
||||||
|
// is overwritten and true is returned.
|
||||||
|
func (dr *daemonRestarter) checkBootstrap(
|
||||||
|
ctx context.Context, hostBootstrap bootstrap.Bootstrap,
|
||||||
|
) (
|
||||||
|
bootstrap.Bootstrap, bool, error,
|
||||||
|
) {
|
||||||
|
|
||||||
|
thisHost := hostBootstrap.ThisHost()
|
||||||
|
|
||||||
|
newHosts, err := dr.inner.GetGarageBootstrapHosts(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return bootstrap.Bootstrap{}, false, fmt.Errorf("getting hosts from garage: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// the daemon's view of this host's bootstrap info takes precedence over
|
||||||
|
// whatever is in garage
|
||||||
|
newHosts[thisHost.Name] = thisHost
|
||||||
|
|
||||||
|
newHostsHash, err := bootstrap.HostsHash(newHosts)
|
||||||
|
if err != nil {
|
||||||
|
return bootstrap.Bootstrap{}, false, fmt.Errorf("calculating hash of new hosts: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
currHostsHash, err := bootstrap.HostsHash(hostBootstrap.Hosts)
|
||||||
|
if err != nil {
|
||||||
|
return bootstrap.Bootstrap{}, false, fmt.Errorf("calculating hash of current hosts: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if bytes.Equal(newHostsHash, currHostsHash) {
|
||||||
|
return hostBootstrap, false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
hostBootstrap.Hosts = newHosts
|
||||||
|
|
||||||
|
return hostBootstrap, true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// blocks until a new bootstrap is available or context is canceled
|
||||||
|
func (dr *daemonRestarter) watchForChanges(ctx context.Context) bootstrap.Bootstrap {
|
||||||
|
|
||||||
|
ticker := time.NewTicker(3 * time.Minute)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
|
||||||
|
case <-ctx.Done():
|
||||||
|
return bootstrap.Bootstrap{}
|
||||||
|
|
||||||
|
case <-ticker.C:
|
||||||
|
|
||||||
|
dr.logger.Info(ctx, "Checking for changes to bootstrap")
|
||||||
|
|
||||||
|
newBootstrap, changed, err := dr.checkBootstrap(
|
||||||
|
ctx, dr.currBootstrap,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
dr.logger.Error(ctx, "Checking bootstrap for changes failed", err)
|
||||||
|
continue
|
||||||
|
} else if !changed {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err = writeBootstrapToStateDir(dr.opts.EnvVars.StateDirPath, newBootstrap)
|
||||||
|
if err != nil {
|
||||||
|
dr.logger.Error(ctx, "Writing new bootstrap to disk failed", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
return newBootstrap
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dr *daemonRestarter) restartLoop(ctx context.Context) {
|
||||||
|
defer func() {
|
||||||
|
dr.l.Lock()
|
||||||
|
dr.state = daemonRestarterStateShutdown
|
||||||
|
inner := dr.inner
|
||||||
|
dr.l.Unlock()
|
||||||
|
|
||||||
|
if inner != nil {
|
||||||
|
if err := inner.Shutdown(); err != nil {
|
||||||
|
dr.logger.Fatal(ctx, "failed to cleanly shutdown daemon, there may be orphaned child processes", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
wait := func(d time.Duration) bool {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return false
|
||||||
|
case <-time.After(d):
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
dr.logger.Info(ctx, "Creating new daemon")
|
||||||
|
daemonInst, err := New(
|
||||||
|
ctx,
|
||||||
|
dr.logger.WithNamespace("daemon"),
|
||||||
|
dr.daemonConfig,
|
||||||
|
dr.currBootstrap,
|
||||||
|
dr.envBinDirPath,
|
||||||
|
dr.opts,
|
||||||
|
)
|
||||||
|
if errors.Is(err, context.Canceled) {
|
||||||
|
return
|
||||||
|
} else if err != nil {
|
||||||
|
dr.logger.Error(ctx, "failed to initialize daemon", err)
|
||||||
|
if !wait(1 * time.Second) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
dr.l.Lock()
|
||||||
|
dr.inner = daemonInst
|
||||||
|
dr.state = daemonRestarterStateOk
|
||||||
|
dr.l.Unlock()
|
||||||
|
|
||||||
|
newBootstrap := dr.watchForChanges(ctx)
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
dr.logger.Info(ctx, "Bootstrap has changed, will restart daemon")
|
||||||
|
dr.l.Lock()
|
||||||
|
dr.currBootstrap = newBootstrap
|
||||||
|
dr.state = daemonRestarterStateRestarting
|
||||||
|
dr.l.Unlock()
|
||||||
|
|
||||||
|
dr.logger.Info(ctx, "Shutting down previous daemon")
|
||||||
|
if err := dr.inner.Shutdown(); err != nil {
|
||||||
|
dr.logger.Fatal(ctx, "failed to cleanly shutdown daemon, there may be orphaned child processes", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dr *daemonRestarter) GetGarageBootstrapHosts(
|
||||||
|
ctx context.Context,
|
||||||
|
) (
|
||||||
|
map[string]bootstrap.Host, error,
|
||||||
|
) {
|
||||||
|
return withInnerDaemon(dr, func(inner Daemon) (map[string]bootstrap.Host, error) {
|
||||||
|
return inner.GetGarageBootstrapHosts(ctx)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (dr *daemonRestarter) Shutdown() error {
|
||||||
|
dr.cancelFn()
|
||||||
|
<-dr.stoppedCh
|
||||||
|
return nil
|
||||||
|
}
|
13
go/daemon/errors.go
Normal file
13
go/daemon/errors.go
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
package daemon
|
||||||
|
|
||||||
|
import "isle/daemon/jsonrpc2"
|
||||||
|
|
||||||
|
var (
|
||||||
|
// ErrInitializing is returned when a cluster is unavailable due to still
|
||||||
|
// being initialized.
|
||||||
|
ErrInitializing = jsonrpc2.NewError(1, "Cluster is being initialized")
|
||||||
|
|
||||||
|
// ErrRestarting is returned when a cluster is unavailable due to being
|
||||||
|
// restarted.
|
||||||
|
ErrRestarting = jsonrpc2.NewError(2, "Cluster is being restarted")
|
||||||
|
)
|
@ -58,7 +58,7 @@ EOF
|
|||||||
--name "testing" \
|
--name "testing" \
|
||||||
> admin.json
|
> admin.json
|
||||||
|
|
||||||
isle daemon --config-path daemon.yml >daemon.log 2>&1 &
|
isle daemon -l debug --config-path daemon.yml >daemon.log 2>&1 &
|
||||||
pid="$!"
|
pid="$!"
|
||||||
echo "Waiting for primus daemon (process $pid) to initialize"
|
echo "Waiting for primus daemon (process $pid) to initialize"
|
||||||
|
|
||||||
@ -82,7 +82,7 @@ EOF
|
|||||||
device: isle-secondus
|
device: isle-secondus
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
isle daemon -c daemon.yml -b "$secondus_bootstrap" >daemon.log 2>&1 &
|
isle daemon -l debug -c daemon.yml -b "$secondus_bootstrap" >daemon.log 2>&1 &
|
||||||
pid="$!"
|
pid="$!"
|
||||||
echo "Waiting for secondus daemon (process $!) to initialize"
|
echo "Waiting for secondus daemon (process $!) to initialize"
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user