Compare commits

...

5 Commits

17 changed files with 433 additions and 394 deletions

View File

@ -69,7 +69,7 @@ in rec {
'';
};
vendorHash = "sha256-JhcoMLVc6WHRiO3nNDyvGoxZRTtYf7e65nRMaK/EODo=";
vendorHash = "sha256-N80aMMR67DjyEtz9/Yat1bft92nj7JuuF4i2xC9iJ6s=";
subPackages = [
"./cmd/entrypoint"

View File

@ -7,6 +7,7 @@ import (
"encoding/json"
"fmt"
"isle/nebula"
"maps"
"net/netip"
"path/filepath"
"sort"
@ -45,11 +46,11 @@ type Bootstrap struct {
Hosts map[nebula.HostName]Host
}
// New initializes and returns a new Bootstrap file for a new host. This
// function assigns Hosts an empty map.
// New initializes and returns a new Bootstrap file for a new host.
func New(
caCreds nebula.CACredentials,
adminCreationParams CreationParams,
existingHosts map[nebula.HostName]Host,
name nebula.HostName,
ip netip.Addr,
) (
@ -72,13 +73,18 @@ func New(
return Bootstrap{}, fmt.Errorf("signing assigned fields: %w", err)
}
existingHosts = maps.Clone(existingHosts)
existingHosts[name] = Host{
HostAssigned: assigned,
}
return Bootstrap{
NetworkCreationParams: adminCreationParams,
CAPublicCredentials: caCreds.Public,
PrivateCredentials: hostPrivCreds,
HostAssigned: assigned,
SignedHostAssigned: signedAssigned,
Hosts: map[nebula.HostName]Host{},
Hosts: existingHosts,
}, nil
}

View File

@ -49,13 +49,17 @@ var subCmdDaemon = subCmd{
logger := subCmdCtx.logger.WithMaxLevel(logLevel.Int())
// TODO check that daemon is either running as root, or that the
// required linux capabilities are set.
// TODO check that the tun module is loaded (for nebula).
daemonConfig, err := daemon.LoadConfig(envAppDirPath, *daemonConfigPath)
if err != nil {
return fmt.Errorf("loading daemon config: %w", err)
}
daemonInst, err := daemon.NewDaemon(
logger, daemonConfig, envBinDirPath, nil,
ctx, logger, daemonConfig, envBinDirPath, nil,
)
if err != nil {
return fmt.Errorf("starting daemon: %w", err)

View File

@ -8,6 +8,7 @@ import (
"isle/daemon/jsonrpc2"
"net"
"net/http"
"os"
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
@ -24,7 +25,13 @@ func newHTTPServer(
l, err := net.Listen("unix", socketPath)
if err != nil {
return nil, fmt.Errorf(
"failed to listen on socket %q: %w", socketPath, err,
"listening on socket %q: %w", socketPath, err,
)
}
if err := os.Chmod(socketPath, 0660); err != nil {
return nil, fmt.Errorf(
"setting permissions of %q to 0660: %w", socketPath, err,
)
}

View File

@ -1,10 +1,13 @@
package daemon
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"reflect"
"isle/bootstrap"
"isle/garage/garagesrv"
@ -77,3 +80,49 @@ func coalesceDaemonConfigAndBootstrap(
return hostBootstrap, nil
}
type bootstrapDiff struct {
hostsChanged bool
nebulaChanged bool
dnsChanged bool
}
func calcBootstrapDiff(
daemonConfig Config,
prevBootstrap, nextBootstrap bootstrap.Bootstrap,
) (
diff bootstrapDiff, err error,
) {
{
prevHash, prevErr := bootstrap.HostsHash(prevBootstrap.Hosts)
nextHash, nextErr := bootstrap.HostsHash(nextBootstrap.Hosts)
if err = errors.Join(prevErr, nextErr); err != nil {
err = fmt.Errorf("calculating host hashes: %w", err)
return
}
diff.hostsChanged = !bytes.Equal(prevHash, nextHash)
}
{
prevNebulaConfig, prevErr := nebulaConfig(daemonConfig, prevBootstrap)
nextNebulaConfig, nextErr := nebulaConfig(daemonConfig, nextBootstrap)
if err = errors.Join(prevErr, nextErr); err != nil {
err = fmt.Errorf("calculating nebula config: %w", err)
return
}
diff.nebulaChanged = !reflect.DeepEqual(
prevNebulaConfig, nextNebulaConfig,
)
}
{
diff.dnsChanged = !reflect.DeepEqual(
dnsmasqConfig(daemonConfig, prevBootstrap),
dnsmasqConfig(daemonConfig, nextBootstrap),
)
}
return
}

View File

@ -1,6 +1,7 @@
package daemon
import (
"context"
"fmt"
"isle/bootstrap"
"isle/dnsmasq"
@ -8,17 +9,12 @@ import (
"sort"
"code.betamike.com/micropelago/pmux/pmuxlib"
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
)
func dnsmasqPmuxProcConfig(
runtimeDirPath, binDirPath string,
hostBootstrap bootstrap.Bootstrap,
daemonConfig Config,
) (
pmuxlib.ProcessConfig, error,
) {
confPath := filepath.Join(runtimeDirPath, "dnsmasq.conf")
func dnsmasqConfig(
daemonConfig Config, hostBootstrap bootstrap.Bootstrap,
) dnsmasq.ConfData {
hostsSlice := make([]dnsmasq.ConfDataHost, 0, len(hostBootstrap.Hosts))
for _, host := range hostBootstrap.Hosts {
hostsSlice = append(hostsSlice, dnsmasq.ConfDataHost{
@ -31,20 +27,55 @@ func dnsmasqPmuxProcConfig(
return hostsSlice[i].IP < hostsSlice[j].IP
})
confData := dnsmasq.ConfData{
return dnsmasq.ConfData{
Resolvers: daemonConfig.DNS.Resolvers,
Domain: hostBootstrap.NetworkCreationParams.Domain,
IP: hostBootstrap.ThisHost().IP().String(),
Hosts: hostsSlice,
}
}
func dnsmasqWriteConfig(
runtimeDirPath string,
daemonConfig Config,
hostBootstrap bootstrap.Bootstrap,
) (
string, error,
) {
var (
confPath = filepath.Join(runtimeDirPath, "dnsmasq.conf")
confData = dnsmasqConfig(daemonConfig, hostBootstrap)
)
if err := dnsmasq.WriteConfFile(confPath, confData); err != nil {
return pmuxlib.ProcessConfig{}, fmt.Errorf("writing dnsmasq.conf to %q: %w", confPath, err)
return "", fmt.Errorf("writing dnsmasq.conf to %q: %w", confPath, err)
}
return confPath, nil
}
func dnsmasqPmuxProcConfig(
logger *mlog.Logger,
runtimeDirPath, binDirPath string,
daemonConfig Config,
hostBootstrap bootstrap.Bootstrap,
) (
pmuxlib.ProcessConfig, error,
) {
confPath, err := dnsmasqWriteConfig(
runtimeDirPath, daemonConfig, hostBootstrap,
)
if err != nil {
return pmuxlib.ProcessConfig{}, fmt.Errorf(
"writing dnsmasq config: %w", err,
)
}
return pmuxlib.ProcessConfig{
Name: "dnsmasq",
Cmd: filepath.Join(binDirPath, "dnsmasq"),
Args: []string{"-d", "-C", confPath},
StartAfterFunc: func(ctx context.Context) error {
return waitForNebula(ctx, logger, hostBootstrap)
},
}, nil
}

View File

@ -151,10 +151,10 @@ func garagePmuxProcConfigs(
adminToken string,
hostBootstrap bootstrap.Bootstrap,
) (
[]pmuxlib.ProcessConfig, error,
map[string]pmuxlib.ProcessConfig, error,
) {
var (
pmuxProcConfigs []pmuxlib.ProcessConfig
pmuxProcConfigs = map[string]pmuxlib.ProcessConfig{}
allocs = daemonConfig.Storage.Allocations
)
@ -172,14 +172,14 @@ func garagePmuxProcConfigs(
return nil, fmt.Errorf("writing child config file for alloc %+v: %w", alloc, err)
}
pmuxProcConfigs = append(pmuxProcConfigs, pmuxlib.ProcessConfig{
Name: fmt.Sprintf("garage-%d", alloc.RPCPort),
procName := fmt.Sprintf("garage-%d", alloc.RPCPort)
pmuxProcConfigs[procName] = pmuxlib.ProcessConfig{
Cmd: filepath.Join(binDirPath, "garage"),
Args: []string{"-c", childConfigPath, "server"},
StartAfterFunc: func(ctx context.Context) error {
return waitForNebula(ctx, logger, hostBootstrap)
},
})
}
}
return pmuxProcConfigs, nil

View File

@ -46,14 +46,12 @@ func waitForNebula(
return ctx.Err()
}
func nebulaPmuxProcConfig(
runtimeDirPath, binDirPath string,
func nebulaConfig(
daemonConfig Config,
hostBootstrap bootstrap.Bootstrap,
) (
pmuxlib.ProcessConfig, error,
map[string]any, error,
) {
var (
lighthouseHostIPs []string
staticHostMap = map[string][]string{}
@ -72,23 +70,19 @@ func nebulaPmuxProcConfig(
caCertPEM, err := hostBootstrap.CAPublicCredentials.Cert.Unwrap().MarshalToPEM()
if err != nil {
return pmuxlib.ProcessConfig{}, fmt.Errorf(
"marshaling CA cert to PEM: :%w", err,
)
return nil, fmt.Errorf("marshaling CA cert to PEM: :%w", err)
}
hostCertPEM, err := hostBootstrap.PublicCredentials.Cert.Unwrap().MarshalToPEM()
if err != nil {
return pmuxlib.ProcessConfig{}, fmt.Errorf(
"marshaling host cert to PEM: :%w", err,
)
return nil, fmt.Errorf("marshaling host cert to PEM: :%w", err)
}
hostKeyPEM := cert.MarshalX25519PrivateKey(
hostBootstrap.PrivateCredentials.EncryptingPrivateKey.Bytes(),
)
config := map[string]interface{}{
config := map[string]any{
"pki": map[string]string{
"ca": string(caCertPEM),
"cert": string(hostCertPEM),
@ -99,7 +93,7 @@ func nebulaPmuxProcConfig(
"punch": true,
"respond": true,
},
"tun": map[string]interface{}{
"tun": map[string]any{
"dev": daemonConfig.VPN.Tun.Device,
},
"firewall": daemonConfig.VPN.Firewall,
@ -112,7 +106,7 @@ func nebulaPmuxProcConfig(
"port": "0",
}
config["lighthouse"] = map[string]interface{}{
config["lighthouse"] = map[string]any{
"hosts": lighthouseHostIPs,
}
@ -121,7 +115,9 @@ func nebulaPmuxProcConfig(
_, port, err := net.SplitHostPort(publicAddr)
if err != nil {
return pmuxlib.ProcessConfig{}, fmt.Errorf("parsing public address %q: %w", publicAddr, err)
return nil, fmt.Errorf(
"parsing public address %q: %w", publicAddr, err,
)
}
config["listen"] = map[string]string{
@ -129,21 +125,60 @@ func nebulaPmuxProcConfig(
"port": port,
}
config["lighthouse"] = map[string]interface{}{
config["lighthouse"] = map[string]any{
"hosts": []string{},
"am_lighthouse": true,
}
}
return config, nil
}
func nebulaWriteConfig(
runtimeDirPath string,
daemonConfig Config,
hostBootstrap bootstrap.Bootstrap,
) (
string, error,
) {
config, err := nebulaConfig(daemonConfig, hostBootstrap)
if err != nil {
return "", fmt.Errorf("creating nebula config: %w", err)
}
nebulaYmlPath := filepath.Join(runtimeDirPath, "nebula.yml")
if err := yamlutil.WriteYamlFile(config, nebulaYmlPath, 0600); err != nil {
return pmuxlib.ProcessConfig{}, fmt.Errorf("writing nebula.yml to %q: %w", nebulaYmlPath, err)
return "", fmt.Errorf("writing nebula.yml to %q: %w", nebulaYmlPath, err)
}
return nebulaYmlPath, nil
}
func nebulaPmuxProcConfig(
runtimeDirPath, binDirPath string,
daemonConfig Config,
hostBootstrap bootstrap.Bootstrap,
) (
pmuxlib.ProcessConfig, error,
) {
config, err := nebulaConfig(daemonConfig, hostBootstrap)
if err != nil {
return pmuxlib.ProcessConfig{}, fmt.Errorf(
"creating nebula config: %w", err,
)
}
nebulaYmlPath := filepath.Join(runtimeDirPath, "nebula.yml")
if err := yamlutil.WriteYamlFile(config, nebulaYmlPath, 0600); err != nil {
return pmuxlib.ProcessConfig{}, fmt.Errorf(
"writing nebula.yml to %q: %w", nebulaYmlPath, err,
)
}
return pmuxlib.ProcessConfig{
Name: "nebula",
Cmd: filepath.Join(binDirPath, "nebula"),
Args: []string{"-config", nebulaYmlPath},
Group: -1, // Make sure nebula is shut down last.
}, nil
}

View File

@ -28,10 +28,11 @@ func (c *Children) newPmuxConfig(
}
dnsmasqPmuxProcConfig, err := dnsmasqPmuxProcConfig(
c.logger,
c.opts.EnvVars.RuntimeDirPath,
binDirPath,
hostBootstrap,
daemonConfig,
hostBootstrap,
)
if err != nil {
return pmuxlib.Config{}, fmt.Errorf(
@ -55,14 +56,12 @@ func (c *Children) newPmuxConfig(
)
}
pmuxProcConfigs := garagePmuxProcConfigs
pmuxProcConfigs["nebula"] = nebulaPmuxProcConfig
pmuxProcConfigs["dnsmasq"] = dnsmasqPmuxProcConfig
return pmuxlib.Config{
Processes: append(
[]pmuxlib.ProcessConfig{
nebulaPmuxProcConfig,
dnsmasqPmuxProcConfig,
},
garagePmuxProcConfigs...,
),
Processes: pmuxProcConfigs,
}, nil
}

View File

@ -18,10 +18,10 @@ import (
// - garage (0 or more, depending on configured storage allocations)
type Children struct {
logger *mlog.Logger
daemonConfig Config
opts Opts
pmuxCancelFn context.CancelFunc
pmuxStoppedCh chan struct{}
pmux *pmuxlib.Pmux
}
// NewChildren initialized and returns a Children instance. If initialization
@ -46,13 +46,10 @@ func NewChildren(
return nil, fmt.Errorf("loading garage RPC secret: %w", err)
}
pmuxCtx, pmuxCancelFn := context.WithCancel(context.Background())
c := &Children{
logger: logger,
daemonConfig: daemonConfig,
opts: *opts,
pmuxCancelFn: pmuxCancelFn,
pmuxStoppedCh: make(chan struct{}),
}
pmuxConfig, err := c.newPmuxConfig(
@ -67,39 +64,48 @@ func NewChildren(
return nil, fmt.Errorf("generating pmux config: %w", err)
}
go func() {
defer close(c.pmuxStoppedCh)
pmuxlib.Run(pmuxCtx, c.opts.Stdout, c.opts.Stderr, pmuxConfig)
c.logger.Debug(pmuxCtx, "pmux stopped")
}()
c.pmux = pmuxlib.NewPmux(pmuxConfig, c.opts.Stdout, c.opts.Stderr)
initErr := c.postPmuxInit(
ctx, daemonConfig, garageAdminToken, hostBootstrap,
)
if initErr != nil {
logger.Warn(ctx, "failed to initialize Children, shutting down child processes", err)
if err := c.Shutdown(); err != nil {
panic(fmt.Sprintf(
"failed to shut down child processes after initialization"+
" error, there may be zombie children leftover."+
" Original error: %v",
initErr,
))
}
c.Shutdown()
return nil, initErr
}
return c, nil
}
// Shutdown blocks until all child processes have gracefully shut themselves
// down.
//
// If this returns an error then it's possible that child processes are
// still running and are no longer managed.
func (c *Children) Shutdown() error {
c.pmuxCancelFn()
<-c.pmuxStoppedCh
// RestartDNSMasq rewrites the dnsmasq config and restarts the process.
func (c *Children) RestartDNSMasq(hostBootstrap bootstrap.Bootstrap) error {
_, err := dnsmasqWriteConfig(
c.opts.EnvVars.RuntimeDirPath, c.daemonConfig, hostBootstrap,
)
if err != nil {
return fmt.Errorf("writing new dnsmasq config: %w", err)
}
c.pmux.Restart("dnsmasq")
return nil
}
// RestartNebula rewrites the nebula config and restarts the process.
func (c *Children) RestartNebula(hostBootstrap bootstrap.Bootstrap) error {
_, err := nebulaWriteConfig(
c.opts.EnvVars.RuntimeDirPath, c.daemonConfig, hostBootstrap,
)
if err != nil {
return fmt.Errorf("writing a new nebula config: %w", err)
}
c.pmux.Restart("nebula")
return nil
}
// Shutdown blocks until all child processes have gracefully shut themselves
// down.
func (c *Children) Shutdown() {
c.pmux.Stop()
}

View File

@ -3,7 +3,6 @@
package daemon
import (
"bytes"
"context"
"errors"
"fmt"
@ -146,7 +145,6 @@ const (
daemonStateNoNetwork = iota
daemonStateInitializing
daemonStateOk
daemonStateRestarting
daemonStateShutdown
)
@ -182,12 +180,12 @@ type daemon struct {
// While still starting up the Daemon for the first time all methods will return
// ErrInitializing, except Shutdown which will block until initialization is
// canceled.
//
// TODO make daemon smarter, it currently restarts on _any_ change, but
// it should restart itself only when there's something actually requiring a
// restart.
func NewDaemon(
logger *mlog.Logger, daemonConfig Config, envBinDirPath string, opts *Opts,
ctx context.Context,
logger *mlog.Logger,
daemonConfig Config,
envBinDirPath string,
opts *Opts,
) (
Daemon, error,
) {
@ -226,21 +224,20 @@ func NewDaemon(
return nil, fmt.Errorf(
"loading bootstrap from %q: %w", bootstrapFilePath, err,
)
} else if err := d.initialize(currBootstrap, nil); err != nil {
} else if err := d.initialize(ctx, currBootstrap); err != nil {
return nil, fmt.Errorf("initializing with bootstrap: %w", err)
}
return d, nil
}
// initialize must be called with d.l write lock held, _but_ the lock should be
// released just after initialize returns.
//
// readyCh will be written to everytime daemon changes state to daemonStateOk,
// unless it is nil or blocks.
// initialize must be called with d.l write lock held, but it will unlock the
// lock itself.
func (d *daemon) initialize(
currBootstrap bootstrap.Bootstrap, readyCh chan<- struct{},
ctx context.Context, currBootstrap bootstrap.Bootstrap,
) error {
defer d.l.Unlock()
// we update this Host's data using whatever configuration has been provided
// by the daemon config. This way the daemon has the most up-to-date
// possible bootstrap. This updated bootstrap will later get updated in
@ -260,6 +257,31 @@ func (d *daemon) initialize(
d.currBootstrap = currBootstrap
d.state = daemonStateInitializing
d.logger.Info(ctx, "Creating child processes")
d.children, err = NewChildren(
ctx,
d.logger.WithNamespace("children"),
d.envBinDirPath,
d.secretsStore,
d.daemonConfig,
d.garageAdminToken,
currBootstrap,
d.opts,
)
if err != nil {
return fmt.Errorf("creating child processes: %w", err)
}
d.logger.Info(ctx, "Child processes created")
if err := d.postInit(ctx); err != nil {
d.logger.Error(ctx, "Post-initialization failed, stopping child processes", err)
d.children.Shutdown()
return fmt.Errorf("performing post-initialization: %w", err)
}
d.state = daemonStateOk
ctx, cancel := context.WithCancel(context.Background())
d.wg.Add(1)
go func() {
@ -271,7 +293,7 @@ func (d *daemon) initialize(
d.wg.Add(1)
go func() {
defer d.wg.Done()
d.restartLoop(ctx, readyCh)
d.reloadLoop(ctx)
d.logger.Debug(ctx, "Daemon restart loop stopped")
}()
@ -294,8 +316,6 @@ func withCurrBootstrap[Res any](
return zero, ErrInitializing
case daemonStateOk:
return fn(currBootstrap)
case daemonStateRestarting:
return zero, ErrRestarting
case daemonStateShutdown:
return zero, errors.New("already shutdown")
default:
@ -303,99 +323,70 @@ func withCurrBootstrap[Res any](
}
}
// creates a new bootstrap file using available information from the network. If
// the new bootstrap file is different than the existing one, the existing one
// is overwritten and true is returned.
func (d *daemon) checkBootstrap(
ctx context.Context, hostBootstrap bootstrap.Bootstrap,
) (
bootstrap.Bootstrap, bool, error,
) {
// reload will check the existing hosts data from currBootstrap against a
// potentially updated set of hosts data, and if there are any differences will
// perform whatever changes are necessary.
func (d *daemon) reload(
ctx context.Context,
currBootstrap bootstrap.Bootstrap,
newHosts map[nebula.HostName]bootstrap.Host,
) error {
var (
newBootstrap = currBootstrap
thisHost = currBootstrap.ThisHost()
)
thisHost := hostBootstrap.ThisHost()
newHosts, err := d.getGarageBootstrapHosts(ctx, d.logger, hostBootstrap)
if err != nil {
return bootstrap.Bootstrap{}, false, fmt.Errorf("getting hosts from garage: %w", err)
}
newBootstrap.Hosts = newHosts
// the daemon's view of this host's bootstrap info takes precedence over
// whatever is in garage
newHosts[thisHost.Name] = thisHost
newBootstrap.Hosts[thisHost.Name] = thisHost
newHostsHash, err := bootstrap.HostsHash(newHosts)
diff, err := calcBootstrapDiff(d.daemonConfig, currBootstrap, newBootstrap)
if err != nil {
return bootstrap.Bootstrap{}, false, fmt.Errorf("calculating hash of new hosts: %w", err)
return fmt.Errorf("calculating diff between bootstraps: %w", err)
} else if diff == (bootstrapDiff{}) {
d.logger.Info(ctx, "No changes to bootstrap detected")
return nil
}
currHostsHash, err := bootstrap.HostsHash(hostBootstrap.Hosts)
if err != nil {
return bootstrap.Bootstrap{}, false, fmt.Errorf("calculating hash of current hosts: %w", err)
}
d.logger.Info(ctx, "Bootstrap has changed, storing new bootstrap")
d.l.Lock()
d.currBootstrap = newBootstrap
d.l.Unlock()
if bytes.Equal(newHostsHash, currHostsHash) {
return hostBootstrap, false, nil
}
var errs []error
hostBootstrap.Hosts = newHosts
// TODO each of these changed cases should block until its respective
// service is confirmed to have come back online.
return hostBootstrap, true, nil
}
// TODO it's possible that reload could be called concurrently, and one call
// would override the reloading done by the other.
// blocks until a new bootstrap is available or context is canceled
func (d *daemon) watchForChanges(ctx context.Context) bootstrap.Bootstrap {
ticker := time.NewTicker(3 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return bootstrap.Bootstrap{}
case <-ticker.C:
d.logger.Info(ctx, "Checking for changes to bootstrap")
newBootstrap, changed, err := d.checkBootstrap(
ctx, d.currBootstrap,
)
if err != nil {
d.logger.Error(ctx, "Checking bootstrap for changes failed", err)
continue
} else if !changed {
continue
}
err = writeBootstrapToStateDir(d.opts.EnvVars.StateDirPath, newBootstrap)
if err != nil {
d.logger.Error(ctx, "Writing new bootstrap to disk failed", err)
continue
}
return newBootstrap
if diff.dnsChanged {
d.logger.Info(ctx, "Restarting dnsmasq to account for bootstrap changes")
if err := d.children.RestartDNSMasq(newBootstrap); err != nil {
errs = append(errs, fmt.Errorf("restarting dnsmasq: %w", err))
}
}
if diff.nebulaChanged {
d.logger.Info(ctx, "Restarting nebula to account for bootstrap changes")
if err := d.children.RestartNebula(newBootstrap); err != nil {
errs = append(errs, fmt.Errorf("restarting nebula: %w", err))
}
}
func (d *daemon) postInit(ctx context.Context) bool {
return errors.Join(errs...)
}
func (d *daemon) postInit(ctx context.Context) error {
if len(d.daemonConfig.Storage.Allocations) > 0 {
if !until(
ctx,
d.logger,
"Applying garage layout",
func(ctx context.Context) error {
return garageApplyLayout(
ctx,
d.logger,
d.daemonConfig,
d.garageAdminToken,
d.currBootstrap,
)
},
) {
return false
d.logger.Info(ctx, "Applying garage layout")
if err := garageApplyLayout(
ctx, d.logger, d.daemonConfig, d.garageAdminToken, d.currBootstrap,
); err != nil {
return fmt.Errorf("applying garage layout: %w", err)
}
}
@ -406,11 +397,7 @@ func (d *daemon) postInit(ctx context.Context) bool {
// manage it at the moment.
_, err := getGarageS3APIGlobalBucketCredentials(ctx, d.secretsStore)
if errors.Is(err, secrets.ErrNotFound) {
if !until(
ctx,
d.logger,
"Initializing garage shared global bucket",
func(ctx context.Context) error {
d.logger.Info(ctx, "Initializing garage shared global bucket")
garageGlobalBucketCreds, err := garageInitializeGlobalBucket(
ctx,
d.logger,
@ -428,107 +415,48 @@ func (d *daemon) postInit(ctx context.Context) bool {
if err != nil {
return fmt.Errorf("storing global bucket creds: %w", err)
}
}
d.logger.Info(ctx, "Updating host info in garage")
err = d.putGarageBoostrapHost(ctx, d.currBootstrap)
if err != nil {
return fmt.Errorf("updating host info in garage: %w", err)
}
return nil
},
) {
return false
}
}
if !until(
ctx,
d.logger,
"Updating host info in garage",
func(ctx context.Context) error {
return d.putGarageBoostrapHost(ctx, d.logger, d.currBootstrap)
},
) {
return false
}
return true
}
func (d *daemon) restartLoop(ctx context.Context, readyCh chan<- struct{}) {
wait := func(d time.Duration) bool {
select {
case <-ctx.Done():
return false
case <-time.After(d):
return true
}
}
ready := func() {
select {
case readyCh <- struct{}{}:
default:
}
}
func (d *daemon) reloadLoop(ctx context.Context) {
ticker := time.NewTicker(3 * time.Minute)
defer ticker.Stop()
for {
if ctx.Err() != nil {
select {
case <-ctx.Done():
return
}
d.logger.Info(ctx, "Creating child processes")
children, err := NewChildren(
ctx,
d.logger.WithNamespace("children"),
d.envBinDirPath,
d.secretsStore,
d.daemonConfig,
d.garageAdminToken,
d.currBootstrap,
d.opts,
)
if errors.Is(err, context.Canceled) {
return
} else if err != nil {
d.logger.Error(ctx, "failed to initialize child processes", err)
if !wait(1 * time.Second) {
return
}
case <-ticker.C:
d.l.RLock()
currBootstrap := d.currBootstrap
d.l.RUnlock()
d.logger.Info(ctx, "Checking for bootstrap changes")
newHosts, err := d.getGarageBootstrapHosts(ctx, currBootstrap)
if err != nil {
d.logger.Error(ctx, "Failed to get hosts from garage", err)
continue
}
d.logger.Info(ctx, "Child processes created")
// TODO there's some potential race conditions here, where
// CreateHost could be called at this point, write the new host to
// garage and the bootstrap, but then this reload call removes the
// host from this bootstrap/children until the next reload.
d.l.Lock()
d.children = children
d.l.Unlock()
if !d.postInit(ctx) {
return
if err := d.reload(ctx, currBootstrap, newHosts); err != nil {
d.logger.Error(ctx, "Reloading with new host data failed", err)
continue
}
d.l.Lock()
d.state = daemonStateOk
d.l.Unlock()
ready()
newBootstrap := d.watchForChanges(ctx)
if ctx.Err() != nil {
return
}
d.logger.Info(ctx, "Bootstrap has changed, will restart daemon")
d.l.Lock()
d.currBootstrap = newBootstrap
d.state = daemonStateRestarting
d.l.Unlock()
d.logger.Info(ctx, "Shutting down previous child processes")
if err := d.children.Shutdown(); err != nil {
d.logger.Fatal(ctx, "Failed to cleanly shutdown children, there may be orphaned child processes", err)
}
// in case context was canceled while shutting the Children down, we
// don't want the Shutdown method to re-attempt calling Shutdown on
// it.
d.children = nil
}
}
@ -564,6 +492,7 @@ func (d *daemon) CreateNetwork(
hostBootstrap, err := bootstrap.New(
nebulaCACreds,
creationParams,
map[nebula.HostName]bootstrap.Host{},
hostName,
ipNet.FirstAddr(),
)
@ -585,20 +514,12 @@ func (d *daemon) CreateNetwork(
)
}
readyCh := make(chan struct{}, 1)
err = d.initialize(hostBootstrap, readyCh)
d.l.Unlock()
// initialize will unlock d.l
err = d.initialize(ctx, hostBootstrap)
if err != nil {
return fmt.Errorf("initializing daemon: %w", err)
}
select {
case <-readyCh:
case <-ctx.Done():
return ctx.Err()
}
return nil
}
@ -612,26 +533,19 @@ func (d *daemon) JoinNetwork(
return ErrAlreadyJoined
}
readyCh := make(chan struct{}, 1)
err := secrets.Import(ctx, d.secretsStore, newBootstrap.Secrets)
if err != nil {
d.l.Unlock()
return fmt.Errorf("importing secrets: %w", err)
}
err = d.initialize(newBootstrap.Bootstrap, readyCh)
d.l.Unlock()
// initialize will unlock d.l
err = d.initialize(ctx, newBootstrap.Bootstrap)
if err != nil {
return fmt.Errorf("initializing daemon: %w", err)
}
select {
case <-readyCh:
return nil
case <-ctx.Done():
return ctx.Err()
}
}
func (d *daemon) GetBootstrap(ctx context.Context) (bootstrap.Bootstrap, error) {
@ -695,11 +609,10 @@ func (d *daemon) CreateHost(
) (
JoiningBootstrap, error,
) {
return withCurrBootstrap(d, func(
currBootstrap bootstrap.Bootstrap,
) (
JoiningBootstrap, error,
) {
d.l.RLock()
currBootstrap := d.currBootstrap
d.l.RUnlock()
caSigningPrivateKey, err := getNebulaCASigningPrivateKey(
ctx, d.secretsStore,
)
@ -711,6 +624,7 @@ func (d *daemon) CreateHost(
joiningBootstrap.Bootstrap, err = bootstrap.New(
makeCACreds(currBootstrap, caSigningPrivateKey),
currBootstrap.NetworkCreationParams,
currBootstrap.Hosts,
hostName,
ip,
)
@ -720,8 +634,6 @@ func (d *daemon) CreateHost(
)
}
joiningBootstrap.Bootstrap.Hosts = currBootstrap.Hosts
secretsIDs := []secrets.ID{
garageRPCSecretSecretID,
garageS3APIGlobalBucketCredentialsSecretID,
@ -737,11 +649,22 @@ func (d *daemon) CreateHost(
return JoiningBootstrap{}, fmt.Errorf("exporting secrets: %w", err)
}
// TODO persist new bootstrap to garage. Requires making the daemon
// config change watching logic smarter, so only dnsmasq gets restarted.
d.logger.Info(ctx, "Putting new host in garage")
err = d.putGarageBoostrapHost(ctx, joiningBootstrap.Bootstrap)
if err != nil {
return JoiningBootstrap{}, fmt.Errorf("putting new host in garage: %w", err)
}
// the new bootstrap will have been initialized with both all existing hosts
// (based on currBootstrap) and the host being created.
newHosts := joiningBootstrap.Bootstrap.Hosts
d.logger.Info(ctx, "Reloading local state with new host")
if err := d.reload(ctx, currBootstrap, newHosts); err != nil {
return JoiningBootstrap{}, fmt.Errorf("reloading child processes: %w", err)
}
return joiningBootstrap, nil
})
}
func (d *daemon) CreateNebulaCertificate(
@ -788,9 +711,7 @@ func (d *daemon) Shutdown() error {
d.state = daemonStateShutdown
if d.children != nil {
if err := d.children.Shutdown(); err != nil {
return fmt.Errorf("shutting down children: %w", err)
}
d.children.Shutdown()
}
return nil

View File

@ -11,10 +11,6 @@ var (
// being initialized.
ErrInitializing = jsonrpc2.NewError(2, "Network is being initialized")
// ErrRestarting is returned when a network is unavailable due to being
// restarted.
ErrRestarting = jsonrpc2.NewError(3, "Network is being restarted")
// ErrAlreadyJoined is returned when the daemon is instructed to create or
// join a new network, but it is already joined to a network.
ErrAlreadyJoined = jsonrpc2.NewError(4, "Already joined to a network")

View File

@ -64,7 +64,7 @@ func garageInitializeGlobalBucket(
// into garage so that other hosts are able to see relevant configuration for
// it.
func (d *daemon) putGarageBoostrapHost(
ctx context.Context, logger *mlog.Logger, currBootstrap bootstrap.Bootstrap,
ctx context.Context, currBootstrap bootstrap.Bootstrap,
) error {
garageClientParams, err := d.getGarageClientParams(ctx, currBootstrap)
if err != nil {
@ -113,7 +113,7 @@ func (d *daemon) putGarageBoostrapHost(
}
func (d *daemon) getGarageBootstrapHosts(
ctx context.Context, logger *mlog.Logger, currBootstrap bootstrap.Bootstrap,
ctx context.Context, currBootstrap bootstrap.Bootstrap,
) (
map[nebula.HostName]bootstrap.Host, error,
) {
@ -157,13 +157,13 @@ func (d *daemon) getGarageBootstrapHosts(
obj.Close()
if err != nil {
logger.Warn(ctx, "Object contains invalid json", err)
d.logger.Warn(ctx, "Object contains invalid json", err)
continue
}
host, err := authedHost.Unwrap(currBootstrap.CAPublicCredentials)
if err != nil {
logger.Warn(ctx, "Host could not be authenticated", err)
d.logger.Warn(ctx, "Host could not be authenticated", err)
}
hosts[host.Name] = host

View File

@ -3,13 +3,12 @@ module isle
go 1.22
require (
code.betamike.com/micropelago/pmux v0.0.0-20230706154656-fde8c2be7540
code.betamike.com/micropelago/pmux v0.0.0-20240719134913-f5fce902e8c4
dev.mediocregopher.com/mediocre-go-lib.git v0.0.0-20240511135822-4ab1176672d7
github.com/adrg/xdg v0.4.0
github.com/imdario/mergo v0.3.12
github.com/jxskiss/base62 v1.1.0
github.com/minio/minio-go/v7 v7.0.28
github.com/shirou/gopsutil v3.21.11+incompatible
github.com/slackhq/nebula v1.6.1
github.com/spf13/pflag v1.0.5
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c
@ -19,7 +18,6 @@ require (
require (
github.com/dustin/go-humanize v1.0.0 // indirect
github.com/go-ole/go-ole v1.2.6 // indirect
github.com/google/uuid v1.1.1 // indirect
github.com/gopherjs/gopherjs v1.17.2 // indirect
github.com/json-iterator/go v1.1.12 // indirect
@ -34,9 +32,6 @@ require (
github.com/rs/xid v1.2.1 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
github.com/smartystreets/assertions v1.13.0 // indirect
github.com/tklauser/go-sysconf v0.3.10 // indirect
github.com/tklauser/numcpus v0.4.0 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
golang.org/x/crypto v0.0.0-20220331220935-ae2d96664a29 // indirect
golang.org/x/net v0.0.0-20220403103023-749bd193bc2b // indirect
golang.org/x/sys v0.0.0-20220406155245-289d7a0edf71 // indirect

View File

@ -1,5 +1,5 @@
code.betamike.com/micropelago/pmux v0.0.0-20230706154656-fde8c2be7540 h1:ycD1mEkCbrx3Apr71Q2PKgyycRu8wvwX9J4ZSvjyTtQ=
code.betamike.com/micropelago/pmux v0.0.0-20230706154656-fde8c2be7540/go.mod h1:WlEWacLREVfIQl1IlBjKzuDgL56DFRvyl7YiL5gGP4w=
code.betamike.com/micropelago/pmux v0.0.0-20240719134913-f5fce902e8c4 h1:n4pGP12kgdH5kCTF4TZYpOjWuAR/zZLpM9j3neeVMEk=
code.betamike.com/micropelago/pmux v0.0.0-20240719134913-f5fce902e8c4/go.mod h1:WlEWacLREVfIQl1IlBjKzuDgL56DFRvyl7YiL5gGP4w=
dev.mediocregopher.com/mediocre-go-lib.git v0.0.0-20240511135822-4ab1176672d7 h1:wKQ3bXzG+KQDtRAN/xaRZ4aQtJe1pccleG6V43MvFxw=
dev.mediocregopher.com/mediocre-go-lib.git v0.0.0-20240511135822-4ab1176672d7/go.mod h1:nP+AtQWrc3k5qq5y3ABiBLkOfUPlk/FO9fpTFpF+jgs=
github.com/adrg/xdg v0.4.0 h1:RzRqFcjH4nE5C6oTAxhBtoE2IRyjBSa62SCbyPidvls=
@ -9,8 +9,6 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
@ -55,8 +53,6 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rs/xid v1.2.1 h1:mhH9Nq+C1fY2l1XIpgxIiUOfNpRBYH1kKcr+qfKgjRc=
github.com/rs/xid v1.2.1/go.mod h1:+uKXf+4Djp6Md1KODXJxgGQPKngRmWyn10oCKFzNHOQ=
github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/slackhq/nebula v1.6.1 h1:/OCTR3abj0Sbf2nGoLUrdDXImrCv0ZVFpVPP5qa0DsM=
@ -73,24 +69,16 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tklauser/go-sysconf v0.3.10 h1:IJ1AZGZRWbY8T5Vfk04D9WOA5WSejdflXxP03OUqALw=
github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk=
github.com/tklauser/numcpus v0.4.0 h1:E53Dm1HjH1/R2/aoCtXtPgzmElmn51aOkhCFSuZq//o=
github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ=
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c h1:u6SKchux2yDvFQnDHS3lPnIRmfVJ5Sxy3ao2SIdysLQ=
github.com/tv42/httpunix v0.0.0-20191220191345-2ba4b9c3382c/go.mod h1:hzIxponao9Kjc7aWznkXaL4U4TWaDSs8zcsY4Ka08nM=
github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg=
github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
golang.org/x/crypto v0.0.0-20220331220935-ae2d96664a29 h1:tkVvjkPTB7pnW3jnid7kNyAMPVWllTNOf/qKDze4p9o=
golang.org/x/crypto v0.0.0-20220331220935-ae2d96664a29/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 h1:yixxcjnhBmY0nkL253HFVIm0JsFHwrHdT3Yh6szTnfY=
golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8/go.mod h1:jj3sYF3dwk5D+ghuXyeI3r5MFf+NT2An6/9dOA95KSI=
golang.org/x/net v0.0.0-20220403103023-749bd193bc2b h1:vI32FkLJNAWtGD4BwkThwEy6XS7ZLLMHkSkYfF8M0W0=
golang.org/x/net v0.0.0-20220403103023-749bd193bc2b/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220406155245-289d7a0edf71 h1:PRD0hj6tTuUnCFD08vkvjkYFbQg/9lV8KIxe1y4/cvU=
golang.org/x/sys v0.0.0-20220406155245-289d7a0edf71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.8-0.20211105212822-18b340fc7af2 h1:GLw7MR8AfAG2GmGcmVgObFOHXYypgGjnGno25RDwn3Y=

View File

@ -10,10 +10,7 @@ function assert_a {
as_primus
assert_a "$primus_ip" primus.hosts.shared.test
# TODO This doesn't work at present, there would need to be some mechanism to
# block the test until secondus' bootstrap info can propagate to primus.
#assert_a "$secondus_ip" secondus.hosts.shared.test
assert_a "$secondus_ip" secondus.hosts.shared.test
as_secondus
assert_a "$primus_ip" primus.hosts.shared.test

View File

@ -1,9 +1,7 @@
# shellcheck source=../../utils/with-1-data-1-empty-node-network.sh
source "$UTILS"/with-1-data-1-empty-node-network.sh
# TODO when primus creates secondus' bootstrap it should write the new host to
# its own bootstrap, as well as to garage.
as_secondus
function do_tests {
hosts="$(isle hosts list)"
[ "$(echo "$hosts" | jq -r '.[0].Name')" = "primus" ]
@ -13,3 +11,10 @@ hosts="$(isle hosts list)"
[ "$(echo "$hosts" | jq -r '.[1].Name')" = "secondus" ]
[ "$(echo "$hosts" | jq -r '.[1].VPN.IP')" = "10.6.9.2" ]
[ "$(echo "$hosts" | jq -r '.[1].Storage.Instances|length')" = "0" ]
}
as_primus
do_tests
as_secondus
do_tests