diff --git a/go/bootstrap/bootstrap.go b/go/bootstrap/bootstrap.go index a41c188..9b61aba 100644 --- a/go/bootstrap/bootstrap.go +++ b/go/bootstrap/bootstrap.go @@ -7,10 +7,13 @@ import ( "encoding/json" "fmt" "isle/nebula" + "isle/toolkit" "maps" "net/netip" "path/filepath" "sort" + + "dev.mediocregopher.com/mediocre-go-lib.git/mctx" ) // StateDirPath returns the path within the user's state directory where the @@ -33,6 +36,22 @@ type CreationParams struct { Domain string } +// NewCreationParams instantiates and returns a CreationParams. +func NewCreationParams(name, domain string) CreationParams { + return CreationParams{ + ID: toolkit.RandStr(32), + Name: name, + Domain: domain, + } +} + +// Annotate implements the mctx.Annotator interface. +func (p CreationParams) Annotate(aa mctx.Annotations) { + aa["networkID"] = p.ID + aa["networkName"] = p.Name + aa["networkDomain"] = p.Domain +} + // Bootstrap contains all information which is needed by a host daemon to join a // network on boot. type Bootstrap struct { diff --git a/go/cmd/entrypoint/daemon.go b/go/cmd/entrypoint/daemon.go index 2649974..7f35fe0 100644 --- a/go/cmd/entrypoint/daemon.go +++ b/go/cmd/entrypoint/daemon.go @@ -61,7 +61,7 @@ var subCmdDaemon = subCmd{ return fmt.Errorf("loading daemon config: %w", err) } - daemonInst, err := daemon.NewDaemon( + daemonInst, err := daemon.New( ctx, logger, daemonConfig, envBinDirPath, nil, ) if err != nil { diff --git a/go/cmd/entrypoint/garage.go b/go/cmd/entrypoint/garage.go index 6e78170..a6fac18 100644 --- a/go/cmd/entrypoint/garage.go +++ b/go/cmd/entrypoint/garage.go @@ -3,6 +3,7 @@ package main import ( "errors" "fmt" + "isle/daemon/daecommon" "os" "path/filepath" "syscall" @@ -12,9 +13,9 @@ import ( // information which may or may not be useful. Unfortunately when it initializes // this directory it likes to print some annoying logs, so we pre-initialize in // order to prevent it from doing so. -func initMCConfigDir() (string, error) { +func initMCConfigDir(envVars daecommon.EnvVars) (string, error) { var ( - path = filepath.Join(daemonEnvVars.StateDirPath, "mc") + path = filepath.Join(envVars.StateDir.Path, "mc") sharePath = filepath.Join(path, "share") configJSONPath = filepath.Join(path, "config.json") ) @@ -72,7 +73,9 @@ var subCmdGarageMC = subCmd{ args = args[i:] } - configDir, err := initMCConfigDir() + envVars := daecommon.GetEnvVars() + + configDir, err := initMCConfigDir(envVars) if err != nil { return fmt.Errorf("initializing minio-client config directory: %w", err) } diff --git a/go/cmd/entrypoint/host.go b/go/cmd/entrypoint/host.go index b3aa681..6ade1c2 100644 --- a/go/cmd/entrypoint/host.go +++ b/go/cmd/entrypoint/host.go @@ -5,7 +5,7 @@ import ( "errors" "fmt" "isle/bootstrap" - "isle/daemon" + "isle/daemon/network" "isle/jsonutil" "os" "sort" @@ -44,11 +44,11 @@ var subCmdHostCreate = subCmd{ } var ( - res daemon.JoiningBootstrap + res network.JoiningBootstrap err error ) res, err = ctx.daemonRPC.CreateHost( - ctx, hostName.V, daemon.CreateHostOpts{ + ctx, hostName.V, network.CreateHostOpts{ IP: ip.V, CanCreateHosts: *canCreateHosts, }, diff --git a/go/cmd/entrypoint/main.go b/go/cmd/entrypoint/main.go index 6ef40ea..89b06ef 100644 --- a/go/cmd/entrypoint/main.go +++ b/go/cmd/entrypoint/main.go @@ -7,8 +7,6 @@ import ( "path/filepath" "syscall" - "isle/daemon/daecommon" - "dev.mediocregopher.com/mediocre-go-lib.git/mctx" "dev.mediocregopher.com/mediocre-go-lib.git/mlog" ) @@ -22,7 +20,6 @@ func getAppDirPath() string { } var ( - daemonEnvVars = daecommon.GetEnvVars() envAppDirPath = getAppDirPath() envBinDirPath = filepath.Join(envAppDirPath, "bin") ) diff --git a/go/cmd/entrypoint/network.go b/go/cmd/entrypoint/network.go index 80ad8a9..37e072e 100644 --- a/go/cmd/entrypoint/network.go +++ b/go/cmd/entrypoint/network.go @@ -3,7 +3,7 @@ package main import ( "errors" "fmt" - "isle/daemon" + "isle/daemon/network" "isle/jsonutil" ) @@ -81,7 +81,7 @@ var subCmdNetworkJoin = subCmd{ return errors.New("--bootstrap-path is required") } - var newBootstrap daemon.JoiningBootstrap + var newBootstrap network.JoiningBootstrap if err := jsonutil.LoadFile(&newBootstrap, *bootstrapPath); err != nil { return fmt.Errorf( "loading bootstrap from %q: %w", *bootstrapPath, err, diff --git a/go/daemon/children/children.go b/go/daemon/children/children.go index 0e4a7e2..2090868 100644 --- a/go/daemon/children/children.go +++ b/go/daemon/children/children.go @@ -15,6 +15,7 @@ import ( "isle/bootstrap" "isle/daemon/daecommon" "isle/secrets" + "isle/toolkit" ) // Opts are optional parameters which can be passed in when initializing a new @@ -47,10 +48,10 @@ func (o *Opts) withDefaults() *Opts { // - dnsmasq // - garage (0 or more, depending on configured storage allocations) type Children struct { - logger *mlog.Logger - daemonConfig daecommon.Config - runtimeDirPath string - opts Opts + logger *mlog.Logger + daemonConfig daecommon.Config + runtimeDir toolkit.Dir + opts Opts pmux *pmuxlib.Pmux } @@ -63,7 +64,7 @@ func New( binDirPath string, secretsStore secrets.Store, daemonConfig daecommon.Config, - runtimeDirPath string, + runtimeDir toolkit.Dir, garageAdminToken string, hostBootstrap bootstrap.Bootstrap, opts *Opts, @@ -79,10 +80,10 @@ func New( } c := &Children{ - logger: logger, - daemonConfig: daemonConfig, - runtimeDirPath: runtimeDirPath, - opts: *opts, + logger: logger, + daemonConfig: daemonConfig, + runtimeDir: runtimeDir, + opts: *opts, } pmuxConfig, err := c.newPmuxConfig( @@ -117,7 +118,7 @@ func New( // successfully. func (c *Children) RestartDNSMasq(hostBootstrap bootstrap.Bootstrap) error { _, err := dnsmasqWriteConfig( - c.runtimeDirPath, c.daemonConfig, hostBootstrap, + c.runtimeDir.Path, c.daemonConfig, hostBootstrap, ) if err != nil { return fmt.Errorf("writing new dnsmasq config: %w", err) @@ -133,7 +134,7 @@ func (c *Children) RestartDNSMasq(hostBootstrap bootstrap.Bootstrap) error { // successfully. func (c *Children) RestartNebula(hostBootstrap bootstrap.Bootstrap) error { _, err := nebulaWriteConfig( - c.runtimeDirPath, c.daemonConfig, hostBootstrap, + c.runtimeDir.Path, c.daemonConfig, hostBootstrap, ) if err != nil { return fmt.Errorf("writing a new nebula config: %w", err) diff --git a/go/daemon/children/pmux.go b/go/daemon/children/pmux.go index 0e98fc1..ec0fb3f 100644 --- a/go/daemon/children/pmux.go +++ b/go/daemon/children/pmux.go @@ -19,7 +19,7 @@ func (c *Children) newPmuxConfig( pmuxlib.Config, error, ) { nebulaPmuxProcConfig, err := nebulaPmuxProcConfig( - c.runtimeDirPath, + c.runtimeDir.Path, binDirPath, daemonConfig, hostBootstrap, @@ -30,7 +30,7 @@ func (c *Children) newPmuxConfig( dnsmasqPmuxProcConfig, err := dnsmasqPmuxProcConfig( c.logger, - c.runtimeDirPath, + c.runtimeDir.Path, binDirPath, daemonConfig, hostBootstrap, @@ -45,7 +45,7 @@ func (c *Children) newPmuxConfig( ctx, c.logger, garageRPCSecret, - c.runtimeDirPath, + c.runtimeDir.Path, binDirPath, daemonConfig, garageAdminToken, diff --git a/go/daemon/client.go b/go/daemon/client.go index c7a2400..77dda78 100644 --- a/go/daemon/client.go +++ b/go/daemon/client.go @@ -10,6 +10,7 @@ import ( "context" "isle/bootstrap" "isle/daemon/jsonrpc2" + "isle/daemon/network" "isle/nebula" ) @@ -23,7 +24,7 @@ func RPCFromClient(client jsonrpc2.Client) RPC { return &rpcClient{client} } -func (c *rpcClient) CreateHost(ctx context.Context, h1 nebula.HostName, c2 CreateHostOpts) (j1 JoiningBootstrap, err error) { +func (c *rpcClient) CreateHost(ctx context.Context, h1 nebula.HostName, c2 network.CreateHostOpts) (j1 network.JoiningBootstrap, err error) { err = c.client.Call( ctx, &j1, @@ -58,7 +59,7 @@ func (c *rpcClient) CreateNetwork(ctx context.Context, name string, domain strin return } -func (c *rpcClient) GetGarageClientParams(ctx context.Context) (g1 GarageClientParams, err error) { +func (c *rpcClient) GetGarageClientParams(ctx context.Context) (g1 network.GarageClientParams, err error) { err = c.client.Call( ctx, &g1, @@ -85,7 +86,7 @@ func (c *rpcClient) GetNebulaCAPublicCredentials(ctx context.Context) (c2 nebula return } -func (c *rpcClient) JoinNetwork(ctx context.Context, j1 JoiningBootstrap) (err error) { +func (c *rpcClient) JoinNetwork(ctx context.Context, j1 network.JoiningBootstrap) (err error) { err = c.client.Call( ctx, nil, diff --git a/go/daemon/daecommon/env.go b/go/daemon/daecommon/env.go index d1ee66e..ec5bdaf 100644 --- a/go/daemon/daecommon/env.go +++ b/go/daemon/daecommon/env.go @@ -1,6 +1,7 @@ package daecommon import ( + "isle/toolkit" "os" "path/filepath" "sync" @@ -11,27 +12,39 @@ import ( // EnvVars are variables which are derived based on the environment which the // process is running in. type EnvVars struct { - RuntimeDirPath string - StateDirPath string + StateDir toolkit.Dir + RuntimeDir toolkit.Dir } // GetEnvVars will return the EnvVars of the current processes, as determined by // the process's environment. -var GetEnvVars = sync.OnceValue(func() (v EnvVars) { +var GetEnvVars = sync.OnceValue(func() EnvVars { // RUNTIME_DIRECTORY/STATE_DIRECTORY are used by the systemd service in // conjunction with the RuntimeDirectory/StateDirectory directives. - v.RuntimeDirPath = envOr( - "RUNTIME_DIRECTORY", - func() string { return filepath.Join(xdg.RuntimeDir, "isle") }, + var ( + res EnvVars + + stateDirPath = envOr( + "STATE_DIRECTORY", + func() string { return filepath.Join(xdg.StateHome, "isle") }, + ) + + runtimeDirPath = envOr( + "RUNTIME_DIRECTORY", + func() string { return filepath.Join(xdg.RuntimeDir, "isle") }, + ) ) - v.StateDirPath = envOr( - "STATE_DIRECTORY", - func() string { return filepath.Join(xdg.StateHome, "isle") }, - ) + h := new(toolkit.MkDirHelper) + res.StateDir, _ = h.Maybe(toolkit.MkDir(stateDirPath, true)) + res.RuntimeDir, _ = h.Maybe(toolkit.MkDir(runtimeDirPath, true)) - return + if err := h.Err(); err != nil { + panic(err) + } + + return res }) //////////////////////////////////////////////////////////////////////////////// diff --git a/go/daemon/daecommon/errors.go b/go/daemon/daecommon/errors.go new file mode 100644 index 0000000..03a1177 --- /dev/null +++ b/go/daemon/daecommon/errors.go @@ -0,0 +1,8 @@ +package daecommon + +// Registrationg of error code ranges for different namespaces (ie packages) +// within daemon. Each range covers 1000 available error codes. +const ( + ErrorCodeRangeDaemon = 0 // daemon package + ErrorCodeRangeNetwork = 1000 // daemon/network package +) diff --git a/go/daemon/daemon.go b/go/daemon/daemon.go index 0a6027e..d820ee0 100644 --- a/go/daemon/daemon.go +++ b/go/daemon/daemon.go @@ -3,27 +3,18 @@ package daemon import ( - "bytes" - "cmp" "context" - "crypto/rand" - "errors" "fmt" - "io/fs" "isle/bootstrap" "isle/daemon/children" "isle/daemon/daecommon" - "isle/jsonutil" + "isle/daemon/network" "isle/nebula" - "isle/secrets" - "net/netip" - "path/filepath" - "slices" + "isle/toolkit" "sync" - "time" + "dev.mediocregopher.com/mediocre-go-lib.git/mctx" "dev.mediocregopher.com/mediocre-go-lib.git/mlog" - "golang.org/x/exp/maps" ) // Opts are optional parameters which can be passed in when initializing a new @@ -46,13 +37,6 @@ func (o *Opts) withDefaults() *Opts { return o } -const ( - daemonStateNoNetwork = iota - daemonStateInitializing - daemonStateOk - daemonStateShutdown -) - var _ RPC = (*Daemon)(nil) // Daemon implements all methods of the Daemon interface, plus others used @@ -77,20 +61,15 @@ type Daemon struct { envBinDirPath string opts *Opts - secretsStore secrets.Store - garageAdminToken string + networksStateDir toolkit.Dir + networksRuntimeDir toolkit.Dir - l sync.RWMutex - state int - children *children.Children - currBootstrap bootstrap.Bootstrap - - shutdownCh chan struct{} - wg sync.WaitGroup + l sync.RWMutex + network network.Network } -// NewDaemon initializes and returns a Daemon. -func NewDaemon( +// New initializes and returns a Daemon. +func New( ctx context.Context, logger *mlog.Logger, daemonConfig daecommon.Config, @@ -99,282 +78,70 @@ func NewDaemon( ) ( *Daemon, error, ) { - var ( - d = &Daemon{ - logger: logger, - daemonConfig: daemonConfig, - envBinDirPath: envBinDirPath, - opts: opts.withDefaults(), - garageAdminToken: randStr(32), - shutdownCh: make(chan struct{}), + d := &Daemon{ + logger: logger, + daemonConfig: daemonConfig, + envBinDirPath: envBinDirPath, + opts: opts.withDefaults(), + } + + { + h := new(toolkit.MkDirHelper) + d.networksStateDir, _ = h.Maybe( + d.opts.EnvVars.StateDir.MkChildDir("networks", true), + ) + + d.networksRuntimeDir, _ = h.Maybe( + d.opts.EnvVars.RuntimeDir.MkChildDir("networks", true), + ) + + if err := h.Err(); err != nil { + return nil, fmt.Errorf("creating networks sub-directories: %w", err) } - bootstrapFilePath = bootstrap.StateDirPath(d.opts.EnvVars.StateDirPath) - ) - - if err := createDirs(d.opts.EnvVars); err != nil { - return nil, fmt.Errorf("initializing daemon directories: %w", err) } - var ( - secretsPath = filepath.Join(d.opts.EnvVars.StateDirPath, "secrets") - err error - ) - - if d.secretsStore, err = secrets.NewFSStore(secretsPath); err != nil { - return nil, fmt.Errorf( - "initializing secrets store at %q: %w", secretsPath, err, - ) + loadableNetworks, err := LoadableNetworks(d.networksStateDir) + if err != nil { + return nil, fmt.Errorf("listing loadable networks: %w", err) } - var currBootstrap bootstrap.Bootstrap - err = jsonutil.LoadFile(&currBootstrap, bootstrapFilePath) - if errors.Is(err, fs.ErrNotExist) { - // daemon has never had a network created or joined - } else if err != nil { + if len(loadableNetworks) > 1 { return nil, fmt.Errorf( - "loading bootstrap from %q: %w", bootstrapFilePath, err, + "more then one loadable Network found: %+v", loadableNetworks, ) - } else if err := d.initialize(ctx, currBootstrap); err != nil { - return nil, fmt.Errorf("initializing with bootstrap: %w", err) + } else if len(loadableNetworks) == 1 { + id := loadableNetworks[0].ID + ctx = mctx.WithAnnotator(ctx, loadableNetworks[0]) + + networkStateDir, networkRuntimeDir, err := networkDirs( + d.networksStateDir, d.networksRuntimeDir, id, true, + ) + if err != nil { + return nil, fmt.Errorf( + "creating sub-directories for network %q: %w", id, err, + ) + } + + d.network, err = network.Load( + ctx, + logger.WithNamespace("network"), + id, + d.daemonConfig, + d.envBinDirPath, + networkStateDir, + networkRuntimeDir, + &network.Opts{ + ChildrenOpts: d.opts.ChildrenOpts, + }, + ) + if err != nil { + return nil, fmt.Errorf("loading network %q: %w", id, err) + } } return d, nil } -func createDirs(e daecommon.EnvVars) error { - var errs []error - if err := mkDir(e.RuntimeDirPath); err != nil { - errs = append(errs, fmt.Errorf( - "creating runtime directory %q: %w", - e.RuntimeDirPath, - err, - )) - } - - if err := mkDir(e.StateDirPath); err != nil { - errs = append(errs, fmt.Errorf( - "creating state directory %q: %w", - e.StateDirPath, - err, - )) - } - - return errors.Join(errs...) -} - -// initialize must be called with d.l write lock held. -func (d *Daemon) initialize( - ctx context.Context, currBootstrap bootstrap.Bootstrap, -) error { - // we update this Host's data using whatever configuration has been provided - // by the daemon config. This way the daemon has the most up-to-date - // possible bootstrap. This updated bootstrap will later get updated in - // garage as a background daemon task, so other hosts will see it as well. - currBootstrap, err := coalesceDaemonConfigAndBootstrap( - d.daemonConfig, currBootstrap, - ) - if err != nil { - return fmt.Errorf("combining daemon configuration into bootstrap: %w", err) - } - - err = writeBootstrapToStateDir(d.opts.EnvVars.StateDirPath, currBootstrap) - if err != nil { - return fmt.Errorf("writing bootstrap to state dir: %w", err) - } - - d.currBootstrap = currBootstrap - d.state = daemonStateInitializing - - d.logger.Info(ctx, "Creating child processes") - d.children, err = children.New( - ctx, - d.logger.WithNamespace("children"), - d.envBinDirPath, - d.secretsStore, - d.daemonConfig, - d.opts.EnvVars.RuntimeDirPath, - d.garageAdminToken, - currBootstrap, - d.opts.ChildrenOpts, - ) - if err != nil { - return fmt.Errorf("creating child processes: %w", err) - } - - d.logger.Info(ctx, "Child processes created") - - if err := d.postInit(ctx); err != nil { - d.logger.Error(ctx, "Post-initialization failed, stopping child processes", err) - d.children.Shutdown() - return fmt.Errorf("performing post-initialization: %w", err) - } - - d.state = daemonStateOk - - ctx, cancel := context.WithCancel(context.Background()) - d.wg.Add(1) - go func() { - defer d.wg.Done() - <-d.shutdownCh - cancel() - }() - - d.wg.Add(1) - go func() { - defer d.wg.Done() - d.reloadLoop(ctx) - d.logger.Debug(ctx, "Daemon restart loop stopped") - }() - - return nil -} - -func withCurrBootstrap[Res any]( - d *Daemon, fn func(bootstrap.Bootstrap) (Res, error), -) (Res, error) { - var zero Res - d.l.RLock() - defer d.l.RUnlock() - - currBootstrap, state := d.currBootstrap, d.state - - switch state { - case daemonStateNoNetwork: - return zero, ErrNoNetwork - case daemonStateInitializing: - return zero, ErrInitializing - case daemonStateOk: - return fn(currBootstrap) - case daemonStateShutdown: - return zero, errors.New("already shutdown") - default: - panic(fmt.Sprintf("unknown state %d", d.state)) - } -} - -// reload will check the existing hosts data from currBootstrap against a -// potentially updated set of hosts data, and if there are any differences will -// perform whatever changes are necessary. -func (d *Daemon) reload( - ctx context.Context, - currBootstrap bootstrap.Bootstrap, - newHosts map[nebula.HostName]bootstrap.Host, -) error { - var ( - newBootstrap = currBootstrap - thisHost = currBootstrap.ThisHost() - ) - - newBootstrap.Hosts = newHosts - - // the daemon's view of this host's bootstrap info takes precedence over - // whatever is in garage - newBootstrap.Hosts[thisHost.Name] = thisHost - - diff, err := children.CalculateReloadDiff( - d.daemonConfig, currBootstrap, newBootstrap, - ) - if err != nil { - return fmt.Errorf("calculating diff between bootstraps: %w", err) - } else if diff == (children.ReloadDiff{}) { - d.logger.Info(ctx, "No changes to bootstrap detected") - return nil - } - - d.logger.Info(ctx, "Bootstrap has changed, storing new bootstrap") - d.l.Lock() - d.currBootstrap = newBootstrap - d.l.Unlock() - - if err := d.children.Reload(ctx, newBootstrap, diff); err != nil { - return fmt.Errorf("reloading child processes (diff:%+v): %w", diff, err) - } - - return nil -} - -func (d *Daemon) postInit(ctx context.Context) error { - if len(d.daemonConfig.Storage.Allocations) > 0 { - d.logger.Info(ctx, "Applying garage layout") - if err := garageApplyLayout( - ctx, d.logger, d.daemonConfig, d.garageAdminToken, d.currBootstrap, - ); err != nil { - return fmt.Errorf("applying garage layout: %w", err) - } - } - - // This is only necessary during network creation, otherwise the bootstrap - // should already have these credentials built in. - // - // TODO this is pretty hacky, but there doesn't seem to be a better way to - // manage it at the moment. - _, err := daecommon.GetGarageS3APIGlobalBucketCredentials( - ctx, d.secretsStore, - ) - if errors.Is(err, secrets.ErrNotFound) { - d.logger.Info(ctx, "Initializing garage shared global bucket") - garageGlobalBucketCreds, err := garageInitializeGlobalBucket( - ctx, - d.logger, - d.daemonConfig, - d.garageAdminToken, - d.currBootstrap, - ) - if err != nil { - return fmt.Errorf("initializing global bucket: %w", err) - } - - err = daecommon.SetGarageS3APIGlobalBucketCredentials( - ctx, d.secretsStore, garageGlobalBucketCreds, - ) - if err != nil { - return fmt.Errorf("storing global bucket creds: %w", err) - } - } - - d.logger.Info(ctx, "Updating host info in garage") - err = d.putGarageBoostrapHost(ctx, d.currBootstrap) - if err != nil { - return fmt.Errorf("updating host info in garage: %w", err) - } - - return nil -} - -func (d *Daemon) reloadLoop(ctx context.Context) { - ticker := time.NewTicker(3 * time.Minute) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - - case <-ticker.C: - d.l.RLock() - currBootstrap := d.currBootstrap - d.l.RUnlock() - - d.logger.Info(ctx, "Checking for bootstrap changes") - newHosts, err := d.getGarageBootstrapHosts(ctx, currBootstrap) - if err != nil { - d.logger.Error(ctx, "Failed to get hosts from garage", err) - continue - } - - // TODO there's some potential race conditions here, where - // CreateHost could be called at this point, write the new host to - // garage and the bootstrap, but then this reload call removes the - // host from this bootstrap/children until the next reload. - - if err := d.reload(ctx, currBootstrap, newHosts); err != nil { - d.logger.Error(ctx, "Reloading with new host data failed", err) - continue - } - } - } -} - // CreateNetwork will initialize a new network using the given parameters. // - name: Human-readable name of the network. // - domain: Primary domain name that network services are served under. @@ -385,66 +152,55 @@ func (d *Daemon) reloadLoop(ctx context.Context) { // // The daemon on which this is called will become the first host in the network, // and will have full administrative privileges. +// +// Errors: +// - network.ErrInvalidConfig func (d *Daemon) CreateNetwork( ctx context.Context, name, domain string, ipNet nebula.IPNet, hostName nebula.HostName, ) error { - nebulaCACreds, err := nebula.NewCACredentials(domain, ipNet) - if err != nil { - return fmt.Errorf("creating nebula CA cert: %w", err) - } - - var ( - creationParams = bootstrap.CreationParams{ - ID: randStr(32), - Name: name, - Domain: domain, - } - - garageRPCSecret = randStr(32) - ) - - err = daecommon.SetGarageRPCSecret(ctx, d.secretsStore, garageRPCSecret) - if err != nil { - return fmt.Errorf("setting garage RPC secret: %w", err) - } - - err = daecommon.SetNebulaCASigningPrivateKey( - ctx, d.secretsStore, nebulaCACreds.SigningPrivateKey, - ) - if err != nil { - return fmt.Errorf("setting nebula CA signing key secret: %w", err) - } - - hostBootstrap, err := bootstrap.New( - nebulaCACreds, - creationParams, - map[nebula.HostName]bootstrap.Host{}, - hostName, - ipNet.FirstAddr(), - ) - if err != nil { - return fmt.Errorf("initializing bootstrap data: %w", err) - } + creationParams := bootstrap.NewCreationParams(name, domain) + ctx = mctx.WithAnnotator(ctx, creationParams) d.l.Lock() defer d.l.Unlock() - if d.state != daemonStateNoNetwork { + if d.network != nil { return ErrAlreadyJoined } - if len(d.daemonConfig.Storage.Allocations) < 3 { - return ErrInvalidConfig.WithData( - "At least three storage allocations are required.", + networkStateDir, networkRuntimeDir, err := networkDirs( + d.networksStateDir, d.networksRuntimeDir, creationParams.ID, false, + ) + if err != nil { + return fmt.Errorf( + "creating sub-directories for network %q: %w", + creationParams.ID, + err, ) } - // initialize will unlock d.l - if err = d.initialize(ctx, hostBootstrap); err != nil { - return fmt.Errorf("initializing daemon: %w", err) + d.logger.Info(ctx, "Creating network") + n, err := network.Create( + ctx, + d.logger.WithNamespace("network"), + d.daemonConfig, + d.envBinDirPath, + networkStateDir, + networkRuntimeDir, + creationParams, + ipNet, + hostName, + &network.Opts{ + ChildrenOpts: d.opts.ChildrenOpts, + }, + ) + if err != nil { + return fmt.Errorf("creating network: %w", err) } + d.logger.Info(ctx, "Network created successfully") + d.network = n return nil } @@ -454,296 +210,157 @@ func (d *Daemon) CreateNetwork( // Errors: // - ErrAlreadyJoined func (d *Daemon) JoinNetwork( - ctx context.Context, newBootstrap JoiningBootstrap, + ctx context.Context, newBootstrap network.JoiningBootstrap, ) error { + networkID := newBootstrap.Bootstrap.NetworkCreationParams.ID + ctx = mctx.WithAnnotator(ctx, newBootstrap.Bootstrap.NetworkCreationParams) + d.l.Lock() defer d.l.Unlock() - if d.state != daemonStateNoNetwork { + if d.network != nil { return ErrAlreadyJoined } - err := secrets.Import(ctx, d.secretsStore, newBootstrap.Secrets) + networkStateDir, networkRuntimeDir, err := networkDirs( + d.networksStateDir, d.networksRuntimeDir, networkID, false, + ) if err != nil { - return fmt.Errorf("importing secrets: %w", err) + return fmt.Errorf( + "creating sub-directories for network %q: %w", networkID, err, + ) } - // initialize will unlock d.l - if err = d.initialize(ctx, newBootstrap.Bootstrap); err != nil { - return fmt.Errorf("initializing daemon: %w", err) + d.logger.Info(ctx, "Joining network") + n, err := network.Join( + ctx, + d.logger.WithNamespace("network"), + d.daemonConfig, + newBootstrap, + d.envBinDirPath, + networkStateDir, + networkRuntimeDir, + &network.Opts{ + ChildrenOpts: d.opts.ChildrenOpts, + }, + ) + if err != nil { + return fmt.Errorf( + "joining network %q: %w", networkID, err, + ) } + d.logger.Info(ctx, "Network joined successfully") + d.network = n return nil } -func (d *Daemon) getBootstrap(ctx context.Context) (bootstrap.Bootstrap, error) { - return withCurrBootstrap(d, func( - currBootstrap bootstrap.Bootstrap, - ) ( - bootstrap.Bootstrap, error, - ) { - return currBootstrap, nil - }) -} +func withNetwork[Res any]( + ctx context.Context, + d *Daemon, + fn func(context.Context, network.Network) (Res, error), +) ( + Res, error, +) { + d.l.RLock() + defer d.l.RUnlock() -// GetHosts returns all hosts known to the network, sorted by their name. -func (d *Daemon) GetHosts(ctx context.Context) ([]bootstrap.Host, error) { - b, err := d.getBootstrap(ctx) - if err != nil { - return nil, fmt.Errorf("retrieving bootstrap: %w", err) + if d.network == nil { + var zero Res + return zero, ErrNoNetwork } - hosts := maps.Values(b.Hosts) - slices.SortFunc(hosts, func(a, b bootstrap.Host) int { - return cmp.Compare(a.Name, b.Name) - }) - - return hosts, nil + return fn(ctx, d.network) } -// GetGarageClientParams returns a GarageClientParams for the current network -// state. +// GetHost implements the method for the network.RPC interface. +func (d *Daemon) GetHosts(ctx context.Context) ([]bootstrap.Host, error) { + return withNetwork( + ctx, + d, + func(ctx context.Context, n network.Network) ([]bootstrap.Host, error) { + return n.GetHosts(ctx) + }, + ) +} + +// GetGarageClientParams implements the method for the network.RPC interface. func (d *Daemon) GetGarageClientParams( ctx context.Context, ) ( - GarageClientParams, error, + network.GarageClientParams, error, ) { - return withCurrBootstrap(d, func( - currBootstrap bootstrap.Bootstrap, - ) ( - GarageClientParams, error, - ) { - return d.getGarageClientParams(ctx, currBootstrap) - }) + return withNetwork( + ctx, + d, + func( + ctx context.Context, n network.Network, + ) ( + network.GarageClientParams, error, + ) { + return n.GetGarageClientParams(ctx) + }, + ) } -// GetNebulaCAPublicCredentials returns the CAPublicCredentials for the network. +// GetNebulaCAPublicCredentials implements the method for the network.RPC +// interface. func (d *Daemon) GetNebulaCAPublicCredentials( ctx context.Context, ) ( nebula.CAPublicCredentials, error, ) { - b, err := d.getBootstrap(ctx) - if err != nil { - return nebula.CAPublicCredentials{}, fmt.Errorf( - "retrieving bootstrap: %w", err, - ) - } - - return b.CAPublicCredentials, nil + return withNetwork( + ctx, + d, + func( + ctx context.Context, n network.Network, + ) ( + nebula.CAPublicCredentials, error, + ) { + return n.GetNebulaCAPublicCredentials(ctx) + }, + ) } -// RemoveHost removes the host of the given name from the network. +// RemoveHost implements the method for the network.RPC interface. func (d *Daemon) RemoveHost(ctx context.Context, hostName nebula.HostName) error { - // TODO RemoveHost should publish a certificate revocation for the host - // being removed. - _, err := withCurrBootstrap(d, func( - currBootstrap bootstrap.Bootstrap, - ) ( - struct{}, error, - ) { - garageClientParams, err := d.getGarageClientParams(ctx, currBootstrap) - if err != nil { - return struct{}{}, fmt.Errorf("get garage client params: %w", err) - } - - client := garageClientParams.GlobalBucketS3APIClient() - return struct{}{}, removeGarageBootstrapHost(ctx, client, hostName) - }) + _, err := withNetwork( + ctx, + d, + func( + ctx context.Context, n network.Network, + ) ( + struct{}, error, + ) { + return struct{}{}, n.RemoveHost(ctx, hostName) + }, + ) return err } -func makeCACreds( - currBootstrap bootstrap.Bootstrap, - caSigningPrivateKey nebula.SigningPrivateKey, -) nebula.CACredentials { - return nebula.CACredentials{ - Public: currBootstrap.CAPublicCredentials, - SigningPrivateKey: caSigningPrivateKey, - } -} - -func chooseAvailableIP(b bootstrap.Bootstrap) (netip.Addr, error) { - var ( - cidrIPNet = b.CAPublicCredentials.Cert.Unwrap().Details.Subnets[0] - cidrMask = cidrIPNet.Mask - cidrIPB = cidrIPNet.IP - - cidr = netip.MustParsePrefix(cidrIPNet.String()) - cidrIP = cidr.Addr() - cidrSuffixBits = cidrIP.BitLen() - cidr.Bits() - - inUseIPs = make(map[netip.Addr]struct{}, len(b.Hosts)) - ) - - for _, host := range b.Hosts { - inUseIPs[host.IP()] = struct{}{} - } - - // first check that there are any addresses at all. We can determine the - // number of possible addresses using the network CIDR. The first IP in a - // subnet is the network identifier, and is reserved. The last IP is the - // broadcast IP, and is also reserved. Hence, the -2. - usableIPs := (1 << cidrSuffixBits) - 2 - if len(inUseIPs) >= usableIPs { - return netip.Addr{}, errors.New("no available IPs") - } - - // We need to know the subnet broadcast address, so we don't accidentally - // produce it. - cidrBCastIPB := bytes.Clone(cidrIPB) - for i := range cidrBCastIPB { - cidrBCastIPB[i] |= ^cidrMask[i] - } - cidrBCastIP, ok := netip.AddrFromSlice(cidrBCastIPB) - if !ok { - panic(fmt.Sprintf("invalid broadcast ip calculated: %x", cidrBCastIP)) - } - - // Try a handful of times to pick an IP at random. This is preferred, as it - // leaves less room for two different CreateHost calls to choose the same - // IP. - for range 20 { - b := make([]byte, len(cidrIPB)) - if _, err := rand.Read(b); err != nil { - return netip.Addr{}, fmt.Errorf("reading random bytes: %w", err) - } - - for i := range b { - b[i] = cidrIPB[i] | (b[i] & ^cidrMask[i]) - } - - ip, ok := netip.AddrFromSlice(b) - if !ok { - panic(fmt.Sprintf("generated invalid IP: %x", b)) - } else if !cidr.Contains(ip) { - panic(fmt.Sprintf( - "generated IP %v which is not in cidr %v", ip, cidr, - )) - } - - if ip == cidrIP || ip == cidrBCastIP { - continue - } - - if _, inUse := inUseIPs[ip]; !inUse { - return ip, nil - } - } - - // If randomly picking fails then just go through IPs one by one until the - // free one is found. - for ip := cidrIP.Next(); ip != cidrBCastIP; ip = ip.Next() { - if _, inUse := inUseIPs[ip]; !inUse { - return ip, nil - } - } - - panic("All ips are in-use, but somehow that wasn't determined earlier") -} - -// CreateHostOpts are optional parameters to the CreateHost method. -type CreateHostOpts struct { - // IP address of the new host. An IP address will be randomly chosen if one - // is not given here. - IP netip.Addr - - // CanCreateHosts indicates that the bootstrap produced by CreateHost should - // give the new host the ability to create new hosts as well. - CanCreateHosts bool - - // TODO add nebula cert tags -} - -// CreateHost creates a bootstrap for a new host with the given name and IP -// address. +// CreateHost implements the method for the network.RPC interface. func (d *Daemon) CreateHost( ctx context.Context, hostName nebula.HostName, - opts CreateHostOpts, + opts network.CreateHostOpts, ) ( - JoiningBootstrap, error, + network.JoiningBootstrap, error, ) { - d.l.RLock() - currBootstrap := d.currBootstrap - d.l.RUnlock() - - ip := opts.IP - if ip == (netip.Addr{}) { - var err error - if ip, err = chooseAvailableIP(currBootstrap); err != nil { - return JoiningBootstrap{}, fmt.Errorf( - "choosing available IP: %w", err, - ) - } - } - // TODO if the ip is given, check that it's not already in use. - - caSigningPrivateKey, err := daecommon.GetNebulaCASigningPrivateKey( - ctx, d.secretsStore, + return withNetwork( + ctx, + d, + func( + ctx context.Context, n network.Network, + ) ( + network.JoiningBootstrap, error, + ) { + return n.CreateHost(ctx, hostName, opts) + }, ) - if err != nil { - return JoiningBootstrap{}, fmt.Errorf("getting CA signing key: %w", err) - } - - var joiningBootstrap JoiningBootstrap - joiningBootstrap.Bootstrap, err = bootstrap.New( - makeCACreds(currBootstrap, caSigningPrivateKey), - currBootstrap.NetworkCreationParams, - currBootstrap.Hosts, - hostName, - ip, - ) - if err != nil { - return JoiningBootstrap{}, fmt.Errorf( - "initializing bootstrap data: %w", err, - ) - } - - secretsIDs := []secrets.ID{ - daecommon.GarageRPCSecretSecretID, - daecommon.GarageS3APIGlobalBucketCredentialsSecretID, - } - - if opts.CanCreateHosts { - secretsIDs = append( - secretsIDs, daecommon.NebulaCASigningPrivateKeySecretID, - ) - } - - if joiningBootstrap.Secrets, err = secrets.Export( - ctx, d.secretsStore, secretsIDs, - ); err != nil { - return JoiningBootstrap{}, fmt.Errorf("exporting secrets: %w", err) - } - - d.logger.Info(ctx, "Putting new host in garage") - err = d.putGarageBoostrapHost(ctx, joiningBootstrap.Bootstrap) - if err != nil { - return JoiningBootstrap{}, fmt.Errorf("putting new host in garage: %w", err) - } - - // the new bootstrap will have been initialized with both all existing hosts - // (based on currBootstrap) and the host being created. - newHosts := joiningBootstrap.Bootstrap.Hosts - - d.logger.Info(ctx, "Reloading local state with new host") - if err := d.reload(ctx, currBootstrap, newHosts); err != nil { - return JoiningBootstrap{}, fmt.Errorf("reloading child processes: %w", err) - } - - return joiningBootstrap, nil } -// CreateNebulaCertificate creates and signs a new nebula certficate for an -// existing host, given the public key for that host. This is currently mostly -// useful for creating certs for mobile devices. -// -// TODO replace this with CreateHostBootstrap, and the CreateNebulaCertificate -// RPC method can just pull cert out of that. -// -// Errors: -// - ErrHostNotFound +// CreateNebulaCertificate implements the method for the network.RPC interface. func (d *Daemon) CreateNebulaCertificate( ctx context.Context, hostName nebula.HostName, @@ -751,28 +368,17 @@ func (d *Daemon) CreateNebulaCertificate( ) ( nebula.Certificate, error, ) { - return withCurrBootstrap(d, func( - currBootstrap bootstrap.Bootstrap, - ) ( - nebula.Certificate, error, - ) { - host, ok := currBootstrap.Hosts[hostName] - if !ok { - return nebula.Certificate{}, ErrHostNotFound - } - ip := host.IP() - - caSigningPrivateKey, err := daecommon.GetNebulaCASigningPrivateKey( - ctx, d.secretsStore, - ) - if err != nil { - return nebula.Certificate{}, fmt.Errorf("getting CA signing key: %w", err) - } - - caCreds := makeCACreds(currBootstrap, caSigningPrivateKey) - - return nebula.NewHostCert(caCreds, hostPubKey, hostName, ip) - }) + return withNetwork( + ctx, + d, + func( + ctx context.Context, n network.Network, + ) ( + nebula.Certificate, error, + ) { + return n.CreateNebulaCertificate(ctx, hostName, hostPubKey) + }, + ) } // Shutdown blocks until all resources held or created by the daemon, @@ -784,13 +390,32 @@ func (d *Daemon) Shutdown() error { d.l.Lock() defer d.l.Unlock() - close(d.shutdownCh) - d.wg.Wait() - d.state = daemonStateShutdown - - if d.children != nil { - d.children.Shutdown() + if d.network != nil { + return d.network.Shutdown() } - return nil + + //var ( + // errCh = make(chan error, len(d.networks)) + // errs []error + //) + + //for id := range d.networks { + // id := id + // n := d.networks[id] + // go func() { + // if err := n.Shutdown(); err != nil { + // errCh <- fmt.Errorf("shutting down network %q: %w", id, err) + // } + // errCh <- nil + // }() + //} + + //for range cap(errCh) { + // if err := <-errCh; err != nil { + // errs = append(errs, err) + // } + //} + + //return errors.Join(errs...) } diff --git a/go/daemon/errors.go b/go/daemon/errors.go index 6e27e5e..aa13083 100644 --- a/go/daemon/errors.go +++ b/go/daemon/errors.go @@ -1,27 +1,21 @@ package daemon -import "isle/daemon/jsonrpc2" +import ( + "isle/daemon/daecommon" + "isle/daemon/jsonrpc2" +) + +const ( + errCodeNoNetwork = daecommon.ErrorCodeRangeDaemon + iota + errCodeAlreadyJoined +) var ( // ErrNoNetwork is returned when the daemon has never been configured with a // network. - ErrNoNetwork = jsonrpc2.NewError(1, "No network configured") - - // ErrInitializing is returned when a network is unavailable due to still - // being initialized. - ErrInitializing = jsonrpc2.NewError(2, "Network is being initialized") + ErrNoNetwork = jsonrpc2.NewError(errCodeNoNetwork, "No network configured") // ErrAlreadyJoined is returned when the daemon is instructed to create or // join a new network, but it is already joined to a network. - ErrAlreadyJoined = jsonrpc2.NewError(4, "Already joined to a network") - - // ErrInvalidConfig is returned when the daemon's configuration is invalid - // for an operation being attempted. - // - // The Data field will be a string containing further details. - ErrInvalidConfig = jsonrpc2.NewError(5, "Invalid daemon config") - - // ErrHostNotFound is returned when performing an operation which expected a - // host to exist in the network, but that host wasn't found. - ErrHostNotFound = jsonrpc2.NewError(6, "Host not found") + ErrAlreadyJoined = jsonrpc2.NewError(errCodeAlreadyJoined, "Already joined to a network") ) diff --git a/go/daemon/garage.go b/go/daemon/garage.go deleted file mode 100644 index 2a3177c..0000000 --- a/go/daemon/garage.go +++ /dev/null @@ -1,76 +0,0 @@ -package daemon - -import ( - "context" - "isle/bootstrap" - "isle/daemon/daecommon" - "net" - "strconv" - - "isle/garage" - - "dev.mediocregopher.com/mediocre-go-lib.git/mlog" -) - -func garageAdminClientLogger(logger *mlog.Logger) *mlog.Logger { - return logger.WithNamespace("garageAdminClient") -} - -// newGarageAdminClient will return an AdminClient for a local garage instance, -// or it will _panic_ if there is no local instance configured. -func newGarageAdminClient( - logger *mlog.Logger, - daemonConfig daecommon.Config, - adminToken string, - hostBootstrap bootstrap.Bootstrap, -) *garage.AdminClient { - - thisHost := hostBootstrap.ThisHost() - - return garage.NewAdminClient( - garageAdminClientLogger(logger), - net.JoinHostPort( - thisHost.IP().String(), - strconv.Itoa(daemonConfig.Storage.Allocations[0].AdminPort), - ), - adminToken, - ) -} - -func garageApplyLayout( - ctx context.Context, - logger *mlog.Logger, - daemonConfig daecommon.Config, - adminToken string, - hostBootstrap bootstrap.Bootstrap, -) error { - - var ( - adminClient = newGarageAdminClient( - logger, daemonConfig, adminToken, hostBootstrap, - ) - thisHost = hostBootstrap.ThisHost() - hostName = thisHost.Name - allocs = daemonConfig.Storage.Allocations - peers = make([]garage.PeerLayout, len(allocs)) - ) - - for i, alloc := range allocs { - - id := daecommon.BootstrapGarageHostForAlloc(thisHost, alloc).ID - - zone := string(hostName) - if alloc.Zone != "" { - zone = alloc.Zone - } - - peers[i] = garage.PeerLayout{ - ID: id, - Capacity: alloc.Capacity * 1_000_000_000, - Zone: zone, - Tags: []string{}, - } - } - - return adminClient.ApplyLayout(ctx, peers) -} diff --git a/go/daemon/garage_client_params.go b/go/daemon/garage_client_params.go deleted file mode 100644 index 416f7d1..0000000 --- a/go/daemon/garage_client_params.go +++ /dev/null @@ -1,55 +0,0 @@ -package daemon - -import ( - "context" - "errors" - "fmt" - "isle/bootstrap" - "isle/daemon/daecommon" - "isle/garage" - "isle/secrets" -) - -// GarageClientParams contains all the data needed to instantiate garage -// clients. -type GarageClientParams struct { - Peer garage.RemotePeer - GlobalBucketS3APICredentials garage.S3APICredentials - - // RPCSecret may be empty, if the secret is not available on the host. - RPCSecret string -} - -func (d *Daemon) getGarageClientParams( - ctx context.Context, currBootstrap bootstrap.Bootstrap, -) ( - GarageClientParams, error, -) { - creds, err := daecommon.GetGarageS3APIGlobalBucketCredentials( - ctx, d.secretsStore, - ) - if err != nil { - return GarageClientParams{}, fmt.Errorf("getting garage global bucket creds: %w", err) - } - - rpcSecret, err := daecommon.GetGarageRPCSecret(ctx, d.secretsStore) - if err != nil && !errors.Is(err, secrets.ErrNotFound) { - return GarageClientParams{}, fmt.Errorf("getting garage rpc secret: %w", err) - } - - return GarageClientParams{ - Peer: currBootstrap.ChooseGaragePeer(), - GlobalBucketS3APICredentials: creds, - RPCSecret: rpcSecret, - }, nil -} - -// GlobalBucketS3APIClient returns an S3 client pre-configured with access to -// the global bucket. -func (p GarageClientParams) GlobalBucketS3APIClient() garage.S3APIClient { - var ( - addr = p.Peer.S3APIAddr() - creds = p.GlobalBucketS3APICredentials - ) - return garage.NewS3APIClient(addr, creds) -} diff --git a/go/daemon/jigs.go b/go/daemon/jigs.go deleted file mode 100644 index 6192c95..0000000 --- a/go/daemon/jigs.go +++ /dev/null @@ -1,50 +0,0 @@ -package daemon - -import ( - "crypto/rand" - "encoding/hex" - "errors" - "fmt" - "io/fs" - "os" - "path/filepath" -) - -func randStr(l int) string { - b := make([]byte, l) - if _, err := rand.Read(b); err != nil { - panic(err) - } - return hex.EncodeToString(b) -} - -// mkDir is like os.Mkdir but it returns better error messages. If the directory -// already exists then nil is returned. -func mkDir(path string) error { - { - parentPath := filepath.Dir(path) - parentInfo, err := os.Stat(parentPath) - if err != nil { - return fmt.Errorf("checking fs node of parent %q: %w", parentPath, err) - } else if !parentInfo.IsDir() { - return fmt.Errorf("%q is not a directory", parentPath) - } - } - - info, err := os.Stat(path) - if errors.Is(err, fs.ErrNotExist) { - // fine - } else if err != nil { - return fmt.Errorf("checking fs node: %w", err) - } else if !info.IsDir() { - return fmt.Errorf("exists but is not a directory") - } else { - return nil - } - - if err := os.Mkdir(path, 0700); err != nil { - return fmt.Errorf("creating directory: %w", err) - } - - return nil -} diff --git a/go/daemon/network.go b/go/daemon/network.go new file mode 100644 index 0000000..648f73b --- /dev/null +++ b/go/daemon/network.go @@ -0,0 +1,73 @@ +package daemon + +import ( + "fmt" + "isle/bootstrap" + "isle/daemon/network" + "isle/toolkit" +) + +func networkStateDir( + networksStateDir toolkit.Dir, networkID string, mayExist bool, +) ( + toolkit.Dir, error, +) { + return networksStateDir.MkChildDir(networkID, mayExist) +} + +func networkRuntimeDir( + networksRuntimeDir toolkit.Dir, networkID string, mayExist bool, +) ( + toolkit.Dir, error, +) { + return networksRuntimeDir.MkChildDir(networkID, mayExist) +} + +func networkDirs( + networksStateDir, networksRuntimeDir toolkit.Dir, + networkID string, + mayExist bool, +) ( + stateDir, runtimeDir toolkit.Dir, err error, +) { + h := new(toolkit.MkDirHelper) + stateDir, _ = h.Maybe( + networkStateDir(networksStateDir, networkID, mayExist), + ) + runtimeDir, _ = h.Maybe( + networkRuntimeDir(networksRuntimeDir, networkID, mayExist), + ) + err = h.Err() + return +} + +// LoadableNetworks returns the CreationParams for each Network which is able to +// be loaded. +func LoadableNetworks( + networksStateDir toolkit.Dir, +) ( + []bootstrap.CreationParams, error, +) { + networkStateDirs, err := networksStateDir.ChildDirs() + if err != nil { + return nil, fmt.Errorf( + "listing children of %q: %w", networksStateDir.Path, err, + ) + } + + creationParams := make([]bootstrap.CreationParams, 0, len(networkStateDirs)) + + for _, networkStateDir := range networkStateDirs { + thisCreationParams, err := network.LoadCreationParams(networkStateDir) + if err != nil { + return nil, fmt.Errorf( + "loading creation params from %q: %w", + networkStateDir.Path, + err, + ) + } + creationParams = append(creationParams, thisCreationParams) + } + + return creationParams, nil +} diff --git a/go/daemon/bootstrap.go b/go/daemon/network/bootstrap.go similarity index 84% rename from go/daemon/bootstrap.go rename to go/daemon/network/bootstrap.go index 583d4e3..f54cd53 100644 --- a/go/daemon/bootstrap.go +++ b/go/daemon/network/bootstrap.go @@ -1,25 +1,15 @@ -package daemon +package network import ( - "encoding/json" "fmt" - "os" - "path/filepath" - "isle/bootstrap" "isle/daemon/daecommon" "isle/garage/garagesrv" "isle/jsonutil" - "isle/secrets" + "os" + "path/filepath" ) -// JoiningBootstrap wraps a normal Bootstrap to include extra data which a host -// might need while joining a network. -type JoiningBootstrap struct { - Bootstrap bootstrap.Bootstrap - Secrets map[secrets.ID]json.RawMessage -} - func writeBootstrapToStateDir( stateDirPath string, hostBootstrap bootstrap.Bootstrap, ) error { diff --git a/go/daemon/network/errors.go b/go/daemon/network/errors.go new file mode 100644 index 0000000..7d45cbc --- /dev/null +++ b/go/daemon/network/errors.go @@ -0,0 +1,28 @@ +package network + +import ( + "isle/daemon/daecommon" + "isle/daemon/jsonrpc2" +) + +const ( + errCodeInitializing = daecommon.ErrorCodeRangeNetwork + iota + errCodeInvalidConfig + errCodeHostNotFound +) + +var ( + // ErrInitializing is returned when a network is unavailable due to still + // being initialized. + ErrInitializing = jsonrpc2.NewError(errCodeInitializing, "Network is being initialized") + + // ErrInvalidConfig is returned when the daemon's configuration is invalid + // for an operation being attempted. + // + // The Data field will be a string containing further details. + ErrInvalidConfig = jsonrpc2.NewError(errCodeInvalidConfig, "Invalid daemon config") + + // ErrHostNotFound is returned when performing an operation which expected a + // host to exist in the network, but that host wasn't found. + ErrHostNotFound = jsonrpc2.NewError(errCodeHostNotFound, "Host not found") +) diff --git a/go/daemon/global_bucket.go b/go/daemon/network/garage.go similarity index 60% rename from go/daemon/global_bucket.go rename to go/daemon/network/garage.go index 53914e3..80dc392 100644 --- a/go/daemon/global_bucket.go +++ b/go/daemon/network/garage.go @@ -1,15 +1,19 @@ -package daemon +package network import ( "bytes" "context" "encoding/json" + "errors" "fmt" "isle/bootstrap" "isle/daemon/daecommon" "isle/garage" "isle/nebula" + "isle/secrets" + "net" "path/filepath" + "strconv" "dev.mediocregopher.com/mediocre-go-lib.git/mctx" "dev.mediocregopher.com/mediocre-go-lib.git/mlog" @@ -21,6 +25,93 @@ const ( garageGlobalBucketBootstrapHostsDirPath = "bootstrap/hosts" ) +func (n *network) getGarageClientParams( + ctx context.Context, currBootstrap bootstrap.Bootstrap, +) ( + GarageClientParams, error, +) { + creds, err := daecommon.GetGarageS3APIGlobalBucketCredentials( + ctx, n.secretsStore, + ) + if err != nil { + return GarageClientParams{}, fmt.Errorf("getting garage global bucket creds: %w", err) + } + + rpcSecret, err := daecommon.GetGarageRPCSecret(ctx, n.secretsStore) + if err != nil && !errors.Is(err, secrets.ErrNotFound) { + return GarageClientParams{}, fmt.Errorf("getting garage rpc secret: %w", err) + } + + return GarageClientParams{ + Peer: currBootstrap.ChooseGaragePeer(), + GlobalBucketS3APICredentials: creds, + RPCSecret: rpcSecret, + }, nil +} + +func garageAdminClientLogger(logger *mlog.Logger) *mlog.Logger { + return logger.WithNamespace("garageAdminClient") +} + +// newGarageAdminClient will return an AdminClient for a local garage instance, +// or it will _panic_ if there is no local instance configured. +func newGarageAdminClient( + logger *mlog.Logger, + daemonConfig daecommon.Config, + adminToken string, + hostBootstrap bootstrap.Bootstrap, +) *garage.AdminClient { + + thisHost := hostBootstrap.ThisHost() + + return garage.NewAdminClient( + garageAdminClientLogger(logger), + net.JoinHostPort( + thisHost.IP().String(), + strconv.Itoa(daemonConfig.Storage.Allocations[0].AdminPort), + ), + adminToken, + ) +} + +func garageApplyLayout( + ctx context.Context, + logger *mlog.Logger, + daemonConfig daecommon.Config, + adminToken string, + hostBootstrap bootstrap.Bootstrap, +) error { + + var ( + adminClient = newGarageAdminClient( + logger, daemonConfig, adminToken, hostBootstrap, + ) + thisHost = hostBootstrap.ThisHost() + hostName = thisHost.Name + allocs = daemonConfig.Storage.Allocations + peers = make([]garage.PeerLayout, len(allocs)) + ) + + for i, alloc := range allocs { + + id := daecommon.BootstrapGarageHostForAlloc(thisHost, alloc).ID + + zone := string(hostName) + if alloc.Zone != "" { + zone = alloc.Zone + } + + peers[i] = garage.PeerLayout{ + ID: id, + Capacity: alloc.Capacity * 1_000_000_000, + Zone: zone, + Tags: []string{}, + } + } + + return adminClient.ApplyLayout(ctx, peers) +} + func garageInitializeGlobalBucket( ctx context.Context, logger *mlog.Logger, @@ -61,13 +152,73 @@ func garageInitializeGlobalBucket( return creds, nil } +func (n *network) getGarageBootstrapHosts( + ctx context.Context, currBootstrap bootstrap.Bootstrap, +) ( + map[nebula.HostName]bootstrap.Host, error, +) { + garageClientParams, err := n.getGarageClientParams(ctx, currBootstrap) + if err != nil { + return nil, fmt.Errorf("getting garage client params: %w", err) + } + + var ( + client = garageClientParams.GlobalBucketS3APIClient() + hosts = map[nebula.HostName]bootstrap.Host{} + + objInfoCh = client.ListObjects( + ctx, garage.GlobalBucket, + minio.ListObjectsOptions{ + Prefix: garageGlobalBucketBootstrapHostsDirPath, + Recursive: true, + }, + ) + ) + + for objInfo := range objInfoCh { + + ctx := mctx.Annotate(ctx, "objectKey", objInfo.Key) + + if objInfo.Err != nil { + return nil, fmt.Errorf("listing objects: %w", objInfo.Err) + } + + obj, err := client.GetObject( + ctx, garage.GlobalBucket, objInfo.Key, minio.GetObjectOptions{}, + ) + + if err != nil { + return nil, fmt.Errorf("retrieving object %q: %w", objInfo.Key, err) + } + + var authedHost bootstrap.AuthenticatedHost + + err = json.NewDecoder(obj).Decode(&authedHost) + obj.Close() + + if err != nil { + n.logger.Warn(ctx, "Object contains invalid json", err) + continue + } + + host, err := authedHost.Unwrap(currBootstrap.CAPublicCredentials) + if err != nil { + n.logger.Warn(ctx, "Host could not be authenticated", err) + } + + hosts[host.Name] = host + } + + return hosts, nil +} + // putGarageBoostrapHost places the .json.signed file for this host // into garage so that other hosts are able to see relevant configuration for // it. -func (d *Daemon) putGarageBoostrapHost( +func (n *network) putGarageBoostrapHost( ctx context.Context, currBootstrap bootstrap.Bootstrap, ) error { - garageClientParams, err := d.getGarageClientParams(ctx, currBootstrap) + garageClientParams, err := n.getGarageClientParams(ctx, currBootstrap) if err != nil { return fmt.Errorf("getting garage client params: %w", err) } @@ -113,66 +264,6 @@ func (d *Daemon) putGarageBoostrapHost( return nil } -func (d *Daemon) getGarageBootstrapHosts( - ctx context.Context, currBootstrap bootstrap.Bootstrap, -) ( - map[nebula.HostName]bootstrap.Host, error, -) { - garageClientParams, err := d.getGarageClientParams(ctx, currBootstrap) - if err != nil { - return nil, fmt.Errorf("getting garage client params: %w", err) - } - - var ( - client = garageClientParams.GlobalBucketS3APIClient() - hosts = map[nebula.HostName]bootstrap.Host{} - - objInfoCh = client.ListObjects( - ctx, garage.GlobalBucket, - minio.ListObjectsOptions{ - Prefix: garageGlobalBucketBootstrapHostsDirPath, - Recursive: true, - }, - ) - ) - - for objInfo := range objInfoCh { - - ctx := mctx.Annotate(ctx, "objectKey", objInfo.Key) - - if objInfo.Err != nil { - return nil, fmt.Errorf("listing objects: %w", objInfo.Err) - } - - obj, err := client.GetObject( - ctx, garage.GlobalBucket, objInfo.Key, minio.GetObjectOptions{}, - ) - - if err != nil { - return nil, fmt.Errorf("retrieving object %q: %w", objInfo.Key, err) - } - - var authedHost bootstrap.AuthenticatedHost - - err = json.NewDecoder(obj).Decode(&authedHost) - obj.Close() - - if err != nil { - d.logger.Warn(ctx, "Object contains invalid json", err) - continue - } - - host, err := authedHost.Unwrap(currBootstrap.CAPublicCredentials) - if err != nil { - d.logger.Warn(ctx, "Host could not be authenticated", err) - } - - hosts[host.Name] = host - } - - return hosts, nil -} - func removeGarageBootstrapHost( ctx context.Context, client garage.S3APIClient, hostName nebula.HostName, ) error { diff --git a/go/daemon/network/jigs.go b/go/daemon/network/jigs.go new file mode 100644 index 0000000..254a595 --- /dev/null +++ b/go/daemon/network/jigs.go @@ -0,0 +1,14 @@ +package network + +import ( + "crypto/rand" + "encoding/hex" +) + +func randStr(l int) string { + b := make([]byte, l) + if _, err := rand.Read(b); err != nil { + panic(err) + } + return hex.EncodeToString(b) +} diff --git a/go/daemon/network/network.go b/go/daemon/network/network.go new file mode 100644 index 0000000..a8dfd8c --- /dev/null +++ b/go/daemon/network/network.go @@ -0,0 +1,900 @@ +// Package network implements the Network type, which manages the daemon's +// membership in a single network. +package network + +import ( + "bytes" + "cmp" + "context" + "crypto/rand" + "encoding/json" + "errors" + "fmt" + "isle/bootstrap" + "isle/daemon/children" + "isle/daemon/daecommon" + "isle/garage" + "isle/jsonutil" + "isle/nebula" + "isle/secrets" + "isle/toolkit" + "log" + "net/netip" + "slices" + "sync" + "time" + + "dev.mediocregopher.com/mediocre-go-lib.git/mlog" + "golang.org/x/exp/maps" +) + +// GarageClientParams contains all the data needed to instantiate garage +// clients. +type GarageClientParams struct { + Peer garage.RemotePeer + GlobalBucketS3APICredentials garage.S3APICredentials + + // RPCSecret may be empty, if the secret is not available on the host. + RPCSecret string +} + +// GlobalBucketS3APIClient returns an S3 client pre-configured with access to +// the global bucket. +func (p GarageClientParams) GlobalBucketS3APIClient() garage.S3APIClient { + var ( + addr = p.Peer.S3APIAddr() + creds = p.GlobalBucketS3APICredentials + ) + return garage.NewS3APIClient(addr, creds) +} + +// CreateHostOpts are optional parameters to the CreateHost method. +type CreateHostOpts struct { + // IP address of the new host. An IP address will be randomly chosen if one + // is not given here. + IP netip.Addr + + // CanCreateHosts indicates that the bootstrap produced by CreateHost should + // give the new host the ability to create new hosts as well. + CanCreateHosts bool + + // TODO add nebula cert tags +} + +// JoiningBootstrap wraps a normal Bootstrap to include extra data which a host +// might need while joining a network. +type JoiningBootstrap struct { + Bootstrap bootstrap.Bootstrap + Secrets map[secrets.ID]json.RawMessage +} + +// RPC defines the methods related to a single network which are available over +// the daemon's RPC interface. +type RPC interface { + // GetHosts returns all hosts known to the network, sorted by their name. + GetHosts(context.Context) ([]bootstrap.Host, error) + + // GetGarageClientParams returns a GarageClientParams for the current + // network state. + GetGarageClientParams(context.Context) (GarageClientParams, error) + + // GetNebulaCAPublicCredentials returns the CAPublicCredentials for the + // network. + GetNebulaCAPublicCredentials( + context.Context, + ) ( + nebula.CAPublicCredentials, error, + ) + + // RemoveHost removes the host of the given name from the network. + RemoveHost(ctx context.Context, hostName nebula.HostName) error + + // CreateHost creates a bootstrap for a new host with the given name and IP + // address. + CreateHost( + context.Context, nebula.HostName, CreateHostOpts, + ) ( + JoiningBootstrap, error, + ) + + // CreateNebulaCertificate creates and signs a new nebula certficate for an + // existing host, given the public key for that host. This is currently + // mostly useful for creating certs for mobile devices. + // + // TODO replace this with CreateHostBootstrap, and the + // CreateNebulaCertificate RPC method can just pull cert out of that. + // + // Errors: + // - ErrHostNotFound + CreateNebulaCertificate( + context.Context, nebula.HostName, nebula.EncryptingPublicKey, + ) ( + nebula.Certificate, error, + ) +} + +// Network manages membership in a single micropelago network. Each Network +// is comprised of a unique IP subnet, hosts connected together on that subnet +// via a VPN, an S3 storage layer only accessible to those hosts, plus other +// services built on this foundation. +// +// A single daemon (isle server) can manage multiple networks. Each network is +// expected to be independent of the others, ie they should not share any +// resources. +type Network interface { + RPC + + // GetNetworkCreationParams returns the CreationParams that the Network was + // originally created with. + GetNetworkCreationParams(context.Context) (bootstrap.CreationParams, error) + + // Shutdown blocks until all resources held or created by the Network, + // including child processes it has started, have been cleaned up. + // + // If this returns an error then it's possible that child processes are + // still running and are no longer managed. + Shutdown() error +} + +//////////////////////////////////////////////////////////////////////////////// +// Network implementation + +// Opts are optional parameters which can be passed in when initializing a new +// Network instance. A nil Opts is equivalent to a zero value. +type Opts struct { + ChildrenOpts *children.Opts +} + +func (o *Opts) withDefaults() *Opts { + if o == nil { + o = new(Opts) + } + return o +} + +type network struct { + logger *mlog.Logger + daemonConfig daecommon.Config + + envBinDirPath string + stateDir toolkit.Dir + runtimeDir toolkit.Dir + + opts *Opts + + secretsStore secrets.Store + garageAdminToken string + + l sync.RWMutex + children *children.Children + currBootstrap bootstrap.Bootstrap + + shutdownCh chan struct{} + wg sync.WaitGroup +} + +// instatiateNetwork returns an instantiated *network instance which has not yet +// been initialized. +func instatiateNetwork( + logger *mlog.Logger, + networkID string, + daemonConfig daecommon.Config, + envBinDirPath string, + stateDir toolkit.Dir, + runtimeDir toolkit.Dir, + opts *Opts, +) *network { + log.Printf("DEBUG: network stateDir:%+v runtimeDir:%+v", stateDir, runtimeDir) + return &network{ + logger: logger, + daemonConfig: daemonConfig, + envBinDirPath: envBinDirPath, + stateDir: stateDir, + runtimeDir: runtimeDir, + opts: opts.withDefaults(), + garageAdminToken: randStr(32), + shutdownCh: make(chan struct{}), + } +} + +// LoadCreationParams returns the CreationParams of a Network which was +// Created/Joined with the given state directory. +func LoadCreationParams( + stateDir toolkit.Dir, +) ( + bootstrap.CreationParams, error, +) { + var ( + // TODO store/load the creation params separately from the rest of + // the bootstrap, since the bootstrap contains potentially the + // entire host list of a network, which could be pretty bulky. + bootstrapFilePath = bootstrap.StateDirPath(stateDir.Path) + bs bootstrap.Bootstrap + ) + + if err := jsonutil.LoadFile(&bs, bootstrapFilePath); err != nil { + return bootstrap.CreationParams{}, fmt.Errorf( + "loading bootstrap from %q: %w", bootstrapFilePath, err, + ) + } + + return bs.NetworkCreationParams, nil +} + +// Load initializes and returns a Network instance for a network which was +// previously joined or created, and which has the given ID. +func Load( + ctx context.Context, + logger *mlog.Logger, + networkID string, + daemonConfig daecommon.Config, + envBinDirPath string, + stateDir toolkit.Dir, + runtimeDir toolkit.Dir, + opts *Opts, +) ( + Network, error, +) { + n := instatiateNetwork( + logger, + networkID, + daemonConfig, + envBinDirPath, + stateDir, + runtimeDir, + opts, + ) + + if err := n.initializeDirs(true); err != nil { + return nil, fmt.Errorf("initializing directories: %w", err) + } + + var ( + currBootstrap bootstrap.Bootstrap + bootstrapFilePath = bootstrap.StateDirPath(n.stateDir.Path) + ) + + if err := jsonutil.LoadFile(&currBootstrap, bootstrapFilePath); err != nil { + return nil, fmt.Errorf( + "loading bootstrap from %q: %w", bootstrapFilePath, err, + ) + } else if err := n.initialize(ctx, currBootstrap); err != nil { + return nil, fmt.Errorf("initializing with bootstrap: %w", err) + } + + return n, nil +} + +// Join initializes and returns a Network instance for an existing network which +// was not previously joined to on this host. Once Join has been called for a +// particular network it will error on subsequent calls for that same network, +// Load should be used instead. +func Join( + ctx context.Context, + logger *mlog.Logger, + daemonConfig daecommon.Config, + joiningBootstrap JoiningBootstrap, + envBinDirPath string, + stateDir toolkit.Dir, + runtimeDir toolkit.Dir, + opts *Opts, +) ( + Network, error, +) { + n := instatiateNetwork( + logger, + joiningBootstrap.Bootstrap.NetworkCreationParams.ID, + daemonConfig, + envBinDirPath, + stateDir, + runtimeDir, + opts, + ) + + if err := n.initializeDirs(false); err != nil { + return nil, fmt.Errorf("initializing directories: %w", err) + } + + if err := secrets.Import( + ctx, n.secretsStore, joiningBootstrap.Secrets, + ); err != nil { + return nil, fmt.Errorf("importing secrets: %w", err) + } + + if err := n.initialize(ctx, joiningBootstrap.Bootstrap); err != nil { + return nil, fmt.Errorf("initializing with bootstrap: %w", err) + } + + return n, nil +} + +// Create initializes and returns a Network for a brand new network which uses +// the given creation parameters. +// +// - name: Human-readable name of the network. +// - domain: Primary domain name that network services are served under. +// - ipNet: An IP subnet, in CIDR form, which will be the overall range of +// possible IPs in the network. The first IP in this network range will +// become this first host's IP. +// - hostName: The name of this first host in the network. +// +// Errors: +// - ErrInvalidConfig - if daemonConfig doesn't have 3 storage allocations +// configured. +func Create( + ctx context.Context, + logger *mlog.Logger, + daemonConfig daecommon.Config, + envBinDirPath string, + stateDir toolkit.Dir, + runtimeDir toolkit.Dir, + creationParams bootstrap.CreationParams, + ipNet nebula.IPNet, // TODO should this go in CreationParams? + hostName nebula.HostName, + opts *Opts, +) ( + Network, error, +) { + if len(daemonConfig.Storage.Allocations) < 3 { + return nil, ErrInvalidConfig.WithData( + "At least three storage allocations are required.", + ) + } + + nebulaCACreds, err := nebula.NewCACredentials(creationParams.Domain, ipNet) + if err != nil { + return nil, fmt.Errorf("creating nebula CA cert: %w", err) + } + + garageRPCSecret := randStr(32) + + n := instatiateNetwork( + logger, + creationParams.ID, + daemonConfig, + envBinDirPath, + stateDir, + runtimeDir, + opts, + ) + + if err := n.initializeDirs(false); err != nil { + return nil, fmt.Errorf("initializing directories: %w", err) + } + + err = daecommon.SetGarageRPCSecret(ctx, n.secretsStore, garageRPCSecret) + if err != nil { + return nil, fmt.Errorf("setting garage RPC secret: %w", err) + } + + err = daecommon.SetNebulaCASigningPrivateKey( + ctx, n.secretsStore, nebulaCACreds.SigningPrivateKey, + ) + if err != nil { + return nil, fmt.Errorf("setting nebula CA signing key secret: %w", err) + } + + hostBootstrap, err := bootstrap.New( + nebulaCACreds, + creationParams, + map[nebula.HostName]bootstrap.Host{}, + hostName, + ipNet.FirstAddr(), + ) + if err != nil { + return nil, fmt.Errorf("initializing bootstrap data: %w", err) + } + + if err := n.initialize(ctx, hostBootstrap); err != nil { + return nil, fmt.Errorf("initializing with bootstrap: %w", err) + } + + return n, nil +} + +func (n *network) initializeDirs(mayExist bool) error { + secretsDir, err := n.stateDir.MkChildDir("secrets", mayExist) + if err != nil { + return fmt.Errorf("creating secrets dir: %w", err) + } + + n.secretsStore = secrets.NewFSStore(secretsDir.Path) + return nil +} + +func (n *network) initialize( + ctx context.Context, currBootstrap bootstrap.Bootstrap, +) error { + // we update this Host's data using whatever configuration has been provided + // by the daemon config. This way the network has the most up-to-date + // possible bootstrap. This updated bootstrap will later get updated in + // garage as a background task, so other hosts will see it as well. + currBootstrap, err := coalesceDaemonConfigAndBootstrap( + n.daemonConfig, currBootstrap, + ) + if err != nil { + return fmt.Errorf("combining configuration into bootstrap: %w", err) + } + + err = writeBootstrapToStateDir(n.stateDir.Path, currBootstrap) + if err != nil { + return fmt.Errorf("writing bootstrap to state dir: %w", err) + } + + n.currBootstrap = currBootstrap + + n.logger.Info(ctx, "Creating child processes") + n.children, err = children.New( + ctx, + n.logger.WithNamespace("children"), + n.envBinDirPath, + n.secretsStore, + n.daemonConfig, + n.runtimeDir, + n.garageAdminToken, + currBootstrap, + n.opts.ChildrenOpts, + ) + if err != nil { + return fmt.Errorf("creating child processes: %w", err) + } + + n.logger.Info(ctx, "Child processes created") + + if err := n.postInit(ctx); err != nil { + n.logger.Error(ctx, "Post-initialization failed, stopping child processes", err) + n.children.Shutdown() + return fmt.Errorf("performing post-initialization: %w", err) + } + + // TODO annotate this context with creation params + ctx, cancel := context.WithCancel(context.Background()) + n.wg.Add(1) + go func() { + defer n.wg.Done() + <-n.shutdownCh + cancel() + }() + + n.wg.Add(1) + go func() { + defer n.wg.Done() + n.reloadLoop(ctx) + n.logger.Debug(ctx, "Daemon restart loop stopped") + }() + + return nil +} + +func (n *network) postInit(ctx context.Context) error { + if len(n.daemonConfig.Storage.Allocations) > 0 { + n.logger.Info(ctx, "Applying garage layout") + if err := garageApplyLayout( + ctx, n.logger, n.daemonConfig, n.garageAdminToken, n.currBootstrap, + ); err != nil { + return fmt.Errorf("applying garage layout: %w", err) + } + } + + // This is only necessary during network creation, otherwise the bootstrap + // should already have these credentials built in. + // + // TODO this is pretty hacky, but there doesn't seem to be a better way to + // manage it at the moment. + _, err := daecommon.GetGarageS3APIGlobalBucketCredentials( + ctx, n.secretsStore, + ) + if errors.Is(err, secrets.ErrNotFound) { + n.logger.Info(ctx, "Initializing garage shared global bucket") + garageGlobalBucketCreds, err := garageInitializeGlobalBucket( + ctx, + n.logger, + n.daemonConfig, + n.garageAdminToken, + n.currBootstrap, + ) + if err != nil { + return fmt.Errorf("initializing global bucket: %w", err) + } + + err = daecommon.SetGarageS3APIGlobalBucketCredentials( + ctx, n.secretsStore, garageGlobalBucketCreds, + ) + if err != nil { + return fmt.Errorf("storing global bucket creds: %w", err) + } + } + + n.logger.Info(ctx, "Updating host info in garage") + err = n.putGarageBoostrapHost(ctx, n.currBootstrap) + if err != nil { + return fmt.Errorf("updating host info in garage: %w", err) + } + + return nil +} + +func (n *network) reloadLoop(ctx context.Context) { + ticker := time.NewTicker(3 * time.Minute) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + + case <-ticker.C: + n.l.RLock() + currBootstrap := n.currBootstrap + n.l.RUnlock() + + n.logger.Info(ctx, "Checking for bootstrap changes") + newHosts, err := n.getGarageBootstrapHosts(ctx, currBootstrap) + if err != nil { + n.logger.Error(ctx, "Failed to get hosts from garage", err) + continue + } + + // TODO there's some potential race conditions here, where + // CreateHost could be called at this point, write the new host to + // garage and the bootstrap, but then this reload call removes the + // host from this bootstrap/children until the next reload. + + if err := n.reload(ctx, currBootstrap, newHosts); err != nil { + n.logger.Error(ctx, "Reloading with new host data failed", err) + continue + } + } + } +} + +// reload will check the existing hosts data from currBootstrap against a +// potentially updated set of hosts data, and if there are any differences will +// perform whatever changes are necessary. +func (n *network) reload( + ctx context.Context, + currBootstrap bootstrap.Bootstrap, + newHosts map[nebula.HostName]bootstrap.Host, +) error { + var ( + newBootstrap = currBootstrap + thisHost = currBootstrap.ThisHost() + ) + + newBootstrap.Hosts = newHosts + + // the daemon's view of this host's bootstrap info takes precedence over + // whatever is in garage + newBootstrap.Hosts[thisHost.Name] = thisHost + + diff, err := children.CalculateReloadDiff( + n.daemonConfig, currBootstrap, newBootstrap, + ) + if err != nil { + return fmt.Errorf("calculating diff between bootstraps: %w", err) + } else if diff == (children.ReloadDiff{}) { + n.logger.Info(ctx, "No changes to bootstrap detected") + return nil + } + + n.logger.Info(ctx, "Bootstrap has changed, storing new bootstrap") + n.l.Lock() + n.currBootstrap = newBootstrap + n.l.Unlock() + + if err := n.children.Reload(ctx, newBootstrap, diff); err != nil { + return fmt.Errorf("reloading child processes (diff:%+v): %w", diff, err) + } + + return nil +} + +func withCurrBootstrap[Res any]( + n *network, fn func(bootstrap.Bootstrap) (Res, error), +) (Res, error) { + n.l.RLock() + defer n.l.RUnlock() + + currBootstrap := n.currBootstrap + return fn(currBootstrap) +} + +func (n *network) getBootstrap( + ctx context.Context, +) ( + bootstrap.Bootstrap, error, +) { + return withCurrBootstrap(n, func( + currBootstrap bootstrap.Bootstrap, + ) ( + bootstrap.Bootstrap, error, + ) { + return currBootstrap, nil + }) +} + +func (n *network) GetHosts(ctx context.Context) ([]bootstrap.Host, error) { + b, err := n.getBootstrap(ctx) + if err != nil { + return nil, fmt.Errorf("retrieving bootstrap: %w", err) + } + + hosts := maps.Values(b.Hosts) + slices.SortFunc(hosts, func(a, b bootstrap.Host) int { + return cmp.Compare(a.Name, b.Name) + }) + + return hosts, nil +} + +func (n *network) GetGarageClientParams( + ctx context.Context, +) ( + GarageClientParams, error, +) { + return withCurrBootstrap(n, func( + currBootstrap bootstrap.Bootstrap, + ) ( + GarageClientParams, error, + ) { + return n.getGarageClientParams(ctx, currBootstrap) + }) +} + +func (n *network) GetNebulaCAPublicCredentials( + ctx context.Context, +) ( + nebula.CAPublicCredentials, error, +) { + b, err := n.getBootstrap(ctx) + if err != nil { + return nebula.CAPublicCredentials{}, fmt.Errorf( + "retrieving bootstrap: %w", err, + ) + } + + return b.CAPublicCredentials, nil +} + +func (n *network) RemoveHost(ctx context.Context, hostName nebula.HostName) error { + // TODO RemoveHost should publish a certificate revocation for the host + // being removed. + _, err := withCurrBootstrap(n, func( + currBootstrap bootstrap.Bootstrap, + ) ( + struct{}, error, + ) { + garageClientParams, err := n.getGarageClientParams(ctx, currBootstrap) + if err != nil { + return struct{}{}, fmt.Errorf("get garage client params: %w", err) + } + + client := garageClientParams.GlobalBucketS3APIClient() + return struct{}{}, removeGarageBootstrapHost(ctx, client, hostName) + }) + return err +} + +func makeCACreds( + currBootstrap bootstrap.Bootstrap, + caSigningPrivateKey nebula.SigningPrivateKey, +) nebula.CACredentials { + return nebula.CACredentials{ + Public: currBootstrap.CAPublicCredentials, + SigningPrivateKey: caSigningPrivateKey, + } +} + +func chooseAvailableIP(b bootstrap.Bootstrap) (netip.Addr, error) { + var ( + cidrIPNet = b.CAPublicCredentials.Cert.Unwrap().Details.Subnets[0] + cidrMask = cidrIPNet.Mask + cidrIPB = cidrIPNet.IP + + cidr = netip.MustParsePrefix(cidrIPNet.String()) + cidrIP = cidr.Addr() + cidrSuffixBits = cidrIP.BitLen() - cidr.Bits() + + inUseIPs = make(map[netip.Addr]struct{}, len(b.Hosts)) + ) + + for _, host := range b.Hosts { + inUseIPs[host.IP()] = struct{}{} + } + + // first check that there are any addresses at all. We can determine the + // number of possible addresses using the network CIDR. The first IP in a + // subnet is the network identifier, and is reserved. The last IP is the + // broadcast IP, and is also reserved. Hence, the -2. + usableIPs := (1 << cidrSuffixBits) - 2 + if len(inUseIPs) >= usableIPs { + return netip.Addr{}, errors.New("no available IPs") + } + + // We need to know the subnet broadcast address, so we don't accidentally + // produce it. + cidrBCastIPB := bytes.Clone(cidrIPB) + for i := range cidrBCastIPB { + cidrBCastIPB[i] |= ^cidrMask[i] + } + cidrBCastIP, ok := netip.AddrFromSlice(cidrBCastIPB) + if !ok { + panic(fmt.Sprintf("invalid broadcast ip calculated: %x", cidrBCastIP)) + } + + // Try a handful of times to pick an IP at random. This is preferred, as it + // leaves less room for two different CreateHost calls to choose the same + // IP. + for range 20 { + b := make([]byte, len(cidrIPB)) + if _, err := rand.Read(b); err != nil { + return netip.Addr{}, fmt.Errorf("reading random bytes: %w", err) + } + + for i := range b { + b[i] = cidrIPB[i] | (b[i] & ^cidrMask[i]) + } + + ip, ok := netip.AddrFromSlice(b) + if !ok { + panic(fmt.Sprintf("generated invalid IP: %x", b)) + } else if !cidr.Contains(ip) { + panic(fmt.Sprintf( + "generated IP %v which is not in cidr %v", ip, cidr, + )) + } + + if ip == cidrIP || ip == cidrBCastIP { + continue + } + + if _, inUse := inUseIPs[ip]; !inUse { + return ip, nil + } + } + + // If randomly picking fails then just go through IPs one by one until the + // free one is found. + for ip := cidrIP.Next(); ip != cidrBCastIP; ip = ip.Next() { + if _, inUse := inUseIPs[ip]; !inUse { + return ip, nil + } + } + + panic("All ips are in-use, but somehow that wasn't determined earlier") +} + +func (n *network) CreateHost( + ctx context.Context, + hostName nebula.HostName, + opts CreateHostOpts, +) ( + JoiningBootstrap, error, +) { + n.l.RLock() + currBootstrap := n.currBootstrap + n.l.RUnlock() + + ip := opts.IP + if ip == (netip.Addr{}) { + var err error + if ip, err = chooseAvailableIP(currBootstrap); err != nil { + return JoiningBootstrap{}, fmt.Errorf( + "choosing available IP: %w", err, + ) + } + } + // TODO if the ip is given, check that it's not already in use. + + caSigningPrivateKey, err := daecommon.GetNebulaCASigningPrivateKey( + ctx, n.secretsStore, + ) + if err != nil { + return JoiningBootstrap{}, fmt.Errorf("getting CA signing key: %w", err) + } + + var joiningBootstrap JoiningBootstrap + joiningBootstrap.Bootstrap, err = bootstrap.New( + makeCACreds(currBootstrap, caSigningPrivateKey), + currBootstrap.NetworkCreationParams, + currBootstrap.Hosts, + hostName, + ip, + ) + if err != nil { + return JoiningBootstrap{}, fmt.Errorf( + "initializing bootstrap data: %w", err, + ) + } + + secretsIDs := []secrets.ID{ + daecommon.GarageRPCSecretSecretID, + daecommon.GarageS3APIGlobalBucketCredentialsSecretID, + } + + if opts.CanCreateHosts { + secretsIDs = append( + secretsIDs, daecommon.NebulaCASigningPrivateKeySecretID, + ) + } + + if joiningBootstrap.Secrets, err = secrets.Export( + ctx, n.secretsStore, secretsIDs, + ); err != nil { + return JoiningBootstrap{}, fmt.Errorf("exporting secrets: %w", err) + } + + n.logger.Info(ctx, "Putting new host in garage") + err = n.putGarageBoostrapHost(ctx, joiningBootstrap.Bootstrap) + if err != nil { + return JoiningBootstrap{}, fmt.Errorf("putting new host in garage: %w", err) + } + + // the new bootstrap will have been initialized with both all existing hosts + // (based on currBootstrap) and the host being created. + newHosts := joiningBootstrap.Bootstrap.Hosts + + n.logger.Info(ctx, "Reloading local state with new host") + if err := n.reload(ctx, currBootstrap, newHosts); err != nil { + return JoiningBootstrap{}, fmt.Errorf("reloading child processes: %w", err) + } + + return joiningBootstrap, nil +} + +func (n *network) CreateNebulaCertificate( + ctx context.Context, + hostName nebula.HostName, + hostPubKey nebula.EncryptingPublicKey, +) ( + nebula.Certificate, error, +) { + return withCurrBootstrap(n, func( + currBootstrap bootstrap.Bootstrap, + ) ( + nebula.Certificate, error, + ) { + host, ok := currBootstrap.Hosts[hostName] + if !ok { + return nebula.Certificate{}, ErrHostNotFound + } + ip := host.IP() + + caSigningPrivateKey, err := daecommon.GetNebulaCASigningPrivateKey( + ctx, n.secretsStore, + ) + if err != nil { + return nebula.Certificate{}, fmt.Errorf("getting CA signing key: %w", err) + } + + caCreds := makeCACreds(currBootstrap, caSigningPrivateKey) + + return nebula.NewHostCert(caCreds, hostPubKey, hostName, ip) + }) +} + +func (n *network) GetNetworkCreationParams( + ctx context.Context, +) ( + bootstrap.CreationParams, error, +) { + + return withCurrBootstrap(n, func( + currBootstrap bootstrap.Bootstrap, + ) ( + bootstrap.CreationParams, error, + ) { + return currBootstrap.NetworkCreationParams, nil + }) +} + +func (n *network) Shutdown() error { + close(n.shutdownCh) + n.wg.Wait() + + if n.children != nil { + n.children.Shutdown() + } + + return nil +} diff --git a/go/daemon/rpc.go b/go/daemon/rpc.go index a845295..1dfa14a 100644 --- a/go/daemon/rpc.go +++ b/go/daemon/rpc.go @@ -2,8 +2,8 @@ package daemon import ( "context" - "isle/bootstrap" "isle/daemon/jsonrpc2" + "isle/daemon/network" "isle/nebula" "net/http" ) @@ -20,31 +20,12 @@ type RPC interface { hostName nebula.HostName, ) error - JoinNetwork(context.Context, JoiningBootstrap) error + JoinNetwork(context.Context, network.JoiningBootstrap) error - GetHosts(context.Context) ([]bootstrap.Host, error) - - GetGarageClientParams(context.Context) (GarageClientParams, error) - - GetNebulaCAPublicCredentials( - context.Context, - ) ( - nebula.CAPublicCredentials, error, - ) - - RemoveHost(ctx context.Context, hostName nebula.HostName) error - - CreateHost( - context.Context, nebula.HostName, CreateHostOpts, - ) ( - JoiningBootstrap, error, - ) - - CreateNebulaCertificate( - context.Context, nebula.HostName, nebula.EncryptingPublicKey, - ) ( - nebula.Certificate, error, - ) + // All network.RPC methods are automatically implemented by Daemon using the + // currently joined network. If no network is joined then any call to these + // methods will return ErrNoNetwork. + network.RPC } // RPCHandler returns a jsonrpc2.Handler which will use the Daemon to serve all diff --git a/go/secrets/store_fs.go b/go/secrets/store_fs.go index 4571c42..936bec8 100644 --- a/go/secrets/store_fs.go +++ b/go/secrets/store_fs.go @@ -20,12 +20,9 @@ type fsStorePayload[Body any] struct { } // NewFSStore returns a Store which will store secrets to the given directory. -func NewFSStore(dirPath string) (Store, error) { - err := os.Mkdir(dirPath, 0700) - if err != nil && !errors.Is(err, fs.ErrExist) { - return nil, fmt.Errorf("making directory: %w", err) - } - return &fsStore{dirPath}, nil +// The directory must already exist. +func NewFSStore(dirPath string) Store { + return &fsStore{dirPath} } func (s *fsStore) path(id ID) string { diff --git a/go/secrets/store_fs_test.go b/go/secrets/store_fs_test.go index cae9876..8ab4906 100644 --- a/go/secrets/store_fs_test.go +++ b/go/secrets/store_fs_test.go @@ -12,16 +12,11 @@ func Test_fsStore(t *testing.T) { } var ( - ctx = context.Background() - dir = t.TempDir() - id = NewID("testing", "a") + ctx = context.Background() + store = NewFSStore(t.TempDir()) + id = NewID("testing", "a") ) - store, err := NewFSStore(dir) - if err != nil { - t.Fatal(err) - } - var got payload if err := store.Get(ctx, &got, id); !errors.Is(err, ErrNotFound) { t.Fatalf("expected %v, got: %v", ErrNotFound, err) diff --git a/go/toolkit/dir.go b/go/toolkit/dir.go new file mode 100644 index 0000000..389c8cc --- /dev/null +++ b/go/toolkit/dir.go @@ -0,0 +1,121 @@ +package toolkit + +import ( + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" +) + +// Dir is a type which makes it possible to statically assert that a directory +// has already been created. +type Dir struct { + Path string +} + +// MkDir creates a Dir at the given path. +// +// If the directory already exists, and mayExist is true, then Dir is returned +// successfully, otherwise an error is returned. +func MkDir(path string, mayExist bool) (Dir, error) { + if path == "" { + panic("Empty path passed to MkDir") + } + + { + parentPath := filepath.Dir(path) + parentInfo, err := os.Stat(parentPath) + if err != nil { + return Dir{}, fmt.Errorf( + "checking fs node of parent %q: %w", parentPath, err, + ) + } else if !parentInfo.IsDir() { + return Dir{}, fmt.Errorf( + "parent %q is not a directory", parentPath, + ) + } + } + + info, err := os.Stat(path) + if errors.Is(err, fs.ErrNotExist) { + // fine + } else if err != nil { + return Dir{}, fmt.Errorf("checking fs node: %w", err) + } else if !info.IsDir() { + return Dir{}, fmt.Errorf("exists but is not a directory") + } else { + if !mayExist { + return Dir{}, errors.New("directory already exists") + } + return Dir{path}, nil + } + + if err := os.Mkdir(path, 0700); err != nil { + return Dir{}, fmt.Errorf("creating directory: %w", err) + } + + return Dir{path}, nil +} + +// MkChildDir is a helper for joining Dir's path to the given name and calling +// MkDir with the result. +func (d Dir) MkChildDir(name string, mayExist bool) (Dir, error) { + childPath := filepath.Join(d.Path, name) + d, err := MkDir(childPath, mayExist) + if err != nil { + return Dir{}, fmt.Errorf( + "creating child directory %q: %w", childPath, err, + ) + } + return d, nil +} + +// ChildDirs returns a Dir for every child directory found under this Dir. +func (d Dir) ChildDirs() ([]Dir, error) { + entries, err := os.ReadDir(d.Path) + if errors.Is(err, fs.ErrNotExist) { + return nil, nil + } else if err != nil { + return nil, fmt.Errorf("listing contents: %w", err) + } + + dirs := make([]Dir, 0, len(entries)) + for _, entry := range entries { + if !entry.IsDir() { + continue + } + dirs = append(dirs, Dir{Path: filepath.Join(d.Path, entry.Name())}) + } + + return dirs, nil +} + +//////////////////////////////////////////////////////////////////////////////// + +// MkDirHelper is a helper type for creating a set of directories. It will +// collect errors as they occur, allowing error handling to be defered. +type MkDirHelper struct { + errs []error +} + +// Maybe returns the given Dir and true if err == nil. If err != nil then false +// is returned, and the error is collected internally such that it will be +// returned as part of the Err() output. +// +// This method is designed to be used in conjunction with a MkDir +// function/method, e.g: +// +// d, ok := h.Maybe(MkDir("some/path", true)) +func (m *MkDirHelper) Maybe(d Dir, err error) (Dir, bool) { + if err != nil { + m.errs = append(m.errs, err) + return Dir{}, false + } + return d, true +} + +// Err returns a join of all errors which have occurred during its lifetime. +func (m *MkDirHelper) Err() error { + return errors.Join(m.errs...) +} diff --git a/go/toolkit/rand.go b/go/toolkit/rand.go new file mode 100644 index 0000000..46372f3 --- /dev/null +++ b/go/toolkit/rand.go @@ -0,0 +1,15 @@ +package toolkit + +import ( + "crypto/rand" + "encoding/hex" +) + +// RandStr returns a random string with the given entropy of bytes. +func RandStr(l int) string { + b := make([]byte, l) + if _, err := rand.Read(b); err != nil { + panic(err) + } + return hex.EncodeToString(b) +} diff --git a/go/toolkit/toolkit.go b/go/toolkit/toolkit.go new file mode 100644 index 0000000..2f88a53 --- /dev/null +++ b/go/toolkit/toolkit.go @@ -0,0 +1,3 @@ +// Package toolkit contains useful utilities which are not specific to any +// specific part of isle. +package toolkit diff --git a/tests/cases/hosts/01-create.sh b/tests/cases/hosts/01-create.sh index df08785..fbcea29 100644 --- a/tests/cases/hosts/01-create.sh +++ b/tests/cases/hosts/01-create.sh @@ -1,7 +1,6 @@ # shellcheck source=../../utils/with-1-data-1-empty-node-network.sh source "$UTILS"/with-1-data-1-empty-node-network.sh -adminBS="$XDG_STATE_HOME"/isle/bootstrap.json bs="$secondus_bootstrap" # set in with-1-data-1-empty-node-network.sh [ "$(jq -r <"$bs" '.Bootstrap.NetworkCreationParams.Domain')" = "shared.test" ] @@ -9,7 +8,7 @@ bs="$secondus_bootstrap" # set in with-1-data-1-empty-node-network.sh [ "$(jq -r <"$bs" '.Bootstrap.SignedHostAssigned.Body.Name')" = "secondus" ] [ "$(jq -r <"$bs" '.Bootstrap.Hosts.primus.PublicCredentials')" \ -= "$(jq -r <"$adminBS" '.SignedHostAssigned.Body.PublicCredentials')" ] += "$(jq -r <"$BOOTSTRAP_FILE" '.SignedHostAssigned.Body.PublicCredentials')" ] [ "$(jq <"$bs" '.Bootstrap.Hosts.primus.Garage.Instances|length')" = "3" ] diff --git a/tests/cases/nebula/01-create-cert.sh b/tests/cases/nebula/01-create-cert.sh index c53029c..c90c3ee 100644 --- a/tests/cases/nebula/01-create-cert.sh +++ b/tests/cases/nebula/01-create-cert.sh @@ -9,7 +9,7 @@ cat pubkey --hostname non-esiste \ --public-key-path pubkey \ 2>&1 || true \ -) | grep '\[6\] Host not found' +) | grep '\[1002\] Host not found' isle nebula create-cert \ --hostname primus \ diff --git a/tests/utils/shared-daemon-env.sh b/tests/utils/shared-daemon-env.sh index 926b906..6bbb0ba 100644 --- a/tests/utils/shared-daemon-env.sh +++ b/tests/utils/shared-daemon-env.sh @@ -13,6 +13,6 @@ export TMPDIR="$TMPDIR" export XDG_RUNTIME_DIR="$XDG_RUNTIME_DIR" export XDG_STATE_HOME="$XDG_STATE_HOME" export ISLE_DAEMON_HTTP_SOCKET_PATH="$ROOT_TMPDIR/$base-daemon.sock" -BOOTSTRAP_FILE="$XDG_STATE_HOME/isle/bootstrap.json" +BOOTSTRAP_FILE="$XDG_STATE_HOME/isle/networks/$NETWORK_ID/bootstrap.json" cd "$TMPDIR" EOF diff --git a/tests/utils/with-1-data-1-empty-node-network.sh b/tests/utils/with-1-data-1-empty-node-network.sh index ff61e2b..506b321 100644 --- a/tests/utils/with-1-data-1-empty-node-network.sh +++ b/tests/utils/with-1-data-1-empty-node-network.sh @@ -97,3 +97,9 @@ secondus_ip="$( | cut -d/ -f1 )" +NETWORK_ID="$(jq '.Bootstrap.NetworkCreationParams.ID' "$secondus_bootstrap")" +export NETWORK_ID + +# shared-daemon-env.sh depends on NETWORK_ID, so we re-call as_primus in order +# to fully populate the envvars we need. +as_primus