// Package children manages the creation, lifetime, and shutdown of child // processes created by the daemon. package children import ( "cmp" "context" "errors" "fmt" "slices" "code.betamike.com/micropelago/pmux/pmuxlib" "dev.mediocregopher.com/mediocre-go-lib.git/mctx" "dev.mediocregopher.com/mediocre-go-lib.git/mlog" "golang.org/x/exp/maps" "isle/bootstrap" "isle/daemon/daecommon" "isle/garage" "isle/garage/garagesrv" "isle/secrets" "isle/toolkit" ) type garageProc struct { *pmuxlib.Process alloc daecommon.ConfigStorageAllocation adminAddr string } // Opts are optional fields which can be passed into New. A nil value is // equivalent to a zero value. type Opts struct { // GarageNewCluster should be true if the garage instances being started // are the first instances in a cluster which is being created. GarageNewCluster bool // GarageBootstrapPeers will be used as the set of peers each garage // instance should use to find the rest of the garage cluster. // // Defaults to peer information contained in the bootstrap hosts. GarageBootstrapPeers []garage.RemoteNode // DEPRECATED can be used to manually set the db engine used by garage for // new allocations. If not given then garagesrv.DBEngineSqlite will be used // for new allocations. GarageDefaultDBEngine garagesrv.DBEngine // TestBlocker is used by tests to set blockpoints. TestBlocker *toolkit.TestBlocker } func (o *Opts) withDefaults() *Opts { if o == nil { o = new(Opts) } if o.GarageDefaultDBEngine == "" { o.GarageDefaultDBEngine = garageDefaultDBEngine } return o } // Children manages all child processes of a network. Child processes are // comprised of: // - nebula // - dnsmasq // - garage (0 or more, depending on configured storage allocations) type Children struct { logger *mlog.Logger binDirPath string runtimeDir toolkit.Dir garageAdminToken string nebulaDeviceNamer *NebulaDeviceNamer opts *Opts garageRPCSecret string nebulaProc *pmuxlib.Process dnsmasqProc *pmuxlib.Process garageProcs map[string]garageProc } // New initializes and returns a Children instance. If initialization fails an // error is returned. func New( ctx context.Context, logger *mlog.Logger, binDirPath string, secretsStore secrets.Store, networkConfig daecommon.NetworkConfig, runtimeDir toolkit.Dir, garageAdminToken string, nebulaDeviceNamer *NebulaDeviceNamer, hostBootstrap bootstrap.Bootstrap, opts *Opts, ) ( *Children, error, ) { opts = opts.withDefaults() logger.Info(ctx, "Loading secrets") garageRPCSecret, err := daecommon.GetGarageRPCSecret(ctx, secretsStore) if err != nil && !errors.Is(err, secrets.ErrNotFound) { return nil, fmt.Errorf("loading garage RPC secret: %w", err) } c := &Children{ logger: logger, binDirPath: binDirPath, runtimeDir: runtimeDir, garageAdminToken: garageAdminToken, nebulaDeviceNamer: nebulaDeviceNamer, opts: opts, garageRPCSecret: garageRPCSecret, } if c.nebulaProc, err = nebulaPmuxProc( ctx, c.logger, c.runtimeDir.Path, c.binDirPath, c.nebulaDeviceNamer, networkConfig, hostBootstrap, ); err != nil { return nil, fmt.Errorf("starting nebula: %w", err) } if err := waitForNebula(ctx, c.logger, hostBootstrap); err != nil { logger.Warn(ctx, "Failed waiting for nebula to initialize, shutting down child processes", err) c.Shutdown() return nil, fmt.Errorf("waiting for nebula to start: %w", err) } if c.dnsmasqProc, err = dnsmasqPmuxProc( ctx, c.logger, c.runtimeDir.Path, c.binDirPath, networkConfig, hostBootstrap, ); err != nil { logger.Warn(ctx, "Failed to start dnsmasq, shutting down child processes", err) c.Shutdown() return nil, fmt.Errorf("starting dnsmasq: %w", err) } garageBootstrapPeers := opts.GarageBootstrapPeers if garageBootstrapPeers == nil { garageBootstrapPeers = hostBootstrap.GarageNodes() } if c.garageProcs, err = garagePmuxProcs( ctx, c.logger, garageRPCSecret, c.runtimeDir.Path, c.binDirPath, networkConfig, garageAdminToken, hostBootstrap, garageBootstrapPeers, c.opts.GarageDefaultDBEngine, ); err != nil { logger.Warn(ctx, "Failed to start garage processes, shutting down child processes", err) c.Shutdown() return nil, fmt.Errorf("starting garage processes: %w", err) } if err := waitForGarage( ctx, c.logger, garageAdminToken, c.garageProcs, opts.GarageNewCluster, ); err != nil { logger.Warn(ctx, "Failed waiting for garage processes to initialize, shutting down child processes", err) c.Shutdown() return nil, fmt.Errorf("waiting for garage processes to initialize: %w", err) } return c, nil } func (c *Children) reloadDNSMasq( ctx context.Context, networkConfig daecommon.NetworkConfig, hostBootstrap bootstrap.Bootstrap, ) error { if _, changed, err := dnsmasqWriteConfig( ctx, c.logger, c.runtimeDir.Path, networkConfig, hostBootstrap, ); err != nil { return fmt.Errorf("writing new dnsmasq config: %w", err) } else if !changed { c.logger.Info(ctx, "No changes to dnsmasq config file") return nil } c.logger.Info(ctx, "dnsmasq config file has changed, restarting process") c.dnsmasqProc.Restart() return nil } func (c *Children) reloadNebula( ctx context.Context, networkConfig daecommon.NetworkConfig, hostBootstrap bootstrap.Bootstrap, ) error { if _, changed, err := nebulaWriteConfig( ctx, c.logger, c.runtimeDir.Path, c.nebulaDeviceNamer, networkConfig, hostBootstrap, ); err != nil { return fmt.Errorf("writing a new nebula config: %w", err) } else if !changed { c.logger.Info(ctx, "No changes to nebula config file") return nil } c.logger.Info(ctx, "nebula config file has changed, restarting process") c.nebulaProc.Restart() if err := waitForNebula(ctx, c.logger, hostBootstrap); err != nil { return fmt.Errorf("waiting for nebula to start: %w", err) } return nil } func (c *Children) reloadGarage( ctx context.Context, networkConfig daecommon.NetworkConfig, hostBootstrap bootstrap.Bootstrap, ) error { var ( allocs = networkConfig.Storage.Allocations thisHost = hostBootstrap.ThisHost() anyStarted bool allocsM = map[daecommon.ConfigStorageAllocation]struct{}{} ) for _, alloc := range allocs { allocsM[alloc] = struct{}{} var ( procName = garagePmuxProcName(alloc) ctx = mctx.Annotate( mctx.WithAnnotator(ctx, alloc), "garageProcName", procName, ) ) // Rewrite the child config always, even if we don't always restart it. // If nothing else this will capture any changes to the bootstrap nodes, // which will be useful if garage gets restarted for any reason. childConfigPath, err := garageWriteChildConfig( ctx, c.logger, c.garageRPCSecret, c.runtimeDir.Path, c.garageAdminToken, hostBootstrap, hostBootstrap.GarageNodes(), alloc, garageDefaultDBEngine, ) if err != nil { return fmt.Errorf("writing child config file for alloc %+v: %w", alloc, err) } if proc, ok := c.garageProcs[procName]; ok { if proc.alloc == alloc { c.logger.Info(ctx, "No changes to storage allocation, leaving garage process as-is") continue } c.logger.Info(ctx, "Storage allocation modified, restarting garage process") proc.Restart() anyStarted = true proc.alloc = alloc c.garageProcs[procName] = proc continue } c.logger.Info(ctx, "New storage allocation, creating garage process") c.garageProcs[procName] = garageProc{ Process: garagePmuxProc( ctx, c.logger, c.binDirPath, procName, childConfigPath, ), alloc: alloc, adminAddr: garageAllocAdminAddr(thisHost, alloc), } anyStarted = true } for procName, proc := range c.garageProcs { if _, ok := allocsM[proc.alloc]; ok { continue } ctx := mctx.Annotate( mctx.WithAnnotator(ctx, proc.alloc), "garageProcName", procName, ) c.logger.Info(ctx, "Storage allocation removed, stopping garage process") proc.Stop() delete(c.garageProcs, procName) } if anyStarted { if err := waitForGarage( ctx, c.logger, c.garageAdminToken, c.garageProcs, false, ); err != nil { return fmt.Errorf("waiting for garage to start: %w", err) } } return nil } // Reload applies a ReloadDiff to the Children, using the given bootstrap which // must be the same one which was passed to CalculateReloadDiff. func (c *Children) Reload( ctx context.Context, newNetworkConfig daecommon.NetworkConfig, newBootstrap bootstrap.Bootstrap, ) error { if err := c.reloadNebula(ctx, newNetworkConfig, newBootstrap); err != nil { return fmt.Errorf("reloading nebula: %w", err) } c.opts.TestBlocker.Blockpoint(ctx, "Children.Reload.postReloadNebula") var errs []error if err := c.reloadDNSMasq(ctx, newNetworkConfig, newBootstrap); err != nil { errs = append(errs, fmt.Errorf("reloading dnsmasq: %w", err)) } if err := c.reloadGarage(ctx, newNetworkConfig, newBootstrap); err != nil { errs = append(errs, fmt.Errorf("reloading garage: %w", err)) } return errors.Join(errs...) } // ActiveStorageAllocations returns the storage allocations which currently have // active garage instances. func (c *Children) ActiveStorageAllocations() []daecommon.ConfigStorageAllocation { allocs := make([]daecommon.ConfigStorageAllocation, 0, len(c.garageProcs)) for _, proc := range c.garageProcs { allocs = append(allocs, proc.alloc) } slices.SortFunc(allocs, func(a, b daecommon.ConfigStorageAllocation) int { return cmp.Compare(a.RPCPort, b.RPCPort) }) return allocs } // GarageAdminClient returns an admin client for an active local garage process, // or false if there are no garage processes. func (c *Children) GarageAdminClient() (*garage.AdminClient, bool) { if len(c.garageProcs) == 0 { return nil, false } procsSlice := maps.Values(c.garageProcs) slices.SortFunc(procsSlice, func(a, b garageProc) int { return cmp.Compare(a.alloc.RPCPort, b.alloc.RPCPort) }) return garage.NewAdminClient( garageAdminClientLogger(c.logger), procsSlice[0].adminAddr, c.garageAdminToken, ), true } // GarageAdminClientForAlloc returns an admin client for a particular allocation // which has a currently running garage instance, or false if there the // allocation has no currently running instance. func (c *Children) GarageAdminClientForAlloc( alloc daecommon.ConfigStorageAllocation, ) ( *garage.AdminClient, bool, ) { procName := garagePmuxProcName(alloc) proc, ok := c.garageProcs[procName] if !ok { return nil, false } return garage.NewAdminClient( garageAdminClientLogger(c.logger), proc.adminAddr, c.garageAdminToken, ), true } // Shutdown blocks until all child processes have gracefully shut themselves // down. func (c *Children) Shutdown() { for _, proc := range c.garageProcs { proc.Stop() } if c.dnsmasqProc != nil { c.dnsmasqProc.Stop() } if c.nebulaProc != nil { c.nebulaProc.Stop() } }