1127 lines
29 KiB
Go
1127 lines
29 KiB
Go
//go:generate mockery --name Network --inpackage --filename network_mock.go
|
|
|
|
// Package network implements the Network type, which manages the daemon's
|
|
// membership in a single network.
|
|
package network
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/rand"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"isle/bootstrap"
|
|
"isle/daemon/children"
|
|
"isle/daemon/daecommon"
|
|
"isle/daemon/network/glm"
|
|
"isle/garage"
|
|
"isle/garage/garagesrv"
|
|
"isle/jsonutil"
|
|
"isle/nebula"
|
|
"isle/secrets"
|
|
"isle/toolkit"
|
|
"net/netip"
|
|
"sync"
|
|
"time"
|
|
|
|
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
|
|
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
|
|
)
|
|
|
|
// GarageClientParams contains all the data needed to instantiate garage
|
|
// clients.
|
|
type GarageClientParams struct {
|
|
Node garage.RemoteNode
|
|
GlobalBucketS3APICredentials garage.S3APICredentials
|
|
|
|
// RPCSecret may be empty, if the secret is not available on the host.
|
|
RPCSecret string
|
|
}
|
|
|
|
// GlobalBucketS3APIClient returns an S3 client pre-configured with access to
|
|
// the global bucket.
|
|
func (p GarageClientParams) GlobalBucketS3APIClient() *garage.S3APIClient {
|
|
var (
|
|
addr = p.Node.S3APIAddr()
|
|
creds = p.GlobalBucketS3APICredentials
|
|
)
|
|
return garage.NewS3APIClient(addr, creds)
|
|
}
|
|
|
|
// CreateHostOpts are optional parameters to the CreateHost method.
|
|
type CreateHostOpts struct {
|
|
// IP address of the new host. An IP address will be randomly chosen if one
|
|
// is not given here.
|
|
IP netip.Addr
|
|
|
|
// CanCreateHosts indicates that the bootstrap produced by CreateHost should
|
|
// give the new host the ability to create new hosts as well.
|
|
CanCreateHosts bool
|
|
}
|
|
|
|
// JoiningBootstrap wraps a normal Bootstrap to include extra data which a host
|
|
// might need while joining a network.
|
|
type JoiningBootstrap struct {
|
|
Bootstrap bootstrap.Bootstrap
|
|
Secrets map[secrets.ID]json.RawMessage
|
|
}
|
|
|
|
// RPC defines the methods related to a single network which are available over
|
|
// the daemon's RPC interface.
|
|
type RPC interface {
|
|
// GetBootstrap returns the currently active Bootstrap for the Network. The
|
|
// PrivateCredentials field will be zero'd out before being returned.
|
|
GetBootstrap(context.Context) (bootstrap.Bootstrap, error)
|
|
|
|
// GetGarageClientParams returns a GarageClientParams for the current
|
|
// network state.
|
|
GetGarageClientParams(context.Context) (GarageClientParams, error)
|
|
|
|
// RemoveHost removes the host of the given name from the network.
|
|
RemoveHost(ctx context.Context, hostName nebula.HostName) error
|
|
|
|
// CreateHost creates a bootstrap for a new host with the given name and IP
|
|
// address.
|
|
//
|
|
// Errors:
|
|
// - ErrIPInUse - if IP field of CreateHostOpts is given, and already in use
|
|
// by another host in the network.
|
|
// - ErrSecretNotFound - This Network doesn't have the CA signing key.
|
|
CreateHost(
|
|
context.Context, nebula.HostName, CreateHostOpts,
|
|
) (
|
|
JoiningBootstrap, error,
|
|
)
|
|
|
|
// CreateNebulaCertificate creates and signs a new nebula certficate for an
|
|
// existing host, given the public key for that host. This is currently
|
|
// mostly useful for creating certs for mobile devices.
|
|
//
|
|
// Errors:
|
|
// - ErrHostNotFound
|
|
// - ErrSecretNotFound - This Network doesn't have the CA signing key.
|
|
CreateNebulaCertificate(
|
|
context.Context, nebula.HostName, nebula.EncryptingPublicKey,
|
|
) (
|
|
nebula.Certificate, error,
|
|
)
|
|
|
|
// GetConfig returns the configuration currently in use.
|
|
GetConfig(context.Context) (daecommon.NetworkConfig, error)
|
|
|
|
// SetConfig overrides the current config with the given one, adjusting any
|
|
// running child processes as needed.
|
|
//
|
|
// Errors:
|
|
// - ErrInvalidConfig
|
|
SetConfig(context.Context, daecommon.NetworkConfig) error
|
|
}
|
|
|
|
// Network manages membership in a single micropelago network. Each Network
|
|
// is comprised of a unique IP subnet, hosts connected together on that subnet
|
|
// via a VPN, an S3 storage layer only accessible to those hosts, plus other
|
|
// services built on this foundation.
|
|
//
|
|
// A single daemon (isle server) can manage multiple networks. Each network is
|
|
// expected to be independent of the others, ie they should not share any
|
|
// resources.
|
|
type Network interface {
|
|
RPC
|
|
|
|
// GetNetworkCreationParams returns the CreationParams that the Network was
|
|
// originally created with.
|
|
GetNetworkCreationParams(context.Context) (bootstrap.CreationParams, error)
|
|
|
|
// Shutdown blocks until all resources held or created by the Network,
|
|
// including child processes it has started, have been cleaned up.
|
|
//
|
|
// If this returns an error then it's possible that child processes are
|
|
// still running and are no longer managed.
|
|
Shutdown() error
|
|
}
|
|
|
|
// Implements constructors interface, methods defined alongside the rest of the
|
|
// Network implementation.
|
|
type constructorsImpl struct{}
|
|
|
|
func newConstructors() constructors { return constructorsImpl{} }
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Network implementation
|
|
|
|
// Opts are optional parameters which can be passed in when initializing a new
|
|
// Network instance. A nil Opts is equivalent to a zero value.
|
|
type Opts struct {
|
|
GarageAdminToken string // Will be randomly generated if left unset.
|
|
|
|
// Config will be used as the configuration of the Network from its
|
|
// initialization onwards.
|
|
//
|
|
// If not given then the most recent NetworkConfig for the network will be
|
|
// used, either that which it was most recently initialized with or which
|
|
// was passed to [SetConfig].
|
|
Config *daecommon.NetworkConfig
|
|
|
|
// testBlocker is used by tests to set blockpoints.
|
|
testBlocker *toolkit.TestBlocker
|
|
|
|
// DEPRECATED See corresponding field in [children.Opts]
|
|
garageDefaultDBEngine garagesrv.DBEngine
|
|
}
|
|
|
|
func (o *Opts) withDefaults() *Opts {
|
|
if o == nil {
|
|
o = new(Opts)
|
|
}
|
|
|
|
if o.GarageAdminToken == "" {
|
|
o.GarageAdminToken = toolkit.RandStr(32)
|
|
}
|
|
|
|
return o
|
|
}
|
|
|
|
type network struct {
|
|
logger *mlog.Logger
|
|
|
|
envBinDirPath string
|
|
nebulaDeviceNamer *children.NebulaDeviceNamer
|
|
stateDir toolkit.Dir
|
|
runtimeDir toolkit.Dir
|
|
|
|
opts *Opts
|
|
|
|
secretsStore secrets.Store
|
|
garageLayoutMgr glm.GarageLayoutManager
|
|
|
|
l sync.RWMutex
|
|
children *children.Children
|
|
networkConfig daecommon.NetworkConfig
|
|
currBootstrap bootstrap.Bootstrap
|
|
|
|
workerCtx context.Context
|
|
workerCancel context.CancelFunc
|
|
wg sync.WaitGroup
|
|
}
|
|
|
|
// newNetwork returns an instantiated *network instance. All initialization
|
|
// steps which are common to all *network creation methods (load, join, create)
|
|
// are included here as well.
|
|
func newNetwork(
|
|
ctx context.Context,
|
|
logger *mlog.Logger,
|
|
envBinDirPath string,
|
|
nebulaDeviceNamer *children.NebulaDeviceNamer,
|
|
stateDir toolkit.Dir,
|
|
runtimeDir toolkit.Dir,
|
|
dirsMayExist bool,
|
|
currBootstrap bootstrap.Bootstrap,
|
|
opts *Opts,
|
|
) (
|
|
*network, error,
|
|
) {
|
|
ctx, cancel := context.WithCancel(context.WithoutCancel(ctx))
|
|
|
|
var (
|
|
ip = currBootstrap.ThisHost().IP()
|
|
n = &network{
|
|
logger: logger,
|
|
envBinDirPath: envBinDirPath,
|
|
nebulaDeviceNamer: nebulaDeviceNamer,
|
|
stateDir: stateDir,
|
|
runtimeDir: runtimeDir,
|
|
opts: opts.withDefaults(),
|
|
garageLayoutMgr: glm.NewGarageLayoutManager(stateDir, ip),
|
|
currBootstrap: currBootstrap,
|
|
workerCtx: ctx,
|
|
workerCancel: cancel,
|
|
}
|
|
err error
|
|
)
|
|
|
|
n.networkConfig, err = loadStoreConfig(n.stateDir, n.opts.Config)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("resolving network config: %w", err)
|
|
}
|
|
|
|
if err := n.garageLayoutMgr.Validate(
|
|
ctx, n.networkConfig.Storage.Allocations,
|
|
); err != nil {
|
|
return nil, ErrInvalidConfig.WithData(err.Error())
|
|
}
|
|
|
|
secretsDir, err := n.stateDir.MkChildDir("secrets", dirsMayExist)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("creating secrets dir: %w", err)
|
|
}
|
|
|
|
n.secretsStore = secrets.NewFSStore(secretsDir.Path)
|
|
|
|
return n, nil
|
|
}
|
|
|
|
// loadCreationParams returns the CreationParams of a Network which was
|
|
// Created/Joined with the given state directory.
|
|
func loadCreationParams(
|
|
stateDir toolkit.Dir,
|
|
) (
|
|
bootstrap.CreationParams, error,
|
|
) {
|
|
var (
|
|
bootstrapFilePath = bootstrap.StateDirPath(stateDir.Path)
|
|
bs bootstrap.Bootstrap
|
|
)
|
|
|
|
if err := jsonutil.LoadFile(&bs, bootstrapFilePath); err != nil {
|
|
return bootstrap.CreationParams{}, fmt.Errorf(
|
|
"loading bootstrap from %q: %w", bootstrapFilePath, err,
|
|
)
|
|
}
|
|
|
|
return bs.NetworkCreationParams, nil
|
|
}
|
|
|
|
func (constructorsImpl) load(
|
|
ctx context.Context,
|
|
logger *mlog.Logger,
|
|
envBinDirPath string,
|
|
nebulaDeviceNamer *children.NebulaDeviceNamer,
|
|
stateDir toolkit.Dir,
|
|
runtimeDir toolkit.Dir,
|
|
opts *Opts,
|
|
) (
|
|
Network, error,
|
|
) {
|
|
currBootstrap, err := loadBootstrapFromStateDir(stateDir.Path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("loading bootstrap from state dir: %w", err)
|
|
}
|
|
|
|
n, err := newNetwork(
|
|
ctx,
|
|
logger,
|
|
envBinDirPath,
|
|
nebulaDeviceNamer,
|
|
stateDir,
|
|
runtimeDir,
|
|
true,
|
|
currBootstrap,
|
|
opts,
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("instantiating Network: %w", err)
|
|
}
|
|
|
|
if err := n.initialize(ctx, false); err != nil {
|
|
return nil, fmt.Errorf("initializing with bootstrap: %w", err)
|
|
}
|
|
|
|
return n, nil
|
|
}
|
|
|
|
func (constructorsImpl) join(
|
|
ctx context.Context,
|
|
logger *mlog.Logger,
|
|
envBinDirPath string,
|
|
nebulaDeviceNamer *children.NebulaDeviceNamer,
|
|
joiningBootstrap JoiningBootstrap,
|
|
stateDir toolkit.Dir,
|
|
runtimeDir toolkit.Dir,
|
|
opts *Opts,
|
|
) (
|
|
Network, error,
|
|
) {
|
|
n, err := newNetwork(
|
|
ctx,
|
|
logger,
|
|
envBinDirPath,
|
|
nebulaDeviceNamer,
|
|
stateDir,
|
|
runtimeDir,
|
|
false,
|
|
joiningBootstrap.Bootstrap,
|
|
opts,
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("instantiating Network: %w", err)
|
|
}
|
|
|
|
if err := secrets.Import(
|
|
ctx, n.secretsStore, joiningBootstrap.Secrets,
|
|
); err != nil {
|
|
return nil, fmt.Errorf("importing secrets: %w", err)
|
|
}
|
|
|
|
if err := n.initialize(ctx, false); err != nil {
|
|
return nil, fmt.Errorf("initializing with bootstrap: %w", err)
|
|
}
|
|
|
|
return n, nil
|
|
}
|
|
|
|
func (constructorsImpl) create(
|
|
ctx context.Context,
|
|
logger *mlog.Logger,
|
|
envBinDirPath string,
|
|
nebulaDeviceNamer *children.NebulaDeviceNamer,
|
|
stateDir toolkit.Dir,
|
|
runtimeDir toolkit.Dir,
|
|
creationParams bootstrap.CreationParams,
|
|
ipNet nebula.IPNet,
|
|
hostName nebula.HostName,
|
|
opts *Opts,
|
|
) (
|
|
Network, error,
|
|
) {
|
|
nebulaCACreds, err := nebula.NewCACredentials(creationParams.Domain, ipNet)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("creating nebula CA cert: %w", err)
|
|
}
|
|
|
|
garageRPCSecret := toolkit.RandStr(32)
|
|
|
|
hostBootstrap, err := bootstrap.New(
|
|
nebulaCACreds,
|
|
creationParams,
|
|
map[nebula.HostName]bootstrap.Host{},
|
|
hostName,
|
|
ipNet.FirstAddr(),
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("initializing bootstrap data: %w", err)
|
|
}
|
|
|
|
n, err := newNetwork(
|
|
ctx,
|
|
logger,
|
|
envBinDirPath,
|
|
nebulaDeviceNamer,
|
|
stateDir,
|
|
runtimeDir,
|
|
false,
|
|
hostBootstrap,
|
|
opts,
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("instantiating Network: %w", err)
|
|
}
|
|
|
|
if len(n.networkConfig.Storage.Allocations) < 3 {
|
|
return nil, ErrInvalidConfig.WithData(
|
|
"At least three storage allocations are required.",
|
|
)
|
|
}
|
|
|
|
err = n.garageLayoutMgr.Validate(ctx, n.networkConfig.Storage.Allocations)
|
|
if err != nil {
|
|
return nil, ErrInvalidConfig.WithData(err.Error())
|
|
}
|
|
|
|
err = daecommon.SetGarageRPCSecret(ctx, n.secretsStore, garageRPCSecret)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("setting garage RPC secret: %w", err)
|
|
}
|
|
|
|
err = daecommon.SetNebulaCASigningPrivateKey(
|
|
ctx, n.secretsStore, nebulaCACreds.SigningPrivateKey,
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("setting nebula CA signing key secret: %w", err)
|
|
}
|
|
|
|
if err = n.garageLayoutMgr.SetActiveAllocations(
|
|
ctx, n.networkConfig.Storage.Allocations,
|
|
); err != nil {
|
|
return nil, fmt.Errorf("initializing GLM active allocations: %w", err)
|
|
}
|
|
|
|
if err := n.initialize(ctx, true); err != nil {
|
|
return nil, fmt.Errorf("initializing with bootstrap: %w", err)
|
|
}
|
|
|
|
return n, nil
|
|
}
|
|
|
|
// updateBootstrapUnsafe updates both the locally saved bootstrap as well as
|
|
// this host's bootstrap host info in garage, first applying the network config
|
|
// to the bootstrap host info.
|
|
//
|
|
// Must be called with the lock held.
|
|
func (n *network) updateBootstrapUnsafe(ctx context.Context) error {
|
|
var err error
|
|
if n.currBootstrap, err = applyNetworkConfigToBootstrap(
|
|
n.networkConfig, n.currBootstrap,
|
|
); err != nil {
|
|
return fmt.Errorf("combining configuration into bootstrap: %w", err)
|
|
}
|
|
|
|
n.logger.Info(ctx, "Writing updated bootstrap to state dir")
|
|
if err = writeBootstrapToStateDir(
|
|
n.stateDir.Path, n.currBootstrap,
|
|
); err != nil {
|
|
return fmt.Errorf("writing bootstrap to state dir: %w", err)
|
|
}
|
|
|
|
n.logger.Info(ctx, "Updating host info in garage")
|
|
if err := putGarageBoostrapHost(
|
|
ctx, n.secretsStore, n.currBootstrap,
|
|
); err != nil {
|
|
return fmt.Errorf("updating host info in garage: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// see comment on garageWaitForAlloc
|
|
func (n *network) garageWaitForAllocs(
|
|
ctx context.Context, allocs []daecommon.ConfigStorageAllocation,
|
|
) error {
|
|
var errs []error
|
|
for _, alloc := range allocs {
|
|
garageAdminClient, ok := n.children.GarageAdminClientForAlloc(alloc)
|
|
if !ok {
|
|
return fmt.Errorf("no garage instance created for %+v", alloc)
|
|
}
|
|
defer garageAdminClient.Close()
|
|
|
|
ctx := mctx.WithAnnotator(ctx, alloc)
|
|
|
|
if err := garageWaitForAlloc(
|
|
ctx, n.logger, garageAdminClient,
|
|
); err != nil {
|
|
errs = append(errs, fmt.Errorf(
|
|
"waiting for alloc %+v to initialize: %w", alloc, err,
|
|
))
|
|
}
|
|
}
|
|
|
|
return errors.Join(errs...)
|
|
}
|
|
|
|
// must hold lock to call this
|
|
func (n *network) glmStateTransitionUnsafe(ctx context.Context) error {
|
|
var knownNodes []garage.KnownNode
|
|
|
|
if adminClient, ok := n.children.GarageAdminClient(); ok {
|
|
defer adminClient.Close()
|
|
|
|
n.logger.Info(ctx, "Getting garage cluster status")
|
|
status, err := adminClient.Status(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("getting garage cluster state: %w", err)
|
|
}
|
|
|
|
knownNodes = status.Nodes
|
|
}
|
|
|
|
n.logger.Info(ctx, "Calculating garage layout state transition")
|
|
stateTx, err := n.garageLayoutMgr.CalculateStateTransition(
|
|
ctx, knownNodes, n.networkConfig.Storage.Allocations,
|
|
)
|
|
if err != nil {
|
|
return fmt.Errorf("getting next state tx: %w", err)
|
|
}
|
|
|
|
childrenNetworkConfig := n.networkConfig
|
|
childrenNetworkConfig.Storage.Allocations = stateTx.ActiveAllocations()
|
|
|
|
n.logger.Info(ctx, "Reloading children with updated storage allocations")
|
|
err = n.children.Reload(ctx, childrenNetworkConfig, n.currBootstrap)
|
|
if err != nil {
|
|
return fmt.Errorf("reloading children: %w", err)
|
|
}
|
|
|
|
if adminClient, ok := n.children.GarageAdminClient(); ok {
|
|
defer adminClient.Close()
|
|
|
|
var (
|
|
host = n.currBootstrap.ThisHost()
|
|
// From garage's perspective a node is "removed" from the cluster by
|
|
// removing its role, which puts it into the draining state.
|
|
removeIDs = stateTx.DrainAllocationIDs()
|
|
)
|
|
|
|
addModifyRoles, err := garageAllocsToRoles(
|
|
host, stateTx.AddModifyAllocations,
|
|
)
|
|
if err != nil {
|
|
return fmt.Errorf("converting allocs to roles: %w", err)
|
|
}
|
|
|
|
n.logger.Info(ctx, "Applying state transition to garage layout")
|
|
if err := adminClient.ApplyLayout(
|
|
ctx, addModifyRoles, removeIDs,
|
|
); err != nil {
|
|
return fmt.Errorf("applying state tx to layout: %w", err)
|
|
}
|
|
} else {
|
|
n.logger.Info(ctx, "No garage instances running, no layout changes to make")
|
|
}
|
|
|
|
if err := n.garageWaitForAllocs(
|
|
ctx, n.networkConfig.Storage.Allocations,
|
|
); err != nil {
|
|
return fmt.Errorf(
|
|
"waiting for garage allocations to fully initialize: %w", err,
|
|
)
|
|
}
|
|
|
|
n.logger.Info(ctx, "Committing state transition")
|
|
if err = n.garageLayoutMgr.CommitStateTransition(
|
|
ctx, stateTx,
|
|
); err != nil {
|
|
return fmt.Errorf("commiting state tx: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (n *network) glmStateTransition(ctx context.Context) error {
|
|
n.l.Lock()
|
|
defer n.l.Unlock()
|
|
return n.glmStateTransitionUnsafe(ctx)
|
|
}
|
|
|
|
func (n *network) reloadHosts(ctx context.Context) error {
|
|
n.l.RLock()
|
|
currBootstrap := n.currBootstrap
|
|
n.l.RUnlock()
|
|
|
|
n.logger.Info(ctx, "Checking for bootstrap changes")
|
|
newHosts, err := getGarageBootstrapHosts(
|
|
ctx, n.logger, n.secretsStore, currBootstrap,
|
|
)
|
|
if err != nil {
|
|
return fmt.Errorf("getting hosts from garage: %w", err)
|
|
}
|
|
|
|
n.l.Lock()
|
|
defer n.l.Unlock()
|
|
|
|
n.currBootstrap.Hosts = newHosts
|
|
|
|
n.logger.Info(ctx, "Writing updated bootstrap to state dir")
|
|
err = writeBootstrapToStateDir(n.stateDir.Path, n.currBootstrap)
|
|
if err != nil {
|
|
return fmt.Errorf("writing bootstrap to state dir: %w", err)
|
|
}
|
|
|
|
childrenNetworkConfig, err := n.getChildrenNetworkConfig(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("getting network config for children: %w", err)
|
|
}
|
|
|
|
n.logger.Info(ctx, "Reloading child processes")
|
|
err = n.children.Reload(ctx, childrenNetworkConfig, n.currBootstrap)
|
|
if err != nil {
|
|
return fmt.Errorf(
|
|
"reloading child processes: %w", err,
|
|
)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (n *network) periodically(
|
|
label string,
|
|
fn func(context.Context) error,
|
|
period time.Duration,
|
|
) {
|
|
n.wg.Add(1)
|
|
go func() {
|
|
defer n.wg.Done()
|
|
|
|
ctx := mctx.Annotate(n.workerCtx, "workerLabel", label)
|
|
|
|
ticker := time.NewTicker(period)
|
|
defer ticker.Stop()
|
|
|
|
n.logger.Info(ctx, "Starting background job worker")
|
|
defer n.logger.Info(ctx, "Stopping background job worker")
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
|
|
case <-ticker.C:
|
|
n.logger.Info(ctx, "Background job worker")
|
|
if err := fn(ctx); err != nil {
|
|
n.logger.Error(ctx, "Background job failed", err)
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
func (n *network) getChildrenNetworkConfig(
|
|
ctx context.Context,
|
|
) (
|
|
daecommon.NetworkConfig, error,
|
|
) {
|
|
childrenNetworkConfig := n.networkConfig
|
|
|
|
activeStorageAllocs, err := n.garageLayoutMgr.GetActiveAllocations(ctx)
|
|
if err != nil {
|
|
return daecommon.NetworkConfig{}, fmt.Errorf(
|
|
"getting active storage allocations: %w", err,
|
|
)
|
|
}
|
|
childrenNetworkConfig.Storage.Allocations = activeStorageAllocs
|
|
|
|
return childrenNetworkConfig, nil
|
|
}
|
|
|
|
func (n *network) initializePostChildren(
|
|
ctx context.Context, isCreate bool,
|
|
) error {
|
|
if !isCreate {
|
|
n.logger.Info(ctx, "Making any necessary changes to garage layout")
|
|
if err := n.glmStateTransitionUnsafe(ctx); err != nil {
|
|
return fmt.Errorf("performing garage layout transition: %w", err)
|
|
}
|
|
} else {
|
|
roles, err := garageAllocsToRoles(
|
|
n.currBootstrap.ThisHost(), n.networkConfig.Storage.Allocations,
|
|
)
|
|
if err != nil {
|
|
return fmt.Errorf("converting allocs to roles: %w", err)
|
|
}
|
|
|
|
garageAdminClient, _ := n.children.GarageAdminClient()
|
|
defer garageAdminClient.Close()
|
|
|
|
n.logger.Info(ctx, "Applying initial garage layout")
|
|
if err := garageAdminClient.ApplyLayout(ctx, roles, nil); err != nil {
|
|
return fmt.Errorf("applying initial garage layout: %w", err)
|
|
}
|
|
|
|
n.logger.Info(ctx, "Initializing garage shared global bucket")
|
|
garageGlobalBucketCreds, err := garageInitializeGlobalBucket(
|
|
ctx, garageAdminClient,
|
|
)
|
|
if err != nil {
|
|
return fmt.Errorf("initializing global bucket: %w", err)
|
|
}
|
|
|
|
if err = daecommon.SetGarageS3APIGlobalBucketCredentials(
|
|
ctx, n.secretsStore, garageGlobalBucketCreds,
|
|
); err != nil {
|
|
return fmt.Errorf("storing global bucket creds: %w", err)
|
|
}
|
|
|
|
n.logger.Info(ctx, "Waiting for garage instances to finish initializing")
|
|
if err := n.garageWaitForAllocs(
|
|
ctx, n.networkConfig.Storage.Allocations,
|
|
); err != nil {
|
|
return fmt.Errorf(
|
|
"waiting for garage allocations to fully initialize: %w", err,
|
|
)
|
|
}
|
|
}
|
|
|
|
if err := n.updateBootstrapUnsafe(ctx); err != nil {
|
|
return fmt.Errorf("updating bootstrap: %w", err)
|
|
}
|
|
|
|
// Do this now so that everything is stable before returning. This also
|
|
// serves a dual-purpose, as it makes sure that the above bootstrap update
|
|
// has propagated from the local garage instance, if any.
|
|
n.logger.Info(ctx, "Reloading hosts from network storage")
|
|
if err := n.reloadHosts(ctx); err != nil {
|
|
return fmt.Errorf("Reloading network bootstrap: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (n *network) initialize(ctx context.Context, isCreate bool) error {
|
|
childrenNetworkConfig, err := n.getChildrenNetworkConfig(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("getting network config for children: %w", err)
|
|
}
|
|
|
|
var garageBootstrapPeers []garage.RemoteNode
|
|
if isCreate {
|
|
garageBootstrapPeers, err = garageInitAllocs(
|
|
n.currBootstrap.ThisHost().IP(),
|
|
n.networkConfig.Storage.Allocations,
|
|
)
|
|
if err != nil {
|
|
return fmt.Errorf("initializing storage allocations: %w", err)
|
|
}
|
|
}
|
|
|
|
n.logger.Info(ctx, "Creating child processes")
|
|
n.children, err = children.New(
|
|
ctx,
|
|
n.logger.WithNamespace("children"),
|
|
n.envBinDirPath,
|
|
n.secretsStore,
|
|
childrenNetworkConfig,
|
|
n.runtimeDir,
|
|
n.opts.GarageAdminToken,
|
|
n.nebulaDeviceNamer,
|
|
n.currBootstrap,
|
|
&children.Opts{
|
|
GarageNewCluster: isCreate,
|
|
GarageBootstrapPeers: garageBootstrapPeers,
|
|
GarageDefaultDBEngine: n.opts.garageDefaultDBEngine,
|
|
},
|
|
)
|
|
if err != nil {
|
|
return fmt.Errorf("creating child processes: %w", err)
|
|
}
|
|
|
|
if err := n.initializePostChildren(ctx, isCreate); err != nil {
|
|
n.logger.Error(ctx, "Failed to initialize Network, shutting down children", err)
|
|
n.children.Shutdown()
|
|
return err
|
|
}
|
|
|
|
n.periodically("reloadHosts", n.reloadHosts, 3*time.Minute)
|
|
n.periodically("glmStateTransition", n.glmStateTransition, 10*time.Minute)
|
|
|
|
return nil
|
|
}
|
|
|
|
func withCurrBootstrap[Res any](
|
|
n *network, fn func(bootstrap.Bootstrap) (Res, error),
|
|
) (Res, error) {
|
|
n.l.RLock()
|
|
defer n.l.RUnlock()
|
|
|
|
currBootstrap := n.currBootstrap
|
|
return fn(currBootstrap)
|
|
}
|
|
|
|
func (n *network) getBootstrap() (
|
|
bootstrap.Bootstrap, error,
|
|
) {
|
|
return withCurrBootstrap(n, func(
|
|
currBootstrap bootstrap.Bootstrap,
|
|
) (
|
|
bootstrap.Bootstrap, error,
|
|
) {
|
|
return currBootstrap, nil
|
|
})
|
|
}
|
|
|
|
func (n *network) GetBootstrap(
|
|
ctx context.Context,
|
|
) (
|
|
bootstrap.Bootstrap, error,
|
|
) {
|
|
return withCurrBootstrap(n, func(
|
|
currBootstrap bootstrap.Bootstrap,
|
|
) (
|
|
bootstrap.Bootstrap, error,
|
|
) {
|
|
currBootstrap.PrivateCredentials = nebula.HostPrivateCredentials{}
|
|
return currBootstrap, nil
|
|
})
|
|
}
|
|
|
|
func (n *network) GetGarageClientParams(
|
|
ctx context.Context,
|
|
) (
|
|
GarageClientParams, error,
|
|
) {
|
|
return withCurrBootstrap(n, func(
|
|
currBootstrap bootstrap.Bootstrap,
|
|
) (
|
|
GarageClientParams, error,
|
|
) {
|
|
return getGarageClientParams(ctx, n.secretsStore, currBootstrap)
|
|
})
|
|
}
|
|
|
|
func (n *network) RemoveHost(ctx context.Context, hostName nebula.HostName) error {
|
|
_, err := withCurrBootstrap(n, func(
|
|
currBootstrap bootstrap.Bootstrap,
|
|
) (
|
|
struct{}, error,
|
|
) {
|
|
garageClientParams, err := getGarageClientParams(
|
|
ctx, n.secretsStore, currBootstrap,
|
|
)
|
|
if err != nil {
|
|
return struct{}{}, fmt.Errorf("get garage client params: %w", err)
|
|
}
|
|
|
|
client := garageClientParams.GlobalBucketS3APIClient()
|
|
defer client.Close()
|
|
|
|
return struct{}{}, removeGarageBootstrapHost(ctx, client, hostName)
|
|
})
|
|
return err
|
|
}
|
|
|
|
func makeCACreds(
|
|
currBootstrap bootstrap.Bootstrap,
|
|
caSigningPrivateKey nebula.SigningPrivateKey,
|
|
) nebula.CACredentials {
|
|
return nebula.CACredentials{
|
|
Public: currBootstrap.CAPublicCredentials,
|
|
SigningPrivateKey: caSigningPrivateKey,
|
|
}
|
|
}
|
|
|
|
func chooseAvailableIP(b bootstrap.Bootstrap) (netip.Addr, error) {
|
|
var (
|
|
cidrIPNet = b.CAPublicCredentials.Cert.Unwrap().Details.Subnets[0]
|
|
cidrMask = cidrIPNet.Mask
|
|
cidrIPB = cidrIPNet.IP
|
|
|
|
cidr = netip.MustParsePrefix(cidrIPNet.String())
|
|
cidrIP = cidr.Addr()
|
|
cidrSuffixBits = cidrIP.BitLen() - cidr.Bits()
|
|
|
|
inUseIPs = make(map[netip.Addr]struct{}, len(b.Hosts))
|
|
)
|
|
|
|
for _, host := range b.Hosts {
|
|
inUseIPs[host.IP()] = struct{}{}
|
|
}
|
|
|
|
// first check that there are any addresses at all. We can determine the
|
|
// number of possible addresses using the network CIDR. The first IP in a
|
|
// subnet is the network identifier, and is reserved. The last IP is the
|
|
// broadcast IP, and is also reserved. Hence, the -2.
|
|
usableIPs := (1 << cidrSuffixBits) - 2
|
|
if len(inUseIPs) >= usableIPs {
|
|
return netip.Addr{}, errors.New("no available IPs")
|
|
}
|
|
|
|
// We need to know the subnet broadcast address, so we don't accidentally
|
|
// produce it.
|
|
cidrBCastIPB := bytes.Clone(cidrIPB)
|
|
for i := range cidrBCastIPB {
|
|
cidrBCastIPB[i] |= ^cidrMask[i]
|
|
}
|
|
cidrBCastIP, ok := netip.AddrFromSlice(cidrBCastIPB)
|
|
if !ok {
|
|
panic(fmt.Sprintf("invalid broadcast ip calculated: %x", cidrBCastIP))
|
|
}
|
|
|
|
// Try a handful of times to pick an IP at random. This is preferred, as it
|
|
// leaves less room for two different CreateHost calls to choose the same
|
|
// IP.
|
|
for range 20 {
|
|
b := make([]byte, len(cidrIPB))
|
|
if _, err := rand.Read(b); err != nil {
|
|
return netip.Addr{}, fmt.Errorf("reading random bytes: %w", err)
|
|
}
|
|
|
|
for i := range b {
|
|
b[i] = cidrIPB[i] | (b[i] & ^cidrMask[i])
|
|
}
|
|
|
|
ip, ok := netip.AddrFromSlice(b)
|
|
if !ok {
|
|
panic(fmt.Sprintf("generated invalid IP: %x", b))
|
|
} else if !cidr.Contains(ip) {
|
|
panic(fmt.Sprintf(
|
|
"generated IP %v which is not in cidr %v", ip, cidr,
|
|
))
|
|
}
|
|
|
|
if ip == cidrIP || ip == cidrBCastIP {
|
|
continue
|
|
}
|
|
|
|
if _, inUse := inUseIPs[ip]; !inUse {
|
|
return ip, nil
|
|
}
|
|
}
|
|
|
|
// If randomly picking fails then just go through IPs one by one until the
|
|
// free one is found.
|
|
for ip := cidrIP.Next(); ip != cidrBCastIP; ip = ip.Next() {
|
|
if _, inUse := inUseIPs[ip]; !inUse {
|
|
return ip, nil
|
|
}
|
|
}
|
|
|
|
panic("All ips are in-use, but somehow that wasn't determined earlier")
|
|
}
|
|
|
|
func (n *network) CreateHost(
|
|
ctx context.Context,
|
|
hostName nebula.HostName,
|
|
opts CreateHostOpts,
|
|
) (
|
|
JoiningBootstrap, error,
|
|
) {
|
|
n.l.RLock()
|
|
currBootstrap := n.currBootstrap
|
|
n.l.RUnlock()
|
|
|
|
ip := opts.IP
|
|
if ip == (netip.Addr{}) {
|
|
var err error
|
|
if ip, err = chooseAvailableIP(currBootstrap); err != nil {
|
|
return JoiningBootstrap{}, fmt.Errorf(
|
|
"choosing available IP: %w", err,
|
|
)
|
|
}
|
|
} else {
|
|
for _, host := range currBootstrap.Hosts {
|
|
if host.IP() == ip {
|
|
return JoiningBootstrap{}, ErrIPInUse
|
|
}
|
|
}
|
|
}
|
|
|
|
caSigningPrivateKey, err := daecommon.GetNebulaCASigningPrivateKey(
|
|
ctx, n.secretsStore,
|
|
)
|
|
if err != nil {
|
|
if errors.Is(err, secrets.ErrNotFound) {
|
|
err = ErrSecretNotFound
|
|
}
|
|
return JoiningBootstrap{}, fmt.Errorf("getting CA signing key: %w", err)
|
|
}
|
|
|
|
var joiningBootstrap JoiningBootstrap
|
|
joiningBootstrap.Bootstrap, err = bootstrap.New(
|
|
makeCACreds(currBootstrap, caSigningPrivateKey),
|
|
currBootstrap.NetworkCreationParams,
|
|
currBootstrap.Hosts,
|
|
hostName,
|
|
ip,
|
|
)
|
|
if err != nil {
|
|
return JoiningBootstrap{}, fmt.Errorf(
|
|
"initializing bootstrap data: %w", err,
|
|
)
|
|
}
|
|
|
|
secretsIDs := []secrets.ID{
|
|
daecommon.GarageRPCSecretSecretID,
|
|
daecommon.GarageS3APIGlobalBucketCredentialsSecretID,
|
|
}
|
|
|
|
if opts.CanCreateHosts {
|
|
secretsIDs = append(
|
|
secretsIDs, daecommon.NebulaCASigningPrivateKeySecretID,
|
|
)
|
|
}
|
|
|
|
if joiningBootstrap.Secrets, err = secrets.Export(
|
|
ctx, n.secretsStore, secretsIDs,
|
|
); err != nil {
|
|
return JoiningBootstrap{}, fmt.Errorf("exporting secrets: %w", err)
|
|
}
|
|
|
|
n.logger.Info(ctx, "Putting new host in garage")
|
|
err = putGarageBoostrapHost(ctx, n.secretsStore, joiningBootstrap.Bootstrap)
|
|
if err != nil {
|
|
return JoiningBootstrap{}, fmt.Errorf("putting new host in garage: %w", err)
|
|
}
|
|
|
|
n.logger.Info(ctx, "Reloading local state with new host")
|
|
if err = n.reloadHosts(ctx); err != nil {
|
|
return JoiningBootstrap{}, fmt.Errorf("reloading child processes: %w", err)
|
|
}
|
|
|
|
return joiningBootstrap, nil
|
|
}
|
|
|
|
func (n *network) CreateNebulaCertificate(
|
|
ctx context.Context,
|
|
hostName nebula.HostName,
|
|
hostPubKey nebula.EncryptingPublicKey,
|
|
) (
|
|
nebula.Certificate, error,
|
|
) {
|
|
return withCurrBootstrap(n, func(
|
|
currBootstrap bootstrap.Bootstrap,
|
|
) (
|
|
nebula.Certificate, error,
|
|
) {
|
|
host, ok := currBootstrap.Hosts[hostName]
|
|
if !ok {
|
|
return nebula.Certificate{}, ErrHostNotFound
|
|
}
|
|
ip := host.IP()
|
|
|
|
caSigningPrivateKey, err := daecommon.GetNebulaCASigningPrivateKey(
|
|
ctx, n.secretsStore,
|
|
)
|
|
if err != nil {
|
|
if errors.Is(err, secrets.ErrNotFound) {
|
|
err = ErrSecretNotFound
|
|
}
|
|
return nebula.Certificate{}, fmt.Errorf("getting CA signing key: %w", err)
|
|
}
|
|
|
|
caCreds := makeCACreds(currBootstrap, caSigningPrivateKey)
|
|
|
|
return nebula.NewHostCert(caCreds, hostPubKey, hostName, ip)
|
|
})
|
|
}
|
|
|
|
func (n *network) GetConfig(context.Context) (daecommon.NetworkConfig, error) {
|
|
n.l.RLock()
|
|
defer n.l.RUnlock()
|
|
return n.networkConfig, nil
|
|
}
|
|
|
|
func (n *network) SetConfig(
|
|
ctx context.Context, config daecommon.NetworkConfig,
|
|
) error {
|
|
n.l.Lock()
|
|
defer n.l.Unlock()
|
|
|
|
if err := n.garageLayoutMgr.Validate(
|
|
ctx, config.Storage.Allocations,
|
|
); err != nil {
|
|
return ErrInvalidConfig.WithData(err.Error())
|
|
}
|
|
|
|
if _, err := loadStoreConfig(n.stateDir, &config); err != nil {
|
|
return fmt.Errorf("storing new config: %w", err)
|
|
}
|
|
|
|
n.networkConfig = config
|
|
|
|
n.logger.Info(ctx, "Making any necessary changes to garage layout")
|
|
if err := n.glmStateTransitionUnsafe(ctx); err != nil {
|
|
return fmt.Errorf("performing garage layout transition: %w", err)
|
|
}
|
|
|
|
if err := n.updateBootstrapUnsafe(ctx); err != nil {
|
|
return fmt.Errorf("updating bootstrap: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (n *network) GetNetworkCreationParams(
|
|
ctx context.Context,
|
|
) (
|
|
bootstrap.CreationParams, error,
|
|
) {
|
|
return withCurrBootstrap(n, func(
|
|
currBootstrap bootstrap.Bootstrap,
|
|
) (
|
|
bootstrap.CreationParams, error,
|
|
) {
|
|
return currBootstrap.NetworkCreationParams, nil
|
|
})
|
|
}
|
|
|
|
func (n *network) Shutdown() error {
|
|
n.workerCancel()
|
|
n.wg.Wait()
|
|
|
|
if n.children != nil {
|
|
n.logger.Info(context.Background(), "Shutting down children")
|
|
n.children.Shutdown()
|
|
}
|
|
|
|
return nil
|
|
}
|