isle/go/daemon/network/network.go

1076 lines
28 KiB
Go
Raw Normal View History

2024-11-09 22:17:10 +00:00
//go:generate mockery --name Network --inpackage --filename network_mock.go
// Package network implements the Network type, which manages the daemon's
// membership in a single network.
package network
import (
"bytes"
"cmp"
"context"
"crypto/rand"
"encoding/json"
"errors"
"fmt"
"isle/bootstrap"
"isle/daemon/children"
"isle/daemon/daecommon"
"isle/garage"
"isle/jsonutil"
"isle/nebula"
"isle/secrets"
"isle/toolkit"
"net/netip"
"slices"
"sync"
"time"
"dev.mediocregopher.com/mediocre-go-lib.git/mctx"
"dev.mediocregopher.com/mediocre-go-lib.git/mlog"
"golang.org/x/exp/maps"
)
// GarageClientParams contains all the data needed to instantiate garage
// clients.
type GarageClientParams struct {
Node garage.RemoteNode
GlobalBucketS3APICredentials garage.S3APICredentials
// RPCSecret may be empty, if the secret is not available on the host.
//
// TODO this shouldn't really be here I don't think, remove it?
RPCSecret string
}
// GlobalBucketS3APIClient returns an S3 client pre-configured with access to
// the global bucket.
func (p GarageClientParams) GlobalBucketS3APIClient() *garage.S3APIClient {
var (
addr = p.Node.S3APIAddr()
creds = p.GlobalBucketS3APICredentials
)
return garage.NewS3APIClient(addr, creds)
}
// CreateHostOpts are optional parameters to the CreateHost method.
type CreateHostOpts struct {
// IP address of the new host. An IP address will be randomly chosen if one
// is not given here.
IP netip.Addr
// CanCreateHosts indicates that the bootstrap produced by CreateHost should
// give the new host the ability to create new hosts as well.
CanCreateHosts bool
// TODO add nebula cert tags
}
// JoiningBootstrap wraps a normal Bootstrap to include extra data which a host
// might need while joining a network.
type JoiningBootstrap struct {
Bootstrap bootstrap.Bootstrap
Secrets map[secrets.ID]json.RawMessage
}
// RPC defines the methods related to a single network which are available over
// the daemon's RPC interface.
type RPC interface {
// GetHosts returns all hosts known to the network, sorted by their name.
GetHosts(context.Context) ([]bootstrap.Host, error)
// GetGarageClientParams returns a GarageClientParams for the current
// network state.
GetGarageClientParams(context.Context) (GarageClientParams, error)
// GetNebulaCAPublicCredentials returns the CAPublicCredentials for the
// network.
GetNebulaCAPublicCredentials(
context.Context,
) (
nebula.CAPublicCredentials, error,
)
// RemoveHost removes the host of the given name from the network.
RemoveHost(ctx context.Context, hostName nebula.HostName) error
// CreateHost creates a bootstrap for a new host with the given name and IP
// address.
CreateHost(
context.Context, nebula.HostName, CreateHostOpts,
) (
JoiningBootstrap, error,
)
// CreateNebulaCertificate creates and signs a new nebula certficate for an
// existing host, given the public key for that host. This is currently
// mostly useful for creating certs for mobile devices.
//
// TODO replace this with CreateHostBootstrap, and the
// CreateNebulaCertificate RPC method can just pull cert out of that.
//
// Errors:
// - ErrHostNotFound
CreateNebulaCertificate(
context.Context, nebula.HostName, nebula.EncryptingPublicKey,
) (
nebula.Certificate, error,
)
2024-10-23 18:18:11 +00:00
// GetConfig returns the configuration currently in use.
GetConfig(context.Context) (daecommon.NetworkConfig, error)
// SetConfig overrides the current config with the given one, adjusting any
// running child processes as needed.
SetConfig(context.Context, daecommon.NetworkConfig) error
}
// Network manages membership in a single micropelago network. Each Network
// is comprised of a unique IP subnet, hosts connected together on that subnet
// via a VPN, an S3 storage layer only accessible to those hosts, plus other
// services built on this foundation.
//
// A single daemon (isle server) can manage multiple networks. Each network is
// expected to be independent of the others, ie they should not share any
// resources.
type Network interface {
RPC
// GetNetworkCreationParams returns the CreationParams that the Network was
// originally created with.
GetNetworkCreationParams(context.Context) (bootstrap.CreationParams, error)
// Shutdown blocks until all resources held or created by the Network,
// including child processes it has started, have been cleaned up.
//
// If this returns an error then it's possible that child processes are
// still running and are no longer managed.
Shutdown() error
}
////////////////////////////////////////////////////////////////////////////////
// Network implementation
// Opts are optional parameters which can be passed in when initializing a new
// Network instance. A nil Opts is equivalent to a zero value.
type Opts struct {
ChildrenOpts *children.Opts
GarageAdminToken string // Will be randomly generated if left unset.
}
func (o *Opts) withDefaults() *Opts {
if o == nil {
o = new(Opts)
}
if o.GarageAdminToken == "" {
o.GarageAdminToken = toolkit.RandStr(32)
}
return o
}
type network struct {
2024-10-24 20:14:13 +00:00
logger *mlog.Logger
envBinDirPath string
stateDir toolkit.Dir
runtimeDir toolkit.Dir
opts *Opts
secretsStore secrets.Store
l sync.RWMutex
children *children.Children
2024-10-24 20:14:13 +00:00
networkConfig daecommon.NetworkConfig
currBootstrap bootstrap.Bootstrap
workerCtx context.Context
workerCancel context.CancelFunc
wg sync.WaitGroup
}
// instatiateNetwork returns an instantiated *network instance which has not yet
// been initialized.
func instatiateNetwork(
ctx context.Context,
logger *mlog.Logger,
networkConfig daecommon.NetworkConfig,
envBinDirPath string,
stateDir toolkit.Dir,
runtimeDir toolkit.Dir,
opts *Opts,
) *network {
ctx = context.WithoutCancel(ctx)
ctx, cancel := context.WithCancel(ctx)
return &network{
logger: logger,
networkConfig: networkConfig,
envBinDirPath: envBinDirPath,
stateDir: stateDir,
runtimeDir: runtimeDir,
opts: opts.withDefaults(),
workerCtx: ctx,
workerCancel: cancel,
}
}
// LoadCreationParams returns the CreationParams of a Network which was
// Created/Joined with the given state directory.
func LoadCreationParams(
stateDir toolkit.Dir,
) (
bootstrap.CreationParams, error,
) {
var (
// TODO store/load the creation params separately from the rest of
// the bootstrap, since the bootstrap contains potentially the
// entire host list of a network, which could be pretty bulky.
bootstrapFilePath = bootstrap.StateDirPath(stateDir.Path)
bs bootstrap.Bootstrap
)
if err := jsonutil.LoadFile(&bs, bootstrapFilePath); err != nil {
return bootstrap.CreationParams{}, fmt.Errorf(
"loading bootstrap from %q: %w", bootstrapFilePath, err,
)
}
return bs.NetworkCreationParams, nil
}
// Load initializes and returns a Network instance for a network which was
// previously joined or created, and which has the given ID.
func Load(
ctx context.Context,
logger *mlog.Logger,
networkConfig daecommon.NetworkConfig,
envBinDirPath string,
stateDir toolkit.Dir,
runtimeDir toolkit.Dir,
opts *Opts,
) (
Network, error,
) {
n := instatiateNetwork(
ctx,
logger,
networkConfig,
envBinDirPath,
stateDir,
runtimeDir,
opts,
)
if err := n.initializeDirs(true); err != nil {
return nil, fmt.Errorf("initializing directories: %w", err)
}
var (
currBootstrap bootstrap.Bootstrap
bootstrapFilePath = bootstrap.StateDirPath(n.stateDir.Path)
)
if err := jsonutil.LoadFile(&currBootstrap, bootstrapFilePath); err != nil {
return nil, fmt.Errorf(
"loading bootstrap from %q: %w", bootstrapFilePath, err,
)
} else if err := n.initialize(ctx, currBootstrap, false); err != nil {
return nil, fmt.Errorf("initializing with bootstrap: %w", err)
}
return n, nil
}
// Join initializes and returns a Network instance for an existing network which
// was not previously joined to on this host. Once Join has been called for a
// particular network it will error on subsequent calls for that same network,
// Load should be used instead.
func Join(
ctx context.Context,
logger *mlog.Logger,
networkConfig daecommon.NetworkConfig,
joiningBootstrap JoiningBootstrap,
envBinDirPath string,
stateDir toolkit.Dir,
runtimeDir toolkit.Dir,
opts *Opts,
) (
Network, error,
) {
n := instatiateNetwork(
ctx,
logger,
networkConfig,
envBinDirPath,
stateDir,
runtimeDir,
opts,
)
if err := n.initializeDirs(false); err != nil {
return nil, fmt.Errorf("initializing directories: %w", err)
}
if err := secrets.Import(
ctx, n.secretsStore, joiningBootstrap.Secrets,
); err != nil {
return nil, fmt.Errorf("importing secrets: %w", err)
}
if err := n.initialize(ctx, joiningBootstrap.Bootstrap, false); err != nil {
return nil, fmt.Errorf("initializing with bootstrap: %w", err)
}
return n, nil
}
// Create initializes and returns a Network for a brand new network which uses
// the given creation parameters.
//
// - name: Human-readable name of the network.
// - domain: Primary domain name that network services are served under.
// - ipNet: An IP subnet, in CIDR form, which will be the overall range of
// possible IPs in the network. The first IP in this network range will
// become this first host's IP.
// - hostName: The name of this first host in the network.
//
// Errors:
// - ErrInvalidConfig - if daemonConfig doesn't have 3 storage allocations
// configured.
func Create(
ctx context.Context,
logger *mlog.Logger,
networkConfig daecommon.NetworkConfig,
envBinDirPath string,
stateDir toolkit.Dir,
runtimeDir toolkit.Dir,
creationParams bootstrap.CreationParams,
ipNet nebula.IPNet,
hostName nebula.HostName,
opts *Opts,
) (
Network, error,
) {
if len(networkConfig.Storage.Allocations) < 3 {
return nil, ErrInvalidConfig.WithData(
"At least three storage allocations are required.",
)
}
nebulaCACreds, err := nebula.NewCACredentials(creationParams.Domain, ipNet)
if err != nil {
return nil, fmt.Errorf("creating nebula CA cert: %w", err)
}
garageRPCSecret := toolkit.RandStr(32)
n := instatiateNetwork(
ctx,
logger,
networkConfig,
envBinDirPath,
stateDir,
runtimeDir,
opts,
)
if err := n.initializeDirs(false); err != nil {
return nil, fmt.Errorf("initializing directories: %w", err)
}
err = daecommon.SetGarageRPCSecret(ctx, n.secretsStore, garageRPCSecret)
if err != nil {
return nil, fmt.Errorf("setting garage RPC secret: %w", err)
}
err = daecommon.SetNebulaCASigningPrivateKey(
ctx, n.secretsStore, nebulaCACreds.SigningPrivateKey,
)
if err != nil {
return nil, fmt.Errorf("setting nebula CA signing key secret: %w", err)
}
hostBootstrap, err := bootstrap.New(
nebulaCACreds,
creationParams,
map[nebula.HostName]bootstrap.Host{},
hostName,
ipNet.FirstAddr(),
)
if err != nil {
return nil, fmt.Errorf("initializing bootstrap data: %w", err)
}
if err := n.initialize(ctx, hostBootstrap, true); err != nil {
return nil, fmt.Errorf("initializing with bootstrap: %w", err)
}
return n, nil
}
func (n *network) initializeDirs(mayExist bool) error {
secretsDir, err := n.stateDir.MkChildDir("secrets", mayExist)
if err != nil {
return fmt.Errorf("creating secrets dir: %w", err)
}
n.secretsStore = secrets.NewFSStore(secretsDir.Path)
return nil
}
func (n *network) periodically(
label string,
fn func(context.Context) error,
period time.Duration,
) {
n.wg.Add(1)
go func() {
defer n.wg.Done()
ctx := mctx.Annotate(n.workerCtx, "workerLabel", label)
ticker := time.NewTicker(period)
defer ticker.Stop()
n.logger.Info(ctx, "Starting background job runner")
defer n.logger.Info(ctx, "Stopping background job runner")
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
n.logger.Info(ctx, "Background job running")
if err := fn(ctx); err != nil {
n.logger.Error(ctx, "Background job failed", err)
}
}
}
}()
}
func (n *network) initialize(
ctx context.Context,
prevBootstrap bootstrap.Bootstrap,
isCreate bool,
) error {
prevThisHost := prevBootstrap.ThisHost()
// we update this Host's data using whatever configuration has been provided
// by the daemon config. This way the network has the most up-to-date
// possible bootstrap. This updated bootstrap will later get updated in
// garage as a background task, so other hosts will see it as well.
currBootstrap, err := coalesceNetworkConfigAndBootstrap(
n.networkConfig, prevBootstrap,
)
if err != nil {
return fmt.Errorf("combining configuration into bootstrap: %w", err)
}
n.logger.Info(ctx, "Writing updated bootstrap to state dir")
err = writeBootstrapToStateDir(n.stateDir.Path, currBootstrap)
if err != nil {
return fmt.Errorf("writing bootstrap to state dir: %w", err)
}
n.currBootstrap = currBootstrap
n.logger.Info(ctx, "Creating child processes")
n.children, err = children.New(
ctx,
n.logger.WithNamespace("children"),
n.envBinDirPath,
n.secretsStore,
n.networkConfig,
n.runtimeDir,
n.opts.GarageAdminToken,
n.currBootstrap,
n.opts.ChildrenOpts,
)
if err != nil {
return fmt.Errorf("creating child processes: %w", err)
}
n.logger.Info(ctx, "Child processes created")
createGarageGlobalBucket := isCreate
err = n.postChildrenInit(ctx, prevThisHost, createGarageGlobalBucket)
if err != nil {
n.logger.Error(ctx, "Post-initialization failed, stopping child processes", err)
n.children.Shutdown()
return fmt.Errorf("performing post-initialization: %w", err)
}
// Do this now so that everything is stable before returning. This also
// serves a dual-purpose, as it makes sure that the PUT from the postChildrenInit
// above has propagated from the local garage instance, if there is one.
2024-10-24 20:14:13 +00:00
n.logger.Info(ctx, "Reloading hosts from network storage")
if err = n.reloadHosts(ctx); err != nil {
return fmt.Errorf("Reloading network bootstrap: %w", err)
}
n.periodically("reloadHosts", n.reloadHosts, 3*time.Minute)
n.periodically(
"removeOrphanGarageNodes", n.removeOrphanGarageNodes, 1*time.Minute,
)
return nil
}
// postChildrenInit performs steps which are required after children have been
// initialized.
func (n *network) postChildrenInit(
ctx context.Context,
prevThisHost bootstrap.Host,
createGarageGlobalBucket bool,
) error {
n.l.RLock()
defer n.l.RUnlock()
thisHost := n.currBootstrap.ThisHost()
if len(thisHost.Garage.Instances) > 0 {
n.logger.Info(ctx, "Applying garage layout")
if err := garageApplyLayout(
ctx,
n.logger,
n.networkConfig,
n.opts.GarageAdminToken,
prevThisHost, thisHost,
); err != nil {
return fmt.Errorf("applying garage layout: %w", err)
}
}
if createGarageGlobalBucket {
n.logger.Info(ctx, "Initializing garage shared global bucket")
garageGlobalBucketCreds, err := garageInitializeGlobalBucket(
ctx,
n.logger,
n.networkConfig,
n.opts.GarageAdminToken,
thisHost,
)
if err != nil {
return fmt.Errorf("initializing global bucket: %w", err)
}
err = daecommon.SetGarageS3APIGlobalBucketCredentials(
ctx, n.secretsStore, garageGlobalBucketCreds,
)
if err != nil {
return fmt.Errorf("storing global bucket creds: %w", err)
}
}
for _, alloc := range n.networkConfig.Storage.Allocations {
if err := garageWaitForAlloc(
ctx, n.logger, alloc, n.opts.GarageAdminToken, thisHost,
); err != nil {
return fmt.Errorf(
"waiting for alloc %+v to initialize: %w", alloc, err,
)
}
}
n.logger.Info(ctx, "Updating host info in garage")
err := putGarageBoostrapHost(ctx, n.secretsStore, n.currBootstrap)
2024-10-24 20:14:13 +00:00
if err != nil {
return fmt.Errorf("updating host info in garage: %w", err)
}
return nil
}
2024-10-24 20:14:13 +00:00
func (n *network) reloadHosts(ctx context.Context) error {
n.l.RLock()
currBootstrap := n.currBootstrap
n.l.RUnlock()
n.logger.Info(ctx, "Checking for bootstrap changes")
2024-10-24 20:14:13 +00:00
newHosts, err := getGarageBootstrapHosts(
ctx, n.logger, n.secretsStore, currBootstrap,
)
if err != nil {
return fmt.Errorf("getting hosts from garage: %w", err)
}
// TODO there's some potential race conditions here, where
// CreateHost could be called at this point, write the new host to
// garage and the bootstrap, but then this reload call removes the
// host from this bootstrap/children until the next reload.
2024-10-24 20:14:13 +00:00
newBootstrap := currBootstrap
newBootstrap.Hosts = newHosts
// the daemon's view of this host's bootstrap info takes precedence over
// whatever is in garage. The garage version lacks the private credentials
// which must be stored locally.
thisHost := currBootstrap.ThisHost()
newBootstrap.Hosts[thisHost.Name] = thisHost
if _, err = n.reload(ctx, nil, &newBootstrap); err != nil {
return fmt.Errorf("reloading with new host data: %w", err)
}
return nil
}
// In general each host will manage the garage cluster layout of its own storage
// allocations via garageApplyLayout. There are three situations which are
// handled here, rather than garageApplyLayout:
//
// - A host removes all of its allocations via SetConfig.
// - A host removes all of its allocations by calling Load with no allocations
// in the provided daecommon.NetworkConfig.
// - A host is removed from the network by another host.
//
// In all of these cases the host no longer has any garage instances running,
// and so can't call garageApplyLayout on itself. To combat this we have all
// hosts which do have garage instances running periodically check that there's
// not some garage nodes orphaned in the cluster layout, and remove them if so.
func (n *network) removeOrphanGarageNodes(ctx context.Context) error {
n.l.RLock()
defer n.l.RUnlock()
thisHost := n.currBootstrap.ThisHost()
if len(thisHost.Garage.Instances) == 0 {
n.logger.Info(ctx, "No local garage instances, cannot remove orphans")
return nil
}
adminClient := newGarageAdminClient(
n.logger, n.networkConfig, n.opts.GarageAdminToken, thisHost,
)
defer adminClient.Close()
clusterStatus, err := adminClient.Status(ctx)
if err != nil {
return fmt.Errorf("retrieving garage cluster status: %w", err)
}
buddyIP, buddyNodes := garageNodeBuddyPeers(clusterStatus, thisHost)
if len(buddyNodes) == 0 {
return nil
}
ctx = mctx.Annotate(ctx, "buddyIP", buddyIP)
for _, host := range n.currBootstrap.Hosts {
if host.IP() != buddyIP {
continue
} else if len(host.Garage.Instances) > 0 {
n.logger.Info(ctx, "Buddy instance has garage nodes configured in its bootstrap, doing nothing")
return nil
}
break
}
// Either the host is no longer in the network, or it no longer has any
// garage instances set on it. Either way, remove its nodes from the cluster
// layout.
buddyNodeIDs := make([]string, len(buddyNodes))
for i, buddyNode := range buddyNodes {
buddyNodeIDs[i] = buddyNode.ID
}
n.logger.Info(ctx, "Applying garage layout to remove orphaned garage nodes")
if err := adminClient.ApplyLayout(ctx, nil, buddyNodeIDs); err != nil {
return fmt.Errorf(
"applying garage cluster layout, removing nodes %+v: %w",
buddyNodes,
err,
)
}
return nil
}
// returns the bootstrap prior to the reload being applied.
2024-10-24 20:14:13 +00:00
func (n *network) reload(
ctx context.Context,
newNetworkConfig *daecommon.NetworkConfig,
newBootstrap *bootstrap.Bootstrap,
) (
bootstrap.Bootstrap, error,
) {
2024-10-24 20:14:13 +00:00
n.l.Lock()
defer n.l.Unlock()
prevBootstrap := n.currBootstrap
if newBootstrap != nil {
n.currBootstrap = *newBootstrap
}
if newNetworkConfig != nil {
n.networkConfig = *newNetworkConfig
}
var err error
if n.currBootstrap, err = coalesceNetworkConfigAndBootstrap(
n.networkConfig, n.currBootstrap,
); err != nil {
return bootstrap.Bootstrap{}, fmt.Errorf(
"combining configuration into bootstrap: %w", err,
)
}
2024-10-24 20:14:13 +00:00
n.logger.Info(ctx, "Writing updated bootstrap to state dir")
err = writeBootstrapToStateDir(n.stateDir.Path, n.currBootstrap)
2024-10-24 20:14:13 +00:00
if err != nil {
return bootstrap.Bootstrap{}, fmt.Errorf(
"writing bootstrap to state dir: %w", err,
)
2024-10-24 20:14:13 +00:00
}
n.logger.Info(ctx, "Reloading child processes")
err = n.children.Reload(ctx, n.networkConfig, n.currBootstrap)
if err != nil {
return bootstrap.Bootstrap{}, fmt.Errorf(
"reloading child processes: %w", err,
)
}
return prevBootstrap, nil
}
func withCurrBootstrap[Res any](
n *network, fn func(bootstrap.Bootstrap) (Res, error),
) (Res, error) {
n.l.RLock()
defer n.l.RUnlock()
currBootstrap := n.currBootstrap
return fn(currBootstrap)
}
2024-10-06 15:15:40 +00:00
func (n *network) getBootstrap() (
bootstrap.Bootstrap, error,
) {
return withCurrBootstrap(n, func(
currBootstrap bootstrap.Bootstrap,
) (
bootstrap.Bootstrap, error,
) {
return currBootstrap, nil
})
}
func (n *network) GetHosts(ctx context.Context) ([]bootstrap.Host, error) {
2024-10-24 20:14:13 +00:00
return withCurrBootstrap(n, func(
currBootstrap bootstrap.Bootstrap,
) (
[]bootstrap.Host, error,
) {
hosts := maps.Values(currBootstrap.Hosts)
slices.SortFunc(hosts, func(a, b bootstrap.Host) int {
return cmp.Compare(a.Name, b.Name)
})
return hosts, nil
})
}
func (n *network) GetGarageClientParams(
ctx context.Context,
) (
GarageClientParams, error,
) {
return withCurrBootstrap(n, func(
currBootstrap bootstrap.Bootstrap,
) (
GarageClientParams, error,
) {
2024-10-24 20:14:13 +00:00
return getGarageClientParams(ctx, n.secretsStore, currBootstrap)
})
}
func (n *network) GetNebulaCAPublicCredentials(
ctx context.Context,
) (
nebula.CAPublicCredentials, error,
) {
2024-10-06 15:15:40 +00:00
b, err := n.getBootstrap()
if err != nil {
return nebula.CAPublicCredentials{}, fmt.Errorf(
"retrieving bootstrap: %w", err,
)
}
return b.CAPublicCredentials, nil
}
func (n *network) RemoveHost(ctx context.Context, hostName nebula.HostName) error {
// TODO RemoveHost should publish a certificate revocation for the host
// being removed.
_, err := withCurrBootstrap(n, func(
currBootstrap bootstrap.Bootstrap,
) (
struct{}, error,
) {
2024-10-24 20:14:13 +00:00
garageClientParams, err := getGarageClientParams(
ctx, n.secretsStore, currBootstrap,
)
if err != nil {
return struct{}{}, fmt.Errorf("get garage client params: %w", err)
}
client := garageClientParams.GlobalBucketS3APIClient()
defer client.Close()
return struct{}{}, removeGarageBootstrapHost(ctx, client, hostName)
})
return err
}
func makeCACreds(
currBootstrap bootstrap.Bootstrap,
caSigningPrivateKey nebula.SigningPrivateKey,
) nebula.CACredentials {
return nebula.CACredentials{
Public: currBootstrap.CAPublicCredentials,
SigningPrivateKey: caSigningPrivateKey,
}
}
func chooseAvailableIP(b bootstrap.Bootstrap) (netip.Addr, error) {
var (
cidrIPNet = b.CAPublicCredentials.Cert.Unwrap().Details.Subnets[0]
cidrMask = cidrIPNet.Mask
cidrIPB = cidrIPNet.IP
cidr = netip.MustParsePrefix(cidrIPNet.String())
cidrIP = cidr.Addr()
cidrSuffixBits = cidrIP.BitLen() - cidr.Bits()
inUseIPs = make(map[netip.Addr]struct{}, len(b.Hosts))
)
for _, host := range b.Hosts {
inUseIPs[host.IP()] = struct{}{}
}
// first check that there are any addresses at all. We can determine the
// number of possible addresses using the network CIDR. The first IP in a
// subnet is the network identifier, and is reserved. The last IP is the
// broadcast IP, and is also reserved. Hence, the -2.
usableIPs := (1 << cidrSuffixBits) - 2
if len(inUseIPs) >= usableIPs {
return netip.Addr{}, errors.New("no available IPs")
}
// We need to know the subnet broadcast address, so we don't accidentally
// produce it.
cidrBCastIPB := bytes.Clone(cidrIPB)
for i := range cidrBCastIPB {
cidrBCastIPB[i] |= ^cidrMask[i]
}
cidrBCastIP, ok := netip.AddrFromSlice(cidrBCastIPB)
if !ok {
panic(fmt.Sprintf("invalid broadcast ip calculated: %x", cidrBCastIP))
}
// Try a handful of times to pick an IP at random. This is preferred, as it
// leaves less room for two different CreateHost calls to choose the same
// IP.
for range 20 {
b := make([]byte, len(cidrIPB))
if _, err := rand.Read(b); err != nil {
return netip.Addr{}, fmt.Errorf("reading random bytes: %w", err)
}
for i := range b {
b[i] = cidrIPB[i] | (b[i] & ^cidrMask[i])
}
ip, ok := netip.AddrFromSlice(b)
if !ok {
panic(fmt.Sprintf("generated invalid IP: %x", b))
} else if !cidr.Contains(ip) {
panic(fmt.Sprintf(
"generated IP %v which is not in cidr %v", ip, cidr,
))
}
if ip == cidrIP || ip == cidrBCastIP {
continue
}
if _, inUse := inUseIPs[ip]; !inUse {
return ip, nil
}
}
// If randomly picking fails then just go through IPs one by one until the
// free one is found.
for ip := cidrIP.Next(); ip != cidrBCastIP; ip = ip.Next() {
if _, inUse := inUseIPs[ip]; !inUse {
return ip, nil
}
}
panic("All ips are in-use, but somehow that wasn't determined earlier")
}
func (n *network) CreateHost(
ctx context.Context,
hostName nebula.HostName,
opts CreateHostOpts,
) (
JoiningBootstrap, error,
) {
n.l.RLock()
currBootstrap := n.currBootstrap
n.l.RUnlock()
ip := opts.IP
if ip == (netip.Addr{}) {
var err error
if ip, err = chooseAvailableIP(currBootstrap); err != nil {
return JoiningBootstrap{}, fmt.Errorf(
"choosing available IP: %w", err,
)
}
}
// TODO if the ip is given, check that it's not already in use.
caSigningPrivateKey, err := daecommon.GetNebulaCASigningPrivateKey(
ctx, n.secretsStore,
)
if err != nil {
return JoiningBootstrap{}, fmt.Errorf("getting CA signing key: %w", err)
}
var joiningBootstrap JoiningBootstrap
joiningBootstrap.Bootstrap, err = bootstrap.New(
makeCACreds(currBootstrap, caSigningPrivateKey),
currBootstrap.NetworkCreationParams,
currBootstrap.Hosts,
hostName,
ip,
)
if err != nil {
return JoiningBootstrap{}, fmt.Errorf(
"initializing bootstrap data: %w", err,
)
}
secretsIDs := []secrets.ID{
daecommon.GarageRPCSecretSecretID,
daecommon.GarageS3APIGlobalBucketCredentialsSecretID,
}
if opts.CanCreateHosts {
secretsIDs = append(
secretsIDs, daecommon.NebulaCASigningPrivateKeySecretID,
)
}
if joiningBootstrap.Secrets, err = secrets.Export(
ctx, n.secretsStore, secretsIDs,
); err != nil {
return JoiningBootstrap{}, fmt.Errorf("exporting secrets: %w", err)
}
n.logger.Info(ctx, "Putting new host in garage")
2024-10-24 20:14:13 +00:00
err = putGarageBoostrapHost(ctx, n.secretsStore, joiningBootstrap.Bootstrap)
if err != nil {
return JoiningBootstrap{}, fmt.Errorf("putting new host in garage: %w", err)
}
// the new bootstrap will have been initialized with both all existing hosts
// (based on currBootstrap) and the host being created.
2024-10-24 20:14:13 +00:00
newBootstrap := currBootstrap
newBootstrap.Hosts = joiningBootstrap.Bootstrap.Hosts
n.logger.Info(ctx, "Reloading local state with new host")
if _, err = n.reload(ctx, nil, &newBootstrap); err != nil {
return JoiningBootstrap{}, fmt.Errorf("reloading child processes: %w", err)
}
return joiningBootstrap, nil
}
func (n *network) CreateNebulaCertificate(
ctx context.Context,
hostName nebula.HostName,
hostPubKey nebula.EncryptingPublicKey,
) (
nebula.Certificate, error,
) {
return withCurrBootstrap(n, func(
currBootstrap bootstrap.Bootstrap,
) (
nebula.Certificate, error,
) {
host, ok := currBootstrap.Hosts[hostName]
if !ok {
return nebula.Certificate{}, ErrHostNotFound
}
ip := host.IP()
caSigningPrivateKey, err := daecommon.GetNebulaCASigningPrivateKey(
ctx, n.secretsStore,
)
if err != nil {
return nebula.Certificate{}, fmt.Errorf("getting CA signing key: %w", err)
}
caCreds := makeCACreds(currBootstrap, caSigningPrivateKey)
return nebula.NewHostCert(caCreds, hostPubKey, hostName, ip)
})
}
2024-10-23 18:18:11 +00:00
func (n *network) GetConfig(context.Context) (daecommon.NetworkConfig, error) {
2024-10-24 20:14:13 +00:00
n.l.RLock()
defer n.l.RUnlock()
2024-10-23 18:18:11 +00:00
return n.networkConfig, nil
}
func (n *network) SetConfig(
ctx context.Context, config daecommon.NetworkConfig,
) error {
prevBootstrap, err := n.reload(ctx, &config, nil)
if err != nil {
return fmt.Errorf("reloading config: %w", err)
}
if err := n.postChildrenInit(ctx, prevBootstrap.ThisHost(), false); err != nil {
return fmt.Errorf("performing post-initialization: %w", err)
}
return nil
}
func (n *network) GetNetworkCreationParams(
ctx context.Context,
) (
bootstrap.CreationParams, error,
) {
return withCurrBootstrap(n, func(
currBootstrap bootstrap.Bootstrap,
) (
bootstrap.CreationParams, error,
) {
return currBootstrap.NetworkCreationParams, nil
})
}
func (n *network) Shutdown() error {
n.workerCancel()
n.wg.Wait()
if n.children != nil {
n.logger.Info(context.Background(), "Shutting down children")
n.children.Shutdown()
}
return nil
}