diff --git a/go/bootstrap/bootstrap.go b/go/bootstrap/bootstrap.go index 83375f7..c4eeed8 100644 --- a/go/bootstrap/bootstrap.go +++ b/go/bootstrap/bootstrap.go @@ -7,6 +7,7 @@ import ( "encoding/json" "fmt" "isle/nebula" + "maps" "net/netip" "path/filepath" "sort" @@ -45,11 +46,11 @@ type Bootstrap struct { Hosts map[nebula.HostName]Host } -// New initializes and returns a new Bootstrap file for a new host. This -// function assigns Hosts an empty map. +// New initializes and returns a new Bootstrap file for a new host. func New( caCreds nebula.CACredentials, adminCreationParams CreationParams, + existingHosts map[nebula.HostName]Host, name nebula.HostName, ip netip.Addr, ) ( @@ -72,13 +73,18 @@ func New( return Bootstrap{}, fmt.Errorf("signing assigned fields: %w", err) } + existingHosts = maps.Clone(existingHosts) + existingHosts[name] = Host{ + HostAssigned: assigned, + } + return Bootstrap{ NetworkCreationParams: adminCreationParams, CAPublicCredentials: caCreds.Public, PrivateCredentials: hostPrivCreds, HostAssigned: assigned, SignedHostAssigned: signedAssigned, - Hosts: map[nebula.HostName]Host{}, + Hosts: existingHosts, }, nil } diff --git a/go/daemon/bootstrap.go b/go/daemon/bootstrap.go index 2871a38..d8e373a 100644 --- a/go/daemon/bootstrap.go +++ b/go/daemon/bootstrap.go @@ -101,7 +101,7 @@ func calcBootstrapDiff( return } - diff.hostsChanged = bytes.Equal(prevHash, nextHash) + diff.hostsChanged = !bytes.Equal(prevHash, nextHash) } { @@ -118,7 +118,7 @@ func calcBootstrapDiff( } { - diff.dnsChanged = reflect.DeepEqual( + diff.dnsChanged = !reflect.DeepEqual( dnsmasqConfig(daemonConfig, prevBootstrap), dnsmasqConfig(daemonConfig, nextBootstrap), ) diff --git a/go/daemon/daemon.go b/go/daemon/daemon.go index 2bc3427..01a34d7 100644 --- a/go/daemon/daemon.go +++ b/go/daemon/daemon.go @@ -232,8 +232,7 @@ func NewDaemon( } // initialize must be called with d.l write lock held, but it will unlock the -// lock itself. Once initialize returns successfully the d.l write lock should -// not be held again, except by the reloadLoop. +// lock itself. func (d *daemon) initialize( ctx context.Context, currBootstrap bootstrap.Bootstrap, ) error { @@ -324,12 +323,17 @@ func withCurrBootstrap[Res any]( } } +// reload will check the existing hosts data from currBootstrap against a +// potentially updated set of hosts data, and if there are any differences will +// perform whatever changes are necessary. func (d *daemon) reload( - ctx context.Context, newHosts map[nebula.HostName]bootstrap.Host, + ctx context.Context, + currBootstrap bootstrap.Bootstrap, + newHosts map[nebula.HostName]bootstrap.Host, ) error { var ( - newBootstrap = d.currBootstrap - thisHost = d.currBootstrap.ThisHost() + newBootstrap = currBootstrap + thisHost = currBootstrap.ThisHost() ) newBootstrap.Hosts = newHosts @@ -338,10 +342,11 @@ func (d *daemon) reload( // whatever is in garage newBootstrap.Hosts[thisHost.Name] = thisHost - diff, err := calcBootstrapDiff(d.daemonConfig, d.currBootstrap, newBootstrap) + diff, err := calcBootstrapDiff(d.daemonConfig, currBootstrap, newBootstrap) if err != nil { return fmt.Errorf("calculating diff between bootstraps: %w", err) } else if diff == (bootstrapDiff{}) { + d.logger.Info(ctx, "No changes to bootstrap detected") return nil } @@ -352,6 +357,12 @@ func (d *daemon) reload( var errs []error + // TODO each of these changed cases should block until its respective + // service is confirmed to have come back online. + + // TODO it's possible that reload could be called concurrently, and one call + // would override the reloading done by the other. + if diff.dnsChanged { d.logger.Info(ctx, "Restarting dnsmasq to account for bootstrap changes") if err := d.children.RestartDNSMasq(newBootstrap); err != nil { @@ -407,7 +418,7 @@ func (d *daemon) postInit(ctx context.Context) error { } d.logger.Info(ctx, "Updating host info in garage") - err = d.putGarageBoostrapHost(ctx, d.logger, d.currBootstrap) + err = d.putGarageBoostrapHost(ctx, d.currBootstrap) if err != nil { return fmt.Errorf("updating host info in garage: %w", err) } @@ -425,15 +436,23 @@ func (d *daemon) reloadLoop(ctx context.Context) { return case <-ticker.C: + d.l.RLock() + currBootstrap := d.currBootstrap + d.l.RUnlock() d.logger.Info(ctx, "Checking for bootstrap changes") - newHosts, err := d.getGarageBootstrapHosts(ctx, d.logger, d.currBootstrap) + newHosts, err := d.getGarageBootstrapHosts(ctx, currBootstrap) if err != nil { d.logger.Error(ctx, "Failed to get hosts from garage", err) continue } - if err := d.reload(ctx, newHosts); err != nil { + // TODO there's some potential race conditions here, where + // CreateHost could be called at this point, write the new host to + // garage and the bootstrap, but then this reload call removes the + // host from this bootstrap/children until the next reload. + + if err := d.reload(ctx, currBootstrap, newHosts); err != nil { d.logger.Error(ctx, "Reloading with new host data failed", err) continue } @@ -473,6 +492,7 @@ func (d *daemon) CreateNetwork( hostBootstrap, err := bootstrap.New( nebulaCACreds, creationParams, + map[nebula.HostName]bootstrap.Host{}, hostName, ipNet.FirstAddr(), ) @@ -589,53 +609,62 @@ func (d *daemon) CreateHost( ) ( JoiningBootstrap, error, ) { - return withCurrBootstrap(d, func( - currBootstrap bootstrap.Bootstrap, - ) ( - JoiningBootstrap, error, - ) { - caSigningPrivateKey, err := getNebulaCASigningPrivateKey( - ctx, d.secretsStore, + d.l.RLock() + currBootstrap := d.currBootstrap + d.l.RUnlock() + + caSigningPrivateKey, err := getNebulaCASigningPrivateKey( + ctx, d.secretsStore, + ) + if err != nil { + return JoiningBootstrap{}, fmt.Errorf("getting CA signing key: %w", err) + } + + var joiningBootstrap JoiningBootstrap + joiningBootstrap.Bootstrap, err = bootstrap.New( + makeCACreds(currBootstrap, caSigningPrivateKey), + currBootstrap.NetworkCreationParams, + currBootstrap.Hosts, + hostName, + ip, + ) + if err != nil { + return JoiningBootstrap{}, fmt.Errorf( + "initializing bootstrap data: %w", err, ) - if err != nil { - return JoiningBootstrap{}, fmt.Errorf("getting CA signing key: %w", err) - } + } - var joiningBootstrap JoiningBootstrap - joiningBootstrap.Bootstrap, err = bootstrap.New( - makeCACreds(currBootstrap, caSigningPrivateKey), - currBootstrap.NetworkCreationParams, - hostName, - ip, - ) - if err != nil { - return JoiningBootstrap{}, fmt.Errorf( - "initializing bootstrap data: %w", err, - ) - } + secretsIDs := []secrets.ID{ + garageRPCSecretSecretID, + garageS3APIGlobalBucketCredentialsSecretID, + } - joiningBootstrap.Bootstrap.Hosts = currBootstrap.Hosts + if opts.CanCreateHosts { + secretsIDs = append(secretsIDs, nebulaCASigningPrivateKeySecretID) + } - secretsIDs := []secrets.ID{ - garageRPCSecretSecretID, - garageS3APIGlobalBucketCredentialsSecretID, - } + if joiningBootstrap.Secrets, err = secrets.Export( + ctx, d.secretsStore, secretsIDs, + ); err != nil { + return JoiningBootstrap{}, fmt.Errorf("exporting secrets: %w", err) + } - if opts.CanCreateHosts { - secretsIDs = append(secretsIDs, nebulaCASigningPrivateKeySecretID) - } + d.logger.Info(ctx, "Putting new host in garage") + err = d.putGarageBoostrapHost(ctx, joiningBootstrap.Bootstrap) + if err != nil { + return JoiningBootstrap{}, fmt.Errorf("putting new host in garage: %w", err) + } - if joiningBootstrap.Secrets, err = secrets.Export( - ctx, d.secretsStore, secretsIDs, - ); err != nil { - return JoiningBootstrap{}, fmt.Errorf("exporting secrets: %w", err) - } + // the new bootstrap will have been initialized with both all existing hosts + // (based on currBootstrap) and the host being created. + newHosts := joiningBootstrap.Bootstrap.Hosts - // TODO persist new bootstrap to garage. Requires making the daemon - // config change watching logic smarter, so only dnsmasq gets restarted. + d.logger.Info(ctx, "Reloading local state with new host") + if err := d.reload(ctx, currBootstrap, newHosts); err != nil { + return JoiningBootstrap{}, fmt.Errorf("reloading child processes: %w", err) + } - return joiningBootstrap, nil - }) + return joiningBootstrap, nil } func (d *daemon) CreateNebulaCertificate( diff --git a/go/daemon/global_bucket.go b/go/daemon/global_bucket.go index d12f413..396f978 100644 --- a/go/daemon/global_bucket.go +++ b/go/daemon/global_bucket.go @@ -64,7 +64,7 @@ func garageInitializeGlobalBucket( // into garage so that other hosts are able to see relevant configuration for // it. func (d *daemon) putGarageBoostrapHost( - ctx context.Context, logger *mlog.Logger, currBootstrap bootstrap.Bootstrap, + ctx context.Context, currBootstrap bootstrap.Bootstrap, ) error { garageClientParams, err := d.getGarageClientParams(ctx, currBootstrap) if err != nil { @@ -113,7 +113,7 @@ func (d *daemon) putGarageBoostrapHost( } func (d *daemon) getGarageBootstrapHosts( - ctx context.Context, logger *mlog.Logger, currBootstrap bootstrap.Bootstrap, + ctx context.Context, currBootstrap bootstrap.Bootstrap, ) ( map[nebula.HostName]bootstrap.Host, error, ) { @@ -157,13 +157,13 @@ func (d *daemon) getGarageBootstrapHosts( obj.Close() if err != nil { - logger.Warn(ctx, "Object contains invalid json", err) + d.logger.Warn(ctx, "Object contains invalid json", err) continue } host, err := authedHost.Unwrap(currBootstrap.CAPublicCredentials) if err != nil { - logger.Warn(ctx, "Host could not be authenticated", err) + d.logger.Warn(ctx, "Host could not be authenticated", err) } hosts[host.Name] = host diff --git a/tests/cases/dnsmasq/00-hosts.sh b/tests/cases/dnsmasq/00-hosts.sh index 9a91537..be7027f 100644 --- a/tests/cases/dnsmasq/00-hosts.sh +++ b/tests/cases/dnsmasq/00-hosts.sh @@ -10,10 +10,7 @@ function assert_a { as_primus assert_a "$primus_ip" primus.hosts.shared.test - -# TODO This doesn't work at present, there would need to be some mechanism to -# block the test until secondus' bootstrap info can propagate to primus. -#assert_a "$secondus_ip" secondus.hosts.shared.test +assert_a "$secondus_ip" secondus.hosts.shared.test as_secondus assert_a "$primus_ip" primus.hosts.shared.test diff --git a/tests/cases/hosts/00-list.sh b/tests/cases/hosts/00-list.sh index 9b9459f..f2cbe1e 100644 --- a/tests/cases/hosts/00-list.sh +++ b/tests/cases/hosts/00-list.sh @@ -1,15 +1,20 @@ # shellcheck source=../../utils/with-1-data-1-empty-node-network.sh source "$UTILS"/with-1-data-1-empty-node-network.sh -# TODO when primus creates secondus' bootstrap it should write the new host to -# its own bootstrap, as well as to garage. +function do_tests { + hosts="$(isle hosts list)" + + [ "$(echo "$hosts" | jq -r '.[0].Name')" = "primus" ] + [ "$(echo "$hosts" | jq -r '.[0].VPN.IP')" = "10.6.9.1" ] + [ "$(echo "$hosts" | jq -r '.[0].Storage.Instances|length')" = "3" ] + + [ "$(echo "$hosts" | jq -r '.[1].Name')" = "secondus" ] + [ "$(echo "$hosts" | jq -r '.[1].VPN.IP')" = "10.6.9.2" ] + [ "$(echo "$hosts" | jq -r '.[1].Storage.Instances|length')" = "0" ] +} + +as_primus +do_tests + as_secondus -hosts="$(isle hosts list)" - -[ "$(echo "$hosts" | jq -r '.[0].Name')" = "primus" ] -[ "$(echo "$hosts" | jq -r '.[0].VPN.IP')" = "10.6.9.1" ] -[ "$(echo "$hosts" | jq -r '.[0].Storage.Instances|length')" = "3" ] - -[ "$(echo "$hosts" | jq -r '.[1].Name')" = "secondus" ] -[ "$(echo "$hosts" | jq -r '.[1].VPN.IP')" = "10.6.9.2" ] -[ "$(echo "$hosts" | jq -r '.[1].Storage.Instances|length')" = "0" ] +do_tests