diff --git a/go/daemon/children/children.go b/go/daemon/children/children.go index 36d716e..f76a8b6 100644 --- a/go/daemon/children/children.go +++ b/go/daemon/children/children.go @@ -45,6 +45,9 @@ type Opts struct { // new allocations. If not given then garagesrv.DBEngineSqlite will be used // for new allocations. GarageDefaultDBEngine garagesrv.DBEngine + + // TestBlocker is used by tests to set blockpoints. + TestBlocker *toolkit.TestBlocker } func (o *Opts) withDefaults() *Opts { @@ -341,6 +344,8 @@ func (c *Children) Reload( return fmt.Errorf("reloading nebula: %w", err) } + c.opts.TestBlocker.Blockpoint(ctx, "Children.Reload.postReloadNebula") + var errs []error if err := c.reloadDNSMasq(ctx, newNetworkConfig, newBootstrap); err != nil { diff --git a/go/daemon/network/network.go b/go/daemon/network/network.go index 57c51a5..c5ed40d 100644 --- a/go/daemon/network/network.go +++ b/go/daemon/network/network.go @@ -532,6 +532,8 @@ func (n *network) glmStateTransitionUnsafe(ctx context.Context) error { return fmt.Errorf("reloading children: %w", err) } + n.opts.testBlocker.Blockpoint(ctx, "glmStateTransition.postChildrenReload") + if adminClient, ok := n.children.GarageAdminClient(); ok { defer adminClient.Close() @@ -768,6 +770,7 @@ func (n *network) initialize(ctx context.Context, isCreate bool) error { GarageNewCluster: isCreate, GarageBootstrapPeers: garageBootstrapPeers, GarageDefaultDBEngine: n.opts.garageDefaultDBEngine, + TestBlocker: n.opts.testBlocker, }, ) if err != nil { diff --git a/go/daemon/network/network_it_test.go b/go/daemon/network/network_it_test.go index a43c2a9..e365f43 100644 --- a/go/daemon/network/network_it_test.go +++ b/go/daemon/network/network_it_test.go @@ -109,17 +109,35 @@ func TestJoin(t *testing.T) { assert.Equal(t, primus.getHostsByName(t), secondus.getHostsByName(t)) }) - t.Run("with alloc/simple", func(t *testing.T) { + t.Run("with alloc", func(t *testing.T) { var ( - h = newIntegrationHarness(t) - primus = h.createNetwork(t, "primus", nil) - secondus = h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{ - networkConfigOpts: &networkConfigOpts{ - numStorageAllocs: 1, - }, - }) + h = newIntegrationHarness(t) + primus = h.createNetwork(t, "primus", nil) + secondusBlocker = toolkit.NewTestBlocker(t) + secondusRuntimeDir = h.mkDir(t, "runtime") ) + secondusBlocker.Expect("Children.Reload.postReloadNebula").Then( + t, h.ctx, func() { + h.logger.Info(h.ctx, "Checking that firewall was updated with new alloc") + assertFirewallInboundEquals( + t, secondusRuntimeDir, []daecommon.ConfigFirewallRule{ + {Port: "any", Proto: "icmp", Host: "any"}, + {Port: "3900", Proto: "tcp", Host: "any"}, + {Port: "3901", Proto: "tcp", Host: "any"}, + }, + ) + }, + ) + + secondus := h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{ + networkConfigOpts: &networkConfigOpts{ + numStorageAllocs: 1, + }, + blocker: secondusBlocker, + runtimeDir: secondusRuntimeDir, + }) + h.logger.Info(h.ctx, "reloading primus' hosts") assert.NoError(t, primus.Network.(*network).reloadHosts(h.ctx)) @@ -184,8 +202,11 @@ func TestNetwork_SetConfig(t *testing.T) { t.Run("add storage alloc/simple", func(t *testing.T) { var ( - h = newIntegrationHarness(t) - network = h.createNetwork(t, "primus", nil) + h = newIntegrationHarness(t) + blocker = toolkit.NewTestBlocker(t) + network = h.createNetwork(t, "primus", &createNetworkOpts{ + blocker: blocker, + }) networkConfig = network.getConfig(t) metaPath = h.mkDir(t, "meta").Path ) @@ -196,12 +217,31 @@ func TestNetwork_SetConfig(t *testing.T) { DataPath: h.mkDir(t, "data").Path, MetaPath: metaPath, Capacity: 1, - S3APIPort: 4900, - RPCPort: 4901, + S3APIPort: 4901, + RPCPort: 4900, AdminPort: 4902, }, ) + blocker.Expect("Children.Reload.postReloadNebula").Then( + t, h.ctx, func() { + h.logger.Info(h.ctx, "Checking that firewall was updated with new alloc") + assertFirewallInboundEquals( + t, network.runtimeDir, []daecommon.ConfigFirewallRule{ + {Port: "any", Proto: "icmp", Host: "any"}, + {Port: "3900", Proto: "tcp", Host: "any"}, + {Port: "3901", Proto: "tcp", Host: "any"}, + {Port: "3910", Proto: "tcp", Host: "any"}, + {Port: "3911", Proto: "tcp", Host: "any"}, + {Port: "3920", Proto: "tcp", Host: "any"}, + {Port: "3921", Proto: "tcp", Host: "any"}, + {Port: "4900", Proto: "tcp", Host: "any"}, + {Port: "4901", Proto: "tcp", Host: "any"}, + }, + ) + }, + ) + assert.NoError(t, network.SetConfig(h.ctx, networkConfig)) h.logger.Info(h.ctx, "Checking that the Host information was updated") @@ -216,8 +256,8 @@ func TestNetwork_SetConfig(t *testing.T) { assert.NotEmpty(t, newAlloc.ID) newAlloc.ID = "" assert.Equal(t, bootstrap.GarageHostInstance{ - S3APIPort: 4900, - RPCPort: 4901, + S3APIPort: 4901, + RPCPort: 4900, }, newAlloc) h.logger.Info(h.ctx, "Checking that the bootstrap file was written with the new host config") @@ -235,7 +275,7 @@ func TestNetwork_SetConfig(t *testing.T) { h.logger.Info(h.ctx, "Checking that garage is using the expected db engine") garageConfig, err := os.ReadFile( - filepath.Join(network.runtimeDir.Path, "garage-4901.toml"), + filepath.Join(network.runtimeDir.Path, "garage-4900.toml"), ) assert.NoError(t, err) assert.Contains(t, @@ -245,6 +285,50 @@ func TestNetwork_SetConfig(t *testing.T) { assert.FileExists(t, filepath.Join(metaPath, "db.sqlite")) }) + t.Run("add storage alloc/on second host", func(t *testing.T) { + var ( + h = newIntegrationHarness(t) + primus = h.createNetwork(t, "primus", nil) + secondusBlocker = toolkit.NewTestBlocker(t) + secondus = h.joinNetwork(t, primus, "secondus", &joinNetworkOpts{ + blocker: secondusBlocker, + }) + secondusNetworkConfig = secondus.getConfig(t) + ) + + secondusBlocker.Expect("Children.Reload.postReloadNebula").Then( + t, h.ctx, func() { + h.logger.Info(h.ctx, "Checking that firewall was updated with new alloc") + assertFirewallInboundEquals( + t, secondus.runtimeDir, []daecommon.ConfigFirewallRule{ + {Port: "any", Proto: "icmp", Host: "any"}, + {Port: "3900", Proto: "tcp", Host: "any"}, + {Port: "3901", Proto: "tcp", Host: "any"}, + }, + ) + }, + ) + + secondusNetworkConfig.Storage.Allocations = append( + secondusNetworkConfig.Storage.Allocations, + daecommon.ConfigStorageAllocation{ + DataPath: h.mkDir(t, "data").Path, + MetaPath: h.mkDir(t, "meta").Path, + Capacity: 1, + S3APIPort: 3901, + RPCPort: 3900, + AdminPort: 3902, + }, + ) + + assert.NoError(t, secondus.SetConfig(h.ctx, secondusNetworkConfig)) + + assertGarageLayout(t, map[*integrationHarnessNetwork]int{ + primus: 3, + secondus: 1, + }) + }) + t.Run("remove storage alloc", func(t *testing.T) { var ( h = newIntegrationHarness(t) diff --git a/go/daemon/network/network_it_util_test.go b/go/daemon/network/network_it_util_test.go index 62b4789..bc3be4a 100644 --- a/go/daemon/network/network_it_util_test.go +++ b/go/daemon/network/network_it_util_test.go @@ -11,6 +11,7 @@ import ( "isle/garage/garagesrv" "isle/nebula" "isle/toolkit" + "isle/yamlutil" "os" "path/filepath" "slices" @@ -91,7 +92,7 @@ func newIntegrationHarness(t *testing.T) *integrationHarness { t.Cleanup(func() { if t.Failed() { - logger.Info(ctx, "Test failed, temporarty test root directory NOT deleted") + logger.Info(ctx, "Test failed, temporary test root directory NOT deleted") return } @@ -166,6 +167,7 @@ type createNetworkOpts struct { manualShutdown bool numStorageAllocs int garageDefaultDBEngine garagesrv.DBEngine + blocker *toolkit.TestBlocker } func (o *createNetworkOpts) withDefaults() *createNetworkOpts { @@ -219,6 +221,7 @@ func (h *integrationHarness) createNetwork( networkOpts = &Opts{ GarageAdminToken: "admin_token", Config: &networkConfig, + testBlocker: opts.blocker, garageDefaultDBEngine: opts.garageDefaultDBEngine, } ) @@ -269,6 +272,7 @@ type joinNetworkOpts struct { canCreateHosts bool manualShutdown bool blocker *toolkit.TestBlocker + runtimeDir toolkit.Dir } func (o *joinNetworkOpts) withDefaults() *joinNetworkOpts { @@ -308,7 +312,7 @@ func (h *integrationHarness) joinNetwork( logger = h.logger.WithNamespace("networks").WithNamespace(hostNameStr) networkConfig = h.mkNetworkConfig(t, opts.networkConfigOpts) stateDir = h.mkDir(t, "state") - runtimeDir = h.mkDir(t, "runtime") + runtimeDir = opts.runtimeDir networkOpts = &Opts{ GarageAdminToken: "admin_token", Config: &networkConfig, @@ -316,6 +320,10 @@ func (h *integrationHarness) joinNetwork( } ) + if runtimeDir == (toolkit.Dir{}) { + runtimeDir = h.mkDir(t, "runtime") + } + logger.Info(h.ctx, "Joining") joinedNetwork, err := h.constructors.join( h.ctx, @@ -456,3 +464,32 @@ func assertGarageLayout( lastLayout = layout } } + +func assertFirewallInboundEquals( + t *testing.T, + runtimeDir toolkit.Dir, + wantRules []daecommon.ConfigFirewallRule, +) { + var ( + config struct { + Firewall daecommon.ConfigFirewall `yaml:"firewall"` + } + path = filepath.Join(runtimeDir.Path, "nebula.yml") + ) + + normalizeRules := func(rules []daecommon.ConfigFirewallRule) { + slices.SortFunc(rules, func(a, b daecommon.ConfigFirewallRule) int { + return cmp.Or( + cmp.Compare(a.Port, b.Port), + cmp.Compare(a.Code, b.Code), + cmp.Compare(a.Proto, b.Proto), + cmp.Compare(a.Host, b.Host), + ) + }) + } + + require.NoError(t, yamlutil.LoadYamlFile(&config, path)) + normalizeRules(wantRules) + normalizeRules(config.Firewall.Inbound) + assert.Equal(t, wantRules, config.Firewall.Inbound) +} diff --git a/go/toolkit/testutils_blocker.go b/go/toolkit/testutils_blocker.go index 358cb03..9d97033 100644 --- a/go/toolkit/testutils_blocker.go +++ b/go/toolkit/testutils_blocker.go @@ -34,13 +34,13 @@ func (eb ExpectedBlockpoint) Unblock() { close(eb.unblockCh) } -// On is a helper which will spawn a go-routine, call Wait on the +// Then is a helper which will spawn a go-routine, call Wait on the // ExpectedBlockpoint, call the given callback, and then Unblock the // ExpectedBlockpoint. // // If Wait returns an error (due to context cancellation) then this fails the // test and returns without calling the callback. -func (eb ExpectedBlockpoint) On(t *testing.T, ctx context.Context, cb func()) { +func (eb ExpectedBlockpoint) Then(t *testing.T, ctx context.Context, cb func()) { go func() { defer eb.Unblock() if !assert.NoError(t, eb.Wait(ctx)) { @@ -69,9 +69,9 @@ func NewTestBlocker(t *testing.T) *TestBlocker { return b } -// Blockpoint will block if and only if TestBlocker is non-nil and -// ExpectBlockpoint has been called with the same ID previously. If the context -// is canceled while blocking then this call will return. +// Blockpoint will block if and only if TestBlocker is non-nil and Expect has +// been called with the same ID previously. If the context is canceled while +// blocking then this call will return. func (b *TestBlocker) Blockpoint(ctx context.Context, id string) { if b == nil { return @@ -99,10 +99,10 @@ func (b *TestBlocker) Blockpoint(ctx context.Context, id string) { } } -// ExpectBlockpoint will cause the TestBlocker to block upon the next call to -// Blockpoint using the same id. The returned ExpectBlockpoint can be used to -// wait until Blockpoint is called, as well as to unblock it. -func (b *TestBlocker) ExpectBlockpoint(id string) ExpectedBlockpoint { +// Expect will cause the TestBlocker to block upon the next call to Blockpoint +// using the same id. The returned ExpectedBlockpoint can be used to wait until +// Blockpoint is called, as well as to unblock it. +func (b *TestBlocker) Expect(id string) ExpectedBlockpoint { b.l.Lock() defer b.l.Unlock() @@ -124,8 +124,8 @@ func (b *TestBlocker) ExpectBlockpoint(id string) ExpectedBlockpoint { return blockpoint } -// AssertExpectations will Fail the test and return false if any calls to -// ExpectBlockpoint have not had a corresponding Blockpoint call made. +// AssertExpectations will Fail the test and return false if any calls to Expect +// have not had a corresponding Blockpoint call made. func (b *TestBlocker) AssertExpectations(t *testing.T) bool { b.l.Lock() defer b.l.Unlock() diff --git a/tasks/misc/host-firewall-autoconfig.md b/tasks/misc/host-firewall-autoconfig.md new file mode 100644 index 0000000..e03e4d5 --- /dev/null +++ b/tasks/misc/host-firewall-autoconfig.md @@ -0,0 +1,27 @@ +--- +type: task +--- + +The host's firewall should be auto-configured, by default, to allow all incoming +traffic for a network's CIDR. For Linux this will (probably) mean making +(system?) calls to iptables. + +- There must be a mechanism for the user to disable this behavior if they want, + likely just as part of the `daemon.yml` and not CLI for MVP. + +- The operator "Configuring Firewall" documentation must be updated. + +- A network's Shutdown method should clear out all enabled rules. + +- On startup the network needs to properly handle the rules already being + present, either because the user added them manually previously or there was a + previous unclean shutdown. + +- Check if systemd service file needs any updates with respect to capabilities + or `After` directives. + +- Keep in mind that IPv6 overlay networks will need to be supported in the + future, so ip6tables needs to be investigated. + +- Make sure that using alternative firewalls, like ufw, works as expected. Also, + how does nftables fit in here? diff --git a/tasks/misc/open-port-checker.md b/tasks/misc/open-port-checker.md new file mode 100644 index 0000000..00bb7da --- /dev/null +++ b/tasks/misc/open-port-checker.md @@ -0,0 +1,30 @@ +--- +type: task +after: + - /nats/rpc.md +--- + +A mechanism should be developed, using NATS, for a host A to double check that +host B can connect to it on a certain `host:port`. This can be used for a couple +use-cases: + +- Make sure that VPN public address is connectable prior to being configured. + +- Make sure that garage RPC port is connectable prior to being configured. + +- Make sure that HTTP(s) ports are connectable prior to being configured + (future). + +The host which is configuring a port to be opened will always be the one to +initiate the process. It will: + +- Open a dummy HTTP server on the port in question, with a handler which always + returns a randomly generated token. + +- Publish an RPC request on NATS which indicates which `host:port` it wishes to + confirm connectability and the token. + +- Waits for a response to the NATS request indicating either that the connection + was successful, or an error indicating why it wasn't. Errors could include: + - Timeout (probably blocked by firewall) + - Challenge token not returned (something else is listening at that port) diff --git a/tasks/nats/add.md b/tasks/nats/add.md index 84346ca..eca0663 100644 --- a/tasks/nats/add.md +++ b/tasks/nats/add.md @@ -1,5 +1,7 @@ --- type: task +after: + - /misc/host-firewall-autoconfig.md --- Introduce [NATS][nats] as a new service run by Isle. All hosts should join the diff --git a/tasks/nats/rpc.md b/tasks/nats/rpc.md index ea7033d..52642d5 100644 --- a/tasks/nats/rpc.md +++ b/tasks/nats/rpc.md @@ -5,9 +5,10 @@ after: --- A general RPC mechanism should be developed which allows one group of hosts to -handle RPC calls made by other hosts. Each RPC request should be signed by the -host which is making it, and the response should be signed and encrypted by the -responder. +handle RPC calls made by other hosts. -The JSONRPC2 framework already developed for communication between CLI and -daemon can be re-used here. +- Each RPC request should be signed by the host which is making it, and the + response should be signed and encrypted by the responder. + +- The JSONRPC2 framework already developed for communication between CLI and + daemon can be re-used here.