package network import ( "bytes" "context" "encoding/json" "errors" "fmt" "isle/bootstrap" "isle/daemon/daecommon" "isle/garage" "isle/nebula" "isle/secrets" "net" "net/netip" "path/filepath" "slices" "strconv" "time" "dev.mediocregopher.com/mediocre-go-lib.git/mctx" "dev.mediocregopher.com/mediocre-go-lib.git/mlog" "github.com/minio/minio-go/v7" "golang.org/x/exp/maps" ) // Paths within garage's global bucket. const ( garageGlobalBucketBootstrapHostsDirPath = "bootstrap/hosts" ) func getGarageClientParams( ctx context.Context, secretsStore secrets.Store, currBootstrap bootstrap.Bootstrap, ) ( GarageClientParams, error, ) { creds, err := daecommon.GetGarageS3APIGlobalBucketCredentials( ctx, secretsStore, ) if err != nil { return GarageClientParams{}, fmt.Errorf("getting garage global bucket creds: %w", err) } rpcSecret, err := daecommon.GetGarageRPCSecret(ctx, secretsStore) if err != nil && !errors.Is(err, secrets.ErrNotFound) { return GarageClientParams{}, fmt.Errorf("getting garage rpc secret: %w", err) } return GarageClientParams{ Node: currBootstrap.ChooseGarageNode(), GlobalBucketS3APICredentials: creds, RPCSecret: rpcSecret, }, nil } func garageAdminClientLogger(logger *mlog.Logger) *mlog.Logger { return logger.WithNamespace("garageAdminClient") } // newGarageAdminClient will return an AdminClient for a local garage instance, // or it will _panic_ if there is no local instance configured. func newGarageAdminClient( logger *mlog.Logger, networkConfig daecommon.NetworkConfig, adminToken string, host bootstrap.Host, ) *garage.AdminClient { return garage.NewAdminClient( garageAdminClientLogger(logger), net.JoinHostPort( host.IP().String(), strconv.Itoa(networkConfig.Storage.Allocations[0].AdminPort), ), adminToken, ) } func garageApplyLayout( ctx context.Context, logger *mlog.Logger, networkConfig daecommon.NetworkConfig, adminToken string, prevHost, currHost bootstrap.Host, ) error { var ( adminClient = newGarageAdminClient( logger, networkConfig, adminToken, currHost, ) hostName = currHost.Name allocs = networkConfig.Storage.Allocations roles = make([]garage.Role, len(allocs)) roleIDs = map[string]struct{}{} idsToRemove = make([]string, 0, len(prevHost.Garage.Instances)) ) defer adminClient.Close() for i, alloc := range allocs { id := daecommon.BootstrapGarageHostForAlloc(currHost, alloc).ID roleIDs[id] = struct{}{} zone := string(hostName) if alloc.Zone != "" { zone = alloc.Zone } roles[i] = garage.Role{ ID: id, Capacity: alloc.Capacity * 1_000_000_000, Zone: zone, Tags: []string{}, } } for _, prevInst := range prevHost.Garage.Instances { if _, ok := roleIDs[prevInst.ID]; !ok { idsToRemove = append(idsToRemove, prevInst.ID) } } return adminClient.ApplyLayout(ctx, roles, idsToRemove) } func garageInitializeGlobalBucket( ctx context.Context, logger *mlog.Logger, networkConfig daecommon.NetworkConfig, adminToken string, host bootstrap.Host, ) ( garage.S3APICredentials, error, ) { adminClient := newGarageAdminClient(logger, networkConfig, adminToken, host) defer adminClient.Close() creds, err := adminClient.CreateS3APICredentials( ctx, garage.GlobalBucketS3APICredentialsName, ) if err != nil { return creds, fmt.Errorf("creating global bucket credentials: %w", err) } bucketID, err := adminClient.CreateBucket(ctx, garage.GlobalBucket) if err != nil { return creds, fmt.Errorf("creating global bucket: %w", err) } if err := adminClient.GrantBucketPermissions( ctx, bucketID, creds.ID, garage.BucketPermissionRead, garage.BucketPermissionWrite, ); err != nil { return creds, fmt.Errorf( "granting permissions to shared global bucket key: %w", err, ) } return creds, nil } func getGarageBootstrapHosts( ctx context.Context, logger *mlog.Logger, secretsStore secrets.Store, currBootstrap bootstrap.Bootstrap, ) ( map[nebula.HostName]bootstrap.Host, error, ) { garageClientParams, err := getGarageClientParams( ctx, secretsStore, currBootstrap, ) if err != nil { return nil, fmt.Errorf("getting garage client params: %w", err) } var ( client = garageClientParams.GlobalBucketS3APIClient() hosts = map[nebula.HostName]bootstrap.Host{} objInfoCh = client.ListObjects( ctx, garage.GlobalBucket, minio.ListObjectsOptions{ Prefix: garageGlobalBucketBootstrapHostsDirPath, Recursive: true, }, ) ) defer client.Close() for objInfo := range objInfoCh { ctx := mctx.Annotate(ctx, "objectKey", objInfo.Key) if objInfo.Err != nil { return nil, fmt.Errorf("listing objects: %w", objInfo.Err) } obj, err := client.GetObject( ctx, garage.GlobalBucket, objInfo.Key, minio.GetObjectOptions{}, ) if err != nil { return nil, fmt.Errorf("retrieving object %q: %w", objInfo.Key, err) } var authedHost bootstrap.AuthenticatedHost err = json.NewDecoder(obj).Decode(&authedHost) obj.Close() if err != nil { logger.Warn(ctx, "Object contains invalid json", err) continue } host, err := authedHost.Unwrap(currBootstrap.CAPublicCredentials) if err != nil { logger.Warn(ctx, "Host could not be authenticated", err) } hosts[host.Name] = host } return hosts, nil } // putGarageBoostrapHost places the .json.signed file for this host // into garage so that other hosts are able to see relevant configuration for // it. func putGarageBoostrapHost( ctx context.Context, secretsStore secrets.Store, currBootstrap bootstrap.Bootstrap, ) error { garageClientParams, err := getGarageClientParams( ctx, secretsStore, currBootstrap, ) if err != nil { return fmt.Errorf("getting garage client params: %w", err) } var ( host = currBootstrap.ThisHost() client = garageClientParams.GlobalBucketS3APIClient() ) defer client.Close() configured, err := nebula.Sign( host.HostConfigured, currBootstrap.PrivateCredentials.SigningPrivateKey, ) if err != nil { return fmt.Errorf("signing host configured data: %w", err) } hostB, err := json.Marshal(bootstrap.AuthenticatedHost{ Assigned: currBootstrap.SignedHostAssigned, Configured: configured, }) if err != nil { return fmt.Errorf("encoding host data: %w", err) } filePath := filepath.Join( garageGlobalBucketBootstrapHostsDirPath, string(host.Name)+".json.signed", ) _, err = client.PutObject( ctx, garage.GlobalBucket, filePath, bytes.NewReader(hostB), int64(len(hostB)), minio.PutObjectOptions{}, ) if err != nil { return fmt.Errorf("writing to %q in global bucket: %w", filePath, err) } return nil } func removeGarageBootstrapHost( ctx context.Context, client *garage.S3APIClient, hostName nebula.HostName, ) error { filePath := filepath.Join( garageGlobalBucketBootstrapHostsDirPath, string(hostName)+".json.signed", ) return client.RemoveObject( ctx, garage.GlobalBucket, filePath, minio.RemoveObjectOptions{}, ) } // We can wait for the garage instance to appear healthy, but there are cases // where they still haven't fully synced the list of buckets and bucket // credentials. For those cases it's necessary to do this as an additional // check. func garageWaitForAlloc( ctx context.Context, logger *mlog.Logger, alloc daecommon.ConfigStorageAllocation, adminToken string, host bootstrap.Host, ) error { var ( hostIP = host.IP().String() adminClient = garage.NewAdminClient( garageAdminClientLogger(logger), net.JoinHostPort(hostIP, strconv.Itoa(alloc.AdminPort)), adminToken, ) ) defer adminClient.Close() ctx = mctx.WithAnnotator(ctx, alloc) for { logger.Info(ctx, "Checking if node has synced bucket list") buckets, err := adminClient.ListBuckets(ctx) if err != nil { return fmt.Errorf("listing buckets: %w", err) } else if len(buckets) == 0 { logger.WarnString(ctx, "No buckets found, will wait a bit and try again") select { case <-time.After(1 * time.Second): continue case <-ctx.Done(): return ctx.Err() } } return nil } } // garageNodeBuddyPeers returns the "buddy" peers of the given host, based on // the given garage cluster status. It will return zero values if the host has // no buddy. // // For situations where we want one host to affect the cluster layout of another // host's peers, we use a simple system to determine a single host which is // responsible. The goal is not to be 100% race-proof (garage handles that), but // rather to try to prevent all hosts from modifying the same host's layout at // the same time. // // The system is to order all hosts by their IP, and say that each host is // responsible for (aka the "buddy" of) the host immediately after their own in // that list. The last host in that list is responsible for the first. func garageNodeBuddyPeers( status garage.ClusterStatus, host bootstrap.Host, ) ( netip.Addr, []garage.Role, ) { var ( thisIP = host.IP() nodeRolesByIP = map[netip.Addr][]garage.Role{} ) for _, node := range status.Nodes { if node.Role == nil { continue } ip := node.Addr.Addr() nodeRolesByIP[ip] = append(nodeRolesByIP[ip], *node.Role) } // If there is only a single host in the cluster (or, somehow, none) then // that host has no buddy. if len(nodeRolesByIP) < 2 { return netip.Addr{}, nil } nodeIPs := maps.Keys(nodeRolesByIP) slices.SortFunc(nodeIPs, netip.Addr.Compare) for i, nodeIP := range nodeIPs { var buddyIP netip.Addr if i == len(nodeIPs)-1 { buddyIP = nodeIPs[0] } else if nodeIP == thisIP { buddyIP = nodeIPs[i+1] } else { continue } return buddyIP, nodeRolesByIP[buddyIP] } panic("Unreachable") }