Compare commits
40 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
92dd2bbe15 | ||
|
18e5811159 | ||
|
5b1f50be65 | ||
|
9df7fa0bcd | ||
|
fd85010a40 | ||
|
cfbfa09d24 | ||
|
db921cc05f | ||
|
4fa2646a75 | ||
|
d7ab2c639e | ||
|
d13bde5e26 | ||
|
d2c365767b | ||
|
fb6c9a1243 | ||
|
9030c1eef8 | ||
|
654775308e | ||
|
f5b0972781 | ||
|
d148b83d4f | ||
|
4b93ce179a | ||
|
4ba18ce9cc | ||
|
ef662822c9 | ||
|
da8b170748 | ||
|
74e50edddd | ||
|
b3bf16ee27 | ||
|
ddd3de7fce | ||
|
84d43501ce | ||
|
012ade5d4b | ||
|
ef5ca86dfc | ||
|
9ec4cca334 | ||
|
18ee8efb5f | ||
|
55eb4e87c4 | ||
|
0bb1577ae1 | ||
|
6eb26be548 | ||
|
eb86eaa6d2 | ||
|
80d7b7d858 | ||
|
93a7132b4c | ||
|
dc5245ce65 | ||
|
70c1d3db46 | ||
|
bc11701999 | ||
|
ca4cc7e44f | ||
|
17ebb65273 | ||
|
7011b71fbd |
3
.cargo/config.toml
Normal file
3
.cargo/config.toml
Normal file
@ -0,0 +1,3 @@
|
||||
[target.x86_64-unknown-linux-gnu]
|
||||
linker = "clang"
|
||||
rustflags = ["-C", "link-arg=-fuse-ld=mold"]
|
4
.gitignore
vendored
4
.gitignore
vendored
@ -3,6 +3,4 @@
|
||||
/pki
|
||||
**/*.rs.bk
|
||||
*.swp
|
||||
/.direnv
|
||||
/.cargo
|
||||
/result
|
||||
/.direnv
|
15
Cargo.lock
generated
15
Cargo.lock
generated
@ -1271,6 +1271,7 @@ dependencies = [
|
||||
"http-range",
|
||||
"httpdate",
|
||||
"hyper",
|
||||
"hyperlocal",
|
||||
"idna",
|
||||
"md-5",
|
||||
"multer",
|
||||
@ -1464,6 +1465,7 @@ dependencies = [
|
||||
"garage_util",
|
||||
"http",
|
||||
"hyper",
|
||||
"hyperlocal",
|
||||
"opentelemetry",
|
||||
"percent-encoding",
|
||||
"tokio",
|
||||
@ -1776,6 +1778,19 @@ dependencies = [
|
||||
"tokio-io-timeout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyperlocal"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fafdf7b2b2de7c9784f76e02c0935e65a8117ec3b768644379983ab333ac98c"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"hex",
|
||||
"hyper",
|
||||
"pin-project",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.57"
|
||||
|
210
default.nix
210
default.nix
@ -1,166 +1,56 @@
|
||||
{
|
||||
buildSystem ? builtins.currentSystem,
|
||||
targetSystem ? buildSystem,
|
||||
gitVersion ? null,
|
||||
release ? false,
|
||||
features ? null,
|
||||
}:
|
||||
{ system ? builtins.currentSystem, git_version ? null, }:
|
||||
|
||||
with import ./nix/common.nix;
|
||||
|
||||
let
|
||||
pkgsSrc = import ./nix/pkgs.nix;
|
||||
newBuildTarget = {
|
||||
nixPkgsSystem,
|
||||
rustTarget ? nixPkgsSystem,
|
||||
nativeBuildInputs ? pkgsCross: [],
|
||||
rustFlags ? pkgsCross: [],
|
||||
}: {
|
||||
inherit nixPkgsSystem rustTarget nativeBuildInputs rustFlags;
|
||||
pkgs = import pkgsSrc { };
|
||||
compile = import ./nix/compile.nix;
|
||||
|
||||
build_debug_and_release = (target: {
|
||||
debug = (compile {
|
||||
inherit system target git_version pkgsSrc cargo2nixOverlay;
|
||||
release = false;
|
||||
}).workspace.garage { compileMode = "build"; };
|
||||
|
||||
release = (compile {
|
||||
inherit system target git_version pkgsSrc cargo2nixOverlay;
|
||||
release = true;
|
||||
}).workspace.garage { compileMode = "build"; };
|
||||
});
|
||||
|
||||
test = (rustPkgs:
|
||||
pkgs.symlinkJoin {
|
||||
name = "garage-tests";
|
||||
paths =
|
||||
builtins.map (key: rustPkgs.workspace.${key} { compileMode = "test"; })
|
||||
(builtins.attrNames rustPkgs.workspace);
|
||||
});
|
||||
|
||||
in {
|
||||
pkgs = {
|
||||
amd64 = build_debug_and_release "x86_64-unknown-linux-musl";
|
||||
i386 = build_debug_and_release "i686-unknown-linux-musl";
|
||||
arm64 = build_debug_and_release "aarch64-unknown-linux-musl";
|
||||
arm = build_debug_and_release "armv6l-unknown-linux-musleabihf";
|
||||
};
|
||||
|
||||
# centralize per-target configuration in a single place.
|
||||
buildTargets = {
|
||||
"x86_64-linux" = newBuildTarget {
|
||||
nixPkgsSystem = "x86_64-unknown-linux-musl";
|
||||
};
|
||||
|
||||
"i686-linux" = newBuildTarget {
|
||||
nixPkgsSystem = "i686-unknown-linux-musl";
|
||||
};
|
||||
|
||||
"aarch64-linux" = newBuildTarget {
|
||||
nixPkgsSystem = "aarch64-unknown-linux-musl";
|
||||
};
|
||||
|
||||
# Old Raspberry Pi's (not currently supported due to linking errors with
|
||||
# libsqlite3 and libsodium
|
||||
#"armv6l-linux" = newBuildTarget {
|
||||
# nixPkgsSystem = "armv6l-unknown-linux-musleabihf";
|
||||
# rustTarget = "arm-unknown-linux-musleabihf";
|
||||
#};
|
||||
|
||||
"x86_64-windows" = newBuildTarget {
|
||||
nixPkgsSystem = "x86_64-w64-mingw32";
|
||||
rustTarget = "x86_64-pc-windows-gnu";
|
||||
nativeBuildInputs = pkgsCross: [ pkgsCross.windows.pthreads ];
|
||||
rustFlags = pkgsCross: [
|
||||
"-C" "link-arg=-L${pkgsCross.windows.pthreads}/lib"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
buildTarget = buildTargets.${targetSystem};
|
||||
|
||||
pkgs = import pkgsSrc { system = buildSystem; };
|
||||
pkgsCross = import pkgsSrc {
|
||||
system = buildSystem;
|
||||
crossSystem.config = buildTarget.nixPkgsSystem;
|
||||
};
|
||||
|
||||
rustTarget = buildTarget.rustTarget;
|
||||
|
||||
toolchain = let
|
||||
fenix = import (pkgs.fetchFromGitHub {
|
||||
owner = "nix-community";
|
||||
repo = "fenix";
|
||||
rev = "81ab0b4f7ae9ebb57daa0edf119c4891806e4d3a";
|
||||
hash = "sha256-bZmI7ytPAYLpyFNgj5xirDkKuAniOkj1xHdv5aIJ5GM=";
|
||||
}) {
|
||||
system = buildSystem;
|
||||
};
|
||||
|
||||
mkToolchain = fenixTarget: fenixTarget.toolchainOf {
|
||||
channel = "1.68.2";
|
||||
sha256 = "sha256-4vetmUhTUsew5FODnjlnQYInzyLNyDwocGa4IvMk3DM=";
|
||||
};
|
||||
in
|
||||
fenix.combine [
|
||||
(mkToolchain fenix).rustc
|
||||
(mkToolchain fenix).rustfmt
|
||||
(mkToolchain fenix).cargo
|
||||
(mkToolchain fenix).clippy
|
||||
(mkToolchain fenix.targets.${rustTarget}).rust-std
|
||||
];
|
||||
|
||||
naersk = let
|
||||
naerskSrc = pkgs.fetchFromGitHub {
|
||||
owner = "nix-community";
|
||||
repo = "naersk";
|
||||
rev = "d9a33d69a9c421d64c8d925428864e93be895dcc";
|
||||
hash = "sha256-e136hTT7LqQ2QjOTZQMW+jnsevWwBpMj78u6FRUsH9I=";
|
||||
};
|
||||
in
|
||||
pkgs.callPackages naerskSrc {
|
||||
cargo = toolchain;
|
||||
rustc = toolchain;
|
||||
};
|
||||
|
||||
builtFeatures = if features != null then
|
||||
features
|
||||
else (
|
||||
[ "garage/bundled-libs" "garage/sled" "garage/lmdb" "garage/k2v" ] ++ (
|
||||
if release then [
|
||||
"garage/consul-discovery"
|
||||
"garage/kubernetes-discovery"
|
||||
"garage/metrics"
|
||||
"garage/telemetry-otlp"
|
||||
test = {
|
||||
amd64 = test (compile {
|
||||
inherit system git_version pkgsSrc cargo2nixOverlay;
|
||||
target = "x86_64-unknown-linux-musl";
|
||||
features = [
|
||||
"garage/bundled-libs"
|
||||
"garage/k2v"
|
||||
"garage/sled"
|
||||
"garage/lmdb"
|
||||
"garage/sqlite"
|
||||
] else [ ]
|
||||
)
|
||||
);
|
||||
|
||||
# For some reason the pkgsCross.pkgsStatic build of libsodium doesn't contain
|
||||
# a `.a` file when compiled to a windows target, but rather contains
|
||||
# a `.dll.a` file which libsodium-sys doesn't pick up on. Copying the one to
|
||||
# the be the other seems to work.
|
||||
libsodium = pkgs.runCommand "libsodium-wrapped" {
|
||||
libsodium = pkgsCross.pkgsStatic.libsodium;
|
||||
} ''
|
||||
cp -rL "$libsodium" "$out"
|
||||
chmod -R +w "$out"
|
||||
if [ ! -e "$out"/lib/libsodium.a ] && [ -f "$out"/lib/libsodium.dll.a ]; then
|
||||
cp "$out"/lib/libsodium.dll.a "$out"/lib/libsodium.a
|
||||
fi
|
||||
'';
|
||||
|
||||
in rec {
|
||||
inherit pkgs pkgsCross;
|
||||
|
||||
# Exported separately so it can be used from shell.nix
|
||||
buildEnv = rec {
|
||||
nativeBuildInputs = (buildTarget.nativeBuildInputs pkgsCross) ++ [
|
||||
toolchain
|
||||
pkgs.protobuf
|
||||
|
||||
# Required for shell because of rust dependency build scripts which must
|
||||
# run on the build system.
|
||||
pkgs.stdenv.cc
|
||||
];
|
||||
|
||||
SODIUM_LIB_DIR = "${libsodium}/lib";
|
||||
|
||||
# Required because ring crate is special. This also seems to have
|
||||
# fixed some issues with the x86_64-windows cross-compile :shrug:
|
||||
TARGET_CC = "${pkgsCross.stdenv.cc}/bin/${pkgsCross.stdenv.cc.targetPrefix}cc";
|
||||
|
||||
CARGO_BUILD_TARGET = rustTarget;
|
||||
CARGO_BUILD_RUSTFLAGS = [
|
||||
"-C" "target-feature=+crt-static"
|
||||
"-C" "link-arg=-static"
|
||||
|
||||
# https://github.com/rust-lang/cargo/issues/4133
|
||||
"-C" "linker=${TARGET_CC}"
|
||||
] ++ (buildTarget.rustFlags pkgsCross);
|
||||
];
|
||||
});
|
||||
};
|
||||
clippy = {
|
||||
amd64 = (compile {
|
||||
inherit system git_version pkgsSrc cargo2nixOverlay;
|
||||
target = "x86_64-unknown-linux-musl";
|
||||
compiler = "clippy";
|
||||
}).workspace.garage { compileMode = "build"; };
|
||||
};
|
||||
|
||||
build = naersk.buildPackage (rec {
|
||||
inherit release;
|
||||
|
||||
src = ./.;
|
||||
strictDeps = true;
|
||||
doCheck = false;
|
||||
|
||||
cargoBuildOptions = prev: prev++[
|
||||
"--features=${builtins.concatStringsSep "," builtFeatures}"
|
||||
];
|
||||
} // buildEnv);
|
||||
}
|
||||
|
@ -1,24 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Garage Adminstration API v0</title>
|
||||
<!-- needed for adaptive design -->
|
||||
<meta charset="utf-8"/>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link href="./css/redoc.css" rel="stylesheet">
|
||||
|
||||
<!--
|
||||
Redoc doesn't change outer page styles
|
||||
-->
|
||||
<style>
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<redoc spec-url='./garage-admin-v1.yml'></redoc>
|
||||
<script src="./redoc.standalone.js"> </script>
|
||||
</body>
|
||||
</html>
|
File diff suppressed because it is too large
Load Diff
@ -37,84 +37,30 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
garage "git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-golang"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Initialization
|
||||
// Set Host and other parameters
|
||||
configuration := garage.NewConfiguration()
|
||||
configuration.Host = "127.0.0.1:3903"
|
||||
|
||||
|
||||
// We can now generate a client
|
||||
client := garage.NewAPIClient(configuration)
|
||||
|
||||
// Authentication is handled through the context pattern
|
||||
ctx := context.WithValue(context.Background(), garage.ContextAccessToken, "s3cr3t")
|
||||
|
||||
// Nodes
|
||||
fmt.Println("--- nodes ---")
|
||||
nodes, _, _ := client.NodesApi.GetNodes(ctx).Execute()
|
||||
fmt.Fprintf(os.Stdout, "First hostname: %v\n", nodes.KnownNodes[0].Hostname)
|
||||
capa := int64(1000000000)
|
||||
change := []garage.NodeRoleChange{
|
||||
garage.NodeRoleChange{NodeRoleUpdate: &garage.NodeRoleUpdate {
|
||||
Id: *nodes.KnownNodes[0].Id,
|
||||
Zone: "dc1",
|
||||
Capacity: *garage.NewNullableInt64(&capa),
|
||||
Tags: []string{ "fast", "amd64" },
|
||||
}},
|
||||
// Send a request
|
||||
resp, r, err := client.NodesApi.GetNodes(ctx).Execute()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error when calling `NodesApi.GetNodes``: %v\n", err)
|
||||
fmt.Fprintf(os.Stderr, "Full HTTP response: %v\n", r)
|
||||
}
|
||||
staged, _, _ := client.LayoutApi.AddLayout(ctx).NodeRoleChange(change).Execute()
|
||||
msg, _, _ := client.LayoutApi.ApplyLayout(ctx).LayoutVersion(*garage.NewLayoutVersion(staged.Version + 1)).Execute()
|
||||
fmt.Printf(strings.Join(msg.Message, "\n")) // Layout configured
|
||||
|
||||
health, _, _ := client.NodesApi.GetHealth(ctx).Execute()
|
||||
fmt.Printf("Status: %s, nodes: %v/%v, storage: %v/%v, partitions: %v/%v\n", health.Status, health.ConnectedNodes, health.KnownNodes, health.StorageNodesOk, health.StorageNodes, health.PartitionsAllOk, health.Partitions)
|
||||
|
||||
// Key
|
||||
fmt.Println("\n--- key ---")
|
||||
key := "openapi-key"
|
||||
keyInfo, _, _ := client.KeyApi.AddKey(ctx).AddKeyRequest(garage.AddKeyRequest{Name: *garage.NewNullableString(&key) }).Execute()
|
||||
defer client.KeyApi.DeleteKey(ctx).Id(*keyInfo.AccessKeyId).Execute()
|
||||
fmt.Printf("AWS_ACCESS_KEY_ID=%s\nAWS_SECRET_ACCESS_KEY=%s\n", *keyInfo.AccessKeyId, *keyInfo.SecretAccessKey.Get())
|
||||
|
||||
id := *keyInfo.AccessKeyId
|
||||
canCreateBucket := true
|
||||
updateKeyRequest := *garage.NewUpdateKeyRequest()
|
||||
updateKeyRequest.SetName("openapi-key-updated")
|
||||
updateKeyRequest.SetAllow(garage.UpdateKeyRequestAllow { CreateBucket: &canCreateBucket })
|
||||
update, _, _ := client.KeyApi.UpdateKey(ctx).Id(id).UpdateKeyRequest(updateKeyRequest).Execute()
|
||||
fmt.Printf("Updated %v with key name %v\n", *update.AccessKeyId, *update.Name)
|
||||
|
||||
keyList, _, _ := client.KeyApi.ListKeys(ctx).Execute()
|
||||
fmt.Printf("Keys count: %v\n", len(keyList))
|
||||
|
||||
// Bucket
|
||||
fmt.Println("\n--- bucket ---")
|
||||
global_name := "global-ns-openapi-bucket"
|
||||
local_name := "local-ns-openapi-bucket"
|
||||
bucketInfo, _, _ := client.BucketApi.CreateBucket(ctx).CreateBucketRequest(garage.CreateBucketRequest{
|
||||
GlobalAlias: &global_name,
|
||||
LocalAlias: &garage.CreateBucketRequestLocalAlias {
|
||||
AccessKeyId: keyInfo.AccessKeyId,
|
||||
Alias: &local_name,
|
||||
},
|
||||
}).Execute()
|
||||
defer client.BucketApi.DeleteBucket(ctx).Id(*bucketInfo.Id).Execute()
|
||||
fmt.Printf("Bucket id: %s\n", *bucketInfo.Id)
|
||||
|
||||
updateBucketRequest := *garage.NewUpdateBucketRequest()
|
||||
website := garage.NewUpdateBucketRequestWebsiteAccess()
|
||||
website.SetEnabled(true)
|
||||
website.SetIndexDocument("index.html")
|
||||
website.SetErrorDocument("errors/4xx.html")
|
||||
updateBucketRequest.SetWebsiteAccess(*website)
|
||||
quotas := garage.NewUpdateBucketRequestQuotas()
|
||||
quotas.SetMaxSize(1000000000)
|
||||
quotas.SetMaxObjects(999999999)
|
||||
updateBucketRequest.SetQuotas(*quotas)
|
||||
updatedBucket, _, _ := client.BucketApi.UpdateBucket(ctx).Id(*bucketInfo.Id).UpdateBucketRequest(updateBucketRequest).Execute()
|
||||
fmt.Printf("Bucket %v website activation: %v\n", *updatedBucket.Id, *updatedBucket.WebsiteAccess)
|
||||
|
||||
bucketList, _, _ := client.BucketApi.ListBuckets(ctx).Execute()
|
||||
fmt.Printf("Bucket count: %v\n", len(bucketList))
|
||||
// Process the response
|
||||
fmt.Fprintf(os.Stdout, "Target hostname: %v\n", resp.KnownNodes[resp.Node].Hostname)
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -31,9 +31,9 @@ npm install --save git+https://git.deuxfleurs.fr/garage-sdk/garage-admin-sdk-js.
|
||||
A short example:
|
||||
|
||||
```javascript
|
||||
const garage = require('garage_administration_api_v1garage_v0_9_0');
|
||||
const garage = require('garage_administration_api_v0garage_v0_8_0');
|
||||
|
||||
const api = new garage.ApiClient("http://127.0.0.1:3903/v1");
|
||||
const api = new garage.ApiClient("http://127.0.0.1:3903/v0");
|
||||
api.authentications['bearerAuth'].accessToken = "s3cr3t";
|
||||
|
||||
const [node, layout, key, bucket] = [
|
||||
|
@ -80,7 +80,7 @@ from garage_admin_sdk.apis import *
|
||||
from garage_admin_sdk.models import *
|
||||
|
||||
configuration = garage_admin_sdk.Configuration(
|
||||
host = "http://localhost:3903/v1",
|
||||
host = "http://localhost:3903/v0",
|
||||
access_token = "s3cr3t"
|
||||
)
|
||||
|
||||
@ -94,14 +94,13 @@ print(f"running garage {status.garage_version}, node_id {status.node}")
|
||||
|
||||
# Change layout of this node
|
||||
current = layout.get_layout()
|
||||
layout.add_layout([
|
||||
NodeRoleChange(
|
||||
id = status.node,
|
||||
layout.add_layout({
|
||||
status.node: NodeClusterInfo(
|
||||
zone = "dc1",
|
||||
capacity = 1000000000,
|
||||
capacity = 1,
|
||||
tags = [ "dev" ],
|
||||
)
|
||||
])
|
||||
})
|
||||
layout.apply_layout(LayoutVersion(
|
||||
version = current.version + 1
|
||||
))
|
||||
|
@ -38,7 +38,7 @@ Our website serving logic is as follow:
|
||||
|
||||
Now we need to infer the URL of your website through your bucket name.
|
||||
Let assume:
|
||||
- we set `root_domain = ".web.example.com"` in `garage.toml` ([ref](@/documentation/reference-manual/configuration.md#web_root_domain))
|
||||
- we set `root_domain = ".web.example.com"` in `garage.toml` ([ref](@/documentation/reference-manual/configuration.md#root_domain))
|
||||
- our bucket name is `garagehq.deuxfleurs.fr`.
|
||||
|
||||
Our bucket will be served if the Host field matches one of these 2 values (the port is ignored):
|
||||
|
@ -12,7 +12,7 @@ An introduction to building cluster layouts can be found in the [production depl
|
||||
In Garage, all of the data that can be stored in a given cluster is divided
|
||||
into slices which we call *partitions*. Each partition is stored by
|
||||
one or several nodes in the cluster
|
||||
(see [`replication_mode`](@/documentation/reference-manual/configuration.md#replication_mode)).
|
||||
(see [`replication_mode`](@/documentation/reference-manual/configuration.md#replication-mode)).
|
||||
The layout determines the correspondence between these partition,
|
||||
which exist on a logical level, and actual storage nodes.
|
||||
|
||||
|
@ -13,11 +13,8 @@ We will bump the version numbers prefixed to each API endpoint at each time the
|
||||
or semantics change, meaning that code that relies on these endpoint will break
|
||||
when changes are introduced.
|
||||
|
||||
Versions:
|
||||
- Before Garage 0.7.2 - no admin API
|
||||
- Garage 0.7.2 - admin APIv0
|
||||
- Garage 0.9.0 - admin APIv1, deprecate admin APIv0
|
||||
|
||||
The Garage administration API was introduced in version 0.7.2, this document
|
||||
does not apply to older versions of Garage.
|
||||
|
||||
|
||||
## Access control
|
||||
@ -134,9 +131,7 @@ $ curl -so /dev/null -w "%{http_code}" http://localhost:3903/check?domain=exampl
|
||||
|
||||
### Cluster operations
|
||||
|
||||
These endpoints have a dedicated OpenAPI spec.
|
||||
- APIv1 - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v1.html) - [OpenAPI YAML](https://garagehq.deuxfleurs.fr/api/garage-admin-v1.yml)
|
||||
- APIv0 (deprecated) - [HTML spec](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html) - [OpenAPI YAML](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.yml)
|
||||
These endpoints are defined on a dedicated [Redocly page](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.html). You can also download its [OpenAPI specification](https://garagehq.deuxfleurs.fr/api/garage-admin-v0.yml).
|
||||
|
||||
Requesting the API from the command line can be as simple as running:
|
||||
|
||||
|
@ -8,8 +8,6 @@ weight = 20
|
||||
Here is an example `garage.toml` configuration file that illustrates all of the possible options:
|
||||
|
||||
```toml
|
||||
replication_mode = "3"
|
||||
|
||||
metadata_dir = "/var/lib/garage/meta"
|
||||
data_dir = "/var/lib/garage/data"
|
||||
metadata_fsync = true
|
||||
@ -23,6 +21,8 @@ sled_cache_capacity = "128MiB"
|
||||
sled_flush_every_ms = 2000
|
||||
lmdb_map_size = "1T"
|
||||
|
||||
replication_mode = "3"
|
||||
|
||||
compression_level = 1
|
||||
|
||||
rpc_secret = "4425f5c26c5e11581d3223904324dcb5b5d5dfb14e5e7f35e38c595424f5f1e6"
|
||||
@ -77,64 +77,157 @@ The following gives details about each available configuration option.
|
||||
|
||||
## Available configuration options
|
||||
|
||||
### Index
|
||||
### `metadata_dir`
|
||||
|
||||
Top-level configuration options:
|
||||
[`block_size`](#block_size),
|
||||
[`bootstrap_peers`](#bootstrap_peers),
|
||||
[`compression_level`](#compression_level),
|
||||
[`data_dir`](#metadata_dir),
|
||||
[`data_fsync`](#data_fsync),
|
||||
[`db_engine`](#db_engine),
|
||||
[`lmdb_map_size`](#lmdb_map_size),
|
||||
[`metadata_dir`](#metadata_dir),
|
||||
[`metadata_fsync`](#metadata_fsync),
|
||||
[`replication_mode`](#replication_mode),
|
||||
[`rpc_bind_addr`](#rpc_bind_addr),
|
||||
[`rpc_public_addr`](#rpc_public_addr),
|
||||
[`rpc_secret`](#rpc_secret),
|
||||
[`rpc_secret_file`](#rpc_secret),
|
||||
[`sled_cache_capacity`](#sled_cache_capacity),
|
||||
[`sled_flush_every_ms`](#sled_flush_every_ms).
|
||||
The directory in which Garage will store its metadata. This contains the node identifier,
|
||||
the network configuration and the peer list, the list of buckets and keys as well
|
||||
as the index of all objects, object version and object blocks.
|
||||
|
||||
The `[consul_discovery]` section:
|
||||
[`api`](#consul_api),
|
||||
[`ca_cert`](#consul_ca_cert),
|
||||
[`client_cert`](#consul_client_cert),
|
||||
[`client_key`](#consul_client_cert),
|
||||
[`consul_http_addr`](#consul_http_addr),
|
||||
[`meta`](#consul_tags),
|
||||
[`service_name`](#consul_service_name),
|
||||
[`tags`](#consul_tags),
|
||||
[`tls_skip_verify`](#consul_tls_skip_verify),
|
||||
[`token`](#consul_token).
|
||||
Store this folder on a fast SSD drive if possible to maximize Garage's performance.
|
||||
|
||||
The `[kubernetes_discovery]` section:
|
||||
[`namespace`](#kube_namespace),
|
||||
[`service_name`](#kube_service_name),
|
||||
[`skip_crd`](#kube_skip_crd).
|
||||
### `data_dir`
|
||||
|
||||
The `[s3_api]` section:
|
||||
[`api_bind_addr`](#s3_api_bind_addr),
|
||||
[`root_domain`](#s3_root_domain),
|
||||
[`s3_region`](#s3_region).
|
||||
The directory in which Garage will store the data blocks of objects.
|
||||
This folder can be placed on an HDD. The space available for `data_dir`
|
||||
should be counted to determine a node's capacity
|
||||
when [adding it to the cluster layout](@/documentation/cookbook/real-world.md).
|
||||
|
||||
The `[s3_web]` section:
|
||||
[`bind_addr`](#web_bind_addr),
|
||||
[`root_domain`](#web_root_domain).
|
||||
Since `v0.9.0`, Garage supports multiple data directories with the following syntax:
|
||||
|
||||
The `[admin]` section:
|
||||
[`api_bind_addr`](#admin_api_bind_addr),
|
||||
[`metrics_token`](#admin_metrics_token),
|
||||
[`metrics_token_file`](#admin_metrics_token),
|
||||
[`admin_token`](#admin_token),
|
||||
[`admin_token_file`](#admin_token),
|
||||
[`trace_sink`](#admin_trace_sink),
|
||||
```toml
|
||||
data_dir = [
|
||||
{ path = "/path/to/old_data", read_only = true },
|
||||
{ path = "/path/to/new_hdd1", capacity = "2T" },
|
||||
{ path = "/path/to/new_hdd2", capacity = "4T" },
|
||||
]
|
||||
```
|
||||
|
||||
See [the dedicated documentation page](@/documentation/operations/multi-hdd.md)
|
||||
on how to operate Garage in such a setup.
|
||||
|
||||
### Top-level configuration options
|
||||
### `db_engine` (since `v0.8.0`)
|
||||
|
||||
#### `replication_mode` {#replication_mode}
|
||||
By default, Garage uses the Sled embedded database library
|
||||
to store its metadata on-disk. Since `v0.8.0`, Garage can use alternative storage backends as follows:
|
||||
|
||||
| DB engine | `db_engine` value | Database path |
|
||||
| --------- | ----------------- | ------------- |
|
||||
| [Sled](https://sled.rs) | `"sled"` | `<metadata_dir>/db/` |
|
||||
| [LMDB](https://www.lmdb.tech) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
|
||||
| [Sqlite](https://sqlite.org) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
|
||||
|
||||
Performance characteristics of the different DB engines are as follows:
|
||||
|
||||
- Sled: the default database engine, which tends to produce
|
||||
large data files and also has performance issues, especially when the metadata folder
|
||||
is on a traditional HDD and not on SSD.
|
||||
- LMDB: the recommended alternative on 64-bit systems,
|
||||
much more space-efficiant and slightly faster. Note that the data format of LMDB is not portable
|
||||
between architectures, so for instance the Garage database of an x86-64
|
||||
node cannot be moved to an ARM64 node. Also note that, while LMDB can technically be used on 32-bit systems,
|
||||
this will limit your node to very small database sizes due to how LMDB works; it is therefore not recommended.
|
||||
- Sqlite: Garage supports Sqlite as a storage backend for metadata,
|
||||
however it may have issues and is also very slow in its current implementation,
|
||||
so it is not recommended to be used for now.
|
||||
|
||||
It is possible to convert Garage's metadata directory from one format to another with a small utility named `convert_db`,
|
||||
which can be downloaded at the following locations:
|
||||
[for amd64](https://garagehq.deuxfleurs.fr/_releases/convert_db/amd64/convert_db),
|
||||
[for i386](https://garagehq.deuxfleurs.fr/_releases/convert_db/i386/convert_db),
|
||||
[for arm64](https://garagehq.deuxfleurs.fr/_releases/convert_db/arm64/convert_db),
|
||||
[for arm](https://garagehq.deuxfleurs.fr/_releases/convert_db/arm/convert_db).
|
||||
The `convert_db` utility is used as folows:
|
||||
|
||||
```
|
||||
convert-db -a <input db engine> -i <input db path> \
|
||||
-b <output db engine> -o <output db path>
|
||||
```
|
||||
|
||||
Make sure to specify the full database path as presented in the table above,
|
||||
and not just the path to the metadata directory.
|
||||
|
||||
### `metadata_fsync`
|
||||
|
||||
Whether to enable synchronous mode for the database engine or not.
|
||||
This is disabled (`false`) by default.
|
||||
|
||||
This reduces the risk of metadata corruption in case of power failures,
|
||||
at the cost of a significant drop in write performance,
|
||||
as Garage will have to pause to sync data to disk much more often
|
||||
(several times for API calls such as PutObject).
|
||||
|
||||
Using this option reduces the risk of simultaneous metadata corruption on several
|
||||
cluster nodes, which could lead to data loss.
|
||||
|
||||
If multi-site replication is used, this option is most likely not necessary, as
|
||||
it is extremely unlikely that two nodes in different locations will have a
|
||||
power failure at the exact same time.
|
||||
|
||||
(Metadata corruption on a single node is not an issue, the corrupted data file
|
||||
can always be deleted and reconstructed from the other nodes in the cluster.)
|
||||
|
||||
Here is how this option impacts the different database engines:
|
||||
|
||||
| Database | `metadata_fsync = false` (default) | `metadata_fsync = true` |
|
||||
|----------|------------------------------------|-------------------------------|
|
||||
| Sled | default options | *unsupported* |
|
||||
| Sqlite | `PRAGMA synchronous = OFF` | `PRAGMA synchronous = NORMAL` |
|
||||
| LMDB | `MDB_NOMETASYNC` + `MDB_NOSYNC` | `MDB_NOMETASYNC` |
|
||||
|
||||
Note that the Sqlite database is always ran in `WAL` mode (`PRAGMA journal_mode = WAL`).
|
||||
|
||||
### `data_fsync`
|
||||
|
||||
Whether to `fsync` data blocks and their containing directory after they are
|
||||
saved to disk.
|
||||
This is disabled (`false`) by default.
|
||||
|
||||
This might reduce the risk that a data block is lost in rare
|
||||
situations such as simultaneous node losing power,
|
||||
at the cost of a moderate drop in write performance.
|
||||
|
||||
Similarly to `metatada_fsync`, this is likely not necessary
|
||||
if geographical replication is used.
|
||||
|
||||
### `block_size`
|
||||
|
||||
Garage splits stored objects in consecutive chunks of size `block_size`
|
||||
(except the last one which might be smaller). The default size is 1MiB and
|
||||
should work in most cases. We recommend increasing it to e.g. 10MiB if
|
||||
you are using Garage to store large files and have fast network connections
|
||||
between all nodes (e.g. 1gbps).
|
||||
|
||||
If you are interested in tuning this, feel free to do so (and remember to
|
||||
report your findings to us!). When this value is changed for a running Garage
|
||||
installation, only files newly uploaded will be affected. Previously uploaded
|
||||
files will remain available. This however means that chunks from existing files
|
||||
will not be deduplicated with chunks from newly uploaded files, meaning you
|
||||
might use more storage space that is optimally possible.
|
||||
|
||||
### `sled_cache_capacity`
|
||||
|
||||
This parameter can be used to tune the capacity of the cache used by
|
||||
[sled](https://sled.rs), the database Garage uses internally to store metadata.
|
||||
Tune this to fit the RAM you wish to make available to your Garage instance.
|
||||
This value has a conservative default (128MB) so that Garage doesn't use too much
|
||||
RAM by default, but feel free to increase this for higher performance.
|
||||
|
||||
### `sled_flush_every_ms`
|
||||
|
||||
This parameters can be used to tune the flushing interval of sled.
|
||||
Increase this if sled is thrashing your SSD, at the risk of losing more data in case
|
||||
of a power outage (though this should not matter much as data is replicated on other
|
||||
nodes). The default value, 2000ms, should be appropriate for most use cases.
|
||||
|
||||
### `lmdb_map_size`
|
||||
|
||||
This parameters can be used to set the map size used by LMDB,
|
||||
which is the size of the virtual memory region used for mapping the database file.
|
||||
The value of this parameter is the maximum size the metadata database can take.
|
||||
This value is not bound by the physical RAM size of the machine running Garage.
|
||||
If not specified, it defaults to 1GiB on 32-bit machines and 1TiB on 64-bit machines.
|
||||
|
||||
### `replication_mode`
|
||||
|
||||
Garage supports the following replication modes:
|
||||
|
||||
@ -217,160 +310,7 @@ to the cluster while rebalancing is in progress. In theory, no data should be
|
||||
lost as rebalancing is a routine operation for Garage, although we cannot
|
||||
guarantee you that everything will go right in such an extreme scenario.
|
||||
|
||||
#### `metadata_dir` {#metadata_dir}
|
||||
|
||||
The directory in which Garage will store its metadata. This contains the node identifier,
|
||||
the network configuration and the peer list, the list of buckets and keys as well
|
||||
as the index of all objects, object version and object blocks.
|
||||
|
||||
Store this folder on a fast SSD drive if possible to maximize Garage's performance.
|
||||
|
||||
#### `data_dir` {#data_dir}
|
||||
|
||||
The directory in which Garage will store the data blocks of objects.
|
||||
This folder can be placed on an HDD. The space available for `data_dir`
|
||||
should be counted to determine a node's capacity
|
||||
when [adding it to the cluster layout](@/documentation/cookbook/real-world.md).
|
||||
|
||||
Since `v0.9.0`, Garage supports multiple data directories with the following syntax:
|
||||
|
||||
```toml
|
||||
data_dir = [
|
||||
{ path = "/path/to/old_data", read_only = true },
|
||||
{ path = "/path/to/new_hdd1", capacity = "2T" },
|
||||
{ path = "/path/to/new_hdd2", capacity = "4T" },
|
||||
]
|
||||
```
|
||||
|
||||
See [the dedicated documentation page](@/documentation/operations/multi-hdd.md)
|
||||
on how to operate Garage in such a setup.
|
||||
|
||||
#### `db_engine` (since `v0.8.0`) {#db_engine}
|
||||
|
||||
Since `v0.8.0`, Garage can use alternative storage backends as follows:
|
||||
|
||||
| DB engine | `db_engine` value | Database path |
|
||||
| --------- | ----------------- | ------------- |
|
||||
| [LMDB](https://www.lmdb.tech) (default since `v0.9.0`) | `"lmdb"` | `<metadata_dir>/db.lmdb/` |
|
||||
| [Sled](https://sled.rs) (default up to `v0.8.0`) | `"sled"` | `<metadata_dir>/db/` |
|
||||
| [Sqlite](https://sqlite.org) | `"sqlite"` | `<metadata_dir>/db.sqlite` |
|
||||
|
||||
Sled was the only database engine up to Garage v0.7.0. Performance issues and
|
||||
API limitations of Sled prompted the addition of alternative engines in v0.8.0.
|
||||
Since v0.9.0, LMDB is the default engine instead of Sled, and Sled is
|
||||
deprecated. We plan to remove Sled in Garage v1.0.
|
||||
|
||||
Performance characteristics of the different DB engines are as follows:
|
||||
|
||||
- Sled: tends to produce large data files and also has performance issues,
|
||||
especially when the metadata folder is on a traditional HDD and not on SSD.
|
||||
|
||||
- LMDB: the recommended database engine on 64-bit systems, much more
|
||||
space-efficient and slightly faster. Note that the data format of LMDB is not
|
||||
portable between architectures, so for instance the Garage database of an
|
||||
x86-64 node cannot be moved to an ARM64 node. Also note that, while LMDB can
|
||||
technically be used on 32-bit systems, this will limit your node to very
|
||||
small database sizes due to how LMDB works; it is therefore not recommended.
|
||||
|
||||
- Sqlite: Garage supports Sqlite as an alternative storage backend for
|
||||
metadata, and although it has not been tested as much, it is expected to work
|
||||
satisfactorily. Since Garage v0.9.0, performance issues have largely been
|
||||
fixed by allowing for a no-fsync mode (see `metadata_fsync`). Sqlite does not
|
||||
have the database size limitation of LMDB on 32-bit systems.
|
||||
|
||||
It is possible to convert Garage's metadata directory from one format to another
|
||||
using the `garage convert-db` command, which should be used as follows:
|
||||
|
||||
```
|
||||
garage convert-db -a <input db engine> -i <input db path> \
|
||||
-b <output db engine> -o <output db path>
|
||||
```
|
||||
|
||||
Make sure to specify the full database path as presented in the table above
|
||||
(third colummn), and not just the path to the metadata directory.
|
||||
|
||||
#### `metadata_fsync` {#metadata_fsync}
|
||||
|
||||
Whether to enable synchronous mode for the database engine or not.
|
||||
This is disabled (`false`) by default.
|
||||
|
||||
This reduces the risk of metadata corruption in case of power failures,
|
||||
at the cost of a significant drop in write performance,
|
||||
as Garage will have to pause to sync data to disk much more often
|
||||
(several times for API calls such as PutObject).
|
||||
|
||||
Using this option reduces the risk of simultaneous metadata corruption on several
|
||||
cluster nodes, which could lead to data loss.
|
||||
|
||||
If multi-site replication is used, this option is most likely not necessary, as
|
||||
it is extremely unlikely that two nodes in different locations will have a
|
||||
power failure at the exact same time.
|
||||
|
||||
(Metadata corruption on a single node is not an issue, the corrupted data file
|
||||
can always be deleted and reconstructed from the other nodes in the cluster.)
|
||||
|
||||
Here is how this option impacts the different database engines:
|
||||
|
||||
| Database | `metadata_fsync = false` (default) | `metadata_fsync = true` |
|
||||
|----------|------------------------------------|-------------------------------|
|
||||
| Sled | default options | *unsupported* |
|
||||
| Sqlite | `PRAGMA synchronous = OFF` | `PRAGMA synchronous = NORMAL` |
|
||||
| LMDB | `MDB_NOMETASYNC` + `MDB_NOSYNC` | `MDB_NOMETASYNC` |
|
||||
|
||||
Note that the Sqlite database is always ran in `WAL` mode (`PRAGMA journal_mode = WAL`).
|
||||
|
||||
#### `data_fsync` {#data_fsync}
|
||||
|
||||
Whether to `fsync` data blocks and their containing directory after they are
|
||||
saved to disk.
|
||||
This is disabled (`false`) by default.
|
||||
|
||||
This might reduce the risk that a data block is lost in rare
|
||||
situations such as simultaneous node losing power,
|
||||
at the cost of a moderate drop in write performance.
|
||||
|
||||
Similarly to `metatada_fsync`, this is likely not necessary
|
||||
if geographical replication is used.
|
||||
|
||||
#### `block_size` {#block_size}
|
||||
|
||||
Garage splits stored objects in consecutive chunks of size `block_size`
|
||||
(except the last one which might be smaller). The default size is 1MiB and
|
||||
should work in most cases. We recommend increasing it to e.g. 10MiB if
|
||||
you are using Garage to store large files and have fast network connections
|
||||
between all nodes (e.g. 1gbps).
|
||||
|
||||
If you are interested in tuning this, feel free to do so (and remember to
|
||||
report your findings to us!). When this value is changed for a running Garage
|
||||
installation, only files newly uploaded will be affected. Previously uploaded
|
||||
files will remain available. This however means that chunks from existing files
|
||||
will not be deduplicated with chunks from newly uploaded files, meaning you
|
||||
might use more storage space that is optimally possible.
|
||||
|
||||
#### `sled_cache_capacity` {#sled_cache_capacity}
|
||||
|
||||
This parameter can be used to tune the capacity of the cache used by
|
||||
[sled](https://sled.rs), the database Garage uses internally to store metadata.
|
||||
Tune this to fit the RAM you wish to make available to your Garage instance.
|
||||
This value has a conservative default (128MB) so that Garage doesn't use too much
|
||||
RAM by default, but feel free to increase this for higher performance.
|
||||
|
||||
#### `sled_flush_every_ms` {#sled_flush_every_ms}
|
||||
|
||||
This parameters can be used to tune the flushing interval of sled.
|
||||
Increase this if sled is thrashing your SSD, at the risk of losing more data in case
|
||||
of a power outage (though this should not matter much as data is replicated on other
|
||||
nodes). The default value, 2000ms, should be appropriate for most use cases.
|
||||
|
||||
#### `lmdb_map_size` {#lmdb_map_size}
|
||||
|
||||
This parameters can be used to set the map size used by LMDB,
|
||||
which is the size of the virtual memory region used for mapping the database file.
|
||||
The value of this parameter is the maximum size the metadata database can take.
|
||||
This value is not bound by the physical RAM size of the machine running Garage.
|
||||
If not specified, it defaults to 1GiB on 32-bit machines and 1TiB on 64-bit machines.
|
||||
|
||||
#### `compression_level` {#compression_level}
|
||||
### `compression_level`
|
||||
|
||||
Zstd compression level to use for storing blocks.
|
||||
|
||||
@ -394,7 +334,7 @@ Compression is done synchronously, setting a value too high will add latency to
|
||||
This value can be different between nodes, compression is done by the node which receive the
|
||||
API call.
|
||||
|
||||
#### `rpc_secret`, `rpc_secret_file` or `GARAGE_RPC_SECRET` (env) {#rpc_secret}
|
||||
### `rpc_secret`, `rpc_secret_file` or `GARAGE_RPC_SECRET` (env)
|
||||
|
||||
Garage uses a secret key, called an RPC secret, that is shared between all
|
||||
nodes of the cluster in order to identify these nodes and allow them to
|
||||
@ -406,7 +346,7 @@ Since Garage `v0.8.2`, the RPC secret can also be stored in a file whose path is
|
||||
given in the configuration variable `rpc_secret_file`, or specified as an
|
||||
environment variable `GARAGE_RPC_SECRET`.
|
||||
|
||||
#### `rpc_bind_addr` {#rpc_bind_addr}
|
||||
### `rpc_bind_addr`
|
||||
|
||||
The address and port on which to bind for inter-cluster communcations
|
||||
(reffered to as RPC for remote procedure calls).
|
||||
@ -415,14 +355,14 @@ the node, even in the case of a NAT: the NAT should be configured to forward the
|
||||
port number to the same internal port nubmer. This means that if you have several nodes running
|
||||
behind a NAT, they should each use a different RPC port number.
|
||||
|
||||
#### `rpc_public_addr` {#rpc_public_addr}
|
||||
### `rpc_public_addr`
|
||||
|
||||
The address and port that other nodes need to use to contact this node for
|
||||
RPC calls. **This parameter is optional but recommended.** In case you have
|
||||
a NAT that binds the RPC port to a port that is different on your public IP,
|
||||
this field might help making it work.
|
||||
|
||||
#### `bootstrap_peers` {#bootstrap_peers}
|
||||
### `bootstrap_peers`
|
||||
|
||||
A list of peer identifiers on which to contact other Garage peers of this cluster.
|
||||
These peer identifiers have the following syntax:
|
||||
@ -439,42 +379,42 @@ key will be returned by `garage node id` and you will have to add the IP
|
||||
yourself.
|
||||
|
||||
|
||||
### The `[consul_discovery]` section
|
||||
## The `[consul_discovery]` section
|
||||
|
||||
Garage supports discovering other nodes of the cluster using Consul. For this
|
||||
to work correctly, nodes need to know their IP address by which they can be
|
||||
reached by other nodes of the cluster, which should be set in `rpc_public_addr`.
|
||||
|
||||
#### `consul_http_addr` {#consul_http_addr}
|
||||
### `consul_http_addr` and `service_name`
|
||||
|
||||
The `consul_http_addr` parameter should be set to the full HTTP(S) address of the Consul server.
|
||||
|
||||
#### `api` {#consul_api}
|
||||
### `api`
|
||||
|
||||
Two APIs for service registration are supported: `catalog` and `agent`. `catalog`, the default, will register a service using
|
||||
the `/v1/catalog` endpoints, enabling mTLS if `client_cert` and `client_key` are provided. The `agent` API uses the
|
||||
`v1/agent` endpoints instead, where an optional `token` may be provided.
|
||||
|
||||
#### `service_name` {#consul_service_name}
|
||||
### `service_name`
|
||||
|
||||
`service_name` should be set to the service name under which Garage's
|
||||
RPC ports are announced.
|
||||
|
||||
#### `client_cert`, `client_key` {#consul_client_cert}
|
||||
### `client_cert`, `client_key`
|
||||
|
||||
TLS client certificate and client key to use when communicating with Consul over TLS. Both are mandatory when doing so.
|
||||
Only available when `api = "catalog"`.
|
||||
|
||||
#### `ca_cert` {#consul_ca_cert}
|
||||
### `ca_cert`
|
||||
|
||||
TLS CA certificate to use when communicating with Consul over TLS.
|
||||
|
||||
#### `tls_skip_verify` {#consul_tls_skip_verify}
|
||||
### `tls_skip_verify`
|
||||
|
||||
Skip server hostname verification in TLS handshake.
|
||||
`ca_cert` is ignored when this is set.
|
||||
|
||||
#### `token` {#consul_token}
|
||||
### `token`
|
||||
|
||||
Uses the provided token for communication with Consul. Only available when `api = "agent"`.
|
||||
The policy assigned to this token should at least have these rules:
|
||||
@ -494,49 +434,49 @@ node_prefix "" {
|
||||
}
|
||||
```
|
||||
|
||||
#### `tags` and `meta` {#consul_tags}
|
||||
### `tags` and `meta`
|
||||
|
||||
Additional list of tags and map of service meta to add during service registration.
|
||||
|
||||
### The `[kubernetes_discovery]` section
|
||||
## The `[kubernetes_discovery]` section
|
||||
|
||||
Garage supports discovering other nodes of the cluster using kubernetes custom
|
||||
resources. For this to work, a `[kubernetes_discovery]` section must be present
|
||||
with at least the `namespace` and `service_name` parameters.
|
||||
|
||||
#### `namespace` {#kube_namespace}
|
||||
### `namespace`
|
||||
|
||||
`namespace` sets the namespace in which the custom resources are
|
||||
configured.
|
||||
|
||||
#### `service_name` {#kube_service_name}
|
||||
### `service_name`
|
||||
|
||||
`service_name` is added as a label to the advertised resources to
|
||||
filter them, to allow for multiple deployments in a single namespace.
|
||||
|
||||
#### `skip_crd` {#kube_skip_crd}
|
||||
### `skip_crd`
|
||||
|
||||
`skip_crd` can be set to true to disable the automatic creation and
|
||||
patching of the `garagenodes.deuxfleurs.fr` CRD. You will need to create the CRD
|
||||
manually.
|
||||
|
||||
|
||||
### The `[s3_api]` section
|
||||
## The `[s3_api]` section
|
||||
|
||||
#### `api_bind_addr` {#s3_api_bind_addr}
|
||||
### `api_bind_addr`
|
||||
|
||||
The IP and port on which to bind for accepting S3 API calls.
|
||||
This endpoint does not suport TLS: a reverse proxy should be used to provide it.
|
||||
|
||||
Alternatively, since `v0.8.5`, a path can be used to create a unix socket with 0222 mode.
|
||||
|
||||
#### `s3_region` {#s3_region}
|
||||
### `s3_region`
|
||||
|
||||
Garage will accept S3 API calls that are targetted to the S3 region defined here.
|
||||
API calls targetted to other regions will fail with a AuthorizationHeaderMalformed error
|
||||
message that redirects the client to the correct region.
|
||||
|
||||
#### `root_domain` {#s3_root_domain}
|
||||
### `root_domain` {#root_domain}
|
||||
|
||||
The optional suffix to access bucket using vhost-style in addition to path-style request.
|
||||
Note path-style requests are always enabled, whether or not vhost-style is configured.
|
||||
@ -548,12 +488,12 @@ using the hostname `my-bucket.s3.garage.eu`.
|
||||
|
||||
|
||||
|
||||
### The `[s3_web]` section
|
||||
## The `[s3_web]` section
|
||||
|
||||
Garage allows to publish content of buckets as websites. This section configures the
|
||||
behaviour of this module.
|
||||
|
||||
#### `bind_addr` {#web_bind_addr}
|
||||
### `bind_addr`
|
||||
|
||||
The IP and port on which to bind for accepting HTTP requests to buckets configured
|
||||
for website access.
|
||||
@ -561,7 +501,7 @@ This endpoint does not suport TLS: a reverse proxy should be used to provide it.
|
||||
|
||||
Alternatively, since `v0.8.5`, a path can be used to create a unix socket with 0222 mode.
|
||||
|
||||
#### `root_domain` {#web_root_domain}
|
||||
### `root_domain`
|
||||
|
||||
The optional suffix appended to bucket names for the corresponding HTTP Host.
|
||||
|
||||
@ -570,11 +510,11 @@ will be accessible either with hostname `deuxfleurs.fr.web.garage.eu`
|
||||
or with hostname `deuxfleurs.fr`.
|
||||
|
||||
|
||||
### The `[admin]` section
|
||||
## The `[admin]` section
|
||||
|
||||
Garage has a few administration capabilities, in particular to allow remote monitoring. These features are detailed below.
|
||||
|
||||
#### `api_bind_addr` {#admin_api_bind_addr}
|
||||
### `api_bind_addr`
|
||||
|
||||
If specified, Garage will bind an HTTP server to this port and address, on
|
||||
which it will listen to requests for administration features.
|
||||
@ -583,7 +523,7 @@ See [administration API reference](@/documentation/reference-manual/admin-api.md
|
||||
Alternatively, since `v0.8.5`, a path can be used to create a unix socket. Note that for security reasons,
|
||||
the socket will have 0220 mode. Make sure to set user and group permissions accordingly.
|
||||
|
||||
#### `metrics_token`, `metrics_token_file` or `GARAGE_METRICS_TOKEN` (env) {#admin_metrics_token}
|
||||
### `metrics_token`, `metrics_token_file` or `GARAGE_METRICS_TOKEN` (env)
|
||||
|
||||
The token for accessing the Metrics endpoint. If this token is not set, the
|
||||
Metrics endpoint can be accessed without access control.
|
||||
@ -594,7 +534,7 @@ You can use any random string for this value. We recommend generating a random t
|
||||
`metrics_token_file` and the `GARAGE_METRICS_TOKEN` environment variable are supported since Garage `v0.8.2`.
|
||||
|
||||
|
||||
#### `admin_token`, `admin_token_file` or `GARAGE_ADMIN_TOKEN` (env) {#admin_token}
|
||||
### `admin_token`, `admin_token_file` or `GARAGE_ADMIN_TOKEN` (env)
|
||||
|
||||
The token for accessing all of the other administration endpoints. If this
|
||||
token is not set, access to these endpoints is disabled entirely.
|
||||
@ -605,7 +545,7 @@ You can use any random string for this value. We recommend generating a random t
|
||||
`admin_token_file` and the `GARAGE_ADMIN_TOKEN` environment variable are supported since Garage `v0.8.2`.
|
||||
|
||||
|
||||
#### `trace_sink` {#admin_trace_sink}
|
||||
### `trace_sink`
|
||||
|
||||
Optionally, the address of an OpenTelemetry collector. If specified,
|
||||
Garage will send traces in the OpenTelemetry format to this endpoint. These
|
||||
|
@ -52,7 +52,7 @@ This is particularly usefull when nodes are far from one another and talk to one
|
||||
|
||||
Garage supports a variety of replication modes, with 1 copy, 2 copies or 3 copies of your data,
|
||||
and with various levels of consistency, in order to adapt to a variety of usage scenarios.
|
||||
Read our reference page on [supported replication modes](@/documentation/reference-manual/configuration.md#replication_mode)
|
||||
Read our reference page on [supported replication modes](@/documentation/reference-manual/configuration.md#replication-mode)
|
||||
to select the replication mode best suited to your use case (hint: in most cases, `replication_mode = "3"` is what you want).
|
||||
|
||||
### Web server for static websites
|
||||
|
111
flake.lock
111
flake.lock
@ -1,5 +1,31 @@
|
||||
{
|
||||
"nodes": {
|
||||
"cargo2nix": {
|
||||
"inputs": {
|
||||
"flake-compat": [
|
||||
"flake-compat"
|
||||
],
|
||||
"flake-utils": "flake-utils",
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
],
|
||||
"rust-overlay": "rust-overlay"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1666087781,
|
||||
"narHash": "sha256-trKVdjMZ8mNkGfLcY5LsJJGtdV3xJDZnMVrkFjErlcs=",
|
||||
"owner": "Alexis211",
|
||||
"repo": "cargo2nix",
|
||||
"rev": "a7a61179b66054904ef6a195d8da736eaaa06c36",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "Alexis211",
|
||||
"repo": "cargo2nix",
|
||||
"rev": "a7a61179b66054904ef6a195d8da736eaaa06c36",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-compat": {
|
||||
"locked": {
|
||||
"lastModified": 1688025799,
|
||||
@ -20,19 +46,54 @@
|
||||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1701680307,
|
||||
"narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=",
|
||||
"lastModified": 1681202837,
|
||||
"narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "4022d587cbbfd70fe950c1e2083a02621806a725",
|
||||
"rev": "cfacdce06f30d2b68473a46042957675eebb3401",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"id": "flake-utils",
|
||||
"type": "indirect"
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-utils_2": {
|
||||
"inputs": {
|
||||
"systems": "systems_2"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1681202837,
|
||||
"narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "cfacdce06f30d2b68473a46042957675eebb3401",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1682109806,
|
||||
"narHash": "sha256-d9g7RKNShMLboTWwukM+RObDWWpHKaqTYXB48clBWXI=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "2362848adf8def2866fabbffc50462e929d7fffb",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixpkgs-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs_2": {
|
||||
"locked": {
|
||||
"lastModified": 1682423271,
|
||||
"narHash": "sha256-WHhl1GiOij1ob4cTLL+yhqr+vFOUH8E5wAX8Ir8fvjE=",
|
||||
@ -50,9 +111,32 @@
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"cargo2nix": "cargo2nix",
|
||||
"flake-compat": "flake-compat",
|
||||
"flake-utils": "flake-utils",
|
||||
"flake-utils": [
|
||||
"cargo2nix",
|
||||
"flake-utils"
|
||||
],
|
||||
"nixpkgs": "nixpkgs_2"
|
||||
}
|
||||
},
|
||||
"rust-overlay": {
|
||||
"inputs": {
|
||||
"flake-utils": "flake-utils_2",
|
||||
"nixpkgs": "nixpkgs"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1682389182,
|
||||
"narHash": "sha256-8t2nmFnH+8V48+IJsf8AK51ebXNlVbOSVYOpiqJKvJE=",
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"rev": "74f1a64dd28faeeb85ef081f32cad2989850322c",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "oxalica",
|
||||
"repo": "rust-overlay",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"systems": {
|
||||
@ -69,6 +153,21 @@
|
||||
"repo": "default",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"systems_2": {
|
||||
"locked": {
|
||||
"lastModified": 1681028828,
|
||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"type": "github"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
|
44
flake.nix
44
flake.nix
@ -8,23 +8,49 @@
|
||||
|
||||
inputs.flake-compat.url = "github:nix-community/flake-compat";
|
||||
|
||||
outputs = { self, nixpkgs, flake-utils, ... }:
|
||||
inputs.cargo2nix = {
|
||||
# As of 2022-10-18: two small patches over unstable branch, one for clippy and one to fix feature detection
|
||||
url = "github:Alexis211/cargo2nix/a7a61179b66054904ef6a195d8da736eaaa06c36";
|
||||
|
||||
# As of 2023-04-25:
|
||||
# - my two patches were merged into unstable (one for clippy and one to "fix" feature detection)
|
||||
# - rustc v1.66
|
||||
# url = "github:cargo2nix/cargo2nix/8fb57a670f7993bfc24099c33eb9c5abb51f29a2";
|
||||
|
||||
# Rust overlay as of 2023-04-25
|
||||
inputs.rust-overlay.url =
|
||||
"github:oxalica/rust-overlay/74f1a64dd28faeeb85ef081f32cad2989850322c";
|
||||
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
inputs.flake-compat.follows = "flake-compat";
|
||||
};
|
||||
|
||||
inputs.flake-utils.follows = "cargo2nix/flake-utils";
|
||||
|
||||
outputs = { self, nixpkgs, cargo2nix, flake-utils, ... }:
|
||||
let
|
||||
gitVersion = self.lastModifiedDate;
|
||||
git_version = self.lastModifiedDate;
|
||||
compile = import ./nix/compile.nix;
|
||||
in
|
||||
flake-utils.lib.eachDefaultSystem (system:
|
||||
let pkgs = nixpkgs.legacyPackages.${system};
|
||||
in {
|
||||
packages = {
|
||||
default = (import ./default.nix {
|
||||
inherit gitVersion;
|
||||
buildSystem = system;
|
||||
default = (compile {
|
||||
inherit system git_version;
|
||||
pkgsSrc = nixpkgs;
|
||||
cargo2nixOverlay = cargo2nix.overlays.default;
|
||||
release = true;
|
||||
}).build;
|
||||
}).workspace.garage { compileMode = "build"; };
|
||||
};
|
||||
devShell = (import ./shell.nix {
|
||||
buildSystem = system;
|
||||
}).rust;
|
||||
devShell = (compile {
|
||||
inherit system git_version;
|
||||
pkgsSrc = nixpkgs;
|
||||
cargo2nixOverlay = cargo2nix.overlays.default;
|
||||
release = false;
|
||||
}).workspaceShell { packages = with pkgs; [
|
||||
rustfmt
|
||||
mold
|
||||
]; };
|
||||
});
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
{ path ? "/../aws-list.txt", }:
|
||||
|
||||
with import ./pkgs.nix;
|
||||
with import ./common.nix;
|
||||
let
|
||||
pkgs = import pkgsSrc { };
|
||||
lib = pkgs.lib;
|
||||
|
17
nix/common.nix
Normal file
17
nix/common.nix
Normal file
@ -0,0 +1,17 @@
|
||||
let
|
||||
lock = builtins.fromJSON (builtins.readFile ../flake.lock);
|
||||
|
||||
inherit (lock.nodes.flake-compat.locked) owner repo rev narHash;
|
||||
|
||||
flake-compat = fetchTarball {
|
||||
url = "https://github.com/${owner}/${repo}/archive/${rev}.tar.gz";
|
||||
sha256 = narHash;
|
||||
};
|
||||
|
||||
flake = (import flake-compat { system = builtins.currentSystem; src = ../.; });
|
||||
in
|
||||
rec {
|
||||
pkgsSrc = flake.defaultNix.inputs.nixpkgs;
|
||||
cargo2nix = flake.defaultNix.inputs.cargo2nix;
|
||||
cargo2nixOverlay = cargo2nix.overlays.default;
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
let
|
||||
lock = builtins.fromJSON (builtins.readFile ../flake.lock);
|
||||
inherit (lock.nodes.nixpkgs.locked) owner repo rev narHash;
|
||||
in
|
||||
fetchTarball {
|
||||
url = "https://github.com/${owner}/${repo}/archive/${rev}.tar.gz";
|
||||
sha256 = narHash;
|
||||
}
|
1
script/jepsen.garage/.envrc
Normal file
1
script/jepsen.garage/.envrc
Normal file
@ -0,0 +1 @@
|
||||
use nix
|
16
script/jepsen.garage/.gitignore
vendored
Normal file
16
script/jepsen.garage/.gitignore
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
/target
|
||||
/classes
|
||||
/checkouts
|
||||
profiles.clj
|
||||
pom.xml
|
||||
pom.xml.asc
|
||||
*.jar
|
||||
*.class
|
||||
/.lein-*
|
||||
/.nrepl-port
|
||||
/.prepl-port
|
||||
.hgignore
|
||||
.hg/
|
||||
.direnv
|
||||
/store
|
||||
.vagrant
|
157
script/jepsen.garage/README.md
Normal file
157
script/jepsen.garage/README.md
Normal file
@ -0,0 +1,157 @@
|
||||
# jepsen.garage
|
||||
|
||||
Jepsen checking of Garage consistency properties.
|
||||
|
||||
## Usage
|
||||
|
||||
Requirements:
|
||||
|
||||
- vagrant
|
||||
- VirtualBox, configured so that nodes can take an IP in a private network `192.168.56.0/24`
|
||||
- a user that can create VirtualBox VMs
|
||||
- leiningen
|
||||
- gnuplot
|
||||
|
||||
Set up VMs:
|
||||
|
||||
```
|
||||
vagrant up
|
||||
```
|
||||
|
||||
Run tests (this one should fail):
|
||||
|
||||
```
|
||||
lein run test --nodes-file nodes.vagrant --time-limit 64 --concurrency 50 --rate 50 --workload reg
|
||||
```
|
||||
|
||||
These ones are working:
|
||||
|
||||
```
|
||||
lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set1
|
||||
lein run test --nodes-file nodes.vagrant --time-limit 64 --rate 50 --concurrency 50 --workload set2
|
||||
```
|
||||
|
||||
## Results
|
||||
|
||||
### Register linear, without timestamp patch
|
||||
|
||||
Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 20 --workload reg1 --ops-per-key 100`
|
||||
|
||||
Results without timestamp patch:
|
||||
|
||||
- Fails with a simple clock-scramble nemesis (`--scenario c`).
|
||||
Explanation: without the timestamp patch, nodes will create objects using their
|
||||
local clock only as a timestamp, so the ordering will be all over the place if
|
||||
clocks are scrambled.
|
||||
|
||||
Results with timestamp patch (`--patch tsfix2`):
|
||||
|
||||
- No failure with clock-scramble nemesis
|
||||
|
||||
- Fails with clock-scramble nemesis + partition nemesis (`--scenario cp`).
|
||||
|
||||
**This test is expected to fail.**
|
||||
Indeed, S3 objects are not meant to behave like linearizable registers.
|
||||
TODO explain using a counter-example
|
||||
|
||||
|
||||
### Read-after-write CRDT register model
|
||||
|
||||
Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload reg2 --ops-per-key 100`
|
||||
|
||||
Results without timestamp patch:
|
||||
|
||||
- Fails with a simple clock-scramble nemesis (`--scenario c`).
|
||||
Explanation: old values are not overwritten correctly when their timestamps are in the future.
|
||||
|
||||
Results with timestamp patch (`--patch tsfix2`):
|
||||
|
||||
- No failures with clock-scramble nemesis + partition nemesis (`--scenario cp`).
|
||||
This proves that `tsfix2` (PR#543) does improve consistency.
|
||||
|
||||
- **Fails with layout reconfiguration nemesis** (`--scenario r`).
|
||||
Example of a failed run: `garage reg2/20231024T120806.899+0200`.
|
||||
This is the failure mode we are looking for and trying to fix for NLnet task 3.
|
||||
|
||||
- Changes brought by NLnet task 3 code (commit 707442f5de):
|
||||
no failures with `--scenario r` (0 of 10 runs), `--scenario pr` (0 of 10 runs),
|
||||
`--scenario cpr` (0 of 10 runs) and `--scenario dpr` (0 of 10 runs).
|
||||
|
||||
|
||||
### Set, basic test (write some items, then read)
|
||||
|
||||
Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 200 --concurrency 200 --workload set1 --ops-per-key 100 --patch tsfix2`
|
||||
|
||||
Results:
|
||||
|
||||
- For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run
|
||||
|
||||
- Does not seem to fail with only the layout reconfiguation nemesis (<10 runs), although theoretically it could
|
||||
|
||||
- **Fails with the partition + layout reconfiguration nemesis** (`--scenario pr`).
|
||||
Example of a failed run: `garage set1/20231024T172214.488+0200` (1 failure in 4 runs).
|
||||
TODO: investigate.
|
||||
This is the failure mode we are looking for and trying to fix for NLnet task 3.
|
||||
|
||||
|
||||
### Set, continuous test (interspersed reads and writes)
|
||||
|
||||
Command: `lein run test --nodes-file nodes.vagrant --time-limit 60 --rate 100 --concurrency 100 --workload set2 --ops-per-key 100 --patch tsfix2`
|
||||
|
||||
Results:
|
||||
|
||||
- No failures with clock-scramble nemesis + db nemesis + partition nemesis (`--scenario cdp`) (0 failures in 10 runs).
|
||||
|
||||
- **Fails with just layout reconfiguration nemesis** (`--scenario r`).
|
||||
Example of a failed run: `garage set2/20231025T141940.198+0200` (10 failures in 10 runs).
|
||||
This is the failure mode we are looking for and trying to fix for NLnet task 3.
|
||||
|
||||
- Changes brought by NLnet task 3 code (commit 707442f5de):
|
||||
no failures with `--scenario r` (0 of 10 runs), `--scenario pr` (0 of 10 runs).
|
||||
`--scenario cpr` (0 of 10 runs) and `--scenario dpr` (0 of 10 runs).
|
||||
|
||||
|
||||
## Investigating (and fixing) errors
|
||||
|
||||
### Segfaults
|
||||
|
||||
They are due to the download being interrupted in the middle (^C during first launch on clean VMs), the `garage` binary is truncated.
|
||||
Add `:force?` to the `cached-wget!` call in `daemon.clj` to re-download the binary.
|
||||
|
||||
### In `jepsen.garage`: prefix wierdness
|
||||
|
||||
In `store/garage set1/20231019T163358.615+0200`:
|
||||
|
||||
```
|
||||
INFO [2023-10-19 16:35:20,977] clojure-agent-send-off-pool-207 - jepsen.garage.set list results for prefix set20/ : (set13/0 set13/1 set13/10 set13/11 set13/12 set13/13 set13/14 set13/15 set13/16 set13/17 set13/18 set13/19 set13/2 set13/20 set13/21 set13/22 set13/23 set13/24 set13/25 set13/26 set13/27 set13/28 set13/29 set13/3 set13/30 set13/31 set13/32 set13/33 set13/34 set13/35 set13/36 set13/37 set13/38 set13/39 set13/4 set13/40 set13/41 set13/42 set13/43 set13/44 set13/45 set13/46 set13/47 set13/48 set13/49 set13/5 set13/50 set13/51 set13/52 set13/53 set13/54 set13/55 set13/56 set13/57 set13/58 set13/59 set13/6 set13/60 set13/61 set13/62 set13/63 set13/64 set13/65 set13/66 set13/67 set13/68 set13/69 set13/7 set13/70 set13/71 set13/72 set13/73 set13/74 set13/75 set13/76 set13/77 set13/78 set13/79 set13/8 set13/80 set13/81 set13/82 set13/83 set13/84 set13/85 set13/86 set13/87 set13/88 set13/89 set13/9 set13/90 set13/91 set13/92 set13/93 set13/94 set13/95 set13/96 set13/97 set13/98 set13/99) (node: http://192.168.56.25:3900 )
|
||||
```
|
||||
|
||||
After inspecting, the actual S3 call made was with prefix "set13/", so at least this is not an error in Garage itself but in the jepsen code.
|
||||
|
||||
Finally found out that this was due to closures not correctly capturing their context in the list function in s3api.clj (wtf clojure?)
|
||||
Not sure exactly where it came from but it seems to have been fixed by making list-inner a separate function and not a sub-function,
|
||||
and passing all values that were previously in the context (creds and prefix) as additional arguments.
|
||||
|
||||
### `reg2` test inconsistency, even with timestamp fix
|
||||
|
||||
The reg2 test is our custom checker for CRDT read-after-write on individual object keys, acting as registers which can be updated.
|
||||
The test fails without the timestamp fix, which is expected as the clock scrambler will prevent nodes from having a correct ordering of objects.
|
||||
|
||||
With the timestamp fix (`--patch tsfix1`), the happenned-before relationship should at least be respected, meaning that when a PutObject call starts
|
||||
after another PutObject call has ended, the second call should overwrite the value of the first call, and that value should not be
|
||||
readable by future GetObject calls.
|
||||
However, we observed inconsistencies even with the timestamp fix.
|
||||
|
||||
The inconsistencies seemed to always happenned after writing a nil value, which translates to a DeleteObject call
|
||||
instead of a PutObject. By removing the possibility of writing nil values, therefore only doing
|
||||
PutObject calls, the issue disappears. There is therefore an issue to fix in DeleteObject.
|
||||
|
||||
The issue in DeleteObject seems to have been fixed by commit `c82d91c6bccf307186332b6c5c6fc0b128b1b2b1`, which can be used using `--patch tsfix2`.
|
||||
|
||||
|
||||
## License
|
||||
|
||||
Copyright © 2023 Alex Auvolat
|
||||
|
||||
This program and the accompanying materials are made available under the
|
||||
terms of the GNU Affero General Public License v3.0.
|
32
script/jepsen.garage/Vagrantfile
vendored
Normal file
32
script/jepsen.garage/Vagrantfile
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
# -*- mode: ruby -*-
|
||||
# vi: set ft=ruby :
|
||||
#
|
||||
|
||||
def vm(config, hostname, ip)
|
||||
config.vm.hostname = hostname
|
||||
config.vm.network "private_network", ip: ip
|
||||
end
|
||||
|
||||
Vagrant.configure("2") do |config|
|
||||
config.vm.box = "generic/debian10"
|
||||
|
||||
config.vm.provider "virtualbox" do |vb|
|
||||
vb.gui = false
|
||||
vb.memory = "512"
|
||||
vb.customize ["modifyvm", :id, "--vram=12"]
|
||||
end
|
||||
|
||||
config.vm.provision "shell", inline: <<-SHELL
|
||||
echo "root:root" | chpasswd
|
||||
mkdir -p /root/.ssh
|
||||
echo "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJpaBZdYxHqMxhv2RExAOa7nkKhPBOHupMP3mYaZ73w9 lx@lindy" >> /root/.ssh/authorized_keys
|
||||
SHELL
|
||||
|
||||
config.vm.define "n1" do |config| vm(config, "n1", "192.168.56.21") end
|
||||
config.vm.define "n2" do |config| vm(config, "n2", "192.168.56.22") end
|
||||
config.vm.define "n3" do |config| vm(config, "n3", "192.168.56.23") end
|
||||
config.vm.define "n4" do |config| vm(config, "n4", "192.168.56.24") end
|
||||
config.vm.define "n5" do |config| vm(config, "n5", "192.168.56.25") end
|
||||
config.vm.define "n6" do |config| vm(config, "n6", "192.168.56.26") end
|
||||
config.vm.define "n7" do |config| vm(config, "n7", "192.168.56.27") end
|
||||
end
|
13
script/jepsen.garage/jaeger.sh
Normal file
13
script/jepsen.garage/jaeger.sh
Normal file
@ -0,0 +1,13 @@
|
||||
docker stop jaeger
|
||||
docker rm jaeger
|
||||
|
||||
# UI is on localhost:16686
|
||||
# otel-grpc collector is on localhost:4317
|
||||
# otel-http collector is on localhost:4318
|
||||
|
||||
docker run -d --name jaeger \
|
||||
-e COLLECTOR_OTLP_ENABLED=true \
|
||||
-p 4317:4317 \
|
||||
-p 4318:4318 \
|
||||
-p 16686:16686 \
|
||||
jaegertracing/all-in-one:1.50
|
7
script/jepsen.garage/nodes.vagrant
Normal file
7
script/jepsen.garage/nodes.vagrant
Normal file
@ -0,0 +1,7 @@
|
||||
192.168.56.21
|
||||
192.168.56.22
|
||||
192.168.56.23
|
||||
192.168.56.24
|
||||
192.168.56.25
|
||||
192.168.56.26
|
||||
192.168.56.27
|
10
script/jepsen.garage/project.clj
Normal file
10
script/jepsen.garage/project.clj
Normal file
@ -0,0 +1,10 @@
|
||||
(defproject jepsen.garage "0.1.0-SNAPSHOT"
|
||||
:description "Jepsen testing for Garage"
|
||||
:url "https://git.deuxfleurs.fr/Deuxfleurs/garage"
|
||||
:license {:name "AGPLv3"
|
||||
:url "https://www.gnu.org/licenses/agpl-3.0.en.html"}
|
||||
:main jepsen.garage
|
||||
:dependencies [[org.clojure/clojure "1.11.1"]
|
||||
[jepsen "0.3.3-SNAPSHOT"]
|
||||
[amazonica "0.3.163"]]
|
||||
:repl-options {:init-ns jepsen.garage})
|
18
script/jepsen.garage/shell.nix
Normal file
18
script/jepsen.garage/shell.nix
Normal file
@ -0,0 +1,18 @@
|
||||
{ pkgs ? import <nixpkgs> {
|
||||
overlays = [
|
||||
(self: super: {
|
||||
jdk = super.jdk11;
|
||||
jre = super.jre11;
|
||||
})
|
||||
];
|
||||
} }:
|
||||
pkgs.mkShell {
|
||||
nativeBuildInputs = with pkgs; [
|
||||
leiningen
|
||||
jdk
|
||||
jna
|
||||
vagrant
|
||||
gnuplot
|
||||
graphviz
|
||||
];
|
||||
}
|
101
script/jepsen.garage/src/jepsen/garage.clj
Normal file
101
script/jepsen.garage/src/jepsen/garage.clj
Normal file
@ -0,0 +1,101 @@
|
||||
(ns jepsen.garage
|
||||
(:require
|
||||
[clojure.string :as str]
|
||||
[jepsen
|
||||
[checker :as checker]
|
||||
[cli :as cli]
|
||||
[generator :as gen]
|
||||
[nemesis :as nemesis]
|
||||
[tests :as tests]]
|
||||
[jepsen.os.debian :as debian]
|
||||
[jepsen.garage
|
||||
[daemon :as grg]
|
||||
[nemesis :as grgNemesis]
|
||||
[reg :as reg]
|
||||
[set :as set]]))
|
||||
|
||||
(def workloads
|
||||
"A map of workload names to functions that construct workloads, given opts."
|
||||
{"reg1" reg/workload1
|
||||
"reg2" reg/workload2
|
||||
"set1" set/workload1
|
||||
"set2" set/workload2})
|
||||
|
||||
(def scenari
|
||||
"A map of scenari to the associated nemesis"
|
||||
{"c" grgNemesis/scenario-c
|
||||
"cp" grgNemesis/scenario-cp
|
||||
"r" grgNemesis/scenario-r
|
||||
"pr" grgNemesis/scenario-pr
|
||||
"cpr" grgNemesis/scenario-cpr
|
||||
"cdp" grgNemesis/scenario-cdp
|
||||
"dpr" grgNemesis/scenario-dpr})
|
||||
|
||||
(def patches
|
||||
"A map of patch names to Garage builds"
|
||||
{"default" "v0.9.0"
|
||||
"tsfix1" "d146cdd5b66ca1d3ed65ce93ca42c6db22defc09"
|
||||
"tsfix2" "c82d91c6bccf307186332b6c5c6fc0b128b1b2b1"
|
||||
"task3a" "707442f5de416fdbed4681a33b739f0a787b7834"})
|
||||
|
||||
(def cli-opts
|
||||
"Additional command line options."
|
||||
[["-p" "--patch NAME" "Garage patch to use"
|
||||
:default "default"
|
||||
:validate [patches (cli/one-of patches)]]
|
||||
["-s" "--scenario NAME" "Nemesis scenario to run"
|
||||
:default "cp"
|
||||
:validate [scenari (cli/one-of scenari)]]
|
||||
["-r" "--rate HZ" "Approximate number of requests per second, per thread."
|
||||
:default 10
|
||||
:parse-fn read-string
|
||||
:validate [#(and (number? %) (pos? %)) "Must be a positive number"]]
|
||||
[nil "--ops-per-key NUM" "Maximum number of operations on any given key."
|
||||
:default 100
|
||||
:parse-fn parse-long
|
||||
:validate [pos? "Must be a positive integer."]]
|
||||
["-w" "--workload NAME" "Workload of test to run"
|
||||
:default "reg1"
|
||||
:validate [workloads (cli/one-of workloads)]]])
|
||||
|
||||
(defn garage-test
|
||||
"Given an options map from the command line runner (e.g. :nodes, :ssh,
|
||||
:concurrency, ...), constructs a test map."
|
||||
[opts]
|
||||
(let [garage-version (get patches (:patch opts))
|
||||
db (grg/db garage-version)
|
||||
workload ((get workloads (:workload opts)) opts)
|
||||
scenario ((get scenari (:scenario opts)) (assoc opts :db db))]
|
||||
(merge tests/noop-test
|
||||
opts
|
||||
{:pure-generators true
|
||||
:name (str "garage " (name (:workload opts)) " " (name (:scenario opts)) " " (name (:patch opts)))
|
||||
:os debian/os
|
||||
:db db
|
||||
:client (:client workload)
|
||||
:generator (gen/phases
|
||||
(->>
|
||||
(:generator workload)
|
||||
(gen/stagger (/ (:rate opts)))
|
||||
(gen/nemesis (:generator scenario))
|
||||
(gen/time-limit (:time-limit opts)))
|
||||
(gen/log "Healing cluster")
|
||||
(gen/nemesis (:final-generator scenario))
|
||||
(gen/log "Waiting for recovery")
|
||||
(gen/sleep 10)
|
||||
(gen/clients (:final-generator workload)))
|
||||
:nemesis (:nemesis scenario)
|
||||
:checker (checker/compose
|
||||
{:perf (checker/perf (:perf scenario))
|
||||
:workload (:checker workload)})
|
||||
})))
|
||||
|
||||
|
||||
(defn -main
|
||||
"Handles command line arguments. Can either run a test, or a web server for
|
||||
browsing results."
|
||||
[& args]
|
||||
(cli/run! (merge (cli/single-test-cmd {:test-fn garage-test
|
||||
:opt-spec cli-opts})
|
||||
(cli/serve-cmd))
|
||||
args))
|
152
script/jepsen.garage/src/jepsen/garage/daemon.clj
Normal file
152
script/jepsen.garage/src/jepsen/garage/daemon.clj
Normal file
@ -0,0 +1,152 @@
|
||||
(ns jepsen.garage.daemon
|
||||
(:require [clojure.tools.logging :refer :all]
|
||||
[jepsen [control :as c]
|
||||
[core :as jepsen]
|
||||
[db :as db]]
|
||||
[jepsen.control.util :as cu]))
|
||||
|
||||
; CONSTANTS -- HOW GARAGE IS SET UP
|
||||
|
||||
(def base-dir "/opt/garage")
|
||||
(def data-dir (str base-dir "/data"))
|
||||
(def meta-dir (str base-dir "/meta"))
|
||||
(def binary (str base-dir "/garage"))
|
||||
(def logfile (str base-dir "/garage.log"))
|
||||
(def pidfile (str base-dir "/garage.pid"))
|
||||
|
||||
(def admin-token "icanhazadmin")
|
||||
(def access-key-id "GK8bfb6a51286071c6c9cd8bc3")
|
||||
(def secret-access-key "b0be95f71c1c6f16858a9edf395078b75c12ecb6b1c03385c4ae92076e4994a3")
|
||||
(def bucket-name "jepsen")
|
||||
|
||||
; THE GARAGE DB
|
||||
|
||||
(defn install!
|
||||
"Download and install Garage"
|
||||
[node version]
|
||||
(c/su
|
||||
(c/trace
|
||||
(info node "installing garage" version)
|
||||
(c/exec :mkdir :-p base-dir)
|
||||
(let [url (str "https://garagehq.deuxfleurs.fr/_releases/" version "/x86_64-unknown-linux-musl/garage")
|
||||
cache (cu/cached-wget! url)]
|
||||
(c/exec :cp cache binary))
|
||||
(c/exec :chmod :+x binary))))
|
||||
|
||||
(defn configure!
|
||||
"Configure Garage"
|
||||
[node]
|
||||
(c/su
|
||||
(c/trace
|
||||
(cu/write-file!
|
||||
(str "rpc_secret = \"0fffabe52542c2b89a56b2efb7dfd477e9dafb285c9025cbdf1de7ca21a6b372\"\n"
|
||||
"rpc_bind_addr = \"0.0.0.0:3901\"\n"
|
||||
"rpc_public_addr = \"" node ":3901\"\n"
|
||||
"db_engine = \"lmdb\"\n"
|
||||
"replication_mode = \"2\"\n"
|
||||
"data_dir = \"" data-dir "\"\n"
|
||||
"metadata_dir = \"" meta-dir "\"\n"
|
||||
"[s3_api]\n"
|
||||
"s3_region = \"us-east-1\"\n"
|
||||
"api_bind_addr = \"0.0.0.0:3900\"\n"
|
||||
"[k2v_api]\n"
|
||||
"api_bind_addr = \"0.0.0.0:3902\"\n"
|
||||
"[admin]\n"
|
||||
"api_bind_addr = \"0.0.0.0:3903\"\n"
|
||||
"admin_token = \"" admin-token "\"\n"
|
||||
"trace_sink = \"http://192.168.56.1:4317\"\n")
|
||||
"/etc/garage.toml"))))
|
||||
|
||||
(defn connect-node!
|
||||
"Connect a Garage node to the rest of the cluster"
|
||||
[test node]
|
||||
(c/trace
|
||||
(let [node-id (c/exec binary :node :id :-q)]
|
||||
(info node "node id:" node-id)
|
||||
(c/on-many (:nodes test)
|
||||
(c/exec binary :node :connect node-id)))))
|
||||
|
||||
(defn configure-node!
|
||||
"Configure a Garage node to be part of a cluster layout"
|
||||
[test node]
|
||||
(c/trace
|
||||
(let [node-id (c/exec binary :node :id :-q)]
|
||||
(c/on (jepsen/primary test)
|
||||
(c/exec binary :layout :assign (subs node-id 0 16) :-c :1G :-z :dc1 :-t node)))))
|
||||
|
||||
(defn finalize-config!
|
||||
"Apply the layout and create a key/bucket pair in the cluster"
|
||||
[node]
|
||||
(c/trace
|
||||
(c/exec binary :layout :apply :--version 1)
|
||||
(info node "garage status:" (c/exec binary :status))
|
||||
(c/exec binary :key :import access-key-id secret-access-key :--yes)
|
||||
(c/exec binary :bucket :create bucket-name)
|
||||
(c/exec binary :bucket :allow :--read :--write bucket-name :--key access-key-id)
|
||||
(info node "key info: " (c/exec binary :key :info access-key-id))))
|
||||
|
||||
(defn db
|
||||
"Garage DB for a particular version"
|
||||
[version]
|
||||
(reify db/DB
|
||||
(setup! [_ test node]
|
||||
(install! node version)
|
||||
(configure! node)
|
||||
(cu/start-daemon!
|
||||
{:logfile logfile
|
||||
:pidfile pidfile
|
||||
:chdir base-dir
|
||||
:env {:RUST_LOG "garage=debug,garage_api=trace"}}
|
||||
binary
|
||||
:server)
|
||||
(c/exec :sleep 3)
|
||||
|
||||
(jepsen/synchronize test)
|
||||
(connect-node! test node)
|
||||
|
||||
(jepsen/synchronize test)
|
||||
(configure-node! test node)
|
||||
|
||||
(jepsen/synchronize test)
|
||||
(when (= node (jepsen/primary test))
|
||||
(finalize-config! node)))
|
||||
|
||||
(teardown! [_ test node]
|
||||
(info node "tearing down garage" version)
|
||||
(c/su
|
||||
(cu/stop-daemon! binary pidfile)
|
||||
(c/exec :rm :-rf logfile)
|
||||
(c/exec :rm :-rf data-dir)
|
||||
(c/exec :rm :-rf meta-dir)))
|
||||
|
||||
db/Pause
|
||||
(pause! [_ test node]
|
||||
(cu/grepkill! :stop binary))
|
||||
(resume! [_ test node]
|
||||
(cu/grepkill! :cont binary))
|
||||
|
||||
db/Kill
|
||||
(kill! [_ test node]
|
||||
(cu/stop-daemon! binary pidfile))
|
||||
(start! [_ test node]
|
||||
(cu/start-daemon!
|
||||
{:logfile logfile
|
||||
:pidfile pidfile
|
||||
:chdir base-dir
|
||||
:env {:RUST_LOG "garage=debug,garage_api=trace"}}
|
||||
binary
|
||||
:server))
|
||||
|
||||
db/LogFiles
|
||||
(log-files [_ test node]
|
||||
[logfile])))
|
||||
|
||||
(defn creds
|
||||
"Obtain Garage credentials for node"
|
||||
[node]
|
||||
{:access-key access-key-id
|
||||
:secret-key secret-access-key
|
||||
:endpoint (str "http://" node ":3900")
|
||||
:bucket bucket-name
|
||||
:client-config {:path-style-access-enabled true}})
|
||||
|
142
script/jepsen.garage/src/jepsen/garage/nemesis.clj
Normal file
142
script/jepsen.garage/src/jepsen/garage/nemesis.clj
Normal file
@ -0,0 +1,142 @@
|
||||
(ns jepsen.garage.nemesis
|
||||
(:require [clojure.tools.logging :refer :all]
|
||||
[jepsen [control :as c]
|
||||
[core :as jepsen]
|
||||
[generator :as gen]
|
||||
[nemesis :as nemesis]]
|
||||
[jepsen.nemesis.combined :as combined]
|
||||
[jepsen.garage.daemon :as grg]
|
||||
[jepsen.control.util :as cu]))
|
||||
|
||||
; ---- reconfiguration nemesis ----
|
||||
|
||||
(defn configure-present!
|
||||
"Configure node to be active in new cluster layout"
|
||||
[test nodes]
|
||||
(info "configure-present!" nodes)
|
||||
(let [node-ids (c/on-many nodes (c/exec grg/binary :node :id :-q))
|
||||
node-id-strs (map (fn [[_ v]] (subs v 0 16)) node-ids)]
|
||||
(c/on
|
||||
(jepsen/primary test)
|
||||
(apply c/exec (concat [grg/binary :layout :assign :-c :1G] node-id-strs)))))
|
||||
|
||||
(defn configure-absent!
|
||||
"Configure nodes to be active in new cluster layout"
|
||||
[test nodes]
|
||||
(info "configure-absent!" nodes)
|
||||
(let [node-ids (c/on-many nodes (c/exec grg/binary :node :id :-q))
|
||||
node-id-strs (map (fn [[_ v]] (subs v 0 16)) node-ids)]
|
||||
(c/on
|
||||
(jepsen/primary test)
|
||||
(apply c/exec (concat [grg/binary :layout :assign :-g] node-id-strs)))))
|
||||
|
||||
(defn finalize-config!
|
||||
"Apply the proposed cluster layout"
|
||||
[test]
|
||||
(let [layout-show (c/on (jepsen/primary test) (c/exec grg/binary :layout :show))
|
||||
[_ layout-next-version] (re-find #"apply --version (\d+)\n" layout-show)]
|
||||
(if layout-next-version
|
||||
(do
|
||||
(info "layout show: " layout-show "; next-version: " layout-next-version)
|
||||
(c/on (jepsen/primary test)
|
||||
(c/exec grg/binary :layout :apply :--version layout-next-version)))
|
||||
(info "no layout changes to apply"))))
|
||||
|
||||
(defn reconfigure-subset
|
||||
"Reconfigure cluster with only a subset of nodes"
|
||||
[cnt]
|
||||
(reify nemesis/Nemesis
|
||||
(setup! [this test] this)
|
||||
|
||||
(invoke! [this test op] op
|
||||
(case (:f op)
|
||||
:start
|
||||
(let [[keep-nodes remove-nodes]
|
||||
(->> (:nodes test)
|
||||
shuffle
|
||||
(split-at cnt))]
|
||||
(info "layout split: keep " keep-nodes ", remove " remove-nodes)
|
||||
(configure-present! test keep-nodes)
|
||||
(configure-absent! test remove-nodes)
|
||||
(finalize-config! test)
|
||||
(assoc op :value keep-nodes))
|
||||
:stop
|
||||
(do
|
||||
(info "layout un-split: all nodes=" (:nodes test))
|
||||
(configure-present! test (:nodes test))
|
||||
(finalize-config! test)
|
||||
(assoc op :value (:nodes test)))))
|
||||
|
||||
(teardown! [this test] this)))
|
||||
|
||||
; ---- nemesis scenari ----
|
||||
|
||||
(defn nemesis-op
|
||||
"A generator for a single nemesis operation"
|
||||
[op]
|
||||
(fn [_ _] {:type :info, :f op}))
|
||||
|
||||
(defn reconfiguration-package
|
||||
"Cluster reconfiguration nemesis package"
|
||||
[opts]
|
||||
{:generator (->>
|
||||
(gen/mix [(nemesis-op :reconfigure-start)
|
||||
(nemesis-op :reconfigure-stop)])
|
||||
(gen/stagger (:interval opts 5)))
|
||||
:final-generator {:type :info, :f :reconfigure-stop}
|
||||
:nemesis (nemesis/compose
|
||||
{{:reconfigure-start :start
|
||||
:reconfigure-stop :stop} (reconfigure-subset 3)})
|
||||
:perf #{{:name "reconfigure"
|
||||
:start #{:reconfigure-start}
|
||||
:stop #{:reconfigur-stop}
|
||||
:color "#A197E9"}}})
|
||||
|
||||
(defn scenario-c
|
||||
"Clock modifying scenario"
|
||||
[opts]
|
||||
(combined/clock-package {:db (:db opts), :interval 1, :faults #{:clock}}))
|
||||
|
||||
(defn scenario-cp
|
||||
"Clock modifying + partition scenario"
|
||||
[opts]
|
||||
(combined/compose-packages
|
||||
[(combined/clock-package {:db (:db opts), :interval 1, :faults #{:clock}})
|
||||
(combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}})]))
|
||||
|
||||
(defn scenario-r
|
||||
"Cluster reconfiguration scenario"
|
||||
[opts]
|
||||
(reconfiguration-package {:interval 1}))
|
||||
|
||||
(defn scenario-pr
|
||||
"Partition + cluster reconfiguration scenario"
|
||||
[opts]
|
||||
(combined/compose-packages
|
||||
[(combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}})
|
||||
(reconfiguration-package {:interval 1})]))
|
||||
|
||||
(defn scenario-cpr
|
||||
"Clock scramble + partition + cluster reconfiguration scenario"
|
||||
[opts]
|
||||
(combined/compose-packages
|
||||
[(combined/clock-package {:db (:db opts), :interval 1, :faults #{:clock}})
|
||||
(combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}})
|
||||
(reconfiguration-package {:interval 1})]))
|
||||
|
||||
(defn scenario-cdp
|
||||
"Clock modifying + db + partition scenario"
|
||||
[opts]
|
||||
(combined/compose-packages
|
||||
[(combined/clock-package {:db (:db opts), :interval 1, :faults #{:clock}})
|
||||
(combined/db-package {:db (:db opts), :interval 1, :faults #{:db :pause :kill}})
|
||||
(combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}})]))
|
||||
|
||||
(defn scenario-dpr
|
||||
"Db + partition + cluster reconfiguration scenario"
|
||||
[opts]
|
||||
(combined/compose-packages
|
||||
[(combined/db-package {:db (:db opts), :interval 1, :faults #{:db :pause :kill}})
|
||||
(combined/partition-package {:db (:db opts), :interval 1, :faults #{:partition}})
|
||||
(reconfiguration-package {:interval 1})]))
|
||||
|
143
script/jepsen.garage/src/jepsen/garage/reg.clj
Normal file
143
script/jepsen.garage/src/jepsen/garage/reg.clj
Normal file
@ -0,0 +1,143 @@
|
||||
(ns jepsen.garage.reg
|
||||
(:require [clojure.tools.logging :refer :all]
|
||||
[clojure.string :as str]
|
||||
[clojure.set :as set]
|
||||
[jepsen [checker :as checker]
|
||||
[cli :as cli]
|
||||
[client :as client]
|
||||
[control :as c]
|
||||
[db :as db]
|
||||
[generator :as gen]
|
||||
[independent :as independent]
|
||||
[nemesis :as nemesis]
|
||||
[util :as util]
|
||||
[tests :as tests]]
|
||||
[jepsen.checker.timeline :as timeline]
|
||||
[jepsen.control.util :as cu]
|
||||
[jepsen.os.debian :as debian]
|
||||
[jepsen.garage.daemon :as grg]
|
||||
[jepsen.garage.s3api :as s3]
|
||||
[knossos.model :as model]
|
||||
[slingshot.slingshot :refer [try+]]))
|
||||
|
||||
(defn op-get [_ _] {:type :invoke, :f :read, :value nil})
|
||||
(defn op-put [_ _] {:type :invoke, :f :write, :value (str (rand-int 99))})
|
||||
(defn op-del [_ _] {:type :invoke, :f :write, :value nil})
|
||||
|
||||
(defrecord RegClient [creds]
|
||||
client/Client
|
||||
(open! [this test node]
|
||||
(assoc this :creds (grg/creds node)))
|
||||
(setup! [this test])
|
||||
(invoke! [this test op]
|
||||
(try+
|
||||
(let [[k v] (:value op)]
|
||||
(case (:f op)
|
||||
:read
|
||||
(util/timeout
|
||||
10000
|
||||
(assoc op :type :fail, :error ::timeout)
|
||||
(let [value (s3/get (:creds this) k)]
|
||||
(assoc op :type :ok, :value (independent/tuple k value))))
|
||||
:write
|
||||
(util/timeout
|
||||
10000
|
||||
(assoc op :type :info, :error ::timeout)
|
||||
(do
|
||||
(s3/put (:creds this) k v)
|
||||
(assoc op :type :ok)))))
|
||||
(catch (re-find #"Unavailable" (.getMessage %)) ex
|
||||
(assoc op :type :info, :error ::unavailable))
|
||||
(catch (re-find #"Broken pipe" (.getMessage %)) ex
|
||||
(assoc op :type :info, :error ::broken-pipe))
|
||||
(catch (re-find #"Connection refused" (.getMessage %)) ex
|
||||
(assoc op :type :info, :error ::connection-refused))))
|
||||
(teardown! [this test])
|
||||
(close! [this test]))
|
||||
|
||||
(defn reg-read-after-write
|
||||
"Read-after-Write checker for register operations"
|
||||
[]
|
||||
(reify checker/Checker
|
||||
(check [this test history opts]
|
||||
(let [init {:put-values {-1 nil}
|
||||
:put-done #{-1}
|
||||
:put-in-progress {}
|
||||
:read-can-contain {}
|
||||
:bad-reads #{}}
|
||||
final (reduce
|
||||
(fn [state op]
|
||||
(let [current-values (set/union
|
||||
(set (map (fn [idx] (get (:put-values state) idx)) (:put-done state)))
|
||||
(set (map (fn [[_ [idx _]]] (get (:put-values state) idx)) (:put-in-progress state))))
|
||||
read-can-contain (reduce
|
||||
(fn [rcc [idx v]] (assoc rcc idx (set/union current-values v)))
|
||||
{} (:read-can-contain state))]
|
||||
(info "--------")
|
||||
(info "state: " state)
|
||||
(info "current-values: " current-values)
|
||||
(info "read-can-contain: " read-can-contain)
|
||||
(info "op: " op)
|
||||
(case [(:type op) (:f op)]
|
||||
([:invoke :write])
|
||||
(assoc state
|
||||
:read-can-contain read-can-contain
|
||||
:put-values (assoc (:put-values state) (:index op) (:value op))
|
||||
:put-in-progress (assoc (:put-in-progress state) (:process op) [(:index op) (:put-done state)]))
|
||||
([:ok :write])
|
||||
(let [[index overwrites] (get (:put-in-progress state) (:process op))]
|
||||
(assoc state
|
||||
:read-can-contain read-can-contain
|
||||
:put-in-progress (dissoc (:put-in-progress state) (:process op))
|
||||
:put-done
|
||||
(conj
|
||||
(set/difference (:put-done state) overwrites)
|
||||
index)))
|
||||
([:invoke :read])
|
||||
(assoc state
|
||||
:read-can-contain (assoc read-can-contain (:process op) current-values))
|
||||
([:ok :read])
|
||||
(let [this-read-can-contain (get read-can-contain (:process op))
|
||||
bad-reads (if (contains? this-read-can-contain (:value op))
|
||||
(:bad-reads state)
|
||||
(conj (:bad-reads state) [(:process op) (:index op) (:value op) this-read-can-contain]))]
|
||||
(info "this-read-can-contain: " this-read-can-contain)
|
||||
(assoc state
|
||||
:read-can-contain (dissoc read-can-contain (:process op))
|
||||
:bad-reads bad-reads))
|
||||
state)))
|
||||
init history)
|
||||
valid? (empty? (:bad-reads final))]
|
||||
(assoc final :valid? valid?)))))
|
||||
|
||||
(defn workload-common
|
||||
"Common parts of workload"
|
||||
[opts]
|
||||
{:client (RegClient. nil)
|
||||
:generator (independent/concurrent-generator
|
||||
10
|
||||
(range)
|
||||
(fn [k]
|
||||
(->>
|
||||
(gen/mix [op-get op-put op-del])
|
||||
(gen/limit (:ops-per-key opts)))))})
|
||||
|
||||
(defn workload1
|
||||
"Tests linearizable reads and writes"
|
||||
[opts]
|
||||
(assoc (workload-common opts)
|
||||
:checker (independent/checker
|
||||
(checker/compose
|
||||
{:linear (checker/linearizable
|
||||
{:model (model/register)
|
||||
:algorithm :linear})
|
||||
:timeline (timeline/html)}))))
|
||||
|
||||
(defn workload2
|
||||
"Tests CRDT reads and writes"
|
||||
[opts]
|
||||
(assoc (workload-common opts)
|
||||
:checker (independent/checker
|
||||
(checker/compose
|
||||
{:reg-read-after-write (reg-read-after-write)
|
||||
:timeline (timeline/html)}))))
|
48
script/jepsen.garage/src/jepsen/garage/s3api.clj
Normal file
48
script/jepsen.garage/src/jepsen/garage/s3api.clj
Normal file
@ -0,0 +1,48 @@
|
||||
(ns jepsen.garage.s3api
|
||||
(:require [clojure.tools.logging :refer :all]
|
||||
[jepsen [control :as c]]
|
||||
[amazonica.aws.s3 :as s3]
|
||||
[slingshot.slingshot :refer [try+]]))
|
||||
|
||||
; GARAGE S3 HELPER FUNCTIONS
|
||||
|
||||
(defn get
|
||||
"Helper for GetObject"
|
||||
[creds k]
|
||||
(try+
|
||||
(-> (s3/get-object creds (:bucket creds) k)
|
||||
:input-stream
|
||||
slurp)
|
||||
(catch (re-find #"Key not found" (.getMessage %)) ex
|
||||
nil)))
|
||||
|
||||
(defn put
|
||||
"Helper for PutObject or DeleteObject (is a delete if value is nil)"
|
||||
[creds k v]
|
||||
(if (= v nil)
|
||||
(s3/delete-object creds
|
||||
:bucket-name (:bucket creds)
|
||||
:key k)
|
||||
(let [some-bytes (.getBytes v "UTF-8")
|
||||
bytes-stream (java.io.ByteArrayInputStream. some-bytes)]
|
||||
(s3/put-object creds
|
||||
:bucket-name (:bucket creds)
|
||||
:key k
|
||||
:input-stream bytes-stream
|
||||
:metadata {:content-length (count some-bytes)}))))
|
||||
|
||||
(defn list-inner [creds prefix ct accum]
|
||||
(let [list-result (s3/list-objects-v2 creds
|
||||
{:bucket-name (:bucket creds)
|
||||
:prefix prefix
|
||||
:continuation-token ct})
|
||||
new-object-summaries (:object-summaries list-result)
|
||||
new-objects (map (fn [d] (:key d)) new-object-summaries)
|
||||
objects (concat new-objects accum)]
|
||||
(if (:truncated? list-result)
|
||||
(list-inner creds prefix (:next-continuation-token list-result) objects)
|
||||
objects)))
|
||||
(defn list
|
||||
"Helper for ListObjects -- just lists everything in the bucket"
|
||||
[creds prefix]
|
||||
(list-inner creds prefix nil []))
|
133
script/jepsen.garage/src/jepsen/garage/set.clj
Normal file
133
script/jepsen.garage/src/jepsen/garage/set.clj
Normal file
@ -0,0 +1,133 @@
|
||||
(ns jepsen.garage.set
|
||||
(:require [clojure.tools.logging :refer :all]
|
||||
[clojure.string :as str]
|
||||
[clojure.set :as set]
|
||||
[jepsen [checker :as checker]
|
||||
[cli :as cli]
|
||||
[client :as client]
|
||||
[control :as c]
|
||||
[checker :as checker]
|
||||
[db :as db]
|
||||
[generator :as gen]
|
||||
[independent :as independent]
|
||||
[nemesis :as nemesis]
|
||||
[util :as util]
|
||||
[tests :as tests]]
|
||||
[jepsen.checker.timeline :as timeline]
|
||||
[jepsen.control.util :as cu]
|
||||
[jepsen.os.debian :as debian]
|
||||
[jepsen.garage.daemon :as grg]
|
||||
[jepsen.garage.s3api :as s3]
|
||||
[knossos.model :as model]
|
||||
[slingshot.slingshot :refer [try+]]))
|
||||
|
||||
(defn op-add-rand100 [_ _] {:type :invoke, :f :add, :value (rand-int 100)})
|
||||
(defn op-read [_ _] {:type :invoke, :f :read, :value nil})
|
||||
|
||||
(defrecord SetClient [creds]
|
||||
client/Client
|
||||
(open! [this test node]
|
||||
(assoc this :creds (grg/creds node)))
|
||||
(setup! [this test])
|
||||
(invoke! [this test op]
|
||||
(try+
|
||||
(let [[k v] (:value op)
|
||||
prefix (str "set" k "/")]
|
||||
(case (:f op)
|
||||
:add
|
||||
(util/timeout
|
||||
10000
|
||||
(assoc op :type :info, :error ::timeout)
|
||||
(do
|
||||
(s3/put (:creds this) (str prefix v) "present")
|
||||
(assoc op :type :ok)))
|
||||
:read
|
||||
(util/timeout
|
||||
10000
|
||||
(assoc op :type :fail, :error ::timeout)
|
||||
(do
|
||||
(let [items (s3/list (:creds this) prefix)]
|
||||
(let [items-stripped (map (fn [o]
|
||||
(assert (str/starts-with? o prefix))
|
||||
(str/replace-first o prefix "")) items)
|
||||
items-set (set (map parse-long items-stripped))]
|
||||
(assoc op :type :ok, :value (independent/tuple k items-set))))))))
|
||||
(catch (re-find #"Unavailable" (.getMessage %)) ex
|
||||
(assoc op :type :info, :error ::unavailable))
|
||||
(catch (re-find #"Broken pipe" (.getMessage %)) ex
|
||||
(assoc op :type :info, :error ::broken-pipe))
|
||||
(catch (re-find #"Connection refused" (.getMessage %)) ex
|
||||
(assoc op :type :info, :error ::connection-refused))))
|
||||
(teardown! [this test])
|
||||
(close! [this test]))
|
||||
|
||||
(defn set-read-after-write
|
||||
"Read-after-Write checker for set operations"
|
||||
[]
|
||||
(reify checker/Checker
|
||||
(check [this test history opts]
|
||||
(let [init {:add-started #{}
|
||||
:add-done #{}
|
||||
:read-must-contain {}
|
||||
:missed #{}
|
||||
:unexpected #{}}
|
||||
final (reduce
|
||||
(fn [state op]
|
||||
(case [(:type op) (:f op)]
|
||||
([:invoke :add])
|
||||
(assoc state :add-started (conj (:add-started state) (:value op)))
|
||||
([:ok :add])
|
||||
(assoc state :add-done (conj (:add-done state) (:value op)))
|
||||
([:invoke :read])
|
||||
(assoc-in state [:read-must-contain (:process op)] (:add-done state))
|
||||
([:ok :read])
|
||||
(let [read-must-contain (get (:read-must-contain state) (:process op))
|
||||
new-missed (set/difference read-must-contain (:value op))
|
||||
new-unexpected (set/difference (:value op) (:add-started state))]
|
||||
(assoc state
|
||||
:read-must-contain (dissoc (:read-must-contain state) (:process op))
|
||||
:missed (set/union (:missed state) new-missed),
|
||||
:unexpected (set/union (:unexpected state) new-unexpected)))
|
||||
state))
|
||||
init history)
|
||||
valid? (and (empty? (:missed final)) (empty? (:unexpected final)))]
|
||||
(assoc final :valid? valid?)))))
|
||||
|
||||
(defn workload1
|
||||
"Tests insertions and deletions"
|
||||
[opts]
|
||||
{:client (SetClient. nil)
|
||||
:checker (independent/checker
|
||||
(checker/compose
|
||||
{:set (checker/set)
|
||||
:timeline (timeline/html)}))
|
||||
:generator (independent/concurrent-generator
|
||||
10
|
||||
(range 100)
|
||||
(fn [k]
|
||||
(->> (range)
|
||||
(map (fn [x] {:type :invoke, :f :add, :value x}))
|
||||
(gen/limit (:ops-per-key opts)))))
|
||||
:final-generator (gen/phases
|
||||
(independent/sequential-generator
|
||||
(range 100)
|
||||
(fn [k] (gen/once op-read)))
|
||||
(gen/sleep 5))})
|
||||
|
||||
(defn workload2
|
||||
"Tests insertions and deletions"
|
||||
[opts]
|
||||
{:client (SetClient. nil)
|
||||
:checker (independent/checker
|
||||
(checker/compose
|
||||
{:set-read-after-write (set-read-after-write)
|
||||
; :set-full (checker/set-full {:linearizable? false})
|
||||
:timeline (timeline/html)}))
|
||||
:generator (independent/concurrent-generator
|
||||
10
|
||||
(range)
|
||||
(fn [k]
|
||||
(->> (gen/mix [op-add-rand100 op-read])
|
||||
(gen/limit (:ops-per-key opts)))))})
|
||||
|
||||
|
7
script/jepsen.garage/test/jepsen/garage_test.clj
Normal file
7
script/jepsen.garage/test/jepsen/garage_test.clj
Normal file
@ -0,0 +1,7 @@
|
||||
(ns jepsen.garage-test
|
||||
(:require [clojure.test :refer :all]
|
||||
[jepsen.garage :refer :all]))
|
||||
|
||||
(deftest a-test
|
||||
(testing "FIXME, I fail."
|
||||
(is (= 0 1))))
|
32
shell.nix
32
shell.nix
@ -1,12 +1,12 @@
|
||||
{
|
||||
buildSystem ? builtins.currentSystem,
|
||||
targetSystem ? buildSystem,
|
||||
}:
|
||||
{ system ? builtins.currentSystem, }:
|
||||
|
||||
with import ./nix/pkgs.nix;
|
||||
with import ./nix/common.nix;
|
||||
|
||||
let
|
||||
inherit (import ./default.nix { inherit buildSystem targetSystem; }) pkgs pkgsCross buildEnv;
|
||||
pkgs = import pkgsSrc {
|
||||
inherit system;
|
||||
overlays = [ cargo2nixOverlay ];
|
||||
};
|
||||
kaniko = (import ./nix/kaniko.nix) pkgs;
|
||||
manifest-tool = (import ./nix/manifest-tool.nix) pkgs;
|
||||
winscp = (import ./nix/winscp.nix) pkgs;
|
||||
@ -14,13 +14,21 @@ let
|
||||
in {
|
||||
# --- Rust Shell ---
|
||||
# Use it to compile Garage
|
||||
rust = pkgsCross.mkShell (buildEnv // {
|
||||
inputsFrom = [
|
||||
kaniko
|
||||
manifest-tool
|
||||
winscp
|
||||
rust = pkgs.mkShell {
|
||||
nativeBuildInputs = with pkgs; [
|
||||
#rustPlatform.rust.rustc
|
||||
rustPlatform.rust.cargo
|
||||
mold
|
||||
#clippy
|
||||
rustfmt
|
||||
#perl
|
||||
#protobuf
|
||||
#pkg-config
|
||||
#openssl
|
||||
file
|
||||
#cargo2nix.packages.x86_64-linux.cargo2nix
|
||||
];
|
||||
});
|
||||
};
|
||||
|
||||
# --- Integration shell ---
|
||||
# Use it to test Garage with common S3 clients
|
||||
|
@ -45,7 +45,7 @@ http = "0.2"
|
||||
httpdate = "1.0"
|
||||
http-range = "0.1"
|
||||
hyper = { version = "0.14", features = ["server", "http1", "runtime", "tcp", "stream"] }
|
||||
#hyperlocal = { version = "0.8.0", default-features = false, features = ["server"] }
|
||||
hyperlocal = { version = "0.8.0", default-features = false, features = ["server"] }
|
||||
multer = "2.0"
|
||||
percent-encoding = "2.1.0"
|
||||
roxmltree = "0.18"
|
||||
|
@ -182,7 +182,7 @@ impl AdminApiServer {
|
||||
),
|
||||
};
|
||||
let status_str = format!(
|
||||
"{}\nConsult the full health check API endpoint at /v1/health for more details\n",
|
||||
"{}\nConsult the full health check API endpoint at /v0/health for more details\n",
|
||||
status_str
|
||||
);
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
//use std::fs::{self, Permissions};
|
||||
use std::fs::{self, Permissions};
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
@ -11,9 +12,9 @@ use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Request, Response, Server};
|
||||
use hyper::{HeaderMap, StatusCode};
|
||||
|
||||
//use hyperlocal::UnixServerExt;
|
||||
use hyperlocal::UnixServerExt;
|
||||
|
||||
//use tokio::net::UnixStream;
|
||||
use tokio::net::UnixStream;
|
||||
|
||||
use opentelemetry::{
|
||||
global,
|
||||
@ -113,18 +114,18 @@ impl<A: ApiHandler> ApiServer<A> {
|
||||
}
|
||||
});
|
||||
|
||||
//let unix_service = make_service_fn(|_: &UnixStream| {
|
||||
// let this = self.clone();
|
||||
let unix_service = make_service_fn(|_: &UnixStream| {
|
||||
let this = self.clone();
|
||||
|
||||
// let path = bind_addr.to_string();
|
||||
// async move {
|
||||
// Ok::<_, GarageError>(service_fn(move |req: Request<Body>| {
|
||||
// let this = this.clone();
|
||||
let path = bind_addr.to_string();
|
||||
async move {
|
||||
Ok::<_, GarageError>(service_fn(move |req: Request<Body>| {
|
||||
let this = this.clone();
|
||||
|
||||
// this.handler(req, path.clone())
|
||||
// }))
|
||||
// }
|
||||
//});
|
||||
this.handler(req, path.clone())
|
||||
}))
|
||||
}
|
||||
});
|
||||
|
||||
info!(
|
||||
"{} API server listening on {}",
|
||||
@ -139,24 +140,23 @@ impl<A: ApiHandler> ApiServer<A> {
|
||||
.with_graceful_shutdown(shutdown_signal)
|
||||
.await?
|
||||
}
|
||||
UnixOrTCPSocketAddress::UnixSocket(_path) => {
|
||||
panic!("Unix sockets are not supported in this fork") // TODO(mediocregopher)
|
||||
} //UnixOrTCPSocketAddress::UnixSocket(ref path) => {
|
||||
// use std::os::unix::fs::PermissionsExt;
|
||||
// remove_unix_socket_if_present(path).await?;
|
||||
UnixOrTCPSocketAddress::UnixSocket(ref path) => {
|
||||
if path.exists() {
|
||||
fs::remove_file(path)?
|
||||
}
|
||||
|
||||
// let bound = Server::bind_unix(path)?;
|
||||
let bound = Server::bind_unix(path)?;
|
||||
|
||||
// fs::set_permissions(
|
||||
// path,
|
||||
// Permissions::from_mode(unix_bind_addr_mode.unwrap_or(0o222)),
|
||||
// )?;
|
||||
fs::set_permissions(
|
||||
path,
|
||||
Permissions::from_mode(unix_bind_addr_mode.unwrap_or(0o222)),
|
||||
)?;
|
||||
|
||||
// bound
|
||||
// .serve(unix_service)
|
||||
// .with_graceful_shutdown(shutdown_signal)
|
||||
// .await?;
|
||||
//}
|
||||
bound
|
||||
.serve(unix_service)
|
||||
.with_graceful_shutdown(shutdown_signal)
|
||||
.await?;
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
|
@ -344,7 +344,7 @@ impl ApiHandler for S3ApiServer {
|
||||
bucket_id,
|
||||
key,
|
||||
upload_id,
|
||||
part_number_marker: part_number_marker.map(|p| p.min(10000)),
|
||||
part_number_marker: part_number_marker.map(|p| p.clamp(1, 10000)),
|
||||
max_parts: max_parts.unwrap_or(1000).clamp(1, 1000),
|
||||
},
|
||||
)
|
||||
|
@ -3,12 +3,12 @@ use std::sync::Arc;
|
||||
use hyper::{Body, Request, Response, StatusCode};
|
||||
|
||||
use garage_util::data::*;
|
||||
use garage_util::time::*;
|
||||
|
||||
use garage_model::garage::Garage;
|
||||
use garage_model::s3::object_table::*;
|
||||
|
||||
use crate::s3::error::*;
|
||||
use crate::s3::put::next_timestamp;
|
||||
use crate::s3::xml as s3_xml;
|
||||
use crate::signature::verify_signed_content;
|
||||
|
||||
@ -23,36 +23,40 @@ async fn handle_delete_internal(
|
||||
.await?
|
||||
.ok_or(Error::NoSuchKey)?; // No need to delete
|
||||
|
||||
let del_timestamp = next_timestamp(Some(&object));
|
||||
let del_uuid = gen_uuid();
|
||||
let interesting_versions = object.versions().iter().filter(|v| {
|
||||
!matches!(
|
||||
v.state,
|
||||
ObjectVersionState::Aborted
|
||||
| ObjectVersionState::Complete(ObjectVersionData::DeleteMarker)
|
||||
)
|
||||
});
|
||||
|
||||
let deleted_version = object
|
||||
.versions()
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|v| !matches!(&v.state, ObjectVersionState::Aborted))
|
||||
.or_else(|| object.versions().iter().rev().next());
|
||||
let deleted_version = match deleted_version {
|
||||
Some(dv) => dv.uuid,
|
||||
None => {
|
||||
warn!("Object has no versions: {:?}", object);
|
||||
Uuid::from([0u8; 32])
|
||||
let mut version_to_delete = None;
|
||||
let mut timestamp = now_msec();
|
||||
for v in interesting_versions {
|
||||
if v.timestamp + 1 > timestamp || version_to_delete.is_none() {
|
||||
version_to_delete = Some(v.uuid);
|
||||
}
|
||||
};
|
||||
timestamp = std::cmp::max(timestamp, v.timestamp + 1);
|
||||
}
|
||||
|
||||
let deleted_version = version_to_delete.ok_or(Error::NoSuchKey)?;
|
||||
|
||||
let version_uuid = gen_uuid();
|
||||
|
||||
let object = Object::new(
|
||||
bucket_id,
|
||||
key.into(),
|
||||
vec![ObjectVersion {
|
||||
uuid: del_uuid,
|
||||
timestamp: del_timestamp,
|
||||
uuid: version_uuid,
|
||||
timestamp,
|
||||
state: ObjectVersionState::Complete(ObjectVersionData::DeleteMarker),
|
||||
}],
|
||||
);
|
||||
|
||||
garage.object_table.insert(&object).await?;
|
||||
|
||||
Ok((deleted_version, del_uuid))
|
||||
Ok((deleted_version, version_uuid))
|
||||
}
|
||||
|
||||
pub async fn handle_delete(
|
||||
|
@ -426,10 +426,8 @@ where
|
||||
// Drop the first key if needed
|
||||
// Only AfterKey requires it according to the S3 spec and our implem.
|
||||
match (&cursor, iter.peek()) {
|
||||
(RangeBegin::AfterKey { key }, Some(object)) if &object.key == key => {
|
||||
iter.next();
|
||||
}
|
||||
_ => (),
|
||||
(RangeBegin::AfterKey { key }, Some(object)) if &object.key == key => iter.next(),
|
||||
(_, _) => None,
|
||||
};
|
||||
|
||||
while let Some(object) = iter.peek() {
|
||||
@ -438,22 +436,16 @@ where
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
match acc.extract(query, &cursor, &mut iter) {
|
||||
ExtractionResult::Extracted { key } => {
|
||||
cursor = RangeBegin::AfterKey { key };
|
||||
}
|
||||
cursor = match acc.extract(query, &cursor, &mut iter) {
|
||||
ExtractionResult::Extracted { key } => RangeBegin::AfterKey { key },
|
||||
ExtractionResult::SkipTo { key, fallback_key } => {
|
||||
cursor = RangeBegin::IncludingKey { key, fallback_key };
|
||||
RangeBegin::IncludingKey { key, fallback_key }
|
||||
}
|
||||
ExtractionResult::FilledAtUpload { key, upload } => {
|
||||
return Ok(Some(RangeBegin::AfterUpload { key, upload }));
|
||||
}
|
||||
ExtractionResult::Filled => {
|
||||
return Ok(Some(cursor));
|
||||
}
|
||||
ExtractionResult::NoMore => {
|
||||
return Ok(None);
|
||||
return Ok(Some(RangeBegin::AfterUpload { key, upload }))
|
||||
}
|
||||
ExtractionResult::Filled => return Ok(Some(cursor)),
|
||||
ExtractionResult::NoMore => return Ok(None),
|
||||
};
|
||||
}
|
||||
|
||||
@ -527,8 +519,8 @@ fn fetch_part_info<'a>(
|
||||
/// This key can be the prefix in the base case, or intermediate
|
||||
/// points in the dataset if we are continuing a previous listing.
|
||||
impl ListObjectsQuery {
|
||||
fn build_accumulator(&self) -> ObjectAccumulator {
|
||||
ObjectAccumulator::new(self.common.page_size)
|
||||
fn build_accumulator(&self) -> Accumulator<String, ObjectInfo> {
|
||||
Accumulator::<String, ObjectInfo>::new(self.common.page_size)
|
||||
}
|
||||
|
||||
fn begin(&self) -> Result<RangeBegin, Error> {
|
||||
@ -537,10 +529,9 @@ impl ListObjectsQuery {
|
||||
// In V2 mode, the continuation token is defined as an opaque
|
||||
// string in the spec, so we can do whatever we want with it.
|
||||
// In our case, it is defined as either [ or ] (for include
|
||||
// or exclude), followed by a base64-encoded string
|
||||
// representing the key to start with.
|
||||
(Some(token), _) => match &token.get(..1) {
|
||||
Some("[") => Ok(RangeBegin::IncludingKey {
|
||||
(Some(token), _) => match &token[..1] {
|
||||
"[" => Ok(RangeBegin::IncludingKey {
|
||||
key: String::from_utf8(
|
||||
BASE64_STANDARD
|
||||
.decode(token[1..].as_bytes())
|
||||
@ -548,7 +539,7 @@ impl ListObjectsQuery {
|
||||
)?,
|
||||
fallback_key: None,
|
||||
}),
|
||||
Some("]") => Ok(RangeBegin::AfterKey {
|
||||
"]" => Ok(RangeBegin::AfterKey {
|
||||
key: String::from_utf8(
|
||||
BASE64_STANDARD
|
||||
.decode(token[1..].as_bytes())
|
||||
@ -589,8 +580,8 @@ impl ListObjectsQuery {
|
||||
}
|
||||
|
||||
impl ListMultipartUploadsQuery {
|
||||
fn build_accumulator(&self) -> UploadAccumulator {
|
||||
UploadAccumulator::new(self.common.page_size)
|
||||
fn build_accumulator(&self) -> Accumulator<Uuid, UploadInfo> {
|
||||
Accumulator::<Uuid, UploadInfo>::new(self.common.page_size)
|
||||
}
|
||||
|
||||
fn begin(&self) -> Result<RangeBegin, Error> {
|
||||
@ -674,7 +665,6 @@ impl<K: std::cmp::Ord, V> Accumulator<K, V> {
|
||||
Some(p) => p,
|
||||
None => return None,
|
||||
};
|
||||
assert!(pfx.starts_with(&query.prefix));
|
||||
|
||||
// Try to register this prefix
|
||||
// If not possible, we can return early
|
||||
@ -685,11 +675,8 @@ impl<K: std::cmp::Ord, V> Accumulator<K, V> {
|
||||
// We consume the whole common prefix from the iterator
|
||||
let mut last_pfx_key = &object.key;
|
||||
loop {
|
||||
match objects.peek() {
|
||||
Some(o) if o.key.starts_with(pfx) => {
|
||||
last_pfx_key = &o.key;
|
||||
objects.next();
|
||||
}
|
||||
last_pfx_key = match objects.peek() {
|
||||
Some(o) if o.key.starts_with(pfx) => &o.key,
|
||||
Some(_) => {
|
||||
return Some(ExtractionResult::Extracted {
|
||||
key: last_pfx_key.to_owned(),
|
||||
@ -705,6 +692,8 @@ impl<K: std::cmp::Ord, V> Accumulator<K, V> {
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
objects.next();
|
||||
}
|
||||
}
|
||||
|
||||
@ -719,11 +708,12 @@ impl<K: std::cmp::Ord, V> Accumulator<K, V> {
|
||||
}
|
||||
|
||||
// Otherwise, we need to check if we can add it
|
||||
if self.is_full() {
|
||||
false
|
||||
} else {
|
||||
self.common_prefixes.insert(key);
|
||||
true
|
||||
match self.is_full() {
|
||||
true => false,
|
||||
false => {
|
||||
self.common_prefixes.insert(key);
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -731,11 +721,12 @@ impl<K: std::cmp::Ord, V> Accumulator<K, V> {
|
||||
// It is impossible to add twice a key, this is an error
|
||||
assert!(!self.keys.contains_key(&key));
|
||||
|
||||
if self.is_full() {
|
||||
false
|
||||
} else {
|
||||
self.keys.insert(key, value);
|
||||
true
|
||||
match self.is_full() {
|
||||
true => false,
|
||||
false => {
|
||||
self.keys.insert(key, value);
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -752,7 +743,6 @@ impl ExtractAccumulator for ObjectAccumulator {
|
||||
}
|
||||
|
||||
let object = objects.next().expect("This iterator can not be empty as it is checked earlier in the code. This is a logic bug, please report it.");
|
||||
assert!(object.key.starts_with(&query.prefix));
|
||||
|
||||
let version = match object.versions().iter().find(|x| x.is_data()) {
|
||||
Some(v) => v,
|
||||
|
@ -9,6 +9,7 @@ use md5::{Digest as Md5Digest, Md5};
|
||||
use garage_table::*;
|
||||
use garage_util::async_hash::*;
|
||||
use garage_util::data::*;
|
||||
use garage_util::time::*;
|
||||
|
||||
use garage_model::bucket_table::Bucket;
|
||||
use garage_model::garage::Garage;
|
||||
@ -29,13 +30,10 @@ pub async fn handle_create_multipart_upload(
|
||||
req: &Request<Body>,
|
||||
bucket_name: &str,
|
||||
bucket_id: Uuid,
|
||||
key: &String,
|
||||
key: &str,
|
||||
) -> Result<Response<Body>, Error> {
|
||||
let existing_object = garage.object_table.get(&bucket_id, &key).await?;
|
||||
|
||||
let upload_id = gen_uuid();
|
||||
let timestamp = next_timestamp(existing_object.as_ref());
|
||||
|
||||
let timestamp = now_msec();
|
||||
let headers = get_headers(req.headers())?;
|
||||
|
||||
// Create object in object table
|
||||
@ -235,8 +233,7 @@ pub async fn handle_complete_multipart_upload(
|
||||
|
||||
// Get object and multipart upload
|
||||
let key = key.to_string();
|
||||
let (object, mut object_version, mpu) =
|
||||
get_upload(&garage, &bucket.id, &key, &upload_id).await?;
|
||||
let (_, mut object_version, mpu) = get_upload(&garage, &bucket.id, &key, &upload_id).await?;
|
||||
|
||||
if mpu.parts.is_empty() {
|
||||
return Err(Error::bad_request("No data was uploaded"));
|
||||
@ -334,7 +331,7 @@ pub async fn handle_complete_multipart_upload(
|
||||
// Calculate total size of final object
|
||||
let total_size = parts.iter().map(|x| x.size.unwrap()).sum();
|
||||
|
||||
if let Err(e) = check_quotas(&garage, bucket, total_size, Some(&object)).await {
|
||||
if let Err(e) = check_quotas(&garage, bucket, &key, total_size).await {
|
||||
object_version.state = ObjectVersionState::Aborted;
|
||||
let final_object = Object::new(bucket.id, key.clone(), vec![object_version]);
|
||||
garage.object_table.insert(&final_object).await?;
|
||||
|
@ -3,7 +3,6 @@ use std::sync::Arc;
|
||||
|
||||
use base64::prelude::*;
|
||||
use futures::prelude::*;
|
||||
use futures::try_join;
|
||||
use hyper::body::{Body, Bytes};
|
||||
use hyper::header::{HeaderMap, HeaderValue};
|
||||
use hyper::{Request, Response};
|
||||
@ -36,7 +35,7 @@ pub async fn handle_put(
|
||||
garage: Arc<Garage>,
|
||||
req: Request<Body>,
|
||||
bucket: &Bucket,
|
||||
key: &String,
|
||||
key: &str,
|
||||
content_sha256: Option<Hash>,
|
||||
) -> Result<Response<Body>, Error> {
|
||||
// Retrieve interesting headers from request
|
||||
@ -69,24 +68,16 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
|
||||
headers: ObjectVersionHeaders,
|
||||
body: S,
|
||||
bucket: &Bucket,
|
||||
key: &String,
|
||||
key: &str,
|
||||
content_md5: Option<String>,
|
||||
content_sha256: Option<FixedBytes32>,
|
||||
) -> Result<(Uuid, String), Error> {
|
||||
let mut chunker = StreamChunker::new(body, garage.config.block_size);
|
||||
let (first_block_opt, existing_object) = try_join!(
|
||||
chunker.next(),
|
||||
garage
|
||||
.object_table
|
||||
.get(&bucket.id, key)
|
||||
.map_err(Error::from),
|
||||
)?;
|
||||
|
||||
let first_block = first_block_opt.unwrap_or_default();
|
||||
|
||||
// Generate identity of new version
|
||||
let version_uuid = gen_uuid();
|
||||
let version_timestamp = next_timestamp(existing_object.as_ref());
|
||||
let version_timestamp = now_msec();
|
||||
|
||||
let mut chunker = StreamChunker::new(body, garage.config.block_size);
|
||||
let first_block = chunker.next().await?.unwrap_or_default();
|
||||
|
||||
// If body is small enough, store it directly in the object table
|
||||
// as "inline data". We can then return immediately.
|
||||
@ -106,7 +97,7 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
|
||||
content_sha256,
|
||||
)?;
|
||||
|
||||
check_quotas(&garage, bucket, size, existing_object.as_ref()).await?;
|
||||
check_quotas(&garage, bucket, key, size).await?;
|
||||
|
||||
let object_version = ObjectVersion {
|
||||
uuid: version_uuid,
|
||||
@ -185,7 +176,7 @@ pub(crate) async fn save_stream<S: Stream<Item = Result<Bytes, Error>> + Unpin>(
|
||||
content_sha256,
|
||||
)?;
|
||||
|
||||
check_quotas(&garage, bucket, total_size, existing_object.as_ref()).await?;
|
||||
check_quotas(&garage, bucket, key, total_size).await?;
|
||||
|
||||
// Save final object state, marked as Complete
|
||||
let md5sum_hex = hex::encode(data_md5sum);
|
||||
@ -238,19 +229,19 @@ pub(crate) fn ensure_checksum_matches(
|
||||
pub(crate) async fn check_quotas(
|
||||
garage: &Arc<Garage>,
|
||||
bucket: &Bucket,
|
||||
key: &str,
|
||||
size: u64,
|
||||
prev_object: Option<&Object>,
|
||||
) -> Result<(), Error> {
|
||||
let quotas = bucket.state.as_option().unwrap().quotas.get();
|
||||
if quotas.max_objects.is_none() && quotas.max_size.is_none() {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let counters = garage
|
||||
.object_counter_table
|
||||
.table
|
||||
.get(&bucket.id, &EmptyKey)
|
||||
.await?;
|
||||
let key = key.to_string();
|
||||
let (prev_object, counters) = futures::try_join!(
|
||||
garage.object_table.get(&bucket.id, &key),
|
||||
garage.object_counter_table.table.get(&bucket.id, &EmptyKey),
|
||||
)?;
|
||||
|
||||
let counters = counters
|
||||
.map(|x| x.filtered_values(&garage.system.ring.borrow()))
|
||||
@ -284,7 +275,7 @@ pub(crate) async fn check_quotas(
|
||||
if cnt_size_diff > 0 && current_size + cnt_size_diff > ms as i64 {
|
||||
return Err(Error::forbidden(format!(
|
||||
"Bucket size quota is reached, maximum total size of objects for this bucket: {}. The bucket is already {} bytes, and this object would add {} bytes.",
|
||||
ms, current_size, cnt_size_diff
|
||||
ms, current_size, size
|
||||
)));
|
||||
}
|
||||
}
|
||||
@ -528,11 +519,3 @@ pub(crate) fn get_headers(headers: &HeaderMap<HeaderValue>) -> Result<ObjectVers
|
||||
other,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn next_timestamp(existing_object: Option<&Object>) -> u64 {
|
||||
existing_object
|
||||
.as_ref()
|
||||
.and_then(|obj| obj.versions().iter().map(|v| v.timestamp).max())
|
||||
.map(|t| std::cmp::max(t + 1, now_msec()))
|
||||
.unwrap_or_else(now_msec)
|
||||
}
|
||||
|
@ -771,7 +771,11 @@ impl BlockManagerLocked {
|
||||
// Now, we do an fsync on the containing directory, to ensure that the rename
|
||||
// is persisted properly. See:
|
||||
// http://thedjbway.b0llix.net/qmail/syncdir.html
|
||||
let dir = fs::OpenOptions::new().read(true).open(directory).await?;
|
||||
let dir = fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.mode(0)
|
||||
.open(directory)
|
||||
.await?;
|
||||
dir.sync_all().await?;
|
||||
drop(dir);
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ use std::collections::HashMap;
|
||||
use std::io::{Read, Write};
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
@ -197,20 +198,6 @@ pub fn read_node_id(metadata_dir: &Path) -> Result<NodeID, Error> {
|
||||
Ok(NodeID::from_slice(&key[..]).unwrap())
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
fn set_private_key_perms(path: &Path) -> Result<(), Error> {
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let perm = std::fs::Permissions::from_mode(0o600);
|
||||
std::fs::set_permissions(path, perm)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn set_private_key_perms(_path: &Path) -> Result<(), Error> {
|
||||
// TODO(mediocregopher) figure out how to do this, but it's not strictly necessary
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn gen_node_key(metadata_dir: &Path) -> Result<NodeKey, Error> {
|
||||
let mut key_file = metadata_dir.to_path_buf();
|
||||
key_file.push("node_key");
|
||||
@ -235,8 +222,11 @@ pub fn gen_node_key(metadata_dir: &Path) -> Result<NodeKey, Error> {
|
||||
let (pubkey, key) = ed25519::gen_keypair();
|
||||
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let mut f = std::fs::File::create(key_file.as_path())?;
|
||||
set_private_key_perms(key_file.as_path())?;
|
||||
let mut perm = f.metadata()?.permissions();
|
||||
perm.set_mode(0o600);
|
||||
std::fs::set_permissions(key_file.as_path(), perm)?;
|
||||
f.write_all(&key[..])?;
|
||||
}
|
||||
|
||||
@ -900,18 +890,6 @@ impl NodeStatus {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn update_disk_usage(
|
||||
&mut self,
|
||||
_meta_dir: &Path,
|
||||
_data_dir: &DataDirEnum,
|
||||
_metrics: &SystemMetrics,
|
||||
) {
|
||||
// TODO(mediocregopher) it'd be nice to have this for windows too, but it seems to only be
|
||||
// used for OpenTelemetry so it's not a real requirement.
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
fn update_disk_usage(
|
||||
&mut self,
|
||||
meta_dir: &Path,
|
||||
@ -919,7 +897,6 @@ impl NodeStatus {
|
||||
metrics: &SystemMetrics,
|
||||
) {
|
||||
use nix::sys::statvfs::statvfs;
|
||||
use std::sync::atomic::Ordering;
|
||||
let mount_avail = |path: &Path| match statvfs(path) {
|
||||
Ok(x) => {
|
||||
let avail = x.blocks_available() as u64 * x.fragment_size() as u64;
|
||||
@ -978,7 +955,6 @@ impl NodeStatus {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
fn get_default_ip() -> Option<IpAddr> {
|
||||
pnet_datalink::interfaces()
|
||||
.iter()
|
||||
@ -987,11 +963,6 @@ fn get_default_ip() -> Option<IpAddr> {
|
||||
.map(|a| a.ip())
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn get_default_ip() -> Option<IpAddr> {
|
||||
None
|
||||
}
|
||||
|
||||
async fn resolve_peers(peers: &[String]) -> Vec<(NodeID, SocketAddr)> {
|
||||
let mut ret = vec![];
|
||||
|
||||
|
@ -27,7 +27,7 @@ futures = "0.3"
|
||||
|
||||
http = "0.2"
|
||||
hyper = { version = "0.14", features = ["server", "http1", "runtime", "tcp", "stream"] }
|
||||
#hyperlocal = { version = "0.8.0", default-features = false, features = ["server"] }
|
||||
hyperlocal = { version = "0.8.0", default-features = false, features = ["server"] }
|
||||
|
||||
tokio = { version = "1.0", default-features = false, features = ["net"] }
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
//use std::fs::{self, Permissions};
|
||||
//use std::os::unix::prelude::PermissionsExt;
|
||||
use std::fs::{self, Permissions};
|
||||
use std::os::unix::prelude::PermissionsExt;
|
||||
use std::{convert::Infallible, sync::Arc};
|
||||
|
||||
use futures::future::Future;
|
||||
@ -11,9 +11,9 @@ use hyper::{
|
||||
Body, Method, Request, Response, Server, StatusCode,
|
||||
};
|
||||
|
||||
//use hyperlocal::UnixServerExt;
|
||||
use hyperlocal::UnixServerExt;
|
||||
|
||||
//use tokio::net::UnixStream;
|
||||
use tokio::net::UnixStream;
|
||||
|
||||
use opentelemetry::{
|
||||
global,
|
||||
@ -100,18 +100,18 @@ impl WebServer {
|
||||
}
|
||||
});
|
||||
|
||||
//let unix_service = make_service_fn(|_: &UnixStream| {
|
||||
// let web_server = web_server.clone();
|
||||
let unix_service = make_service_fn(|_: &UnixStream| {
|
||||
let web_server = web_server.clone();
|
||||
|
||||
// let path = addr.to_string();
|
||||
// async move {
|
||||
// remove_unix_socket_if_present(&path).await.expect("could not remove existing unix socket");
|
||||
// Ok::<_, Error>(service_fn(move |req: Request<Body>| {
|
||||
// let web_server = web_server.clone();
|
||||
// web_server.handle_request(req, path.clone())
|
||||
// }))
|
||||
// }
|
||||
//});
|
||||
let path = addr.to_string();
|
||||
async move {
|
||||
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
|
||||
let web_server = web_server.clone();
|
||||
|
||||
web_server.handle_request(req, path.clone())
|
||||
}))
|
||||
}
|
||||
});
|
||||
|
||||
info!("Web server listening on {}", addr);
|
||||
|
||||
@ -122,22 +122,20 @@ impl WebServer {
|
||||
.with_graceful_shutdown(shutdown_signal)
|
||||
.await?
|
||||
}
|
||||
UnixOrTCPSocketAddress::UnixSocket(_path) => {
|
||||
panic!("Unix sockets are not supported in this fork") // TODO(mediocregopher)
|
||||
} //UnixOrTCPSocketAddress::UnixSocket(ref path) => {
|
||||
// if path.exists() {
|
||||
// fs::remove_file(path)?
|
||||
// }
|
||||
UnixOrTCPSocketAddress::UnixSocket(ref path) => {
|
||||
if path.exists() {
|
||||
fs::remove_file(path)?
|
||||
}
|
||||
|
||||
// let bound = Server::bind_unix(path)?;
|
||||
let bound = Server::bind_unix(path)?;
|
||||
|
||||
// fs::set_permissions(path, Permissions::from_mode(0o222))?;
|
||||
fs::set_permissions(path, Permissions::from_mode(0o222))?;
|
||||
|
||||
// bound
|
||||
// .serve(unix_service)
|
||||
// .with_graceful_shutdown(shutdown_signal)
|
||||
// .await?;
|
||||
//}
|
||||
bound
|
||||
.serve(unix_service)
|
||||
.with_graceful_shutdown(shutdown_signal)
|
||||
.await?;
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
|
Loading…
Reference in New Issue
Block a user