First port of Garage to Netapp
This commit is contained in:
parent
dc017a0cab
commit
4067797d01
705
Cargo.lock
generated
705
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "garage_api"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
@ -13,9 +13,9 @@ path = "lib.rs"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_model = { version = "0.3.0", path = "../model" }
|
||||
garage_table = { version = "0.3.0", path = "../table" }
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_model = { version = "0.4.0", path = "../model" }
|
||||
garage_table = { version = "0.4.0", path = "../table" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
|
||||
base64 = "0.13"
|
||||
bytes = "1.0"
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "garage"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
@ -14,12 +14,12 @@ path = "main.rs"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_api = { version = "0.3.0", path = "../api" }
|
||||
garage_model = { version = "0.3.0", path = "../model" }
|
||||
garage_rpc = { version = "0.3.0", path = "../rpc" }
|
||||
garage_table = { version = "0.3.0", path = "../table" }
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_web = { version = "0.3.0", path = "../web" }
|
||||
garage_api = { version = "0.4.0", path = "../api" }
|
||||
garage_model = { version = "0.4.0", path = "../model" }
|
||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
||||
garage_table = { version = "0.4.0", path = "../table" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
garage_web = { version = "0.4.0", path = "../web" }
|
||||
|
||||
bytes = "1.0"
|
||||
git-version = "0.3.4"
|
||||
@ -27,6 +27,8 @@ hex = "0.4"
|
||||
log = "0.4"
|
||||
pretty_env_logger = "0.4"
|
||||
rand = "0.8"
|
||||
async-trait = "0.1.7"
|
||||
sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
|
||||
|
||||
sled = "0.34"
|
||||
|
||||
@ -38,3 +40,5 @@ toml = "0.5"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||
|
||||
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
|
||||
|
@ -2,6 +2,7 @@ use std::collections::HashMap;
|
||||
use std::fmt::Write;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use garage_util::error::Error;
|
||||
@ -10,8 +11,7 @@ use garage_table::crdt::Crdt;
|
||||
use garage_table::replication::*;
|
||||
use garage_table::*;
|
||||
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::rpc_server::*;
|
||||
use garage_rpc::*;
|
||||
|
||||
use garage_model::bucket_table::*;
|
||||
use garage_model::garage::Garage;
|
||||
@ -19,10 +19,8 @@ use garage_model::key_table::*;
|
||||
|
||||
use crate::cli::*;
|
||||
use crate::repair::Repair;
|
||||
use crate::*;
|
||||
|
||||
pub const ADMIN_RPC_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
pub const ADMIN_RPC_PATH: &str = "_admin";
|
||||
pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc";
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum AdminRpc {
|
||||
@ -33,41 +31,31 @@ pub enum AdminRpc {
|
||||
|
||||
// Replies
|
||||
Ok(String),
|
||||
Error(String),
|
||||
BucketList(Vec<String>),
|
||||
BucketInfo(Bucket),
|
||||
KeyList(Vec<(String, String)>),
|
||||
KeyInfo(Key),
|
||||
}
|
||||
|
||||
impl RpcMessage for AdminRpc {}
|
||||
impl Message for AdminRpc {
|
||||
type Response = AdminRpc;
|
||||
}
|
||||
|
||||
pub struct AdminRpcHandler {
|
||||
garage: Arc<Garage>,
|
||||
rpc_client: Arc<RpcClient<AdminRpc>>,
|
||||
endpoint: Arc<Endpoint<AdminRpc, Self>>,
|
||||
}
|
||||
|
||||
impl AdminRpcHandler {
|
||||
pub fn new(garage: Arc<Garage>) -> Arc<Self> {
|
||||
let rpc_client = garage.system.clone().rpc_client::<AdminRpc>(ADMIN_RPC_PATH);
|
||||
Arc::new(Self { garage, rpc_client })
|
||||
let endpoint = garage.system.netapp.endpoint(ADMIN_RPC_PATH.into());
|
||||
let admin = Arc::new(Self { garage, endpoint });
|
||||
admin.endpoint.set_handler(admin.clone());
|
||||
admin
|
||||
}
|
||||
|
||||
pub fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer) {
|
||||
rpc_server.add_handler::<AdminRpc, _, _>(ADMIN_RPC_PATH.to_string(), move |msg, _addr| {
|
||||
let self2 = self.clone();
|
||||
async move {
|
||||
match msg {
|
||||
AdminRpc::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await,
|
||||
AdminRpc::KeyOperation(ko) => self2.handle_key_cmd(ko).await,
|
||||
AdminRpc::LaunchRepair(opt) => self2.handle_launch_repair(opt).await,
|
||||
AdminRpc::Stats(opt) => self2.handle_stats(opt).await,
|
||||
_ => Err(Error::BadRpc("Invalid RPC".to_string())),
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async fn handle_bucket_cmd(&self, cmd: BucketOperation) -> Result<AdminRpc, Error> {
|
||||
async fn handle_bucket_cmd(&self, cmd: &BucketOperation) -> Result<AdminRpc, Error> {
|
||||
match cmd {
|
||||
BucketOperation::List => {
|
||||
let bucket_names = self
|
||||
@ -187,7 +175,7 @@ impl AdminRpcHandler {
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_key_cmd(&self, cmd: KeyOperation) -> Result<AdminRpc, Error> {
|
||||
async fn handle_key_cmd(&self, cmd: &KeyOperation) -> Result<AdminRpc, Error> {
|
||||
match cmd {
|
||||
KeyOperation::List => {
|
||||
let key_ids = self
|
||||
@ -210,13 +198,13 @@ impl AdminRpcHandler {
|
||||
Ok(AdminRpc::KeyInfo(key))
|
||||
}
|
||||
KeyOperation::New(query) => {
|
||||
let key = Key::new(query.name);
|
||||
let key = Key::new(query.name.clone());
|
||||
self.garage.key_table.insert(&key).await?;
|
||||
Ok(AdminRpc::KeyInfo(key))
|
||||
}
|
||||
KeyOperation::Rename(query) => {
|
||||
let mut key = self.get_existing_key(&query.key_pattern).await?;
|
||||
key.name.update(query.new_name);
|
||||
key.name.update(query.new_name.clone());
|
||||
self.garage.key_table.insert(&key).await?;
|
||||
Ok(AdminRpc::KeyInfo(key))
|
||||
}
|
||||
@ -353,17 +341,18 @@ impl AdminRpcHandler {
|
||||
let mut failures = vec![];
|
||||
let ring = self.garage.system.ring.borrow().clone();
|
||||
for node in ring.config.members.keys() {
|
||||
let node = NodeID::from_slice(node.as_slice()).unwrap();
|
||||
if self
|
||||
.rpc_client
|
||||
.endpoint
|
||||
.call(
|
||||
*node,
|
||||
AdminRpc::LaunchRepair(opt_to_send.clone()),
|
||||
ADMIN_RPC_TIMEOUT,
|
||||
&node,
|
||||
&AdminRpc::LaunchRepair(opt_to_send.clone()),
|
||||
PRIO_NORMAL,
|
||||
)
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
failures.push(*node);
|
||||
failures.push(node);
|
||||
}
|
||||
}
|
||||
if failures.is_empty() {
|
||||
@ -397,14 +386,16 @@ impl AdminRpcHandler {
|
||||
let ring = self.garage.system.ring.borrow().clone();
|
||||
|
||||
for node in ring.config.members.keys() {
|
||||
let node = NodeID::from_slice(node.as_slice()).unwrap();
|
||||
|
||||
let mut opt = opt.clone();
|
||||
opt.all_nodes = false;
|
||||
|
||||
writeln!(&mut ret, "\n======================").unwrap();
|
||||
writeln!(&mut ret, "Stats for node {:?}:", node).unwrap();
|
||||
match self
|
||||
.rpc_client
|
||||
.call(*node, AdminRpc::Stats(opt), ADMIN_RPC_TIMEOUT)
|
||||
.endpoint
|
||||
.call(&node, &AdminRpc::Stats(opt), PRIO_NORMAL)
|
||||
.await
|
||||
{
|
||||
Ok(AdminRpc::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(),
|
||||
@ -495,4 +486,23 @@ impl AdminRpcHandler {
|
||||
.unwrap();
|
||||
writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap();
|
||||
}
|
||||
|
||||
async fn handle_rpc(self: &Arc<Self>, msg: &AdminRpc) -> Result<AdminRpc, Error> {
|
||||
match msg {
|
||||
AdminRpc::BucketOperation(bo) => self.handle_bucket_cmd(bo).await,
|
||||
AdminRpc::KeyOperation(ko) => self.handle_key_cmd(ko).await,
|
||||
AdminRpc::LaunchRepair(opt) => self.handle_launch_repair(opt.clone()).await,
|
||||
AdminRpc::Stats(opt) => self.handle_stats(opt.clone()).await,
|
||||
_ => Err(Error::BadRpc("Invalid RPC".to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EndpointHandler<AdminRpc> for AdminRpcHandler {
|
||||
async fn handle(self: &Arc<Self>, message: &AdminRpc, _from: NodeID) -> AdminRpc {
|
||||
self.handle_rpc(message)
|
||||
.await
|
||||
.unwrap_or_else(|e| AdminRpc::Error(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
use std::cmp::max;
|
||||
use std::collections::HashSet;
|
||||
use std::net::SocketAddr;
|
||||
//use std::cmp::max;
|
||||
//use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -8,11 +7,11 @@ use structopt::StructOpt;
|
||||
|
||||
use garage_util::data::Uuid;
|
||||
use garage_util::error::Error;
|
||||
use garage_util::time::*;
|
||||
//use garage_util::time::*;
|
||||
|
||||
use garage_rpc::membership::*;
|
||||
use garage_rpc::ring::*;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::system::*;
|
||||
use garage_rpc::*;
|
||||
|
||||
use garage_model::bucket_table::*;
|
||||
use garage_model::key_table::*;
|
||||
@ -298,54 +297,65 @@ pub struct StatsOpt {
|
||||
|
||||
pub async fn cli_cmd(
|
||||
cmd: Command,
|
||||
membership_rpc_cli: RpcAddrClient<Message>,
|
||||
admin_rpc_cli: RpcAddrClient<AdminRpc>,
|
||||
rpc_host: SocketAddr,
|
||||
system_rpc_endpoint: &Endpoint<SystemRpc, ()>,
|
||||
admin_rpc_endpoint: &Endpoint<AdminRpc, ()>,
|
||||
rpc_host: NodeID,
|
||||
) -> Result<(), Error> {
|
||||
match cmd {
|
||||
Command::Status => cmd_status(membership_rpc_cli, rpc_host).await,
|
||||
Command::Status => cmd_status(system_rpc_endpoint, rpc_host).await,
|
||||
Command::Node(NodeOperation::Configure(configure_opt)) => {
|
||||
cmd_configure(membership_rpc_cli, rpc_host, configure_opt).await
|
||||
cmd_configure(system_rpc_endpoint, rpc_host, configure_opt).await
|
||||
}
|
||||
Command::Node(NodeOperation::Remove(remove_opt)) => {
|
||||
cmd_remove(membership_rpc_cli, rpc_host, remove_opt).await
|
||||
cmd_remove(system_rpc_endpoint, rpc_host, remove_opt).await
|
||||
}
|
||||
Command::Bucket(bo) => {
|
||||
cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::BucketOperation(bo)).await
|
||||
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await
|
||||
}
|
||||
Command::Key(ko) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::KeyOperation(ko)).await,
|
||||
Command::Repair(ro) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::LaunchRepair(ro)).await,
|
||||
Command::Stats(so) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::Stats(so)).await,
|
||||
Command::Key(ko) => {
|
||||
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::KeyOperation(ko)).await
|
||||
}
|
||||
Command::Repair(ro) => {
|
||||
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::LaunchRepair(ro)).await
|
||||
}
|
||||
Command::Stats(so) => cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::Stats(so)).await,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn cmd_status(
|
||||
rpc_cli: RpcAddrClient<Message>,
|
||||
rpc_host: SocketAddr,
|
||||
) -> Result<(), Error> {
|
||||
pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) -> Result<(), Error> {
|
||||
let status = match rpc_cli
|
||||
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
|
||||
.await??
|
||||
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
||||
.await?
|
||||
{
|
||||
Message::AdvertiseNodesUp(nodes) => nodes,
|
||||
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
};
|
||||
let config = match rpc_cli
|
||||
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
||||
.await??
|
||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
||||
.await?
|
||||
{
|
||||
Message::AdvertiseConfig(cfg) => cfg,
|
||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
};
|
||||
|
||||
println!("STATUS:");
|
||||
for node in status {
|
||||
println!("{:?}", node);
|
||||
}
|
||||
println!("CONFIG: (v{})", config.version);
|
||||
for (id, node) in config.members {
|
||||
println!("{} {:?}", hex::encode(id.as_slice()), node);
|
||||
}
|
||||
|
||||
/* TODO
|
||||
let (hostname_len, addr_len, tag_len, zone_len) = status
|
||||
.iter()
|
||||
.map(|adv| (adv, config.members.get(&adv.id)))
|
||||
.map(|(adv, cfg)| {
|
||||
.map(|(id, addr, _)| (addr, config.members.get(&adv.id)))
|
||||
.map(|(addr, cfg)| {
|
||||
(
|
||||
adv.state_info.hostname.len(),
|
||||
adv.addr.to_string().len(),
|
||||
8,
|
||||
addr.to_string().len(),
|
||||
cfg.map(|c| c.tag.len()).unwrap_or(0),
|
||||
cfg.map(|c| c.zone.len()).unwrap_or(0),
|
||||
)
|
||||
@ -355,13 +365,13 @@ pub async fn cmd_status(
|
||||
});
|
||||
|
||||
println!("Healthy nodes:");
|
||||
for adv in status.iter().filter(|x| x.is_up) {
|
||||
for (id, addr, _) in status.iter().filter(|(id, addr, is_up)| is_up) {
|
||||
if let Some(cfg) = config.members.get(&adv.id) {
|
||||
println!(
|
||||
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}",
|
||||
id = adv.id,
|
||||
host = adv.state_info.hostname,
|
||||
addr = adv.addr,
|
||||
id = id,
|
||||
host = "",
|
||||
addr = addr,
|
||||
tag = cfg.tag,
|
||||
zone = cfg.zone,
|
||||
capacity = cfg.capacity_string(),
|
||||
@ -373,36 +383,36 @@ pub async fn cmd_status(
|
||||
} else {
|
||||
println!(
|
||||
"{id:?}\t{h}{h_pad}\t{addr}{a_pad}\tUNCONFIGURED/REMOVED",
|
||||
id = adv.id,
|
||||
h = adv.state_info.hostname,
|
||||
addr = adv.addr,
|
||||
h_pad = " ".repeat(hostname_len - adv.state_info.hostname.len()),
|
||||
a_pad = " ".repeat(addr_len - adv.addr.to_string().len()),
|
||||
id = id,
|
||||
h = "",
|
||||
addr = addr,
|
||||
h_pad = " ".repeat(hostname_len - "".len()),
|
||||
a_pad = " ".repeat(addr_len - addr.to_string().len()),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let status_keys = status.iter().map(|x| x.id).collect::<HashSet<_>>();
|
||||
let failure_case_1 = status.iter().any(|x| !x.is_up);
|
||||
let status_keys = status.iter().map(|(id, _, _)| id).collect::<HashSet<_>>();
|
||||
let failure_case_1 = status.iter().any(|(_, _, is_up)| !is_up);
|
||||
let failure_case_2 = config
|
||||
.members
|
||||
.iter()
|
||||
.any(|(id, _)| !status_keys.contains(id));
|
||||
if failure_case_1 || failure_case_2 {
|
||||
println!("\nFailed nodes:");
|
||||
for adv in status.iter().filter(|x| !x.is_up) {
|
||||
if let Some(cfg) = config.members.get(&adv.id) {
|
||||
for (id, addr) in status.iter().filter(|(_, _, is_up)| !is_up) {
|
||||
if let Some(cfg) = config.members.get(&id) {
|
||||
println!(
|
||||
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}\tlast seen: {last_seen}s ago",
|
||||
id=adv.id,
|
||||
host=adv.state_info.hostname,
|
||||
addr=adv.addr,
|
||||
id=id,
|
||||
host="",
|
||||
addr=addr,
|
||||
tag=cfg.tag,
|
||||
zone=cfg.zone,
|
||||
capacity=cfg.capacity_string(),
|
||||
last_seen=(now_msec() - adv.last_seen) / 1000,
|
||||
h_pad=" ".repeat(hostname_len - adv.state_info.hostname.len()),
|
||||
a_pad=" ".repeat(addr_len - adv.addr.to_string().len()),
|
||||
last_seen=(now_msec() - 0) / 1000,
|
||||
h_pad=" ".repeat(hostname_len - "".len()),
|
||||
a_pad=" ".repeat(addr_len - addr.to_string().len()),
|
||||
t_pad=" ".repeat(tag_len - cfg.tag.len()),
|
||||
z_pad=" ".repeat(zone_len - cfg.zone.len()),
|
||||
);
|
||||
@ -411,12 +421,12 @@ pub async fn cmd_status(
|
||||
let (tag_len, zone_len) = config
|
||||
.members
|
||||
.iter()
|
||||
.filter(|(&id, _)| !status.iter().any(|x| x.id == id))
|
||||
.filter(|(&id, _)| !status.iter().any(|(xid, _, _)| xid == id))
|
||||
.map(|(_, cfg)| (cfg.tag.len(), cfg.zone.len()))
|
||||
.fold((0, 0), |(t, z), (mt, mz)| (max(t, mt), max(z, mz)));
|
||||
|
||||
for (id, cfg) in config.members.iter() {
|
||||
if !status.iter().any(|x| x.id == *id) {
|
||||
if !status.iter().any(|(xid, _, _)| xid == *id) {
|
||||
println!(
|
||||
"{id:?}\t{tag}{t_pad}\t{zone}{z_pad}\t{capacity}\tnever seen",
|
||||
id = id,
|
||||
@ -429,6 +439,7 @@ pub async fn cmd_status(
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -455,25 +466,30 @@ pub fn find_matching_node(
|
||||
}
|
||||
|
||||
pub async fn cmd_configure(
|
||||
rpc_cli: RpcAddrClient<Message>,
|
||||
rpc_host: SocketAddr,
|
||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||
rpc_host: NodeID,
|
||||
args: ConfigureNodeOpt,
|
||||
) -> Result<(), Error> {
|
||||
let status = match rpc_cli
|
||||
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT)
|
||||
.await??
|
||||
.call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
|
||||
.await?
|
||||
{
|
||||
Message::AdvertiseNodesUp(nodes) => nodes,
|
||||
SystemRpc::ReturnKnownNodes(nodes) => nodes,
|
||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
};
|
||||
|
||||
let added_node = find_matching_node(status.iter().map(|x| x.id), &args.node_id)?;
|
||||
let added_node = find_matching_node(
|
||||
status
|
||||
.iter()
|
||||
.map(|(id, _, _)| Uuid::try_from(id.as_ref()).unwrap()),
|
||||
&args.node_id,
|
||||
)?;
|
||||
|
||||
let mut config = match rpc_cli
|
||||
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
||||
.await??
|
||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
||||
.await?
|
||||
{
|
||||
Message::AdvertiseConfig(cfg) => cfg,
|
||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
};
|
||||
|
||||
@ -527,25 +543,21 @@ pub async fn cmd_configure(
|
||||
config.version += 1;
|
||||
|
||||
rpc_cli
|
||||
.call(
|
||||
&rpc_host,
|
||||
&Message::AdvertiseConfig(config),
|
||||
ADMIN_RPC_TIMEOUT,
|
||||
)
|
||||
.await??;
|
||||
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn cmd_remove(
|
||||
rpc_cli: RpcAddrClient<Message>,
|
||||
rpc_host: SocketAddr,
|
||||
rpc_cli: &Endpoint<SystemRpc, ()>,
|
||||
rpc_host: NodeID,
|
||||
args: RemoveNodeOpt,
|
||||
) -> Result<(), Error> {
|
||||
let mut config = match rpc_cli
|
||||
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT)
|
||||
.await??
|
||||
.call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
|
||||
.await?
|
||||
{
|
||||
Message::AdvertiseConfig(cfg) => cfg,
|
||||
SystemRpc::AdvertiseConfig(cfg) => cfg,
|
||||
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
|
||||
};
|
||||
|
||||
@ -562,21 +574,17 @@ pub async fn cmd_remove(
|
||||
config.version += 1;
|
||||
|
||||
rpc_cli
|
||||
.call(
|
||||
&rpc_host,
|
||||
&Message::AdvertiseConfig(config),
|
||||
ADMIN_RPC_TIMEOUT,
|
||||
)
|
||||
.await??;
|
||||
.call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn cmd_admin(
|
||||
rpc_cli: RpcAddrClient<AdminRpc>,
|
||||
rpc_host: SocketAddr,
|
||||
rpc_cli: &Endpoint<AdminRpc, ()>,
|
||||
rpc_host: NodeID,
|
||||
args: AdminRpc,
|
||||
) -> Result<(), Error> {
|
||||
match rpc_cli.call(&rpc_host, args, ADMIN_RPC_TIMEOUT).await?? {
|
||||
match rpc_cli.call(&rpc_host, &args, PRIO_NORMAL).await? {
|
||||
AdminRpc::Ok(msg) => {
|
||||
println!("{}", msg);
|
||||
}
|
||||
|
@ -10,16 +10,16 @@ mod repair;
|
||||
mod server;
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use structopt::StructOpt;
|
||||
|
||||
use garage_util::config::TlsConfig;
|
||||
use netapp::util::parse_peer_addr;
|
||||
use netapp::NetworkKey;
|
||||
|
||||
use garage_util::error::Error;
|
||||
|
||||
use garage_rpc::membership::*;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::system::*;
|
||||
use garage_rpc::*;
|
||||
|
||||
use admin_rpc::*;
|
||||
use cli::*;
|
||||
@ -27,16 +27,14 @@ use cli::*;
|
||||
#[derive(StructOpt, Debug)]
|
||||
#[structopt(name = "garage")]
|
||||
struct Opt {
|
||||
/// RPC connect to this host to execute client operations
|
||||
#[structopt(short = "h", long = "rpc-host", default_value = "127.0.0.1:3901", parse(try_from_str = parse_address))]
|
||||
pub rpc_host: SocketAddr,
|
||||
/// Host to connect to for admin operations, in the format:
|
||||
/// <public-key>@<ip>:<port>
|
||||
#[structopt(short = "h", long = "rpc-host")]
|
||||
pub rpc_host: Option<String>,
|
||||
|
||||
#[structopt(long = "ca-cert")]
|
||||
pub ca_cert: Option<String>,
|
||||
#[structopt(long = "client-cert")]
|
||||
pub client_cert: Option<String>,
|
||||
#[structopt(long = "client-key")]
|
||||
pub client_key: Option<String>,
|
||||
/// RPC secret network key for admin operations
|
||||
#[structopt(short = "s", long = "rpc-secret")]
|
||||
pub rpc_secret: Option<String>,
|
||||
|
||||
#[structopt(subcommand)]
|
||||
cmd: Command,
|
||||
@ -66,33 +64,20 @@ async fn main() {
|
||||
}
|
||||
|
||||
async fn cli_command(opt: Opt) -> Result<(), Error> {
|
||||
let tls_config = match (opt.ca_cert, opt.client_cert, opt.client_key) {
|
||||
(Some(ca_cert), Some(client_cert), Some(client_key)) => Some(TlsConfig {
|
||||
ca_cert,
|
||||
node_cert: client_cert,
|
||||
node_key: client_key,
|
||||
}),
|
||||
(None, None, None) => None,
|
||||
_ => {
|
||||
warn!("Missing one of: --ca-cert, --node-cert, --node-key. Not using TLS.");
|
||||
None
|
||||
}
|
||||
};
|
||||
let net_key_hex_str = &opt.rpc_secret.expect("No RPC secret provided");
|
||||
let network_key = NetworkKey::from_slice(
|
||||
&hex::decode(net_key_hex_str).expect("Invalid RPC secret key (bad hex)")[..],
|
||||
)
|
||||
.expect("Invalid RPC secret provided (wrong length)");
|
||||
let (_pk, sk) = sodiumoxide::crypto::sign::ed25519::gen_keypair();
|
||||
|
||||
let rpc_http_cli =
|
||||
Arc::new(RpcHttpClient::new(8, &tls_config).expect("Could not create RPC client"));
|
||||
let membership_rpc_cli =
|
||||
RpcAddrClient::new(rpc_http_cli.clone(), MEMBERSHIP_RPC_PATH.to_string());
|
||||
let admin_rpc_cli = RpcAddrClient::new(rpc_http_cli.clone(), ADMIN_RPC_PATH.to_string());
|
||||
let netapp = NetApp::new(network_key, sk);
|
||||
let (id, addr) =
|
||||
parse_peer_addr(&opt.rpc_host.expect("No RPC host provided")).expect("Invalid RPC host");
|
||||
netapp.clone().try_connect(addr, id).await?;
|
||||
|
||||
cli_cmd(opt.cmd, membership_rpc_cli, admin_rpc_cli, opt.rpc_host).await
|
||||
}
|
||||
|
||||
fn parse_address(address: &str) -> Result<SocketAddr, String> {
|
||||
use std::net::ToSocketAddrs;
|
||||
address
|
||||
.to_socket_addrs()
|
||||
.map_err(|_| format!("Could not resolve {}", address))?
|
||||
.next()
|
||||
.ok_or_else(|| format!("Could not resolve {}", address))
|
||||
let system_rpc_endpoint = netapp.endpoint::<SystemRpc, ()>(SYSTEM_RPC_PATH.into());
|
||||
let admin_rpc_endpoint = netapp.endpoint::<AdminRpc, ()>(ADMIN_RPC_PATH.into());
|
||||
|
||||
cli_cmd(opt.cmd, &system_rpc_endpoint, &admin_rpc_endpoint, id).await
|
||||
}
|
||||
|
@ -1,7 +1,5 @@
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures_util::future::*;
|
||||
use tokio::sync::watch;
|
||||
|
||||
use garage_util::background::*;
|
||||
@ -10,21 +8,10 @@ use garage_util::error::Error;
|
||||
|
||||
use garage_api::run_api_server;
|
||||
use garage_model::garage::Garage;
|
||||
use garage_rpc::rpc_server::RpcServer;
|
||||
use garage_web::run_web_server;
|
||||
|
||||
use crate::admin_rpc::*;
|
||||
|
||||
async fn shutdown_signal(send_cancel: watch::Sender<bool>) -> Result<(), Error> {
|
||||
// Wait for the CTRL+C signal
|
||||
tokio::signal::ctrl_c()
|
||||
.await
|
||||
.expect("failed to install CTRL+C signal handler");
|
||||
info!("Received CTRL+C, shutting down.");
|
||||
send_cancel.send(true)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn wait_from(mut chan: watch::Receiver<bool>) {
|
||||
while !*chan.borrow() {
|
||||
if chan.changed().await.is_err() {
|
||||
@ -47,52 +34,46 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
|
||||
.open()
|
||||
.expect("Unable to open sled DB");
|
||||
|
||||
info!("Initialize RPC server...");
|
||||
let mut rpc_server = RpcServer::new(config.rpc_bind_addr, config.rpc_tls.clone());
|
||||
|
||||
info!("Initializing background runner...");
|
||||
let (send_cancel, watch_cancel) = watch::channel(false);
|
||||
let watch_cancel = netapp::util::watch_ctrl_c();
|
||||
let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone());
|
||||
|
||||
info!("Initializing Garage main data store...");
|
||||
let garage = Garage::new(config.clone(), db, background, &mut rpc_server);
|
||||
let bootstrap = garage.system.clone().bootstrap(
|
||||
config.bootstrap_peers,
|
||||
config.consul_host,
|
||||
config.consul_service_name,
|
||||
);
|
||||
let garage = Garage::new(config.clone(), db, background);
|
||||
|
||||
let run_system = tokio::spawn(garage.system.clone().run(watch_cancel.clone()));
|
||||
|
||||
info!("Crate admin RPC handler...");
|
||||
AdminRpcHandler::new(garage.clone()).register_handler(&mut rpc_server);
|
||||
AdminRpcHandler::new(garage.clone());
|
||||
|
||||
info!("Initializing RPC and API servers...");
|
||||
let run_rpc_server = Arc::new(rpc_server).run(wait_from(watch_cancel.clone()));
|
||||
let api_server = run_api_server(garage.clone(), wait_from(watch_cancel.clone()));
|
||||
let web_server = run_web_server(garage, wait_from(watch_cancel.clone()));
|
||||
info!("Initializing API server...");
|
||||
let api_server = tokio::spawn(run_api_server(
|
||||
garage.clone(),
|
||||
wait_from(watch_cancel.clone()),
|
||||
));
|
||||
|
||||
futures::try_join!(
|
||||
bootstrap.map(|()| {
|
||||
info!("Bootstrap done");
|
||||
Ok(())
|
||||
}),
|
||||
run_rpc_server.map(|rv| {
|
||||
info!("RPC server exited");
|
||||
rv
|
||||
}),
|
||||
api_server.map(|rv| {
|
||||
info!("API server exited");
|
||||
rv
|
||||
}),
|
||||
web_server.map(|rv| {
|
||||
info!("Web server exited");
|
||||
rv
|
||||
}),
|
||||
await_background_done.map(|rv| {
|
||||
info!("Background runner exited: {:?}", rv);
|
||||
Ok(())
|
||||
}),
|
||||
shutdown_signal(send_cancel),
|
||||
)?;
|
||||
info!("Initializing web server...");
|
||||
let web_server = tokio::spawn(run_web_server(
|
||||
garage.clone(),
|
||||
wait_from(watch_cancel.clone()),
|
||||
));
|
||||
|
||||
// Stuff runs
|
||||
|
||||
// When a cancel signal is sent, stuff stops
|
||||
if let Err(e) = api_server.await? {
|
||||
warn!("API server exited with error: {}", e);
|
||||
}
|
||||
if let Err(e) = web_server.await? {
|
||||
warn!("Web server exited with error: {}", e);
|
||||
}
|
||||
|
||||
// Remove RPC handlers for system to break reference cycles
|
||||
garage.system.netapp.drop_all_handlers();
|
||||
|
||||
// Await for last parts to end
|
||||
run_system.await?;
|
||||
await_background_done.await?;
|
||||
|
||||
info!("Cleaning up...");
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "garage_model"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
@ -13,10 +13,11 @@ path = "lib.rs"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_rpc = { version = "0.3.0", path = "../rpc" }
|
||||
garage_table = { version = "0.3.0", path = "../table" }
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
||||
garage_table = { version = "0.4.0", path = "../table" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
|
||||
async-trait = "0.1.7"
|
||||
arc-swap = "1.0"
|
||||
hex = "0.4"
|
||||
log = "0.4"
|
||||
@ -31,3 +32,5 @@ serde_bytes = "0.11"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||
|
||||
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
|
||||
|
@ -3,6 +3,7 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use arc_swap::ArcSwapOption;
|
||||
use async_trait::async_trait;
|
||||
use futures::future::*;
|
||||
use futures::select;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -14,9 +15,8 @@ use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
use garage_util::time::*;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::rpc_server::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::*;
|
||||
|
||||
use garage_table::replication::{TableReplication, TableShardedReplication};
|
||||
|
||||
@ -36,8 +36,9 @@ const RESYNC_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// RPC messages used to share blocks of data between nodes
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum Message {
|
||||
pub enum BlockRpc {
|
||||
Ok,
|
||||
Error(String),
|
||||
/// Message to ask for a block of data, by hash
|
||||
GetBlock(Hash),
|
||||
/// Message to send a block of data, either because requested, of for first delivery of new
|
||||
@ -60,7 +61,9 @@ pub struct PutBlockMessage {
|
||||
pub data: Vec<u8>,
|
||||
}
|
||||
|
||||
impl RpcMessage for Message {}
|
||||
impl Message for BlockRpc {
|
||||
type Response = BlockRpc;
|
||||
}
|
||||
|
||||
/// The block manager, handling block exchange between nodes, and block storage on local node
|
||||
pub struct BlockManager {
|
||||
@ -77,7 +80,7 @@ pub struct BlockManager {
|
||||
resync_notify: Notify,
|
||||
|
||||
system: Arc<System>,
|
||||
rpc_client: Arc<RpcClient<Message>>,
|
||||
endpoint: Arc<Endpoint<BlockRpc, Self>>,
|
||||
pub(crate) garage: ArcSwapOption<Garage>,
|
||||
}
|
||||
|
||||
@ -87,7 +90,6 @@ impl BlockManager {
|
||||
data_dir: PathBuf,
|
||||
replication: TableShardedReplication,
|
||||
system: Arc<System>,
|
||||
rpc_server: &mut RpcServer,
|
||||
) -> Arc<Self> {
|
||||
let rc = db
|
||||
.open_tree("block_local_rc")
|
||||
@ -97,8 +99,7 @@ impl BlockManager {
|
||||
.open_tree("block_local_resync_queue")
|
||||
.expect("Unable to open block_local_resync_queue tree");
|
||||
|
||||
let rpc_path = "block_manager";
|
||||
let rpc_client = system.rpc_client::<Message>(rpc_path);
|
||||
let endpoint = system.netapp.endpoint(format!("garage_model/block.rs/Rpc"));
|
||||
|
||||
let block_manager = Arc::new(Self {
|
||||
replication,
|
||||
@ -108,35 +109,19 @@ impl BlockManager {
|
||||
resync_queue,
|
||||
resync_notify: Notify::new(),
|
||||
system,
|
||||
rpc_client,
|
||||
endpoint,
|
||||
garage: ArcSwapOption::from(None),
|
||||
});
|
||||
block_manager
|
||||
.clone()
|
||||
.register_handler(rpc_server, rpc_path.into());
|
||||
block_manager.endpoint.set_handler(block_manager.clone());
|
||||
|
||||
block_manager
|
||||
}
|
||||
|
||||
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||
let self2 = self.clone();
|
||||
rpc_server.add_handler::<Message, _, _>(path, move |msg, _addr| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle(&msg).await }
|
||||
});
|
||||
|
||||
let self2 = self.clone();
|
||||
self.rpc_client
|
||||
.set_local_handler(self.system.id, move |msg| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle(&msg).await }
|
||||
});
|
||||
}
|
||||
|
||||
async fn handle(self: Arc<Self>, msg: &Message) -> Result<Message, Error> {
|
||||
async fn handle_rpc(self: Arc<Self>, msg: &BlockRpc) -> Result<BlockRpc, Error> {
|
||||
match msg {
|
||||
Message::PutBlock(m) => self.write_block(&m.hash, &m.data).await,
|
||||
Message::GetBlock(h) => self.read_block(h).await,
|
||||
Message::NeedBlockQuery(h) => self.need_block(h).await.map(Message::NeedBlockReply),
|
||||
BlockRpc::PutBlock(m) => self.write_block(&m.hash, &m.data).await,
|
||||
BlockRpc::GetBlock(h) => self.read_block(h).await,
|
||||
BlockRpc::NeedBlockQuery(h) => self.need_block(h).await.map(BlockRpc::NeedBlockReply),
|
||||
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
|
||||
}
|
||||
}
|
||||
@ -157,7 +142,7 @@ impl BlockManager {
|
||||
}
|
||||
|
||||
/// Write a block to disk
|
||||
async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<Message, Error> {
|
||||
async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<BlockRpc, Error> {
|
||||
let _lock = self.data_dir_lock.lock().await;
|
||||
|
||||
let mut path = self.block_dir(hash);
|
||||
@ -165,18 +150,18 @@ impl BlockManager {
|
||||
|
||||
path.push(hex::encode(hash));
|
||||
if fs::metadata(&path).await.is_ok() {
|
||||
return Ok(Message::Ok);
|
||||
return Ok(BlockRpc::Ok);
|
||||
}
|
||||
|
||||
let mut f = fs::File::create(path).await?;
|
||||
f.write_all(data).await?;
|
||||
drop(f);
|
||||
|
||||
Ok(Message::Ok)
|
||||
Ok(BlockRpc::Ok)
|
||||
}
|
||||
|
||||
/// Read block from disk, verifying it's integrity
|
||||
async fn read_block(&self, hash: &Hash) -> Result<Message, Error> {
|
||||
async fn read_block(&self, hash: &Hash) -> Result<BlockRpc, Error> {
|
||||
let path = self.block_path(hash);
|
||||
|
||||
let mut f = match fs::File::open(&path).await {
|
||||
@ -204,7 +189,7 @@ impl BlockManager {
|
||||
return Err(Error::CorruptData(*hash));
|
||||
}
|
||||
|
||||
Ok(Message::PutBlock(PutBlockMessage { hash: *hash, data }))
|
||||
Ok(BlockRpc::PutBlock(PutBlockMessage { hash: *hash, data }))
|
||||
}
|
||||
|
||||
/// Check if this node should have a block, but don't actually have it
|
||||
@ -346,17 +331,22 @@ impl BlockManager {
|
||||
}
|
||||
who.retain(|id| *id != self.system.id);
|
||||
|
||||
let msg = Arc::new(Message::NeedBlockQuery(*hash));
|
||||
let msg = Arc::new(BlockRpc::NeedBlockQuery(*hash));
|
||||
let who_needs_fut = who.iter().map(|to| {
|
||||
self.rpc_client
|
||||
.call_arc(*to, msg.clone(), NEED_BLOCK_QUERY_TIMEOUT)
|
||||
self.system.rpc.call_arc(
|
||||
&self.endpoint,
|
||||
*to,
|
||||
msg.clone(),
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_timeout(NEED_BLOCK_QUERY_TIMEOUT),
|
||||
)
|
||||
});
|
||||
let who_needs_resps = join_all(who_needs_fut).await;
|
||||
|
||||
let mut need_nodes = vec![];
|
||||
for (node, needed) in who.iter().zip(who_needs_resps.into_iter()) {
|
||||
match needed? {
|
||||
Message::NeedBlockReply(needed) => {
|
||||
BlockRpc::NeedBlockReply(needed) => {
|
||||
if needed {
|
||||
need_nodes.push(*node);
|
||||
}
|
||||
@ -377,11 +367,14 @@ impl BlockManager {
|
||||
);
|
||||
|
||||
let put_block_message = self.read_block(hash).await?;
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&need_nodes[..],
|
||||
put_block_message,
|
||||
RequestStrategy::with_quorum(need_nodes.len())
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(need_nodes.len())
|
||||
.with_timeout(BLOCK_RW_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
@ -413,18 +406,21 @@ impl BlockManager {
|
||||
pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> {
|
||||
let who = self.replication.read_nodes(&hash);
|
||||
let resps = self
|
||||
.rpc_client
|
||||
.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&who[..],
|
||||
Message::GetBlock(*hash),
|
||||
RequestStrategy::with_quorum(1)
|
||||
BlockRpc::GetBlock(*hash),
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(1)
|
||||
.with_timeout(BLOCK_RW_TIMEOUT)
|
||||
.interrupt_after_quorum(true),
|
||||
)
|
||||
.await?;
|
||||
|
||||
for resp in resps {
|
||||
if let Message::PutBlock(msg) = resp {
|
||||
if let BlockRpc::PutBlock(msg) = resp {
|
||||
return Ok(msg.data);
|
||||
}
|
||||
}
|
||||
@ -437,11 +433,14 @@ impl BlockManager {
|
||||
/// Send block to nodes that should have it
|
||||
pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> {
|
||||
let who = self.replication.write_nodes(&hash);
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&who[..],
|
||||
Message::PutBlock(PutBlockMessage { hash, data }),
|
||||
RequestStrategy::with_quorum(self.replication.write_quorum())
|
||||
BlockRpc::PutBlock(PutBlockMessage { hash, data }),
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(self.replication.write_quorum())
|
||||
.with_timeout(BLOCK_RW_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
@ -531,6 +530,16 @@ impl BlockManager {
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EndpointHandler<BlockRpc> for BlockManager {
|
||||
async fn handle(self: &Arc<Self>, message: &BlockRpc, _from: NodeID) -> BlockRpc {
|
||||
self.clone()
|
||||
.handle_rpc(message)
|
||||
.await
|
||||
.unwrap_or_else(|e| BlockRpc::Error(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
|
||||
fn u64_from_be_bytes<T: AsRef<[u8]>>(bytes: T) -> u64 {
|
||||
assert!(bytes.as_ref().len() == 8);
|
||||
let mut x8 = [0u8; 8];
|
||||
|
@ -1,11 +1,11 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use netapp::NetworkKey;
|
||||
|
||||
use garage_util::background::*;
|
||||
use garage_util::config::*;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::rpc_client::RpcHttpClient;
|
||||
use garage_rpc::rpc_server::RpcServer;
|
||||
use garage_rpc::system::System;
|
||||
|
||||
use garage_table::replication::ReplicationMode;
|
||||
use garage_table::replication::TableFullReplication;
|
||||
@ -45,26 +45,25 @@ pub struct Garage {
|
||||
|
||||
impl Garage {
|
||||
/// Create and run garage
|
||||
pub fn new(
|
||||
config: Config,
|
||||
db: sled::Db,
|
||||
background: Arc<BackgroundRunner>,
|
||||
rpc_server: &mut RpcServer,
|
||||
) -> Arc<Self> {
|
||||
pub fn new(config: Config, db: sled::Db, background: Arc<BackgroundRunner>) -> Arc<Self> {
|
||||
let network_key = NetworkKey::from_slice(
|
||||
&hex::decode(&config.rpc_secret).expect("Invalid RPC secret key")[..],
|
||||
)
|
||||
.expect("Invalid RPC secret key");
|
||||
|
||||
let replication_mode = ReplicationMode::parse(&config.replication_mode)
|
||||
.expect("Invalid replication_mode in config file.");
|
||||
|
||||
info!("Initialize membership management system...");
|
||||
let rpc_http_client = Arc::new(
|
||||
RpcHttpClient::new(config.max_concurrent_rpc_requests, &config.rpc_tls)
|
||||
.expect("Could not create RPC client"),
|
||||
);
|
||||
let system = System::new(
|
||||
network_key,
|
||||
config.metadata_dir.clone(),
|
||||
rpc_http_client,
|
||||
background.clone(),
|
||||
rpc_server,
|
||||
replication_mode.replication_factor(),
|
||||
config.rpc_bind_addr,
|
||||
config.bootstrap_peers.clone(),
|
||||
config.consul_host.clone(),
|
||||
config.consul_service_name.clone(),
|
||||
);
|
||||
|
||||
let data_rep_param = TableShardedReplication {
|
||||
@ -87,13 +86,8 @@ impl Garage {
|
||||
};
|
||||
|
||||
info!("Initialize block manager...");
|
||||
let block_manager = BlockManager::new(
|
||||
&db,
|
||||
config.data_dir.clone(),
|
||||
data_rep_param,
|
||||
system.clone(),
|
||||
rpc_server,
|
||||
);
|
||||
let block_manager =
|
||||
BlockManager::new(&db, config.data_dir.clone(), data_rep_param, system.clone());
|
||||
|
||||
info!("Initialize block_ref_table...");
|
||||
let block_ref_table = Table::new(
|
||||
@ -104,7 +98,6 @@ impl Garage {
|
||||
system.clone(),
|
||||
&db,
|
||||
"block_ref".to_string(),
|
||||
rpc_server,
|
||||
);
|
||||
|
||||
info!("Initialize version_table...");
|
||||
@ -117,7 +110,6 @@ impl Garage {
|
||||
system.clone(),
|
||||
&db,
|
||||
"version".to_string(),
|
||||
rpc_server,
|
||||
);
|
||||
|
||||
info!("Initialize object_table...");
|
||||
@ -130,7 +122,6 @@ impl Garage {
|
||||
system.clone(),
|
||||
&db,
|
||||
"object".to_string(),
|
||||
rpc_server,
|
||||
);
|
||||
|
||||
info!("Initialize bucket_table...");
|
||||
@ -140,7 +131,6 @@ impl Garage {
|
||||
system.clone(),
|
||||
&db,
|
||||
"bucket".to_string(),
|
||||
rpc_server,
|
||||
);
|
||||
|
||||
info!("Initialize key_table_table...");
|
||||
@ -150,7 +140,6 @@ impl Garage {
|
||||
system.clone(),
|
||||
&db,
|
||||
"key".to_string(),
|
||||
rpc_server,
|
||||
);
|
||||
|
||||
info!("Initialize Garage...");
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "garage_rpc"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
@ -13,7 +13,7 @@ path = "lib.rs"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
|
||||
garage_rpc_021 = { package = "garage_rpc", version = "0.2.1" }
|
||||
|
||||
@ -22,7 +22,10 @@ bytes = "1.0"
|
||||
gethostname = "0.2"
|
||||
hex = "0.4"
|
||||
log = "0.4"
|
||||
rand = "0.8"
|
||||
sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
|
||||
|
||||
async-trait = "0.1.7"
|
||||
rmp-serde = "0.15"
|
||||
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
|
||||
serde_json = "1.0"
|
||||
@ -32,11 +35,6 @@ futures-util = "0.3"
|
||||
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
|
||||
http = "0.2"
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
hyper-rustls = { version = "0.22", default-features = false }
|
||||
rustls = "0.19"
|
||||
tokio-rustls = "0.22"
|
||||
webpki = "0.21"
|
||||
|
||||
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
|
||||
hyper = "0.14"
|
||||
|
||||
|
@ -4,10 +4,10 @@
|
||||
extern crate log;
|
||||
|
||||
mod consul;
|
||||
pub(crate) mod tls_util;
|
||||
|
||||
pub mod membership;
|
||||
pub mod ring;
|
||||
pub mod system;
|
||||
|
||||
pub mod rpc_client;
|
||||
pub mod rpc_server;
|
||||
pub mod rpc_helper;
|
||||
|
||||
pub use rpc_helper::*;
|
||||
|
@ -1,722 +0,0 @@
|
||||
//! Module containing structs related to membership management
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::io::{Read, Write};
|
||||
use std::net::{IpAddr, SocketAddr};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::future::join_all;
|
||||
use futures::select;
|
||||
use futures_util::future::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::watch;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use garage_util::background::BackgroundRunner;
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
use garage_util::persister::Persister;
|
||||
use garage_util::time::*;
|
||||
|
||||
use crate::consul::get_consul_nodes;
|
||||
use crate::ring::*;
|
||||
use crate::rpc_client::*;
|
||||
use crate::rpc_server::*;
|
||||
|
||||
const PING_INTERVAL: Duration = Duration::from_secs(10);
|
||||
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
|
||||
const PING_TIMEOUT: Duration = Duration::from_secs(2);
|
||||
const MAX_FAILURES_BEFORE_CONSIDERED_DOWN: usize = 5;
|
||||
|
||||
/// RPC endpoint used for calls related to membership
|
||||
pub const MEMBERSHIP_RPC_PATH: &str = "_membership";
|
||||
|
||||
/// RPC messages related to membership
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub enum Message {
|
||||
/// Response to successfull advertisements
|
||||
Ok,
|
||||
/// Message sent to detect other nodes status
|
||||
Ping(PingMessage),
|
||||
/// Ask other node for the nodes it knows. Answered with AdvertiseNodesUp
|
||||
PullStatus,
|
||||
/// Ask other node its config. Answered with AdvertiseConfig
|
||||
PullConfig,
|
||||
/// Advertisement of nodes the host knows up. Sent spontanously or in response to PullStatus
|
||||
AdvertiseNodesUp(Vec<AdvertisedNode>),
|
||||
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
|
||||
AdvertiseConfig(NetworkConfig),
|
||||
}
|
||||
|
||||
impl RpcMessage for Message {}
|
||||
|
||||
/// A ping, containing informations about status and config
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct PingMessage {
|
||||
id: Uuid,
|
||||
rpc_port: u16,
|
||||
|
||||
status_hash: Hash,
|
||||
config_version: u64,
|
||||
|
||||
state_info: StateInfo,
|
||||
}
|
||||
|
||||
/// A node advertisement
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct AdvertisedNode {
|
||||
/// Id of the node this advertisement relates to
|
||||
pub id: Uuid,
|
||||
/// IP and port of the node
|
||||
pub addr: SocketAddr,
|
||||
|
||||
/// Is the node considered up
|
||||
pub is_up: bool,
|
||||
/// When was the node last seen up, in milliseconds since UNIX epoch
|
||||
pub last_seen: u64,
|
||||
|
||||
pub state_info: StateInfo,
|
||||
}
|
||||
|
||||
/// This node's membership manager
|
||||
pub struct System {
|
||||
/// The id of this node
|
||||
pub id: Uuid,
|
||||
|
||||
persist_config: Persister<NetworkConfig>,
|
||||
persist_status: Persister<Vec<AdvertisedNode>>,
|
||||
rpc_local_port: u16,
|
||||
|
||||
state_info: StateInfo,
|
||||
|
||||
rpc_http_client: Arc<RpcHttpClient>,
|
||||
rpc_client: Arc<RpcClient<Message>>,
|
||||
|
||||
replication_factor: usize,
|
||||
pub(crate) status: watch::Receiver<Arc<Status>>,
|
||||
/// The ring
|
||||
pub ring: watch::Receiver<Arc<Ring>>,
|
||||
|
||||
update_lock: Mutex<Updaters>,
|
||||
|
||||
/// The job runner of this node
|
||||
pub background: Arc<BackgroundRunner>,
|
||||
}
|
||||
|
||||
struct Updaters {
|
||||
update_status: watch::Sender<Arc<Status>>,
|
||||
update_ring: watch::Sender<Arc<Ring>>,
|
||||
}
|
||||
|
||||
/// The status of each nodes, viewed by this node
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Status {
|
||||
/// Mapping of each node id to its known status
|
||||
pub nodes: HashMap<Uuid, Arc<StatusEntry>>,
|
||||
/// Hash of `nodes`, used to detect when nodes have different views of the cluster
|
||||
pub hash: Hash,
|
||||
}
|
||||
|
||||
/// The status of a single node
|
||||
#[derive(Debug)]
|
||||
pub struct StatusEntry {
|
||||
/// The IP and port used to connect to this node
|
||||
pub addr: SocketAddr,
|
||||
/// Last time this node was seen
|
||||
pub last_seen: u64,
|
||||
/// Number of consecutive pings sent without reply to this node
|
||||
pub num_failures: AtomicUsize,
|
||||
pub state_info: StateInfo,
|
||||
}
|
||||
|
||||
impl StatusEntry {
|
||||
/// is the node associated to this entry considered up
|
||||
pub fn is_up(&self) -> bool {
|
||||
self.num_failures.load(Ordering::SeqCst) < MAX_FAILURES_BEFORE_CONSIDERED_DOWN
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StateInfo {
|
||||
/// Hostname of the node
|
||||
pub hostname: String,
|
||||
/// Replication factor configured on the node
|
||||
pub replication_factor: Option<usize>, // TODO Option is just for retrocompatibility. It should become a simple usize at some point
|
||||
}
|
||||
|
||||
impl Status {
|
||||
fn handle_ping(&mut self, ip: IpAddr, info: &PingMessage) -> bool {
|
||||
let addr = SocketAddr::new(ip, info.rpc_port);
|
||||
let old_status = self.nodes.insert(
|
||||
info.id,
|
||||
Arc::new(StatusEntry {
|
||||
addr,
|
||||
last_seen: now_msec(),
|
||||
num_failures: AtomicUsize::from(0),
|
||||
state_info: info.state_info.clone(),
|
||||
}),
|
||||
);
|
||||
match old_status {
|
||||
None => {
|
||||
info!("Newly pingable node: {}", hex::encode(&info.id));
|
||||
true
|
||||
}
|
||||
Some(x) => x.addr != addr,
|
||||
}
|
||||
}
|
||||
|
||||
fn recalculate_hash(&mut self) {
|
||||
let mut nodes = self.nodes.iter().collect::<Vec<_>>();
|
||||
nodes.sort_unstable_by_key(|(id, _status)| *id);
|
||||
|
||||
let mut nodes_txt = String::new();
|
||||
debug!("Current set of pingable nodes: --");
|
||||
for (id, status) in nodes {
|
||||
debug!("{} {}", hex::encode(&id), status.addr);
|
||||
writeln!(&mut nodes_txt, "{} {}", hex::encode(&id), status.addr).unwrap();
|
||||
}
|
||||
debug!("END --");
|
||||
self.hash = blake2sum(nodes_txt.as_bytes());
|
||||
}
|
||||
|
||||
fn to_serializable_membership(&self, system: &System) -> Vec<AdvertisedNode> {
|
||||
let mut mem = vec![];
|
||||
for (node, status) in self.nodes.iter() {
|
||||
let state_info = if *node == system.id {
|
||||
system.state_info.clone()
|
||||
} else {
|
||||
status.state_info.clone()
|
||||
};
|
||||
mem.push(AdvertisedNode {
|
||||
id: *node,
|
||||
addr: status.addr,
|
||||
is_up: status.is_up(),
|
||||
last_seen: status.last_seen,
|
||||
state_info,
|
||||
});
|
||||
}
|
||||
mem
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_node_id(metadata_dir: &Path) -> Result<Uuid, Error> {
|
||||
let mut id_file = metadata_dir.to_path_buf();
|
||||
id_file.push("node_id");
|
||||
if id_file.as_path().exists() {
|
||||
let mut f = std::fs::File::open(id_file.as_path())?;
|
||||
let mut d = vec![];
|
||||
f.read_to_end(&mut d)?;
|
||||
if d.len() != 32 {
|
||||
return Err(Error::Message("Corrupt node_id file".to_string()));
|
||||
}
|
||||
|
||||
let mut id = [0u8; 32];
|
||||
id.copy_from_slice(&d[..]);
|
||||
Ok(id.into())
|
||||
} else {
|
||||
let id = gen_uuid();
|
||||
|
||||
let mut f = std::fs::File::create(id_file.as_path())?;
|
||||
f.write_all(id.as_slice())?;
|
||||
Ok(id)
|
||||
}
|
||||
}
|
||||
|
||||
impl System {
|
||||
/// Create this node's membership manager
|
||||
pub fn new(
|
||||
metadata_dir: PathBuf,
|
||||
rpc_http_client: Arc<RpcHttpClient>,
|
||||
background: Arc<BackgroundRunner>,
|
||||
rpc_server: &mut RpcServer,
|
||||
replication_factor: usize,
|
||||
) -> Arc<Self> {
|
||||
let id = gen_node_id(&metadata_dir).expect("Unable to read or generate node ID");
|
||||
info!("Node ID: {}", hex::encode(&id));
|
||||
|
||||
let persist_config = Persister::new(&metadata_dir, "network_config");
|
||||
let persist_status = Persister::new(&metadata_dir, "peer_info");
|
||||
|
||||
let net_config = match persist_config.load() {
|
||||
Ok(x) => x,
|
||||
Err(e) => {
|
||||
match Persister::<garage_rpc_021::ring::NetworkConfig>::new(
|
||||
&metadata_dir,
|
||||
"network_config",
|
||||
)
|
||||
.load()
|
||||
{
|
||||
Ok(old_config) => NetworkConfig::migrate_from_021(old_config),
|
||||
Err(e2) => {
|
||||
info!(
|
||||
"No valid previous network configuration stored ({}, {}), starting fresh.",
|
||||
e, e2
|
||||
);
|
||||
NetworkConfig::new()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let mut status = Status {
|
||||
nodes: HashMap::new(),
|
||||
hash: Hash::default(),
|
||||
};
|
||||
status.recalculate_hash();
|
||||
let (update_status, status) = watch::channel(Arc::new(status));
|
||||
|
||||
let state_info = StateInfo {
|
||||
hostname: gethostname::gethostname()
|
||||
.into_string()
|
||||
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
|
||||
replication_factor: Some(replication_factor),
|
||||
};
|
||||
|
||||
let ring = Ring::new(net_config, replication_factor);
|
||||
let (update_ring, ring) = watch::channel(Arc::new(ring));
|
||||
|
||||
let rpc_path = MEMBERSHIP_RPC_PATH.to_string();
|
||||
let rpc_client = RpcClient::new(
|
||||
RpcAddrClient::<Message>::new(rpc_http_client.clone(), rpc_path.clone()),
|
||||
background.clone(),
|
||||
status.clone(),
|
||||
);
|
||||
|
||||
let sys = Arc::new(System {
|
||||
id,
|
||||
persist_config,
|
||||
persist_status,
|
||||
rpc_local_port: rpc_server.bind_addr.port(),
|
||||
state_info,
|
||||
rpc_http_client,
|
||||
rpc_client,
|
||||
replication_factor,
|
||||
status,
|
||||
ring,
|
||||
update_lock: Mutex::new(Updaters {
|
||||
update_status,
|
||||
update_ring,
|
||||
}),
|
||||
background,
|
||||
});
|
||||
sys.clone().register_handler(rpc_server, rpc_path);
|
||||
sys
|
||||
}
|
||||
|
||||
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||
rpc_server.add_handler::<Message, _, _>(path, move |msg, addr| {
|
||||
let self2 = self.clone();
|
||||
async move {
|
||||
match msg {
|
||||
Message::Ping(ping) => self2.handle_ping(&addr, &ping).await,
|
||||
|
||||
Message::PullStatus => Ok(self2.handle_pull_status()),
|
||||
Message::PullConfig => Ok(self2.handle_pull_config()),
|
||||
Message::AdvertiseNodesUp(adv) => self2.handle_advertise_nodes_up(&adv).await,
|
||||
Message::AdvertiseConfig(adv) => self2.handle_advertise_config(&adv).await,
|
||||
|
||||
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Get an RPC client
|
||||
pub fn rpc_client<M: RpcMessage + 'static>(self: &Arc<Self>, path: &str) -> Arc<RpcClient<M>> {
|
||||
RpcClient::new(
|
||||
RpcAddrClient::new(self.rpc_http_client.clone(), path.to_string()),
|
||||
self.background.clone(),
|
||||
self.status.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Save network configuration to disc
|
||||
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
|
||||
let ring = self.ring.borrow().clone();
|
||||
self.persist_config
|
||||
.save_async(&ring.config)
|
||||
.await
|
||||
.expect("Cannot save current cluster configuration");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn make_ping(&self) -> Message {
|
||||
let status = self.status.borrow().clone();
|
||||
let ring = self.ring.borrow().clone();
|
||||
Message::Ping(PingMessage {
|
||||
id: self.id,
|
||||
rpc_port: self.rpc_local_port,
|
||||
status_hash: status.hash,
|
||||
config_version: ring.config.version,
|
||||
state_info: self.state_info.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn broadcast(self: Arc<Self>, msg: Message, timeout: Duration) {
|
||||
let status = self.status.borrow().clone();
|
||||
let to = status
|
||||
.nodes
|
||||
.keys()
|
||||
.filter(|x| **x != self.id)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
self.rpc_client.call_many(&to[..], msg, timeout).await;
|
||||
}
|
||||
|
||||
/// Perform bootstraping, starting the ping loop
|
||||
pub async fn bootstrap(
|
||||
self: Arc<Self>,
|
||||
peers: Vec<SocketAddr>,
|
||||
consul_host: Option<String>,
|
||||
consul_service_name: Option<String>,
|
||||
) {
|
||||
let self2 = self.clone();
|
||||
self.background
|
||||
.spawn_worker("discovery loop".to_string(), |stop_signal| {
|
||||
self2.discovery_loop(peers, consul_host, consul_service_name, stop_signal)
|
||||
});
|
||||
|
||||
let self2 = self.clone();
|
||||
self.background
|
||||
.spawn_worker("ping loop".to_string(), |stop_signal| {
|
||||
self2.ping_loop(stop_signal)
|
||||
});
|
||||
}
|
||||
|
||||
async fn ping_nodes(self: Arc<Self>, peers: Vec<(SocketAddr, Option<Uuid>)>) {
|
||||
let ping_msg = self.make_ping();
|
||||
let ping_resps = join_all(peers.iter().map(|(addr, id_option)| {
|
||||
let sys = self.clone();
|
||||
let ping_msg_ref = &ping_msg;
|
||||
async move {
|
||||
(
|
||||
id_option,
|
||||
addr,
|
||||
sys.rpc_client
|
||||
.by_addr()
|
||||
.call(&addr, ping_msg_ref, PING_TIMEOUT)
|
||||
.await,
|
||||
)
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
|
||||
let update_locked = self.update_lock.lock().await;
|
||||
let mut status: Status = self.status.borrow().as_ref().clone();
|
||||
let ring = self.ring.borrow().clone();
|
||||
|
||||
let mut has_changes = false;
|
||||
let mut to_advertise = vec![];
|
||||
|
||||
for (id_option, addr, ping_resp) in ping_resps {
|
||||
if let Ok(Ok(Message::Ping(info))) = ping_resp {
|
||||
let is_new = status.handle_ping(addr.ip(), &info);
|
||||
if is_new {
|
||||
has_changes = true;
|
||||
to_advertise.push(AdvertisedNode {
|
||||
id: info.id,
|
||||
addr: *addr,
|
||||
is_up: true,
|
||||
last_seen: now_msec(),
|
||||
state_info: info.state_info.clone(),
|
||||
});
|
||||
}
|
||||
if is_new || status.hash != info.status_hash {
|
||||
self.background
|
||||
.spawn_cancellable(self.clone().pull_status(info.id).map(Ok));
|
||||
}
|
||||
if is_new || ring.config.version < info.config_version {
|
||||
self.background
|
||||
.spawn_cancellable(self.clone().pull_config(info.id).map(Ok));
|
||||
}
|
||||
} else if let Some(id) = id_option {
|
||||
if let Some(st) = status.nodes.get_mut(id) {
|
||||
// we need to increment failure counter as call was done using by_addr so the
|
||||
// counter was not auto-incremented
|
||||
st.num_failures.fetch_add(1, Ordering::SeqCst);
|
||||
if !st.is_up() {
|
||||
warn!("Node {:?} seems to be down.", id);
|
||||
if !ring.config.members.contains_key(id) {
|
||||
info!("Removing node {:?} from status (not in config and not responding to pings anymore)", id);
|
||||
status.nodes.remove(&id);
|
||||
has_changes = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if has_changes {
|
||||
status.recalculate_hash();
|
||||
}
|
||||
self.update_status(&update_locked, status).await;
|
||||
drop(update_locked);
|
||||
|
||||
if !to_advertise.is_empty() {
|
||||
self.broadcast(Message::AdvertiseNodesUp(to_advertise), PING_TIMEOUT)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_ping(
|
||||
self: Arc<Self>,
|
||||
from: &SocketAddr,
|
||||
ping: &PingMessage,
|
||||
) -> Result<Message, Error> {
|
||||
let update_locked = self.update_lock.lock().await;
|
||||
let mut status: Status = self.status.borrow().as_ref().clone();
|
||||
|
||||
let is_new = status.handle_ping(from.ip(), ping);
|
||||
if is_new {
|
||||
status.recalculate_hash();
|
||||
}
|
||||
let status_hash = status.hash;
|
||||
let config_version = self.ring.borrow().config.version;
|
||||
|
||||
self.update_status(&update_locked, status).await;
|
||||
drop(update_locked);
|
||||
|
||||
if is_new || status_hash != ping.status_hash {
|
||||
self.background
|
||||
.spawn_cancellable(self.clone().pull_status(ping.id).map(Ok));
|
||||
}
|
||||
if is_new || config_version < ping.config_version {
|
||||
self.background
|
||||
.spawn_cancellable(self.clone().pull_config(ping.id).map(Ok));
|
||||
}
|
||||
|
||||
Ok(self.make_ping())
|
||||
}
|
||||
|
||||
fn handle_pull_status(&self) -> Message {
|
||||
Message::AdvertiseNodesUp(self.status.borrow().to_serializable_membership(self))
|
||||
}
|
||||
|
||||
fn handle_pull_config(&self) -> Message {
|
||||
let ring = self.ring.borrow().clone();
|
||||
Message::AdvertiseConfig(ring.config.clone())
|
||||
}
|
||||
|
||||
async fn handle_advertise_nodes_up(
|
||||
self: Arc<Self>,
|
||||
adv: &[AdvertisedNode],
|
||||
) -> Result<Message, Error> {
|
||||
let mut to_ping = vec![];
|
||||
|
||||
let update_lock = self.update_lock.lock().await;
|
||||
let mut status: Status = self.status.borrow().as_ref().clone();
|
||||
let mut has_changed = false;
|
||||
let mut max_replication_factor = 0;
|
||||
|
||||
for node in adv.iter() {
|
||||
if node.id == self.id {
|
||||
// learn our own ip address
|
||||
let self_addr = SocketAddr::new(node.addr.ip(), self.rpc_local_port);
|
||||
let old_self = status.nodes.insert(
|
||||
node.id,
|
||||
Arc::new(StatusEntry {
|
||||
addr: self_addr,
|
||||
last_seen: now_msec(),
|
||||
num_failures: AtomicUsize::from(0),
|
||||
state_info: self.state_info.clone(),
|
||||
}),
|
||||
);
|
||||
has_changed = match old_self {
|
||||
None => true,
|
||||
Some(x) => x.addr != self_addr,
|
||||
};
|
||||
} else {
|
||||
let ping_them = match status.nodes.get(&node.id) {
|
||||
// Case 1: new node
|
||||
None => true,
|
||||
// Case 2: the node might have changed address
|
||||
Some(our_node) => node.is_up && !our_node.is_up() && our_node.addr != node.addr,
|
||||
};
|
||||
max_replication_factor = std::cmp::max(
|
||||
max_replication_factor,
|
||||
node.state_info.replication_factor.unwrap_or_default(),
|
||||
);
|
||||
if ping_them {
|
||||
to_ping.push((node.addr, Some(node.id)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.replication_factor < max_replication_factor {
|
||||
error!("Some node have a higher replication factor ({}) than this one ({}). This is not supported and might lead to bugs",
|
||||
max_replication_factor,
|
||||
self.replication_factor);
|
||||
std::process::exit(1);
|
||||
}
|
||||
if has_changed {
|
||||
status.recalculate_hash();
|
||||
}
|
||||
self.update_status(&update_lock, status).await;
|
||||
drop(update_lock);
|
||||
|
||||
if !to_ping.is_empty() {
|
||||
self.background
|
||||
.spawn_cancellable(self.clone().ping_nodes(to_ping).map(Ok));
|
||||
}
|
||||
|
||||
Ok(Message::Ok)
|
||||
}
|
||||
|
||||
async fn handle_advertise_config(
|
||||
self: Arc<Self>,
|
||||
adv: &NetworkConfig,
|
||||
) -> Result<Message, Error> {
|
||||
let update_lock = self.update_lock.lock().await;
|
||||
let ring: Arc<Ring> = self.ring.borrow().clone();
|
||||
|
||||
if adv.version > ring.config.version {
|
||||
let ring = Ring::new(adv.clone(), self.replication_factor);
|
||||
update_lock.update_ring.send(Arc::new(ring))?;
|
||||
drop(update_lock);
|
||||
|
||||
self.background.spawn_cancellable(
|
||||
self.clone()
|
||||
.broadcast(Message::AdvertiseConfig(adv.clone()), PING_TIMEOUT)
|
||||
.map(Ok),
|
||||
);
|
||||
self.background.spawn(self.clone().save_network_config());
|
||||
}
|
||||
|
||||
Ok(Message::Ok)
|
||||
}
|
||||
|
||||
async fn ping_loop(self: Arc<Self>, mut stop_signal: watch::Receiver<bool>) {
|
||||
while !*stop_signal.borrow() {
|
||||
let restart_at = tokio::time::sleep(PING_INTERVAL);
|
||||
|
||||
let status = self.status.borrow().clone();
|
||||
let ping_addrs = status
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|(id, _)| **id != self.id)
|
||||
.map(|(id, status)| (status.addr, Some(*id)))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
self.clone().ping_nodes(ping_addrs).await;
|
||||
|
||||
select! {
|
||||
_ = restart_at.fuse() => {},
|
||||
_ = stop_signal.changed().fuse() => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn discovery_loop(
|
||||
self: Arc<Self>,
|
||||
bootstrap_peers: Vec<SocketAddr>,
|
||||
consul_host: Option<String>,
|
||||
consul_service_name: Option<String>,
|
||||
mut stop_signal: watch::Receiver<bool>,
|
||||
) {
|
||||
let consul_config = match (consul_host, consul_service_name) {
|
||||
(Some(ch), Some(csn)) => Some((ch, csn)),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
while !*stop_signal.borrow() {
|
||||
let not_configured = self.ring.borrow().config.members.is_empty();
|
||||
let no_peers = self.status.borrow().nodes.len() < 3;
|
||||
let bad_peers = self
|
||||
.status
|
||||
.borrow()
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|(_, v)| v.is_up())
|
||||
.count() != self.ring.borrow().config.members.len();
|
||||
|
||||
if not_configured || no_peers || bad_peers {
|
||||
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
|
||||
|
||||
let mut ping_list = bootstrap_peers
|
||||
.iter()
|
||||
.map(|ip| (*ip, None))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if let Ok(peers) = self.persist_status.load_async().await {
|
||||
ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id))));
|
||||
}
|
||||
|
||||
if let Some((consul_host, consul_service_name)) = &consul_config {
|
||||
match get_consul_nodes(consul_host, consul_service_name).await {
|
||||
Ok(node_list) => {
|
||||
ping_list.extend(node_list.iter().map(|a| (*a, None)));
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Could not retrieve node list from Consul: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.clone().ping_nodes(ping_list).await;
|
||||
}
|
||||
|
||||
let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL);
|
||||
select! {
|
||||
_ = restart_at.fuse() => {},
|
||||
_ = stop_signal.changed().fuse() => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for some reason fixing this is causing compilation error, see https://github.com/rust-lang/rust-clippy/issues/7052
|
||||
#[allow(clippy::manual_async_fn)]
|
||||
fn pull_status(
|
||||
self: Arc<Self>,
|
||||
peer: Uuid,
|
||||
) -> impl futures::future::Future<Output = ()> + Send + 'static {
|
||||
async move {
|
||||
let resp = self
|
||||
.rpc_client
|
||||
.call(peer, Message::PullStatus, PING_TIMEOUT)
|
||||
.await;
|
||||
if let Ok(Message::AdvertiseNodesUp(nodes)) = resp {
|
||||
let _: Result<_, _> = self.handle_advertise_nodes_up(&nodes).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn pull_config(self: Arc<Self>, peer: Uuid) {
|
||||
let resp = self
|
||||
.rpc_client
|
||||
.call(peer, Message::PullConfig, PING_TIMEOUT)
|
||||
.await;
|
||||
if let Ok(Message::AdvertiseConfig(config)) = resp {
|
||||
let _: Result<_, _> = self.handle_advertise_config(&config).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn update_status(self: &Arc<Self>, updaters: &Updaters, status: Status) {
|
||||
if status.hash != self.status.borrow().hash {
|
||||
let mut list = status.to_serializable_membership(&self);
|
||||
|
||||
// Combine with old peer list to make sure no peer is lost
|
||||
if let Ok(old_list) = self.persist_status.load_async().await {
|
||||
for pp in old_list {
|
||||
if !list.iter().any(|np| pp.id == np.id) {
|
||||
list.push(pp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !list.is_empty() {
|
||||
info!("Persisting new peer list ({} peers)", list.len());
|
||||
self.persist_status
|
||||
.save_async(&list)
|
||||
.await
|
||||
.expect("Unable to persist peer list");
|
||||
}
|
||||
}
|
||||
|
||||
updaters
|
||||
.update_status
|
||||
.send(Arc::new(status))
|
||||
.expect("Could not update internal membership status");
|
||||
}
|
||||
}
|
@ -3,6 +3,8 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
|
||||
use netapp::NodeID;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use garage_util::data::*;
|
||||
@ -98,7 +100,7 @@ pub struct Ring {
|
||||
pub config: NetworkConfig,
|
||||
|
||||
// Internal order of nodes used to make a more compact representation of the ring
|
||||
nodes: Vec<Uuid>,
|
||||
nodes: Vec<NodeID>,
|
||||
|
||||
// The list of entries in the ring
|
||||
ring: Vec<RingEntry>,
|
||||
@ -260,6 +262,11 @@ impl Ring {
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let nodes = nodes
|
||||
.iter()
|
||||
.map(|id| NodeID::from_slice(id.as_slice()).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Self {
|
||||
replication_factor,
|
||||
config,
|
||||
@ -291,7 +298,7 @@ impl Ring {
|
||||
}
|
||||
|
||||
/// Walk the ring to find the n servers in which data should be replicated
|
||||
pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<Uuid> {
|
||||
pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<NodeID> {
|
||||
if self.ring.len() != 1 << PARTITION_BITS {
|
||||
warn!("Ring not yet ready, read/writes will be lost!");
|
||||
return vec![];
|
||||
|
@ -1,369 +0,0 @@
|
||||
//! Contain structs related to making RPCs
|
||||
use std::borrow::Borrow;
|
||||
use std::marker::PhantomData;
|
||||
use std::net::SocketAddr;
|
||||
use std::pin::Pin;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use arc_swap::ArcSwapOption;
|
||||
use futures::future::Future;
|
||||
use futures::stream::futures_unordered::FuturesUnordered;
|
||||
use futures::stream::StreamExt;
|
||||
use futures_util::future::FutureExt;
|
||||
use hyper::client::{Client, HttpConnector};
|
||||
use hyper::{Body, Method, Request};
|
||||
use tokio::sync::{watch, Semaphore};
|
||||
|
||||
use garage_util::background::BackgroundRunner;
|
||||
use garage_util::config::TlsConfig;
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::{Error, RpcError};
|
||||
|
||||
use crate::membership::Status;
|
||||
use crate::rpc_server::RpcMessage;
|
||||
use crate::tls_util;
|
||||
|
||||
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Strategy to apply when making RPC
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct RequestStrategy {
|
||||
/// Max time to wait for reponse
|
||||
pub rs_timeout: Duration,
|
||||
/// Min number of response to consider the request successful
|
||||
pub rs_quorum: usize,
|
||||
/// Should requests be dropped after enough response are received
|
||||
pub rs_interrupt_after_quorum: bool,
|
||||
}
|
||||
|
||||
impl RequestStrategy {
|
||||
/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
|
||||
pub fn with_quorum(quorum: usize) -> Self {
|
||||
RequestStrategy {
|
||||
rs_timeout: DEFAULT_TIMEOUT,
|
||||
rs_quorum: quorum,
|
||||
rs_interrupt_after_quorum: false,
|
||||
}
|
||||
}
|
||||
/// Set timeout of the strategy
|
||||
pub fn with_timeout(mut self, timeout: Duration) -> Self {
|
||||
self.rs_timeout = timeout;
|
||||
self
|
||||
}
|
||||
/// Set if requests can be dropped after quorum has been reached
|
||||
/// In general true for read requests, and false for write
|
||||
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
|
||||
self.rs_interrupt_after_quorum = interrupt;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Shortcut for a boxed async function taking a message, and resolving to another message or an
|
||||
/// error
|
||||
pub type LocalHandlerFn<M> =
|
||||
Box<dyn Fn(Arc<M>) -> Pin<Box<dyn Future<Output = Result<M, Error>> + Send>> + Send + Sync>;
|
||||
|
||||
/// Client used to send RPC
|
||||
pub struct RpcClient<M: RpcMessage> {
|
||||
status: watch::Receiver<Arc<Status>>,
|
||||
background: Arc<BackgroundRunner>,
|
||||
|
||||
local_handler: ArcSwapOption<(Uuid, LocalHandlerFn<M>)>,
|
||||
|
||||
rpc_addr_client: RpcAddrClient<M>,
|
||||
}
|
||||
|
||||
impl<M: RpcMessage + 'static> RpcClient<M> {
|
||||
/// Create a new RpcClient from an address, a job runner, and the status of all RPC servers
|
||||
pub fn new(
|
||||
rac: RpcAddrClient<M>,
|
||||
background: Arc<BackgroundRunner>,
|
||||
status: watch::Receiver<Arc<Status>>,
|
||||
) -> Arc<Self> {
|
||||
Arc::new(Self {
|
||||
rpc_addr_client: rac,
|
||||
background,
|
||||
status,
|
||||
local_handler: ArcSwapOption::new(None),
|
||||
})
|
||||
}
|
||||
|
||||
/// Set the local handler, to process RPC to this node without network usage
|
||||
pub fn set_local_handler<F, Fut>(&self, my_id: Uuid, handler: F)
|
||||
where
|
||||
F: Fn(Arc<M>) -> Fut + Send + Sync + 'static,
|
||||
Fut: Future<Output = Result<M, Error>> + Send + 'static,
|
||||
{
|
||||
let handler_arc = Arc::new(handler);
|
||||
let handler: LocalHandlerFn<M> = Box::new(move |msg| {
|
||||
let handler_arc2 = handler_arc.clone();
|
||||
Box::pin(async move { handler_arc2(msg).await })
|
||||
});
|
||||
self.local_handler.swap(Some(Arc::new((my_id, handler))));
|
||||
}
|
||||
|
||||
/// Get a RPC client to make calls using node's SocketAddr instead of its ID
|
||||
pub fn by_addr(&self) -> &RpcAddrClient<M> {
|
||||
&self.rpc_addr_client
|
||||
}
|
||||
|
||||
/// Make a RPC call
|
||||
pub async fn call(&self, to: Uuid, msg: M, timeout: Duration) -> Result<M, Error> {
|
||||
self.call_arc(to, Arc::new(msg), timeout).await
|
||||
}
|
||||
|
||||
/// Make a RPC call from a message stored in an Arc
|
||||
pub async fn call_arc(&self, to: Uuid, msg: Arc<M>, timeout: Duration) -> Result<M, Error> {
|
||||
if let Some(lh) = self.local_handler.load_full() {
|
||||
let (my_id, local_handler) = lh.as_ref();
|
||||
if to.borrow() == my_id {
|
||||
return local_handler(msg).await;
|
||||
}
|
||||
}
|
||||
let status = self.status.borrow().clone();
|
||||
let node_status = match status.nodes.get(&to) {
|
||||
Some(node_status) => {
|
||||
if node_status.is_up() {
|
||||
node_status
|
||||
} else {
|
||||
return Err(Error::from(RpcError::NodeDown(to)));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
return Err(Error::Message(format!(
|
||||
"Peer ID not found: {:?}",
|
||||
to.borrow()
|
||||
)))
|
||||
}
|
||||
};
|
||||
match self
|
||||
.rpc_addr_client
|
||||
.call(&node_status.addr, msg, timeout)
|
||||
.await
|
||||
{
|
||||
Err(rpc_error) => {
|
||||
node_status.num_failures.fetch_add(1, Ordering::SeqCst);
|
||||
Err(Error::from(rpc_error))
|
||||
}
|
||||
Ok(x) => x,
|
||||
}
|
||||
}
|
||||
|
||||
/// Make a RPC call to multiple servers, returning a Vec containing each result
|
||||
pub async fn call_many(&self, to: &[Uuid], msg: M, timeout: Duration) -> Vec<Result<M, Error>> {
|
||||
let msg = Arc::new(msg);
|
||||
let mut resp_stream = to
|
||||
.iter()
|
||||
.map(|to| self.call_arc(*to, msg.clone(), timeout))
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
let mut results = vec![];
|
||||
while let Some(resp) = resp_stream.next().await {
|
||||
results.push(resp);
|
||||
}
|
||||
results
|
||||
}
|
||||
|
||||
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
|
||||
/// strategy could not be respected due to too many errors
|
||||
pub async fn try_call_many(
|
||||
self: &Arc<Self>,
|
||||
to: &[Uuid],
|
||||
msg: M,
|
||||
strategy: RequestStrategy,
|
||||
) -> Result<Vec<M>, Error> {
|
||||
let timeout = strategy.rs_timeout;
|
||||
|
||||
let msg = Arc::new(msg);
|
||||
let mut resp_stream = to
|
||||
.to_vec()
|
||||
.into_iter()
|
||||
.map(|to| {
|
||||
let self2 = self.clone();
|
||||
let msg = msg.clone();
|
||||
async move { self2.call_arc(to, msg, timeout).await }
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
let mut results = vec![];
|
||||
let mut errors = vec![];
|
||||
|
||||
while let Some(resp) = resp_stream.next().await {
|
||||
match resp {
|
||||
Ok(msg) => {
|
||||
results.push(msg);
|
||||
if results.len() >= strategy.rs_quorum {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
errors.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if results.len() >= strategy.rs_quorum {
|
||||
// Continue requests in background.
|
||||
// Continue the remaining requests immediately using tokio::spawn
|
||||
// but enqueue a task in the background runner
|
||||
// to ensure that the process won't exit until the requests are done
|
||||
// (if we had just enqueued the resp_stream.collect directly in the background runner,
|
||||
// the requests might have been put on hold in the background runner's queue,
|
||||
// in which case they might timeout or otherwise fail)
|
||||
if !strategy.rs_interrupt_after_quorum {
|
||||
let wait_finished_fut = tokio::spawn(async move {
|
||||
resp_stream.collect::<Vec<_>>().await;
|
||||
});
|
||||
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
} else {
|
||||
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
|
||||
Err(Error::from(RpcError::TooManyErrors(errors)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Thin wrapper arround an `RpcHttpClient` specifying the path of the request
|
||||
pub struct RpcAddrClient<M: RpcMessage> {
|
||||
phantom: PhantomData<M>,
|
||||
|
||||
http_client: Arc<RpcHttpClient>,
|
||||
path: String,
|
||||
}
|
||||
|
||||
impl<M: RpcMessage> RpcAddrClient<M> {
|
||||
/// Create an RpcAddrClient from an HTTP client and the endpoint to reach for RPCs
|
||||
pub fn new(http_client: Arc<RpcHttpClient>, path: String) -> Self {
|
||||
Self {
|
||||
phantom: PhantomData::default(),
|
||||
http_client,
|
||||
path,
|
||||
}
|
||||
}
|
||||
|
||||
/// Make a RPC
|
||||
pub async fn call<MB>(
|
||||
&self,
|
||||
to_addr: &SocketAddr,
|
||||
msg: MB,
|
||||
timeout: Duration,
|
||||
) -> Result<Result<M, Error>, RpcError>
|
||||
where
|
||||
MB: Borrow<M>,
|
||||
{
|
||||
self.http_client
|
||||
.call(&self.path, to_addr, msg, timeout)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
/// HTTP client used to make RPCs
|
||||
pub struct RpcHttpClient {
|
||||
request_limiter: Semaphore,
|
||||
method: ClientMethod,
|
||||
}
|
||||
|
||||
enum ClientMethod {
|
||||
Http(Client<HttpConnector, hyper::Body>),
|
||||
Https(Client<tls_util::HttpsConnectorFixedDnsname<HttpConnector>, hyper::Body>),
|
||||
}
|
||||
|
||||
impl RpcHttpClient {
|
||||
/// Create a new RpcHttpClient
|
||||
pub fn new(
|
||||
max_concurrent_requests: usize,
|
||||
tls_config: &Option<TlsConfig>,
|
||||
) -> Result<Self, Error> {
|
||||
let method = if let Some(cf) = tls_config {
|
||||
let ca_certs = tls_util::load_certs(&cf.ca_cert).map_err(|e| {
|
||||
Error::Message(format!("Failed to open CA certificate file: {:?}", e))
|
||||
})?;
|
||||
let node_certs = tls_util::load_certs(&cf.node_cert)
|
||||
.map_err(|e| Error::Message(format!("Failed to open certificate file: {:?}", e)))?;
|
||||
let node_key = tls_util::load_private_key(&cf.node_key)
|
||||
.map_err(|e| Error::Message(format!("Failed to open private key file: {:?}", e)))?;
|
||||
|
||||
let mut config = rustls::ClientConfig::new();
|
||||
|
||||
for crt in ca_certs.iter() {
|
||||
config.root_store.add(crt)?;
|
||||
}
|
||||
|
||||
config.set_single_client_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
|
||||
|
||||
let connector =
|
||||
tls_util::HttpsConnectorFixedDnsname::<HttpConnector>::new(config, "garage");
|
||||
|
||||
ClientMethod::Https(Client::builder().build(connector))
|
||||
} else {
|
||||
ClientMethod::Http(Client::new())
|
||||
};
|
||||
Ok(RpcHttpClient {
|
||||
method,
|
||||
request_limiter: Semaphore::new(max_concurrent_requests),
|
||||
})
|
||||
}
|
||||
|
||||
/// Make a RPC
|
||||
async fn call<M, MB>(
|
||||
&self,
|
||||
path: &str,
|
||||
to_addr: &SocketAddr,
|
||||
msg: MB,
|
||||
timeout: Duration,
|
||||
) -> Result<Result<M, Error>, RpcError>
|
||||
where
|
||||
MB: Borrow<M>,
|
||||
M: RpcMessage,
|
||||
{
|
||||
let uri = match self.method {
|
||||
ClientMethod::Http(_) => format!("http://{}/{}", to_addr, path),
|
||||
ClientMethod::Https(_) => format!("https://{}/{}", to_addr, path),
|
||||
};
|
||||
|
||||
let req = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri(uri)
|
||||
.body(Body::from(rmp_to_vec_all_named(msg.borrow())?))?;
|
||||
|
||||
let resp_fut = match &self.method {
|
||||
ClientMethod::Http(client) => client.request(req).fuse(),
|
||||
ClientMethod::Https(client) => client.request(req).fuse(),
|
||||
};
|
||||
|
||||
trace!("({}) Acquiring request_limiter slot...", path);
|
||||
let slot = self.request_limiter.acquire().await;
|
||||
trace!("({}) Got slot, doing request to {}...", path, to_addr);
|
||||
let resp = tokio::time::timeout(timeout, resp_fut)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
debug!(
|
||||
"RPC timeout to {}: {}",
|
||||
to_addr,
|
||||
debug_serialize(msg.borrow())
|
||||
);
|
||||
e
|
||||
})?
|
||||
.map_err(|e| {
|
||||
warn!(
|
||||
"RPC HTTP client error when connecting to {}: {}",
|
||||
to_addr, e
|
||||
);
|
||||
e
|
||||
})?;
|
||||
|
||||
let status = resp.status();
|
||||
trace!("({}) Request returned, got status {}", path, status);
|
||||
let body = hyper::body::to_bytes(resp.into_body()).await?;
|
||||
drop(slot);
|
||||
|
||||
match rmp_serde::decode::from_read::<_, Result<M, String>>(&body[..])? {
|
||||
Err(e) => Ok(Err(Error::RemoteError(e, status))),
|
||||
Ok(x) => Ok(Ok(x)),
|
||||
}
|
||||
}
|
||||
}
|
206
src/rpc/rpc_helper.rs
Normal file
206
src/rpc/rpc_helper.rs
Normal file
@ -0,0 +1,206 @@
|
||||
//! Contain structs related to making RPCs
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use futures::future::join_all;
|
||||
use futures::stream::futures_unordered::FuturesUnordered;
|
||||
use futures::stream::StreamExt;
|
||||
use futures_util::future::FutureExt;
|
||||
use tokio::select;
|
||||
|
||||
pub use netapp::endpoint::{Endpoint, EndpointHandler, Message};
|
||||
use netapp::peering::fullmesh::FullMeshPeeringStrategy;
|
||||
pub use netapp::proto::*;
|
||||
pub use netapp::{NetApp, NodeID};
|
||||
|
||||
use garage_util::background::BackgroundRunner;
|
||||
use garage_util::error::{Error, RpcError};
|
||||
|
||||
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Strategy to apply when making RPC
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct RequestStrategy {
|
||||
/// Max time to wait for reponse
|
||||
pub rs_timeout: Duration,
|
||||
/// Min number of response to consider the request successful
|
||||
pub rs_quorum: Option<usize>,
|
||||
/// Should requests be dropped after enough response are received
|
||||
pub rs_interrupt_after_quorum: bool,
|
||||
/// Request priority
|
||||
pub rs_priority: RequestPriority,
|
||||
}
|
||||
|
||||
impl RequestStrategy {
|
||||
/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
|
||||
pub fn with_priority(prio: RequestPriority) -> Self {
|
||||
RequestStrategy {
|
||||
rs_timeout: DEFAULT_TIMEOUT,
|
||||
rs_quorum: None,
|
||||
rs_interrupt_after_quorum: false,
|
||||
rs_priority: prio,
|
||||
}
|
||||
}
|
||||
/// Set quorum to be reached for request
|
||||
pub fn with_quorum(mut self, quorum: usize) -> Self {
|
||||
self.rs_quorum = Some(quorum);
|
||||
self
|
||||
}
|
||||
/// Set timeout of the strategy
|
||||
pub fn with_timeout(mut self, timeout: Duration) -> Self {
|
||||
self.rs_timeout = timeout;
|
||||
self
|
||||
}
|
||||
/// Set if requests can be dropped after quorum has been reached
|
||||
/// In general true for read requests, and false for write
|
||||
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
|
||||
self.rs_interrupt_after_quorum = interrupt;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RpcHelper {
|
||||
pub(crate) fullmesh: Arc<FullMeshPeeringStrategy>,
|
||||
pub(crate) background: Arc<BackgroundRunner>,
|
||||
}
|
||||
|
||||
impl RpcHelper {
|
||||
pub async fn call<M, H>(
|
||||
&self,
|
||||
endpoint: &Endpoint<M, H>,
|
||||
to: NodeID,
|
||||
msg: M,
|
||||
strat: RequestStrategy,
|
||||
) -> Result<M::Response, Error>
|
||||
where
|
||||
M: Message,
|
||||
H: EndpointHandler<M>,
|
||||
{
|
||||
self.call_arc(endpoint, to, Arc::new(msg), strat).await
|
||||
}
|
||||
|
||||
pub async fn call_arc<M, H>(
|
||||
&self,
|
||||
endpoint: &Endpoint<M, H>,
|
||||
to: NodeID,
|
||||
msg: Arc<M>,
|
||||
strat: RequestStrategy,
|
||||
) -> Result<M::Response, Error>
|
||||
where
|
||||
M: Message,
|
||||
H: EndpointHandler<M>,
|
||||
{
|
||||
select! {
|
||||
res = endpoint.call(&to, &msg, strat.rs_priority) => Ok(res?),
|
||||
_ = tokio::time::sleep(strat.rs_timeout) => Err(Error::Rpc(RpcError::Timeout)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn call_many<M, H>(
|
||||
&self,
|
||||
endpoint: &Endpoint<M, H>,
|
||||
to: &[NodeID],
|
||||
msg: M,
|
||||
strat: RequestStrategy,
|
||||
) -> Vec<(NodeID, Result<M::Response, Error>)>
|
||||
where
|
||||
M: Message,
|
||||
H: EndpointHandler<M>,
|
||||
{
|
||||
let msg = Arc::new(msg);
|
||||
let resps = join_all(
|
||||
to.iter()
|
||||
.map(|to| self.call_arc(endpoint, *to, msg.clone(), strat)),
|
||||
)
|
||||
.await;
|
||||
to.iter()
|
||||
.cloned()
|
||||
.zip(resps.into_iter())
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
pub async fn broadcast<M, H>(
|
||||
&self,
|
||||
endpoint: &Endpoint<M, H>,
|
||||
msg: M,
|
||||
strat: RequestStrategy,
|
||||
) -> Vec<(NodeID, Result<M::Response, Error>)>
|
||||
where
|
||||
M: Message,
|
||||
H: EndpointHandler<M>,
|
||||
{
|
||||
let to = self
|
||||
.fullmesh
|
||||
.get_peer_list()
|
||||
.iter()
|
||||
.map(|p| p.id)
|
||||
.collect::<Vec<_>>();
|
||||
self.call_many(endpoint, &to[..], msg, strat).await
|
||||
}
|
||||
|
||||
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
|
||||
/// strategy could not be respected due to too many errors
|
||||
pub async fn try_call_many<M, H>(
|
||||
&self,
|
||||
endpoint: &Arc<Endpoint<M, H>>,
|
||||
to: &[NodeID],
|
||||
msg: M,
|
||||
strategy: RequestStrategy,
|
||||
) -> Result<Vec<M::Response>, Error>
|
||||
where
|
||||
M: Message + 'static,
|
||||
H: EndpointHandler<M> + 'static,
|
||||
{
|
||||
let msg = Arc::new(msg);
|
||||
let mut resp_stream = to
|
||||
.to_vec()
|
||||
.into_iter()
|
||||
.map(|to| {
|
||||
let self2 = self.clone();
|
||||
let msg = msg.clone();
|
||||
let endpoint2 = endpoint.clone();
|
||||
async move { self2.call_arc(&endpoint2, to, msg, strategy).await }
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
let mut results = vec![];
|
||||
let mut errors = vec![];
|
||||
let quorum = strategy.rs_quorum.unwrap_or(to.len());
|
||||
|
||||
while let Some(resp) = resp_stream.next().await {
|
||||
match resp {
|
||||
Ok(msg) => {
|
||||
results.push(msg);
|
||||
if results.len() >= quorum {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
errors.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if results.len() >= quorum {
|
||||
// Continue requests in background.
|
||||
// Continue the remaining requests immediately using tokio::spawn
|
||||
// but enqueue a task in the background runner
|
||||
// to ensure that the process won't exit until the requests are done
|
||||
// (if we had just enqueued the resp_stream.collect directly in the background runner,
|
||||
// the requests might have been put on hold in the background runner's queue,
|
||||
// in which case they might timeout or otherwise fail)
|
||||
if !strategy.rs_interrupt_after_quorum {
|
||||
let wait_finished_fut = tokio::spawn(async move {
|
||||
resp_stream.collect::<Vec<_>>().await;
|
||||
});
|
||||
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
} else {
|
||||
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
|
||||
Err(Error::from(RpcError::TooManyErrors(errors)))
|
||||
}
|
||||
}
|
||||
}
|
@ -1,247 +0,0 @@
|
||||
//! Contains structs related to receiving RPCs
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use futures::future::Future;
|
||||
use futures_util::future::*;
|
||||
use futures_util::stream::*;
|
||||
use hyper::server::conn::AddrStream;
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Method, Request, Response, Server, StatusCode};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::net::{TcpListener, TcpStream};
|
||||
use tokio_rustls::server::TlsStream;
|
||||
use tokio_rustls::TlsAcceptor;
|
||||
use tokio_stream::wrappers::TcpListenerStream;
|
||||
|
||||
use garage_util::config::TlsConfig;
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
|
||||
use crate::tls_util;
|
||||
|
||||
/// Trait for messages that can be sent as RPC
|
||||
pub trait RpcMessage: Serialize + for<'de> Deserialize<'de> + Send + Sync {}
|
||||
|
||||
type ResponseFuture = Pin<Box<dyn Future<Output = Result<Response<Body>, Error>> + Send>>;
|
||||
type Handler = Box<dyn Fn(Request<Body>, SocketAddr) -> ResponseFuture + Send + Sync>;
|
||||
|
||||
/// Structure handling RPCs
|
||||
pub struct RpcServer {
|
||||
/// The address the RpcServer will bind
|
||||
pub bind_addr: SocketAddr,
|
||||
/// The tls configuration used for RPC
|
||||
pub tls_config: Option<TlsConfig>,
|
||||
|
||||
handlers: HashMap<String, Handler>,
|
||||
}
|
||||
|
||||
async fn handle_func<M, F, Fut>(
|
||||
handler: Arc<F>,
|
||||
req: Request<Body>,
|
||||
sockaddr: SocketAddr,
|
||||
name: Arc<String>,
|
||||
) -> Result<Response<Body>, Error>
|
||||
where
|
||||
M: RpcMessage + 'static,
|
||||
F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static,
|
||||
Fut: Future<Output = Result<M, Error>> + Send + 'static,
|
||||
{
|
||||
let begin_time = Instant::now();
|
||||
let whole_body = hyper::body::to_bytes(req.into_body()).await?;
|
||||
let msg = rmp_serde::decode::from_read::<_, M>(&whole_body[..])?;
|
||||
|
||||
trace!(
|
||||
"Request message: {}",
|
||||
serde_json::to_string(&msg)
|
||||
.unwrap_or_else(|_| "<json error>".into())
|
||||
.chars()
|
||||
.take(100)
|
||||
.collect::<String>()
|
||||
);
|
||||
|
||||
match handler(msg, sockaddr).await {
|
||||
Ok(resp) => {
|
||||
let resp_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Ok(resp))?;
|
||||
let rpc_duration = (Instant::now() - begin_time).as_millis();
|
||||
if rpc_duration > 100 {
|
||||
debug!("RPC {} ok, took long: {} ms", name, rpc_duration,);
|
||||
}
|
||||
Ok(Response::new(Body::from(resp_bytes)))
|
||||
}
|
||||
Err(e) => {
|
||||
let err_str = format!("{}", e);
|
||||
let rep_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Err(err_str))?;
|
||||
let mut err_response = Response::new(Body::from(rep_bytes));
|
||||
*err_response.status_mut() = match e {
|
||||
Error::BadRpc(_) => StatusCode::BAD_REQUEST,
|
||||
_ => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
};
|
||||
warn!(
|
||||
"RPC error ({}): {} ({} ms)",
|
||||
name,
|
||||
e,
|
||||
(Instant::now() - begin_time).as_millis(),
|
||||
);
|
||||
Ok(err_response)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RpcServer {
|
||||
/// Create a new RpcServer
|
||||
pub fn new(bind_addr: SocketAddr, tls_config: Option<TlsConfig>) -> Self {
|
||||
Self {
|
||||
bind_addr,
|
||||
tls_config,
|
||||
handlers: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Add handler handling request made to `name`
|
||||
pub fn add_handler<M, F, Fut>(&mut self, name: String, handler: F)
|
||||
where
|
||||
M: RpcMessage + 'static,
|
||||
F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static,
|
||||
Fut: Future<Output = Result<M, Error>> + Send + 'static,
|
||||
{
|
||||
let name2 = Arc::new(name.clone());
|
||||
let handler_arc = Arc::new(handler);
|
||||
let handler = Box::new(move |req: Request<Body>, sockaddr: SocketAddr| {
|
||||
let handler2 = handler_arc.clone();
|
||||
let b: ResponseFuture = Box::pin(handle_func(handler2, req, sockaddr, name2.clone()));
|
||||
b
|
||||
});
|
||||
self.handlers.insert(name, handler);
|
||||
}
|
||||
|
||||
async fn handler(
|
||||
self: Arc<Self>,
|
||||
req: Request<Body>,
|
||||
addr: SocketAddr,
|
||||
) -> Result<Response<Body>, Error> {
|
||||
if req.method() != Method::POST {
|
||||
let mut bad_request = Response::default();
|
||||
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
|
||||
return Ok(bad_request);
|
||||
}
|
||||
|
||||
let path = &req.uri().path()[1..].to_string();
|
||||
|
||||
let handler = match self.handlers.get(path) {
|
||||
Some(h) => h,
|
||||
None => {
|
||||
let mut not_found = Response::default();
|
||||
*not_found.status_mut() = StatusCode::NOT_FOUND;
|
||||
return Ok(not_found);
|
||||
}
|
||||
};
|
||||
|
||||
trace!("({}) Handling request", path);
|
||||
|
||||
let resp_waiter = tokio::spawn(handler(req, addr));
|
||||
match resp_waiter.await {
|
||||
Err(err) => {
|
||||
warn!("Handler await error: {}", err);
|
||||
let mut ise = Response::default();
|
||||
*ise.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
|
||||
Ok(ise)
|
||||
}
|
||||
Ok(Err(err)) => {
|
||||
trace!("({}) Request handler failed: {}", path, err);
|
||||
let mut bad_request = Response::new(Body::from(format!("{}", err)));
|
||||
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
|
||||
Ok(bad_request)
|
||||
}
|
||||
Ok(Ok(resp)) => {
|
||||
trace!("({}) Request handler succeeded", path);
|
||||
Ok(resp)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the RpcServer
|
||||
pub async fn run(
|
||||
self: Arc<Self>,
|
||||
shutdown_signal: impl Future<Output = ()>,
|
||||
) -> Result<(), Error> {
|
||||
if let Some(tls_config) = self.tls_config.as_ref() {
|
||||
let ca_certs = tls_util::load_certs(&tls_config.ca_cert)?;
|
||||
let node_certs = tls_util::load_certs(&tls_config.node_cert)?;
|
||||
let node_key = tls_util::load_private_key(&tls_config.node_key)?;
|
||||
|
||||
let mut ca_store = rustls::RootCertStore::empty();
|
||||
for crt in ca_certs.iter() {
|
||||
ca_store.add(crt)?;
|
||||
}
|
||||
|
||||
let mut config =
|
||||
rustls::ServerConfig::new(rustls::AllowAnyAuthenticatedClient::new(ca_store));
|
||||
config.set_single_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
|
||||
let tls_acceptor = Arc::new(TlsAcceptor::from(Arc::new(config)));
|
||||
|
||||
let listener = TcpListener::bind(&self.bind_addr).await?;
|
||||
let incoming = TcpListenerStream::new(listener).filter_map(|socket| async {
|
||||
match socket {
|
||||
Ok(stream) => match tls_acceptor.clone().accept(stream).await {
|
||||
Ok(x) => Some(Ok::<_, hyper::Error>(x)),
|
||||
Err(_e) => None,
|
||||
},
|
||||
Err(_) => None,
|
||||
}
|
||||
});
|
||||
let incoming = hyper::server::accept::from_stream(incoming);
|
||||
|
||||
let self_arc = self.clone();
|
||||
let service = make_service_fn(|conn: &TlsStream<TcpStream>| {
|
||||
let client_addr = conn
|
||||
.get_ref()
|
||||
.0
|
||||
.peer_addr()
|
||||
.unwrap_or_else(|_| ([0, 0, 0, 0], 0).into());
|
||||
let self_arc = self_arc.clone();
|
||||
async move {
|
||||
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
|
||||
self_arc.clone().handler(req, client_addr).map_err(|e| {
|
||||
warn!("RPC handler error: {}", e);
|
||||
e
|
||||
})
|
||||
}))
|
||||
}
|
||||
});
|
||||
|
||||
let server = Server::builder(incoming).serve(service);
|
||||
|
||||
let graceful = server.with_graceful_shutdown(shutdown_signal);
|
||||
info!("RPC server listening on http://{}", self.bind_addr);
|
||||
|
||||
graceful.await?;
|
||||
} else {
|
||||
let self_arc = self.clone();
|
||||
let service = make_service_fn(move |conn: &AddrStream| {
|
||||
let client_addr = conn.remote_addr();
|
||||
let self_arc = self_arc.clone();
|
||||
async move {
|
||||
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
|
||||
self_arc.clone().handler(req, client_addr).map_err(|e| {
|
||||
warn!("RPC handler error: {}", e);
|
||||
e
|
||||
})
|
||||
}))
|
||||
}
|
||||
});
|
||||
|
||||
let server = Server::bind(&self.bind_addr).serve(service);
|
||||
|
||||
let graceful = server.with_graceful_shutdown(shutdown_signal);
|
||||
info!("RPC server listening on http://{}", self.bind_addr);
|
||||
|
||||
graceful.await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
363
src/rpc/system.rs
Normal file
363
src/rpc/system.rs
Normal file
@ -0,0 +1,363 @@
|
||||
//! Module containing structs related to membership management
|
||||
use std::io::{Read, Write};
|
||||
use std::net::SocketAddr;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use arc_swap::ArcSwap;
|
||||
use async_trait::async_trait;
|
||||
use futures::{join, select};
|
||||
use futures_util::future::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sodiumoxide::crypto::sign::ed25519;
|
||||
use tokio::sync::watch;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
use netapp::endpoint::{Endpoint, EndpointHandler, Message};
|
||||
use netapp::peering::fullmesh::FullMeshPeeringStrategy;
|
||||
use netapp::proto::*;
|
||||
use netapp::{NetApp, NetworkKey, NodeID, NodeKey};
|
||||
|
||||
use garage_util::background::BackgroundRunner;
|
||||
use garage_util::error::Error;
|
||||
use garage_util::persister::Persister;
|
||||
//use garage_util::time::*;
|
||||
|
||||
//use crate::consul::get_consul_nodes;
|
||||
use crate::ring::*;
|
||||
use crate::rpc_helper::{RequestStrategy, RpcHelper};
|
||||
|
||||
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
|
||||
const PING_TIMEOUT: Duration = Duration::from_secs(2);
|
||||
|
||||
/// RPC endpoint used for calls related to membership
|
||||
pub const SYSTEM_RPC_PATH: &str = "garage_rpc/membership.rs/SystemRpc";
|
||||
|
||||
/// RPC messages related to membership
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub enum SystemRpc {
|
||||
/// Response to successfull advertisements
|
||||
Ok,
|
||||
/// Error response
|
||||
Error(String),
|
||||
/// Ask other node its config. Answered with AdvertiseConfig
|
||||
PullConfig,
|
||||
/// Advertise Garage status. Answered with another AdvertiseStatus.
|
||||
/// Exchanged with every node on a regular basis.
|
||||
AdvertiseStatus(StateInfo),
|
||||
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
|
||||
AdvertiseConfig(NetworkConfig),
|
||||
/// Get known nodes states
|
||||
GetKnownNodes,
|
||||
/// Return known nodes
|
||||
ReturnKnownNodes(Vec<(NodeID, SocketAddr, bool)>),
|
||||
}
|
||||
|
||||
impl Message for SystemRpc {
|
||||
type Response = SystemRpc;
|
||||
}
|
||||
|
||||
/// This node's membership manager
|
||||
pub struct System {
|
||||
/// The id of this node
|
||||
pub id: NodeID,
|
||||
|
||||
persist_config: Persister<NetworkConfig>,
|
||||
|
||||
state_info: ArcSwap<StateInfo>,
|
||||
|
||||
pub netapp: Arc<NetApp>,
|
||||
fullmesh: Arc<FullMeshPeeringStrategy>,
|
||||
pub rpc: RpcHelper,
|
||||
|
||||
system_endpoint: Arc<Endpoint<SystemRpc, System>>,
|
||||
|
||||
rpc_listen_addr: SocketAddr,
|
||||
bootstrap_peers: Vec<(NodeID, SocketAddr)>,
|
||||
consul_host: Option<String>,
|
||||
consul_service_name: Option<String>,
|
||||
replication_factor: usize,
|
||||
|
||||
/// The ring
|
||||
pub ring: watch::Receiver<Arc<Ring>>,
|
||||
update_ring: Mutex<watch::Sender<Arc<Ring>>>,
|
||||
|
||||
/// The job runner of this node
|
||||
pub background: Arc<BackgroundRunner>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct StateInfo {
|
||||
/// Hostname of the node
|
||||
pub hostname: String,
|
||||
/// Replication factor configured on the node
|
||||
pub replication_factor: usize,
|
||||
/// Configuration version
|
||||
pub config_version: u64,
|
||||
}
|
||||
|
||||
fn gen_node_key(metadata_dir: &Path) -> Result<NodeKey, Error> {
|
||||
let mut id_file = metadata_dir.to_path_buf();
|
||||
id_file.push("node_id");
|
||||
if id_file.as_path().exists() {
|
||||
let mut f = std::fs::File::open(id_file.as_path())?;
|
||||
let mut d = vec![];
|
||||
f.read_to_end(&mut d)?;
|
||||
if d.len() != 64 {
|
||||
return Err(Error::Message("Corrupt node_id file".to_string()));
|
||||
}
|
||||
|
||||
let mut key = [0u8; 64];
|
||||
key.copy_from_slice(&d[..]);
|
||||
Ok(NodeKey::from_slice(&key[..]).unwrap())
|
||||
} else {
|
||||
let (key, _) = ed25519::gen_keypair();
|
||||
|
||||
let mut f = std::fs::File::create(id_file.as_path())?;
|
||||
f.write_all(&key[..])?;
|
||||
Ok(NodeKey::from_slice(&key[..]).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
impl System {
|
||||
/// Create this node's membership manager
|
||||
pub fn new(
|
||||
network_key: NetworkKey,
|
||||
metadata_dir: PathBuf,
|
||||
background: Arc<BackgroundRunner>,
|
||||
replication_factor: usize,
|
||||
rpc_listen_addr: SocketAddr,
|
||||
bootstrap_peers: Vec<(NodeID, SocketAddr)>,
|
||||
consul_host: Option<String>,
|
||||
consul_service_name: Option<String>,
|
||||
) -> Arc<Self> {
|
||||
let node_key = gen_node_key(&metadata_dir).expect("Unable to read or generate node ID");
|
||||
info!("Node public key: {}", hex::encode(&node_key.public_key()));
|
||||
|
||||
let persist_config = Persister::new(&metadata_dir, "network_config");
|
||||
|
||||
let net_config = match persist_config.load() {
|
||||
Ok(x) => x,
|
||||
Err(e) => {
|
||||
match Persister::<garage_rpc_021::ring::NetworkConfig>::new(
|
||||
&metadata_dir,
|
||||
"network_config",
|
||||
)
|
||||
.load()
|
||||
{
|
||||
Ok(old_config) => NetworkConfig::migrate_from_021(old_config),
|
||||
Err(e2) => {
|
||||
info!(
|
||||
"No valid previous network configuration stored ({}, {}), starting fresh.",
|
||||
e, e2
|
||||
);
|
||||
NetworkConfig::new()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let state_info = StateInfo {
|
||||
hostname: gethostname::gethostname()
|
||||
.into_string()
|
||||
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
|
||||
replication_factor: replication_factor,
|
||||
config_version: net_config.version,
|
||||
};
|
||||
|
||||
let ring = Ring::new(net_config, replication_factor);
|
||||
let (update_ring, ring) = watch::channel(Arc::new(ring));
|
||||
|
||||
let netapp = NetApp::new(network_key, node_key);
|
||||
let fullmesh = FullMeshPeeringStrategy::new(netapp.clone(), bootstrap_peers.clone());
|
||||
|
||||
let system_endpoint = netapp.endpoint(SYSTEM_RPC_PATH.into());
|
||||
|
||||
let sys = Arc::new(System {
|
||||
id: netapp.id.clone(),
|
||||
persist_config,
|
||||
state_info: ArcSwap::new(Arc::new(state_info)),
|
||||
netapp: netapp.clone(),
|
||||
fullmesh: fullmesh.clone(),
|
||||
rpc: RpcHelper {
|
||||
fullmesh: fullmesh.clone(),
|
||||
background: background.clone(),
|
||||
},
|
||||
system_endpoint,
|
||||
replication_factor,
|
||||
rpc_listen_addr,
|
||||
bootstrap_peers,
|
||||
consul_host,
|
||||
consul_service_name,
|
||||
ring,
|
||||
update_ring: Mutex::new(update_ring),
|
||||
background: background.clone(),
|
||||
});
|
||||
sys.system_endpoint.set_handler(sys.clone());
|
||||
sys
|
||||
}
|
||||
|
||||
/// Perform bootstraping, starting the ping loop
|
||||
pub async fn run(self: Arc<Self>, must_exit: watch::Receiver<bool>) {
|
||||
join!(
|
||||
self.netapp
|
||||
.clone()
|
||||
.listen(self.rpc_listen_addr, None, must_exit.clone()),
|
||||
self.fullmesh.clone().run(must_exit.clone()),
|
||||
self.discovery_loop(must_exit.clone()),
|
||||
);
|
||||
}
|
||||
|
||||
// ---- INTERNALS ----
|
||||
|
||||
/// Save network configuration to disc
|
||||
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
|
||||
let ring: Arc<Ring> = self.ring.borrow().clone();
|
||||
self.persist_config
|
||||
.save_async(&ring.config)
|
||||
.await
|
||||
.expect("Cannot save current cluster configuration");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_state_info(&self) {
|
||||
let mut new_si: StateInfo = self.state_info.load().as_ref().clone();
|
||||
|
||||
let ring = self.ring.borrow();
|
||||
new_si.config_version = ring.config.version;
|
||||
self.state_info.swap(Arc::new(new_si));
|
||||
}
|
||||
|
||||
fn handle_pull_config(&self) -> SystemRpc {
|
||||
let ring = self.ring.borrow().clone();
|
||||
SystemRpc::AdvertiseConfig(ring.config.clone())
|
||||
}
|
||||
|
||||
async fn handle_advertise_config(
|
||||
self: Arc<Self>,
|
||||
adv: &NetworkConfig,
|
||||
) -> Result<SystemRpc, Error> {
|
||||
let update_ring = self.update_ring.lock().await;
|
||||
let ring: Arc<Ring> = self.ring.borrow().clone();
|
||||
|
||||
if adv.version > ring.config.version {
|
||||
let ring = Ring::new(adv.clone(), self.replication_factor);
|
||||
update_ring.send(Arc::new(ring))?;
|
||||
drop(update_ring);
|
||||
|
||||
let self2 = self.clone();
|
||||
let adv2 = adv.clone();
|
||||
self.background.spawn_cancellable(async move {
|
||||
self2
|
||||
.rpc
|
||||
.broadcast(
|
||||
&self2.system_endpoint,
|
||||
SystemRpc::AdvertiseConfig(adv2),
|
||||
RequestStrategy::with_priority(PRIO_NORMAL),
|
||||
)
|
||||
.await;
|
||||
Ok(())
|
||||
});
|
||||
self.background.spawn(self.clone().save_network_config());
|
||||
}
|
||||
|
||||
Ok(SystemRpc::Ok)
|
||||
}
|
||||
|
||||
async fn discovery_loop(&self, mut stop_signal: watch::Receiver<bool>) {
|
||||
/* TODO
|
||||
let consul_config = match (&self.consul_host, &self.consul_service_name) {
|
||||
(Some(ch), Some(csn)) => Some((ch.clone(), csn.clone())),
|
||||
_ => None,
|
||||
};
|
||||
*/
|
||||
|
||||
while !*stop_signal.borrow() {
|
||||
let not_configured = self.ring.borrow().config.members.is_empty();
|
||||
let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor;
|
||||
let bad_peers = self
|
||||
.fullmesh
|
||||
.get_peer_list()
|
||||
.iter()
|
||||
.filter(|p| p.is_up())
|
||||
.count() != self.ring.borrow().config.members.len();
|
||||
|
||||
if not_configured || no_peers || bad_peers {
|
||||
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
|
||||
|
||||
let ping_list = self.bootstrap_peers.clone();
|
||||
|
||||
/*
|
||||
*TODO bring this back: persisted list of peers
|
||||
if let Ok(peers) = self.persist_status.load_async().await {
|
||||
ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id))));
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* TODO bring this back: get peers from consul
|
||||
if let Some((consul_host, consul_service_name)) = &consul_config {
|
||||
match get_consul_nodes(consul_host, consul_service_name).await {
|
||||
Ok(node_list) => {
|
||||
ping_list.extend(node_list.iter().map(|a| (*a, None)));
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Could not retrieve node list from Consul: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
for (node_id, node_addr) in ping_list {
|
||||
tokio::spawn(self.netapp.clone().try_connect(node_addr, node_id));
|
||||
}
|
||||
}
|
||||
|
||||
let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL);
|
||||
select! {
|
||||
_ = restart_at.fuse() => {},
|
||||
_ = stop_signal.changed().fuse() => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn pull_config(self: Arc<Self>, peer: NodeID) {
|
||||
let resp = self
|
||||
.rpc
|
||||
.call(
|
||||
&self.system_endpoint,
|
||||
peer,
|
||||
SystemRpc::PullConfig,
|
||||
RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
|
||||
)
|
||||
.await;
|
||||
if let Ok(SystemRpc::AdvertiseConfig(config)) = resp {
|
||||
let _: Result<_, _> = self.handle_advertise_config(&config).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EndpointHandler<SystemRpc> for System {
|
||||
async fn handle(self: &Arc<Self>, msg: &SystemRpc, _from: NodeID) -> SystemRpc {
|
||||
let resp = match msg {
|
||||
SystemRpc::PullConfig => Ok(self.handle_pull_config()),
|
||||
SystemRpc::AdvertiseConfig(adv) => self.clone().handle_advertise_config(&adv).await,
|
||||
SystemRpc::GetKnownNodes => {
|
||||
let known_nodes = self
|
||||
.fullmesh
|
||||
.get_peer_list()
|
||||
.iter()
|
||||
.map(|n| (n.id, n.addr, n.is_up()))
|
||||
.collect::<Vec<_>>();
|
||||
Ok(SystemRpc::ReturnKnownNodes(known_nodes))
|
||||
}
|
||||
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
|
||||
};
|
||||
match resp {
|
||||
Ok(r) => r,
|
||||
Err(e) => SystemRpc::Error(format!("{}", e)),
|
||||
}
|
||||
}
|
||||
}
|
@ -1,140 +0,0 @@
|
||||
use core::future::Future;
|
||||
use core::task::{Context, Poll};
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::{fs, io};
|
||||
|
||||
use futures_util::future::*;
|
||||
use hyper::client::connect::Connection;
|
||||
use hyper::client::HttpConnector;
|
||||
use hyper::service::Service;
|
||||
use hyper::Uri;
|
||||
use hyper_rustls::MaybeHttpsStream;
|
||||
use rustls::internal::pemfile;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio_rustls::TlsConnector;
|
||||
use webpki::DNSNameRef;
|
||||
|
||||
use garage_util::error::Error;
|
||||
|
||||
pub fn load_certs(filename: &str) -> Result<Vec<rustls::Certificate>, Error> {
|
||||
let certfile = fs::File::open(&filename)?;
|
||||
let mut reader = io::BufReader::new(certfile);
|
||||
|
||||
let certs = pemfile::certs(&mut reader).map_err(|_| {
|
||||
Error::Message(format!(
|
||||
"Could not deecode certificates from file: {}",
|
||||
filename
|
||||
))
|
||||
})?;
|
||||
|
||||
if certs.is_empty() {
|
||||
return Err(Error::Message(format!(
|
||||
"Invalid certificate file: {}",
|
||||
filename
|
||||
)));
|
||||
}
|
||||
Ok(certs)
|
||||
}
|
||||
|
||||
pub fn load_private_key(filename: &str) -> Result<rustls::PrivateKey, Error> {
|
||||
let keydata = fs::read_to_string(filename)?;
|
||||
|
||||
let mut buf1 = keydata.as_bytes();
|
||||
let rsa_keys = pemfile::rsa_private_keys(&mut buf1).unwrap_or_default();
|
||||
|
||||
let mut buf2 = keydata.as_bytes();
|
||||
let pkcs8_keys = pemfile::pkcs8_private_keys(&mut buf2).unwrap_or_default();
|
||||
|
||||
let mut keys = rsa_keys;
|
||||
keys.extend(pkcs8_keys.into_iter());
|
||||
|
||||
if keys.len() != 1 {
|
||||
return Err(Error::Message(format!(
|
||||
"Invalid private key file: {} ({} private keys)",
|
||||
filename,
|
||||
keys.len()
|
||||
)));
|
||||
}
|
||||
Ok(keys[0].clone())
|
||||
}
|
||||
|
||||
// ---- AWFUL COPYPASTA FROM HYPER-RUSTLS connector.rs
|
||||
// ---- ALWAYS USE `garage` AS HOSTNAME FOR TLS VERIFICATION
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct HttpsConnectorFixedDnsname<T> {
|
||||
http: T,
|
||||
tls_config: Arc<rustls::ClientConfig>,
|
||||
fixed_dnsname: &'static str,
|
||||
}
|
||||
|
||||
type BoxError = Box<dyn std::error::Error + Send + Sync>;
|
||||
|
||||
impl HttpsConnectorFixedDnsname<HttpConnector> {
|
||||
pub fn new(mut tls_config: rustls::ClientConfig, fixed_dnsname: &'static str) -> Self {
|
||||
let mut http = HttpConnector::new();
|
||||
http.enforce_http(false);
|
||||
tls_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
|
||||
Self {
|
||||
http,
|
||||
tls_config: Arc::new(tls_config),
|
||||
fixed_dnsname,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Service<Uri> for HttpsConnectorFixedDnsname<T>
|
||||
where
|
||||
T: Service<Uri>,
|
||||
T::Response: Connection + AsyncRead + AsyncWrite + Send + Unpin + 'static,
|
||||
T::Future: Send + 'static,
|
||||
T::Error: Into<BoxError>,
|
||||
{
|
||||
type Response = MaybeHttpsStream<T::Response>;
|
||||
type Error = BoxError;
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
type Future =
|
||||
Pin<Box<dyn Future<Output = Result<MaybeHttpsStream<T::Response>, BoxError>> + Send>>;
|
||||
|
||||
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||
match self.http.poll_ready(cx) {
|
||||
Poll::Ready(Ok(())) => Poll::Ready(Ok(())),
|
||||
Poll::Ready(Err(e)) => Poll::Ready(Err(e.into())),
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
|
||||
fn call(&mut self, dst: Uri) -> Self::Future {
|
||||
let is_https = dst.scheme_str() == Some("https");
|
||||
|
||||
if !is_https {
|
||||
let connecting_future = self.http.call(dst);
|
||||
|
||||
let f = async move {
|
||||
let tcp = connecting_future.await.map_err(Into::into)?;
|
||||
|
||||
Ok(MaybeHttpsStream::Http(tcp))
|
||||
};
|
||||
f.boxed()
|
||||
} else {
|
||||
let cfg = self.tls_config.clone();
|
||||
let connecting_future = self.http.call(dst);
|
||||
|
||||
let dnsname =
|
||||
DNSNameRef::try_from_ascii_str(self.fixed_dnsname).expect("Invalid fixed dnsname");
|
||||
|
||||
let f = async move {
|
||||
let tcp = connecting_future.await.map_err(Into::into)?;
|
||||
let connector = TlsConnector::from(cfg);
|
||||
let tls = connector
|
||||
.connect(dnsname, tcp)
|
||||
.await
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
|
||||
Ok(MaybeHttpsStream::Https(tls))
|
||||
};
|
||||
f.boxed()
|
||||
}
|
||||
}
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "garage_table"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
@ -13,9 +13,10 @@ path = "lib.rs"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_rpc = { version = "0.3.0", path = "../rpc" }
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_rpc = { version = "0.4.0", path = "../rpc" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
|
||||
async-trait = "0.1.7"
|
||||
bytes = "1.0"
|
||||
hexdump = "0.1"
|
||||
log = "0.4"
|
||||
@ -30,4 +31,3 @@ serde_bytes = "0.11"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||
|
||||
|
@ -9,7 +9,7 @@ use tokio::sync::Notify;
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::*;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::system::System;
|
||||
|
||||
use crate::crdt::Crdt;
|
||||
use crate::replication::*;
|
||||
|
@ -2,6 +2,7 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_bytes::ByteBuf;
|
||||
|
||||
@ -13,9 +14,8 @@ use tokio::sync::watch;
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::rpc_server::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::*;
|
||||
|
||||
use crate::data::*;
|
||||
use crate::replication::*;
|
||||
@ -24,11 +24,11 @@ use crate::schema::*;
|
||||
const TABLE_GC_BATCH_SIZE: usize = 1024;
|
||||
const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
pub struct TableGc<F: TableSchema, R: TableReplication> {
|
||||
pub struct TableGc<F: TableSchema + 'static, R: TableReplication + 'static> {
|
||||
system: Arc<System>,
|
||||
data: Arc<TableData<F, R>>,
|
||||
|
||||
rpc_client: Arc<RpcClient<GcRpc>>,
|
||||
endpoint: Arc<Endpoint<GcRpc, Self>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
@ -36,30 +36,30 @@ enum GcRpc {
|
||||
Update(Vec<ByteBuf>),
|
||||
DeleteIfEqualHash(Vec<(ByteBuf, Hash)>),
|
||||
Ok,
|
||||
Error(String),
|
||||
}
|
||||
|
||||
impl RpcMessage for GcRpc {}
|
||||
impl Message for GcRpc {
|
||||
type Response = GcRpc;
|
||||
}
|
||||
|
||||
impl<F, R> TableGc<F, R>
|
||||
where
|
||||
F: TableSchema + 'static,
|
||||
R: TableReplication + 'static,
|
||||
{
|
||||
pub(crate) fn launch(
|
||||
system: Arc<System>,
|
||||
data: Arc<TableData<F, R>>,
|
||||
rpc_server: &mut RpcServer,
|
||||
) -> Arc<Self> {
|
||||
let rpc_path = format!("table_{}/gc", data.name);
|
||||
let rpc_client = system.rpc_client::<GcRpc>(&rpc_path);
|
||||
pub(crate) fn launch(system: Arc<System>, data: Arc<TableData<F, R>>) -> Arc<Self> {
|
||||
let endpoint = system
|
||||
.netapp
|
||||
.endpoint(format!("garage_table/gc.rs/Rpc:{}", data.name));
|
||||
|
||||
let gc = Arc::new(Self {
|
||||
system: system.clone(),
|
||||
data: data.clone(),
|
||||
rpc_client,
|
||||
endpoint,
|
||||
});
|
||||
|
||||
gc.register_handler(rpc_server, rpc_path);
|
||||
gc.endpoint.set_handler(gc.clone());
|
||||
|
||||
let gc1 = gc.clone();
|
||||
system.background.spawn_worker(
|
||||
@ -168,7 +168,7 @@ where
|
||||
|
||||
async fn try_send_and_delete(
|
||||
&self,
|
||||
nodes: Vec<Uuid>,
|
||||
nodes: Vec<NodeID>,
|
||||
items: Vec<(ByteBuf, Hash, ByteBuf)>,
|
||||
) -> Result<(), Error> {
|
||||
let n_items = items.len();
|
||||
@ -180,11 +180,15 @@ where
|
||||
deletes.push((k, vhash));
|
||||
}
|
||||
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&nodes[..],
|
||||
GcRpc::Update(updates),
|
||||
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT),
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_quorum(nodes.len())
|
||||
.with_timeout(TABLE_GC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@ -193,11 +197,15 @@ where
|
||||
self.data.name, n_items
|
||||
);
|
||||
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&nodes[..],
|
||||
GcRpc::DeleteIfEqualHash(deletes.clone()),
|
||||
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT),
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_quorum(nodes.len())
|
||||
.with_timeout(TABLE_GC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@ -217,24 +225,7 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---- RPC HANDLER ----
|
||||
|
||||
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||
let self2 = self.clone();
|
||||
rpc_server.add_handler::<GcRpc, _, _>(path, move |msg, _addr| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle_rpc(&msg).await }
|
||||
});
|
||||
|
||||
let self2 = self.clone();
|
||||
self.rpc_client
|
||||
.set_local_handler(self.system.id, move |msg| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle_rpc(&msg).await }
|
||||
});
|
||||
}
|
||||
|
||||
async fn handle_rpc(self: &Arc<Self>, message: &GcRpc) -> Result<GcRpc, Error> {
|
||||
async fn handle_rpc(&self, message: &GcRpc) -> Result<GcRpc, Error> {
|
||||
match message {
|
||||
GcRpc::Update(items) => {
|
||||
self.data.update_many(items)?;
|
||||
@ -251,3 +242,16 @@ where
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<F, R> EndpointHandler<GcRpc> for TableGc<F, R>
|
||||
where
|
||||
F: TableSchema + 'static,
|
||||
R: TableReplication + 'static,
|
||||
{
|
||||
async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> GcRpc {
|
||||
self.handle_rpc(message)
|
||||
.await
|
||||
.unwrap_or_else(|e| GcRpc::Error(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,8 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::ring::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::NodeID;
|
||||
use garage_util::data::*;
|
||||
|
||||
use crate::replication::*;
|
||||
@ -19,16 +20,20 @@ pub struct TableFullReplication {
|
||||
}
|
||||
|
||||
impl TableReplication for TableFullReplication {
|
||||
fn read_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
|
||||
fn read_nodes(&self, _hash: &Hash) -> Vec<NodeID> {
|
||||
vec![self.system.id]
|
||||
}
|
||||
fn read_quorum(&self) -> usize {
|
||||
1
|
||||
}
|
||||
|
||||
fn write_nodes(&self, _hash: &Hash) -> Vec<Uuid> {
|
||||
fn write_nodes(&self, _hash: &Hash) -> Vec<NodeID> {
|
||||
let ring = self.system.ring.borrow();
|
||||
ring.config.members.keys().cloned().collect::<Vec<_>>()
|
||||
ring.config
|
||||
.members
|
||||
.keys()
|
||||
.map(|id| NodeID::from_slice(id.as_slice()).unwrap())
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
fn write_quorum(&self) -> usize {
|
||||
let nmembers = self.system.ring.borrow().config.members.len();
|
||||
|
@ -1,5 +1,5 @@
|
||||
use garage_rpc::ring::*;
|
||||
|
||||
use garage_rpc::NodeID;
|
||||
use garage_util::data::*;
|
||||
|
||||
/// Trait to describe how a table shall be replicated
|
||||
@ -8,12 +8,12 @@ pub trait TableReplication: Send + Sync {
|
||||
// To understand various replication methods
|
||||
|
||||
/// Which nodes to send read requests to
|
||||
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid>;
|
||||
fn read_nodes(&self, hash: &Hash) -> Vec<NodeID>;
|
||||
/// Responses needed to consider a read succesfull
|
||||
fn read_quorum(&self) -> usize;
|
||||
|
||||
/// Which nodes to send writes to
|
||||
fn write_nodes(&self, hash: &Hash) -> Vec<Uuid>;
|
||||
fn write_nodes(&self, hash: &Hash) -> Vec<NodeID>;
|
||||
/// Responses needed to consider a write succesfull
|
||||
fn write_quorum(&self) -> usize;
|
||||
fn max_write_errors(&self) -> usize;
|
||||
|
@ -1,7 +1,8 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::ring::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::NodeID;
|
||||
use garage_util::data::*;
|
||||
|
||||
use crate::replication::*;
|
||||
@ -25,7 +26,7 @@ pub struct TableShardedReplication {
|
||||
}
|
||||
|
||||
impl TableReplication for TableShardedReplication {
|
||||
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid> {
|
||||
fn read_nodes(&self, hash: &Hash) -> Vec<NodeID> {
|
||||
let ring = self.system.ring.borrow();
|
||||
ring.get_nodes(&hash, self.replication_factor)
|
||||
}
|
||||
@ -33,7 +34,7 @@ impl TableReplication for TableShardedReplication {
|
||||
self.read_quorum
|
||||
}
|
||||
|
||||
fn write_nodes(&self, hash: &Hash) -> Vec<Uuid> {
|
||||
fn write_nodes(&self, hash: &Hash) -> Vec<NodeID> {
|
||||
let ring = self.system.ring.borrow();
|
||||
ring.get_nodes(&hash, self.replication_factor)
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ use std::collections::VecDeque;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::select;
|
||||
use futures_util::future::*;
|
||||
use futures_util::stream::*;
|
||||
@ -13,10 +14,9 @@ use tokio::sync::{mpsc, watch};
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::ring::*;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::rpc_server::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::*;
|
||||
|
||||
use crate::data::*;
|
||||
use crate::merkle::*;
|
||||
@ -28,13 +28,13 @@ const TABLE_SYNC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
// Do anti-entropy every 10 minutes
|
||||
const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60);
|
||||
|
||||
pub struct TableSyncer<F: TableSchema, R: TableReplication> {
|
||||
pub struct TableSyncer<F: TableSchema + 'static, R: TableReplication + 'static> {
|
||||
system: Arc<System>,
|
||||
data: Arc<TableData<F, R>>,
|
||||
merkle: Arc<MerkleUpdater<F, R>>,
|
||||
|
||||
todo: Mutex<SyncTodo>,
|
||||
rpc_client: Arc<RpcClient<SyncRpc>>,
|
||||
endpoint: Arc<Endpoint<SyncRpc, Self>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
@ -45,9 +45,12 @@ pub(crate) enum SyncRpc {
|
||||
Node(MerkleNodeKey, MerkleNode),
|
||||
Items(Vec<Arc<ByteBuf>>),
|
||||
Ok,
|
||||
Error(String),
|
||||
}
|
||||
|
||||
impl RpcMessage for SyncRpc {}
|
||||
impl Message for SyncRpc {
|
||||
type Response = SyncRpc;
|
||||
}
|
||||
|
||||
struct SyncTodo {
|
||||
todo: Vec<TodoPartition>,
|
||||
@ -72,10 +75,10 @@ where
|
||||
system: Arc<System>,
|
||||
data: Arc<TableData<F, R>>,
|
||||
merkle: Arc<MerkleUpdater<F, R>>,
|
||||
rpc_server: &mut RpcServer,
|
||||
) -> Arc<Self> {
|
||||
let rpc_path = format!("table_{}/sync", data.name);
|
||||
let rpc_client = system.rpc_client::<SyncRpc>(&rpc_path);
|
||||
let endpoint = system
|
||||
.netapp
|
||||
.endpoint(format!("garage_table/sync.rs/Rpc:{}", data.name));
|
||||
|
||||
let todo = SyncTodo { todo: vec![] };
|
||||
|
||||
@ -84,10 +87,10 @@ where
|
||||
data: data.clone(),
|
||||
merkle,
|
||||
todo: Mutex::new(todo),
|
||||
rpc_client,
|
||||
endpoint,
|
||||
});
|
||||
|
||||
syncer.register_handler(rpc_server, rpc_path);
|
||||
syncer.endpoint.set_handler(syncer.clone());
|
||||
|
||||
let (busy_tx, busy_rx) = mpsc::unbounded_channel();
|
||||
|
||||
@ -112,21 +115,6 @@ where
|
||||
syncer
|
||||
}
|
||||
|
||||
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||
let self2 = self.clone();
|
||||
rpc_server.add_handler::<SyncRpc, _, _>(path, move |msg, _addr| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle_rpc(&msg).await }
|
||||
});
|
||||
|
||||
let self2 = self.clone();
|
||||
self.rpc_client
|
||||
.set_local_handler(self.system.id, move |msg| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle_rpc(&msg).await }
|
||||
});
|
||||
}
|
||||
|
||||
async fn watcher_task(
|
||||
self: Arc<Self>,
|
||||
mut must_exit: watch::Receiver<bool>,
|
||||
@ -317,15 +305,19 @@ where
|
||||
async fn offload_items(
|
||||
self: &Arc<Self>,
|
||||
items: &[(Vec<u8>, Arc<ByteBuf>)],
|
||||
nodes: &[Uuid],
|
||||
nodes: &[NodeID],
|
||||
) -> Result<(), Error> {
|
||||
let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>();
|
||||
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
nodes,
|
||||
SyncRpc::Items(values),
|
||||
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_SYNC_RPC_TIMEOUT),
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_quorum(nodes.len())
|
||||
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@ -362,7 +354,7 @@ where
|
||||
async fn do_sync_with(
|
||||
self: Arc<Self>,
|
||||
partition: TodoPartition,
|
||||
who: Uuid,
|
||||
who: NodeID,
|
||||
must_exit: watch::Receiver<bool>,
|
||||
) -> Result<(), Error> {
|
||||
let (root_ck_key, root_ck) = self.get_root_ck(partition.partition)?;
|
||||
@ -378,11 +370,14 @@ where
|
||||
// Check if they have the same root checksum
|
||||
// If so, do nothing.
|
||||
let root_resp = self
|
||||
.rpc_client
|
||||
.system
|
||||
.rpc
|
||||
.call(
|
||||
&self.endpoint,
|
||||
who,
|
||||
SyncRpc::RootCkHash(partition.partition, root_ck_hash),
|
||||
TABLE_SYNC_RPC_TIMEOUT,
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@ -430,8 +425,15 @@ where
|
||||
// Get Merkle node for this tree position at remote node
|
||||
// and compare it with local node
|
||||
let remote_node = match self
|
||||
.rpc_client
|
||||
.call(who, SyncRpc::GetNode(key.clone()), TABLE_SYNC_RPC_TIMEOUT)
|
||||
.system
|
||||
.rpc
|
||||
.call(
|
||||
&self.endpoint,
|
||||
who,
|
||||
SyncRpc::GetNode(key.clone()),
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?
|
||||
{
|
||||
SyncRpc::Node(_, node) => node,
|
||||
@ -478,7 +480,7 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn send_items(&self, who: Uuid, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> {
|
||||
async fn send_items(&self, who: NodeID, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> {
|
||||
info!(
|
||||
"({}) Sending {} items to {:?}",
|
||||
self.data.name,
|
||||
@ -492,8 +494,15 @@ where
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let rpc_resp = self
|
||||
.rpc_client
|
||||
.call(who, SyncRpc::Items(values), TABLE_SYNC_RPC_TIMEOUT)
|
||||
.system
|
||||
.rpc
|
||||
.call(
|
||||
&self.endpoint,
|
||||
who,
|
||||
SyncRpc::Items(values),
|
||||
RequestStrategy::with_priority(PRIO_BACKGROUND)
|
||||
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
if let SyncRpc::Ok = rpc_resp {
|
||||
Ok(())
|
||||
@ -506,7 +515,6 @@ where
|
||||
}
|
||||
|
||||
// ======= SYNCHRONIZATION PROCEDURE -- RECEIVER SIDE ======
|
||||
|
||||
async fn handle_rpc(self: &Arc<Self>, message: &SyncRpc) -> Result<SyncRpc, Error> {
|
||||
match message {
|
||||
SyncRpc::RootCkHash(range, h) => {
|
||||
@ -527,6 +535,19 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<F, R> EndpointHandler<SyncRpc> for TableSyncer<F, R>
|
||||
where
|
||||
F: TableSchema + 'static,
|
||||
R: TableReplication + 'static,
|
||||
{
|
||||
async fn handle(self: &Arc<Self>, message: &SyncRpc, _from: NodeID) -> SyncRpc {
|
||||
self.handle_rpc(message)
|
||||
.await
|
||||
.unwrap_or_else(|e| SyncRpc::Error(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
|
||||
impl SyncTodo {
|
||||
fn add_full_sync<F: TableSchema, R: TableReplication>(
|
||||
&mut self,
|
||||
|
@ -2,6 +2,7 @@ use std::collections::{BTreeMap, HashMap};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use futures::stream::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_bytes::ByteBuf;
|
||||
@ -9,9 +10,8 @@ use serde_bytes::ByteBuf;
|
||||
use garage_util::data::*;
|
||||
use garage_util::error::Error;
|
||||
|
||||
use garage_rpc::membership::System;
|
||||
use garage_rpc::rpc_client::*;
|
||||
use garage_rpc::rpc_server::*;
|
||||
use garage_rpc::system::System;
|
||||
use garage_rpc::*;
|
||||
|
||||
use crate::crdt::Crdt;
|
||||
use crate::data::*;
|
||||
@ -23,17 +23,18 @@ use crate::sync::*;
|
||||
|
||||
const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
pub struct Table<F: TableSchema, R: TableReplication> {
|
||||
pub struct Table<F: TableSchema + 'static, R: TableReplication + 'static> {
|
||||
pub system: Arc<System>,
|
||||
pub data: Arc<TableData<F, R>>,
|
||||
pub merkle_updater: Arc<MerkleUpdater<F, R>>,
|
||||
pub syncer: Arc<TableSyncer<F, R>>,
|
||||
rpc_client: Arc<RpcClient<TableRpc<F>>>,
|
||||
endpoint: Arc<Endpoint<TableRpc<F>, Self>>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub(crate) enum TableRpc<F: TableSchema> {
|
||||
Ok,
|
||||
Error(String),
|
||||
|
||||
ReadEntry(F::P, F::S),
|
||||
ReadEntryResponse(Option<ByteBuf>),
|
||||
@ -44,7 +45,9 @@ pub(crate) enum TableRpc<F: TableSchema> {
|
||||
Update(Vec<Arc<ByteBuf>>),
|
||||
}
|
||||
|
||||
impl<F: TableSchema> RpcMessage for TableRpc<F> {}
|
||||
impl<F: TableSchema> Message for TableRpc<F> {
|
||||
type Response = TableRpc<F>;
|
||||
}
|
||||
|
||||
impl<F, R> Table<F, R>
|
||||
where
|
||||
@ -59,32 +62,27 @@ where
|
||||
system: Arc<System>,
|
||||
db: &sled::Db,
|
||||
name: String,
|
||||
rpc_server: &mut RpcServer,
|
||||
) -> Arc<Self> {
|
||||
let rpc_path = format!("table_{}", name);
|
||||
let rpc_client = system.rpc_client::<TableRpc<F>>(&rpc_path);
|
||||
let endpoint = system
|
||||
.netapp
|
||||
.endpoint(format!("garage_table/table.rs/Rpc:{}", name));
|
||||
|
||||
let data = TableData::new(system.clone(), name, instance, replication, db);
|
||||
|
||||
let merkle_updater = MerkleUpdater::launch(&system.background, data.clone());
|
||||
|
||||
let syncer = TableSyncer::launch(
|
||||
system.clone(),
|
||||
data.clone(),
|
||||
merkle_updater.clone(),
|
||||
rpc_server,
|
||||
);
|
||||
TableGc::launch(system.clone(), data.clone(), rpc_server);
|
||||
let syncer = TableSyncer::launch(system.clone(), data.clone(), merkle_updater.clone());
|
||||
TableGc::launch(system.clone(), data.clone());
|
||||
|
||||
let table = Arc::new(Self {
|
||||
system,
|
||||
data,
|
||||
merkle_updater,
|
||||
syncer,
|
||||
rpc_client,
|
||||
endpoint,
|
||||
});
|
||||
|
||||
table.clone().register_handler(rpc_server, rpc_path);
|
||||
table.endpoint.set_handler(table.clone());
|
||||
|
||||
table
|
||||
}
|
||||
@ -97,11 +95,14 @@ where
|
||||
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?));
|
||||
let rpc = TableRpc::<F>::Update(vec![e_enc]);
|
||||
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&who[..],
|
||||
rpc,
|
||||
RequestStrategy::with_quorum(self.data.replication.write_quorum())
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(self.data.replication.write_quorum())
|
||||
.with_timeout(TABLE_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
@ -123,7 +124,16 @@ where
|
||||
let call_futures = call_list.drain().map(|(node, entries)| async move {
|
||||
let rpc = TableRpc::<F>::Update(entries);
|
||||
|
||||
let resp = self.rpc_client.call(node, rpc, TABLE_RPC_TIMEOUT).await?;
|
||||
let resp = self
|
||||
.system
|
||||
.rpc
|
||||
.call(
|
||||
&self.endpoint,
|
||||
node,
|
||||
rpc,
|
||||
RequestStrategy::with_priority(PRIO_NORMAL).with_timeout(TABLE_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
Ok::<_, Error>((node, resp))
|
||||
});
|
||||
let mut resps = call_futures.collect::<FuturesUnordered<_>>();
|
||||
@ -152,11 +162,14 @@ where
|
||||
|
||||
let rpc = TableRpc::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
|
||||
let resps = self
|
||||
.rpc_client
|
||||
.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&who[..],
|
||||
rpc,
|
||||
RequestStrategy::with_quorum(self.data.replication.read_quorum())
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(self.data.replication.read_quorum())
|
||||
.with_timeout(TABLE_RPC_TIMEOUT)
|
||||
.interrupt_after_quorum(true),
|
||||
)
|
||||
@ -208,11 +221,14 @@ where
|
||||
let rpc = TableRpc::<F>::ReadRange(partition_key.clone(), begin_sort_key, filter, limit);
|
||||
|
||||
let resps = self
|
||||
.rpc_client
|
||||
.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
&who[..],
|
||||
rpc,
|
||||
RequestStrategy::with_quorum(self.data.replication.read_quorum())
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(self.data.replication.read_quorum())
|
||||
.with_timeout(TABLE_RPC_TIMEOUT)
|
||||
.interrupt_after_quorum(true),
|
||||
)
|
||||
@ -261,36 +277,25 @@ where
|
||||
|
||||
// =============== UTILITY FUNCTION FOR CLIENT OPERATIONS ===============
|
||||
|
||||
async fn repair_on_read(&self, who: &[Uuid], what: F::E) -> Result<(), Error> {
|
||||
async fn repair_on_read(&self, who: &[NodeID], what: F::E) -> Result<(), Error> {
|
||||
let what_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(&what)?));
|
||||
self.rpc_client
|
||||
self.system
|
||||
.rpc
|
||||
.try_call_many(
|
||||
&self.endpoint,
|
||||
who,
|
||||
TableRpc::<F>::Update(vec![what_enc]),
|
||||
RequestStrategy::with_quorum(who.len()).with_timeout(TABLE_RPC_TIMEOUT),
|
||||
RequestStrategy::with_priority(PRIO_NORMAL)
|
||||
.with_quorum(who.len())
|
||||
.with_timeout(TABLE_RPC_TIMEOUT),
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// =============== HANDLERS FOR RPC OPERATIONS (SERVER SIDE) ==============
|
||||
|
||||
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
|
||||
let self2 = self.clone();
|
||||
rpc_server.add_handler::<TableRpc<F>, _, _>(path, move |msg, _addr| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle(&msg).await }
|
||||
});
|
||||
|
||||
let self2 = self.clone();
|
||||
self.rpc_client
|
||||
.set_local_handler(self.system.id, move |msg| {
|
||||
let self2 = self2.clone();
|
||||
async move { self2.handle(&msg).await }
|
||||
});
|
||||
}
|
||||
|
||||
async fn handle(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> {
|
||||
// ====== RPC HANDLER =====
|
||||
//
|
||||
async fn handle_rpc(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> {
|
||||
match msg {
|
||||
TableRpc::ReadEntry(key, sort_key) => {
|
||||
let value = self.data.read_entry(key, sort_key)?;
|
||||
@ -308,3 +313,16 @@ where
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<F, R> EndpointHandler<TableRpc<F>> for Table<F, R>
|
||||
where
|
||||
F: TableSchema + 'static,
|
||||
R: TableReplication + 'static,
|
||||
{
|
||||
async fn handle(self: &Arc<Self>, msg: &TableRpc<F>, _from: NodeID) -> TableRpc<F> {
|
||||
self.handle_rpc(msg)
|
||||
.await
|
||||
.unwrap_or_else(|e| TableRpc::<F>::Error(format!("{}", e)))
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "garage_util"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
@ -32,7 +32,6 @@ toml = "0.5"
|
||||
futures = "0.3"
|
||||
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
|
||||
|
||||
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
|
||||
http = "0.2"
|
||||
hyper = "0.14"
|
||||
rustls = "0.19"
|
||||
webpki = "0.21"
|
||||
|
@ -3,8 +3,11 @@ use std::io::Read;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use serde::de::Error as SerdeError;
|
||||
use serde::{de, Deserialize};
|
||||
|
||||
use netapp::NodeID;
|
||||
|
||||
use crate::error::Error;
|
||||
|
||||
/// Represent the whole configuration
|
||||
@ -26,20 +29,20 @@ pub struct Config {
|
||||
// (we can add more aliases for this later)
|
||||
pub replication_mode: String,
|
||||
|
||||
/// RPC secret key: 32 bytes hex encoded
|
||||
pub rpc_secret: String,
|
||||
|
||||
/// Address to bind for RPC
|
||||
pub rpc_bind_addr: SocketAddr,
|
||||
|
||||
/// Bootstrap peers RPC address
|
||||
#[serde(deserialize_with = "deserialize_vec_addr")]
|
||||
pub bootstrap_peers: Vec<SocketAddr>,
|
||||
pub bootstrap_peers: Vec<(NodeID, SocketAddr)>,
|
||||
/// Consule host to connect to to discover more peers
|
||||
pub consul_host: Option<String>,
|
||||
/// Consul service name to use
|
||||
pub consul_service_name: Option<String>,
|
||||
|
||||
/// Configuration for RPC TLS
|
||||
pub rpc_tls: Option<TlsConfig>,
|
||||
|
||||
/// Max number of concurrent RPC request
|
||||
#[serde(default = "default_max_concurrent_rpc_requests")]
|
||||
pub max_concurrent_rpc_requests: usize,
|
||||
@ -59,17 +62,6 @@ pub struct Config {
|
||||
pub s3_web: WebConfig,
|
||||
}
|
||||
|
||||
/// Configuration for RPC TLS
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
pub struct TlsConfig {
|
||||
/// Path to certificate autority used for all nodes
|
||||
pub ca_cert: String,
|
||||
/// Path to public certificate for this node
|
||||
pub node_cert: String,
|
||||
/// Path to private key for this node
|
||||
pub node_key: String,
|
||||
}
|
||||
|
||||
/// Configuration for S3 api
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
pub struct ApiConfig {
|
||||
@ -115,27 +107,32 @@ pub fn read_config(config_file: PathBuf) -> Result<Config, Error> {
|
||||
Ok(toml::from_str(&config)?)
|
||||
}
|
||||
|
||||
fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<SocketAddr>, D::Error>
|
||||
fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<(NodeID, SocketAddr)>, D::Error>
|
||||
where
|
||||
D: de::Deserializer<'de>,
|
||||
{
|
||||
use std::net::ToSocketAddrs;
|
||||
|
||||
Ok(<Vec<&str>>::deserialize(deserializer)?
|
||||
.iter()
|
||||
.filter_map(|&name| {
|
||||
name.to_socket_addrs()
|
||||
.map(|iter| (name, iter))
|
||||
.map_err(|_| warn!("Error resolving \"{}\"", name))
|
||||
.ok()
|
||||
})
|
||||
.map(|(name, iter)| {
|
||||
let v = iter.collect::<Vec<_>>();
|
||||
if v.is_empty() {
|
||||
warn!("Error resolving \"{}\"", name)
|
||||
}
|
||||
v
|
||||
})
|
||||
.flatten()
|
||||
.collect())
|
||||
let mut ret = vec![];
|
||||
|
||||
for peer in <Vec<&str>>::deserialize(deserializer)? {
|
||||
let delim = peer
|
||||
.find('@')
|
||||
.ok_or_else(|| D::Error::custom("Invalid bootstrap peer: public key not specified"))?;
|
||||
let (key, host) = peer.split_at(delim);
|
||||
let pubkey = NodeID::from_slice(&hex::decode(&key).map_err(D::Error::custom)?)
|
||||
.ok_or_else(|| D::Error::custom("Invalid bootstrap peer public key"))?;
|
||||
let hosts = host[1..]
|
||||
.to_socket_addrs()
|
||||
.map_err(D::Error::custom)?
|
||||
.collect::<Vec<_>>();
|
||||
if hosts.is_empty() {
|
||||
return Err(D::Error::custom(format!("Error resolving {}", &host[1..])));
|
||||
}
|
||||
for host in hosts {
|
||||
ret.push((pubkey.clone(), host));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
@ -11,8 +11,8 @@ pub enum RpcError {
|
||||
#[error(display = "Node is down: {:?}.", _0)]
|
||||
NodeDown(Uuid),
|
||||
|
||||
#[error(display = "Timeout: {}", _0)]
|
||||
Timeout(#[error(source)] tokio::time::error::Elapsed),
|
||||
#[error(display = "Timeout")]
|
||||
Timeout,
|
||||
|
||||
#[error(display = "HTTP error: {}", _0)]
|
||||
Http(#[error(source)] http::Error),
|
||||
@ -45,11 +45,8 @@ pub enum Error {
|
||||
#[error(display = "Invalid HTTP header value: {}", _0)]
|
||||
HttpHeader(#[error(source)] http::header::ToStrError),
|
||||
|
||||
#[error(display = "TLS error: {}", _0)]
|
||||
Tls(#[error(source)] rustls::TLSError),
|
||||
|
||||
#[error(display = "PKI error: {}", _0)]
|
||||
Pki(#[error(source)] webpki::Error),
|
||||
#[error(display = "Netapp error: {}", _0)]
|
||||
Netapp(#[error(source)] netapp::error::Error),
|
||||
|
||||
#[error(display = "Sled error: {}", _0)]
|
||||
Sled(#[error(source)] sled::Error),
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "garage_web"
|
||||
version = "0.3.0"
|
||||
version = "0.4.0"
|
||||
authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
|
||||
edition = "2018"
|
||||
license = "AGPL-3.0"
|
||||
@ -13,10 +13,10 @@ path = "lib.rs"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
garage_api = { version = "0.3.0", path = "../api" }
|
||||
garage_model = { version = "0.3.0", path = "../model" }
|
||||
garage_util = { version = "0.3.0", path = "../util" }
|
||||
garage_table = { version = "0.3.0", path = "../table" }
|
||||
garage_api = { version = "0.4.0", path = "../api" }
|
||||
garage_model = { version = "0.4.0", path = "../model" }
|
||||
garage_util = { version = "0.4.0", path = "../util" }
|
||||
garage_table = { version = "0.4.0", path = "../table" }
|
||||
|
||||
err-derive = "0.3"
|
||||
idna = "0.2"
|
||||
|
Loading…
Reference in New Issue
Block a user