First port of Garage to Netapp

This commit is contained in:
Alex Auvolat 2021-10-14 11:50:12 +02:00
parent dc017a0cab
commit 4067797d01
No known key found for this signature in database
GPG Key ID: EDABF9711E244EB1
31 changed files with 1497 additions and 2289 deletions

705
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
[package] [package]
name = "garage_api" name = "garage_api"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -13,9 +13,9 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_model = { version = "0.3.0", path = "../model" } garage_model = { version = "0.4.0", path = "../model" }
garage_table = { version = "0.3.0", path = "../table" } garage_table = { version = "0.4.0", path = "../table" }
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
base64 = "0.13" base64 = "0.13"
bytes = "1.0" bytes = "1.0"

View File

@ -1,6 +1,6 @@
[package] [package]
name = "garage" name = "garage"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -14,12 +14,12 @@ path = "main.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_api = { version = "0.3.0", path = "../api" } garage_api = { version = "0.4.0", path = "../api" }
garage_model = { version = "0.3.0", path = "../model" } garage_model = { version = "0.4.0", path = "../model" }
garage_rpc = { version = "0.3.0", path = "../rpc" } garage_rpc = { version = "0.4.0", path = "../rpc" }
garage_table = { version = "0.3.0", path = "../table" } garage_table = { version = "0.4.0", path = "../table" }
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
garage_web = { version = "0.3.0", path = "../web" } garage_web = { version = "0.4.0", path = "../web" }
bytes = "1.0" bytes = "1.0"
git-version = "0.3.4" git-version = "0.3.4"
@ -27,6 +27,8 @@ hex = "0.4"
log = "0.4" log = "0.4"
pretty_env_logger = "0.4" pretty_env_logger = "0.4"
rand = "0.8" rand = "0.8"
async-trait = "0.1.7"
sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
sled = "0.34" sled = "0.34"
@ -38,3 +40,5 @@ toml = "0.5"
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }

View File

@ -2,6 +2,7 @@ use std::collections::HashMap;
use std::fmt::Write; use std::fmt::Write;
use std::sync::Arc; use std::sync::Arc;
use async_trait::async_trait;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use garage_util::error::Error; use garage_util::error::Error;
@ -10,8 +11,7 @@ use garage_table::crdt::Crdt;
use garage_table::replication::*; use garage_table::replication::*;
use garage_table::*; use garage_table::*;
use garage_rpc::rpc_client::*; use garage_rpc::*;
use garage_rpc::rpc_server::*;
use garage_model::bucket_table::*; use garage_model::bucket_table::*;
use garage_model::garage::Garage; use garage_model::garage::Garage;
@ -19,10 +19,8 @@ use garage_model::key_table::*;
use crate::cli::*; use crate::cli::*;
use crate::repair::Repair; use crate::repair::Repair;
use crate::*;
pub const ADMIN_RPC_TIMEOUT: Duration = Duration::from_secs(30); pub const ADMIN_RPC_PATH: &str = "garage/admin_rpc.rs/Rpc";
pub const ADMIN_RPC_PATH: &str = "_admin";
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub enum AdminRpc { pub enum AdminRpc {
@ -33,41 +31,31 @@ pub enum AdminRpc {
// Replies // Replies
Ok(String), Ok(String),
Error(String),
BucketList(Vec<String>), BucketList(Vec<String>),
BucketInfo(Bucket), BucketInfo(Bucket),
KeyList(Vec<(String, String)>), KeyList(Vec<(String, String)>),
KeyInfo(Key), KeyInfo(Key),
} }
impl RpcMessage for AdminRpc {} impl Message for AdminRpc {
type Response = AdminRpc;
}
pub struct AdminRpcHandler { pub struct AdminRpcHandler {
garage: Arc<Garage>, garage: Arc<Garage>,
rpc_client: Arc<RpcClient<AdminRpc>>, endpoint: Arc<Endpoint<AdminRpc, Self>>,
} }
impl AdminRpcHandler { impl AdminRpcHandler {
pub fn new(garage: Arc<Garage>) -> Arc<Self> { pub fn new(garage: Arc<Garage>) -> Arc<Self> {
let rpc_client = garage.system.clone().rpc_client::<AdminRpc>(ADMIN_RPC_PATH); let endpoint = garage.system.netapp.endpoint(ADMIN_RPC_PATH.into());
Arc::new(Self { garage, rpc_client }) let admin = Arc::new(Self { garage, endpoint });
admin.endpoint.set_handler(admin.clone());
admin
} }
pub fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer) { async fn handle_bucket_cmd(&self, cmd: &BucketOperation) -> Result<AdminRpc, Error> {
rpc_server.add_handler::<AdminRpc, _, _>(ADMIN_RPC_PATH.to_string(), move |msg, _addr| {
let self2 = self.clone();
async move {
match msg {
AdminRpc::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await,
AdminRpc::KeyOperation(ko) => self2.handle_key_cmd(ko).await,
AdminRpc::LaunchRepair(opt) => self2.handle_launch_repair(opt).await,
AdminRpc::Stats(opt) => self2.handle_stats(opt).await,
_ => Err(Error::BadRpc("Invalid RPC".to_string())),
}
}
});
}
async fn handle_bucket_cmd(&self, cmd: BucketOperation) -> Result<AdminRpc, Error> {
match cmd { match cmd {
BucketOperation::List => { BucketOperation::List => {
let bucket_names = self let bucket_names = self
@ -187,7 +175,7 @@ impl AdminRpcHandler {
} }
} }
async fn handle_key_cmd(&self, cmd: KeyOperation) -> Result<AdminRpc, Error> { async fn handle_key_cmd(&self, cmd: &KeyOperation) -> Result<AdminRpc, Error> {
match cmd { match cmd {
KeyOperation::List => { KeyOperation::List => {
let key_ids = self let key_ids = self
@ -210,13 +198,13 @@ impl AdminRpcHandler {
Ok(AdminRpc::KeyInfo(key)) Ok(AdminRpc::KeyInfo(key))
} }
KeyOperation::New(query) => { KeyOperation::New(query) => {
let key = Key::new(query.name); let key = Key::new(query.name.clone());
self.garage.key_table.insert(&key).await?; self.garage.key_table.insert(&key).await?;
Ok(AdminRpc::KeyInfo(key)) Ok(AdminRpc::KeyInfo(key))
} }
KeyOperation::Rename(query) => { KeyOperation::Rename(query) => {
let mut key = self.get_existing_key(&query.key_pattern).await?; let mut key = self.get_existing_key(&query.key_pattern).await?;
key.name.update(query.new_name); key.name.update(query.new_name.clone());
self.garage.key_table.insert(&key).await?; self.garage.key_table.insert(&key).await?;
Ok(AdminRpc::KeyInfo(key)) Ok(AdminRpc::KeyInfo(key))
} }
@ -353,17 +341,18 @@ impl AdminRpcHandler {
let mut failures = vec![]; let mut failures = vec![];
let ring = self.garage.system.ring.borrow().clone(); let ring = self.garage.system.ring.borrow().clone();
for node in ring.config.members.keys() { for node in ring.config.members.keys() {
let node = NodeID::from_slice(node.as_slice()).unwrap();
if self if self
.rpc_client .endpoint
.call( .call(
*node, &node,
AdminRpc::LaunchRepair(opt_to_send.clone()), &AdminRpc::LaunchRepair(opt_to_send.clone()),
ADMIN_RPC_TIMEOUT, PRIO_NORMAL,
) )
.await .await
.is_err() .is_err()
{ {
failures.push(*node); failures.push(node);
} }
} }
if failures.is_empty() { if failures.is_empty() {
@ -397,14 +386,16 @@ impl AdminRpcHandler {
let ring = self.garage.system.ring.borrow().clone(); let ring = self.garage.system.ring.borrow().clone();
for node in ring.config.members.keys() { for node in ring.config.members.keys() {
let node = NodeID::from_slice(node.as_slice()).unwrap();
let mut opt = opt.clone(); let mut opt = opt.clone();
opt.all_nodes = false; opt.all_nodes = false;
writeln!(&mut ret, "\n======================").unwrap(); writeln!(&mut ret, "\n======================").unwrap();
writeln!(&mut ret, "Stats for node {:?}:", node).unwrap(); writeln!(&mut ret, "Stats for node {:?}:", node).unwrap();
match self match self
.rpc_client .endpoint
.call(*node, AdminRpc::Stats(opt), ADMIN_RPC_TIMEOUT) .call(&node, &AdminRpc::Stats(opt), PRIO_NORMAL)
.await .await
{ {
Ok(AdminRpc::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(), Ok(AdminRpc::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(),
@ -495,4 +486,23 @@ impl AdminRpcHandler {
.unwrap(); .unwrap();
writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap(); writeln!(to, " GC todo queue length: {}", t.data.gc_todo_len()).unwrap();
} }
async fn handle_rpc(self: &Arc<Self>, msg: &AdminRpc) -> Result<AdminRpc, Error> {
match msg {
AdminRpc::BucketOperation(bo) => self.handle_bucket_cmd(bo).await,
AdminRpc::KeyOperation(ko) => self.handle_key_cmd(ko).await,
AdminRpc::LaunchRepair(opt) => self.handle_launch_repair(opt.clone()).await,
AdminRpc::Stats(opt) => self.handle_stats(opt.clone()).await,
_ => Err(Error::BadRpc("Invalid RPC".to_string())),
}
}
}
#[async_trait]
impl EndpointHandler<AdminRpc> for AdminRpcHandler {
async fn handle(self: &Arc<Self>, message: &AdminRpc, _from: NodeID) -> AdminRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| AdminRpc::Error(format!("{}", e)))
}
} }

View File

@ -1,6 +1,5 @@
use std::cmp::max; //use std::cmp::max;
use std::collections::HashSet; //use std::collections::HashSet;
use std::net::SocketAddr;
use std::path::PathBuf; use std::path::PathBuf;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -8,11 +7,11 @@ use structopt::StructOpt;
use garage_util::data::Uuid; use garage_util::data::Uuid;
use garage_util::error::Error; use garage_util::error::Error;
use garage_util::time::*; //use garage_util::time::*;
use garage_rpc::membership::*;
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::rpc_client::*; use garage_rpc::system::*;
use garage_rpc::*;
use garage_model::bucket_table::*; use garage_model::bucket_table::*;
use garage_model::key_table::*; use garage_model::key_table::*;
@ -298,54 +297,65 @@ pub struct StatsOpt {
pub async fn cli_cmd( pub async fn cli_cmd(
cmd: Command, cmd: Command,
membership_rpc_cli: RpcAddrClient<Message>, system_rpc_endpoint: &Endpoint<SystemRpc, ()>,
admin_rpc_cli: RpcAddrClient<AdminRpc>, admin_rpc_endpoint: &Endpoint<AdminRpc, ()>,
rpc_host: SocketAddr, rpc_host: NodeID,
) -> Result<(), Error> { ) -> Result<(), Error> {
match cmd { match cmd {
Command::Status => cmd_status(membership_rpc_cli, rpc_host).await, Command::Status => cmd_status(system_rpc_endpoint, rpc_host).await,
Command::Node(NodeOperation::Configure(configure_opt)) => { Command::Node(NodeOperation::Configure(configure_opt)) => {
cmd_configure(membership_rpc_cli, rpc_host, configure_opt).await cmd_configure(system_rpc_endpoint, rpc_host, configure_opt).await
} }
Command::Node(NodeOperation::Remove(remove_opt)) => { Command::Node(NodeOperation::Remove(remove_opt)) => {
cmd_remove(membership_rpc_cli, rpc_host, remove_opt).await cmd_remove(system_rpc_endpoint, rpc_host, remove_opt).await
} }
Command::Bucket(bo) => { Command::Bucket(bo) => {
cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::BucketOperation(bo)).await cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::BucketOperation(bo)).await
} }
Command::Key(ko) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::KeyOperation(ko)).await, Command::Key(ko) => {
Command::Repair(ro) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::LaunchRepair(ro)).await, cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::KeyOperation(ko)).await
Command::Stats(so) => cmd_admin(admin_rpc_cli, rpc_host, AdminRpc::Stats(so)).await, }
Command::Repair(ro) => {
cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::LaunchRepair(ro)).await
}
Command::Stats(so) => cmd_admin(admin_rpc_endpoint, rpc_host, AdminRpc::Stats(so)).await,
_ => unreachable!(), _ => unreachable!(),
} }
} }
pub async fn cmd_status( pub async fn cmd_status(rpc_cli: &Endpoint<SystemRpc, ()>, rpc_host: NodeID) -> Result<(), Error> {
rpc_cli: RpcAddrClient<Message>,
rpc_host: SocketAddr,
) -> Result<(), Error> {
let status = match rpc_cli let status = match rpc_cli
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT) .call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
.await?? .await?
{ {
Message::AdvertiseNodesUp(nodes) => nodes, SystemRpc::ReturnKnownNodes(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
let config = match rpc_cli let config = match rpc_cli
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await?? .await?
{ {
Message::AdvertiseConfig(cfg) => cfg, SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
println!("STATUS:");
for node in status {
println!("{:?}", node);
}
println!("CONFIG: (v{})", config.version);
for (id, node) in config.members {
println!("{} {:?}", hex::encode(id.as_slice()), node);
}
/* TODO
let (hostname_len, addr_len, tag_len, zone_len) = status let (hostname_len, addr_len, tag_len, zone_len) = status
.iter() .iter()
.map(|adv| (adv, config.members.get(&adv.id))) .map(|(id, addr, _)| (addr, config.members.get(&adv.id)))
.map(|(adv, cfg)| { .map(|(addr, cfg)| {
( (
adv.state_info.hostname.len(), 8,
adv.addr.to_string().len(), addr.to_string().len(),
cfg.map(|c| c.tag.len()).unwrap_or(0), cfg.map(|c| c.tag.len()).unwrap_or(0),
cfg.map(|c| c.zone.len()).unwrap_or(0), cfg.map(|c| c.zone.len()).unwrap_or(0),
) )
@ -355,13 +365,13 @@ pub async fn cmd_status(
}); });
println!("Healthy nodes:"); println!("Healthy nodes:");
for adv in status.iter().filter(|x| x.is_up) { for (id, addr, _) in status.iter().filter(|(id, addr, is_up)| is_up) {
if let Some(cfg) = config.members.get(&adv.id) { if let Some(cfg) = config.members.get(&adv.id) {
println!( println!(
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}", "{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}",
id = adv.id, id = id,
host = adv.state_info.hostname, host = "",
addr = adv.addr, addr = addr,
tag = cfg.tag, tag = cfg.tag,
zone = cfg.zone, zone = cfg.zone,
capacity = cfg.capacity_string(), capacity = cfg.capacity_string(),
@ -373,36 +383,36 @@ pub async fn cmd_status(
} else { } else {
println!( println!(
"{id:?}\t{h}{h_pad}\t{addr}{a_pad}\tUNCONFIGURED/REMOVED", "{id:?}\t{h}{h_pad}\t{addr}{a_pad}\tUNCONFIGURED/REMOVED",
id = adv.id, id = id,
h = adv.state_info.hostname, h = "",
addr = adv.addr, addr = addr,
h_pad = " ".repeat(hostname_len - adv.state_info.hostname.len()), h_pad = " ".repeat(hostname_len - "".len()),
a_pad = " ".repeat(addr_len - adv.addr.to_string().len()), a_pad = " ".repeat(addr_len - addr.to_string().len()),
); );
} }
} }
let status_keys = status.iter().map(|x| x.id).collect::<HashSet<_>>(); let status_keys = status.iter().map(|(id, _, _)| id).collect::<HashSet<_>>();
let failure_case_1 = status.iter().any(|x| !x.is_up); let failure_case_1 = status.iter().any(|(_, _, is_up)| !is_up);
let failure_case_2 = config let failure_case_2 = config
.members .members
.iter() .iter()
.any(|(id, _)| !status_keys.contains(id)); .any(|(id, _)| !status_keys.contains(id));
if failure_case_1 || failure_case_2 { if failure_case_1 || failure_case_2 {
println!("\nFailed nodes:"); println!("\nFailed nodes:");
for adv in status.iter().filter(|x| !x.is_up) { for (id, addr) in status.iter().filter(|(_, _, is_up)| !is_up) {
if let Some(cfg) = config.members.get(&adv.id) { if let Some(cfg) = config.members.get(&id) {
println!( println!(
"{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}\tlast seen: {last_seen}s ago", "{id:?}\t{host}{h_pad}\t{addr}{a_pad}\t[{tag}]{t_pad}\t{zone}{z_pad}\t{capacity}\tlast seen: {last_seen}s ago",
id=adv.id, id=id,
host=adv.state_info.hostname, host="",
addr=adv.addr, addr=addr,
tag=cfg.tag, tag=cfg.tag,
zone=cfg.zone, zone=cfg.zone,
capacity=cfg.capacity_string(), capacity=cfg.capacity_string(),
last_seen=(now_msec() - adv.last_seen) / 1000, last_seen=(now_msec() - 0) / 1000,
h_pad=" ".repeat(hostname_len - adv.state_info.hostname.len()), h_pad=" ".repeat(hostname_len - "".len()),
a_pad=" ".repeat(addr_len - adv.addr.to_string().len()), a_pad=" ".repeat(addr_len - addr.to_string().len()),
t_pad=" ".repeat(tag_len - cfg.tag.len()), t_pad=" ".repeat(tag_len - cfg.tag.len()),
z_pad=" ".repeat(zone_len - cfg.zone.len()), z_pad=" ".repeat(zone_len - cfg.zone.len()),
); );
@ -411,12 +421,12 @@ pub async fn cmd_status(
let (tag_len, zone_len) = config let (tag_len, zone_len) = config
.members .members
.iter() .iter()
.filter(|(&id, _)| !status.iter().any(|x| x.id == id)) .filter(|(&id, _)| !status.iter().any(|(xid, _, _)| xid == id))
.map(|(_, cfg)| (cfg.tag.len(), cfg.zone.len())) .map(|(_, cfg)| (cfg.tag.len(), cfg.zone.len()))
.fold((0, 0), |(t, z), (mt, mz)| (max(t, mt), max(z, mz))); .fold((0, 0), |(t, z), (mt, mz)| (max(t, mt), max(z, mz)));
for (id, cfg) in config.members.iter() { for (id, cfg) in config.members.iter() {
if !status.iter().any(|x| x.id == *id) { if !status.iter().any(|(xid, _, _)| xid == *id) {
println!( println!(
"{id:?}\t{tag}{t_pad}\t{zone}{z_pad}\t{capacity}\tnever seen", "{id:?}\t{tag}{t_pad}\t{zone}{z_pad}\t{capacity}\tnever seen",
id = id, id = id,
@ -429,6 +439,7 @@ pub async fn cmd_status(
} }
} }
} }
*/
Ok(()) Ok(())
} }
@ -455,25 +466,30 @@ pub fn find_matching_node(
} }
pub async fn cmd_configure( pub async fn cmd_configure(
rpc_cli: RpcAddrClient<Message>, rpc_cli: &Endpoint<SystemRpc, ()>,
rpc_host: SocketAddr, rpc_host: NodeID,
args: ConfigureNodeOpt, args: ConfigureNodeOpt,
) -> Result<(), Error> { ) -> Result<(), Error> {
let status = match rpc_cli let status = match rpc_cli
.call(&rpc_host, &Message::PullStatus, ADMIN_RPC_TIMEOUT) .call(&rpc_host, &SystemRpc::GetKnownNodes, PRIO_NORMAL)
.await?? .await?
{ {
Message::AdvertiseNodesUp(nodes) => nodes, SystemRpc::ReturnKnownNodes(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
let added_node = find_matching_node(status.iter().map(|x| x.id), &args.node_id)?; let added_node = find_matching_node(
status
.iter()
.map(|(id, _, _)| Uuid::try_from(id.as_ref()).unwrap()),
&args.node_id,
)?;
let mut config = match rpc_cli let mut config = match rpc_cli
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await?? .await?
{ {
Message::AdvertiseConfig(cfg) => cfg, SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
@ -527,25 +543,21 @@ pub async fn cmd_configure(
config.version += 1; config.version += 1;
rpc_cli rpc_cli
.call( .call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
&rpc_host, .await?;
&Message::AdvertiseConfig(config),
ADMIN_RPC_TIMEOUT,
)
.await??;
Ok(()) Ok(())
} }
pub async fn cmd_remove( pub async fn cmd_remove(
rpc_cli: RpcAddrClient<Message>, rpc_cli: &Endpoint<SystemRpc, ()>,
rpc_host: SocketAddr, rpc_host: NodeID,
args: RemoveNodeOpt, args: RemoveNodeOpt,
) -> Result<(), Error> { ) -> Result<(), Error> {
let mut config = match rpc_cli let mut config = match rpc_cli
.call(&rpc_host, &Message::PullConfig, ADMIN_RPC_TIMEOUT) .call(&rpc_host, &SystemRpc::PullConfig, PRIO_NORMAL)
.await?? .await?
{ {
Message::AdvertiseConfig(cfg) => cfg, SystemRpc::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))), resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
@ -562,21 +574,17 @@ pub async fn cmd_remove(
config.version += 1; config.version += 1;
rpc_cli rpc_cli
.call( .call(&rpc_host, &SystemRpc::AdvertiseConfig(config), PRIO_NORMAL)
&rpc_host, .await?;
&Message::AdvertiseConfig(config),
ADMIN_RPC_TIMEOUT,
)
.await??;
Ok(()) Ok(())
} }
pub async fn cmd_admin( pub async fn cmd_admin(
rpc_cli: RpcAddrClient<AdminRpc>, rpc_cli: &Endpoint<AdminRpc, ()>,
rpc_host: SocketAddr, rpc_host: NodeID,
args: AdminRpc, args: AdminRpc,
) -> Result<(), Error> { ) -> Result<(), Error> {
match rpc_cli.call(&rpc_host, args, ADMIN_RPC_TIMEOUT).await?? { match rpc_cli.call(&rpc_host, &args, PRIO_NORMAL).await? {
AdminRpc::Ok(msg) => { AdminRpc::Ok(msg) => {
println!("{}", msg); println!("{}", msg);
} }

View File

@ -10,16 +10,16 @@ mod repair;
mod server; mod server;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use structopt::StructOpt; use structopt::StructOpt;
use garage_util::config::TlsConfig; use netapp::util::parse_peer_addr;
use netapp::NetworkKey;
use garage_util::error::Error; use garage_util::error::Error;
use garage_rpc::membership::*; use garage_rpc::system::*;
use garage_rpc::rpc_client::*; use garage_rpc::*;
use admin_rpc::*; use admin_rpc::*;
use cli::*; use cli::*;
@ -27,16 +27,14 @@ use cli::*;
#[derive(StructOpt, Debug)] #[derive(StructOpt, Debug)]
#[structopt(name = "garage")] #[structopt(name = "garage")]
struct Opt { struct Opt {
/// RPC connect to this host to execute client operations /// Host to connect to for admin operations, in the format:
#[structopt(short = "h", long = "rpc-host", default_value = "127.0.0.1:3901", parse(try_from_str = parse_address))] /// <public-key>@<ip>:<port>
pub rpc_host: SocketAddr, #[structopt(short = "h", long = "rpc-host")]
pub rpc_host: Option<String>,
#[structopt(long = "ca-cert")] /// RPC secret network key for admin operations
pub ca_cert: Option<String>, #[structopt(short = "s", long = "rpc-secret")]
#[structopt(long = "client-cert")] pub rpc_secret: Option<String>,
pub client_cert: Option<String>,
#[structopt(long = "client-key")]
pub client_key: Option<String>,
#[structopt(subcommand)] #[structopt(subcommand)]
cmd: Command, cmd: Command,
@ -66,33 +64,20 @@ async fn main() {
} }
async fn cli_command(opt: Opt) -> Result<(), Error> { async fn cli_command(opt: Opt) -> Result<(), Error> {
let tls_config = match (opt.ca_cert, opt.client_cert, opt.client_key) { let net_key_hex_str = &opt.rpc_secret.expect("No RPC secret provided");
(Some(ca_cert), Some(client_cert), Some(client_key)) => Some(TlsConfig { let network_key = NetworkKey::from_slice(
ca_cert, &hex::decode(net_key_hex_str).expect("Invalid RPC secret key (bad hex)")[..],
node_cert: client_cert, )
node_key: client_key, .expect("Invalid RPC secret provided (wrong length)");
}), let (_pk, sk) = sodiumoxide::crypto::sign::ed25519::gen_keypair();
(None, None, None) => None,
_ => {
warn!("Missing one of: --ca-cert, --node-cert, --node-key. Not using TLS.");
None
}
};
let rpc_http_cli = let netapp = NetApp::new(network_key, sk);
Arc::new(RpcHttpClient::new(8, &tls_config).expect("Could not create RPC client")); let (id, addr) =
let membership_rpc_cli = parse_peer_addr(&opt.rpc_host.expect("No RPC host provided")).expect("Invalid RPC host");
RpcAddrClient::new(rpc_http_cli.clone(), MEMBERSHIP_RPC_PATH.to_string()); netapp.clone().try_connect(addr, id).await?;
let admin_rpc_cli = RpcAddrClient::new(rpc_http_cli.clone(), ADMIN_RPC_PATH.to_string());
cli_cmd(opt.cmd, membership_rpc_cli, admin_rpc_cli, opt.rpc_host).await let system_rpc_endpoint = netapp.endpoint::<SystemRpc, ()>(SYSTEM_RPC_PATH.into());
} let admin_rpc_endpoint = netapp.endpoint::<AdminRpc, ()>(ADMIN_RPC_PATH.into());
fn parse_address(address: &str) -> Result<SocketAddr, String> { cli_cmd(opt.cmd, &system_rpc_endpoint, &admin_rpc_endpoint, id).await
use std::net::ToSocketAddrs;
address
.to_socket_addrs()
.map_err(|_| format!("Could not resolve {}", address))?
.next()
.ok_or_else(|| format!("Could not resolve {}", address))
} }

View File

@ -1,7 +1,5 @@
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Arc;
use futures_util::future::*;
use tokio::sync::watch; use tokio::sync::watch;
use garage_util::background::*; use garage_util::background::*;
@ -10,21 +8,10 @@ use garage_util::error::Error;
use garage_api::run_api_server; use garage_api::run_api_server;
use garage_model::garage::Garage; use garage_model::garage::Garage;
use garage_rpc::rpc_server::RpcServer;
use garage_web::run_web_server; use garage_web::run_web_server;
use crate::admin_rpc::*; use crate::admin_rpc::*;
async fn shutdown_signal(send_cancel: watch::Sender<bool>) -> Result<(), Error> {
// Wait for the CTRL+C signal
tokio::signal::ctrl_c()
.await
.expect("failed to install CTRL+C signal handler");
info!("Received CTRL+C, shutting down.");
send_cancel.send(true)?;
Ok(())
}
async fn wait_from(mut chan: watch::Receiver<bool>) { async fn wait_from(mut chan: watch::Receiver<bool>) {
while !*chan.borrow() { while !*chan.borrow() {
if chan.changed().await.is_err() { if chan.changed().await.is_err() {
@ -47,52 +34,46 @@ pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
.open() .open()
.expect("Unable to open sled DB"); .expect("Unable to open sled DB");
info!("Initialize RPC server...");
let mut rpc_server = RpcServer::new(config.rpc_bind_addr, config.rpc_tls.clone());
info!("Initializing background runner..."); info!("Initializing background runner...");
let (send_cancel, watch_cancel) = watch::channel(false); let watch_cancel = netapp::util::watch_ctrl_c();
let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone()); let (background, await_background_done) = BackgroundRunner::new(16, watch_cancel.clone());
info!("Initializing Garage main data store..."); info!("Initializing Garage main data store...");
let garage = Garage::new(config.clone(), db, background, &mut rpc_server); let garage = Garage::new(config.clone(), db, background);
let bootstrap = garage.system.clone().bootstrap(
config.bootstrap_peers, let run_system = tokio::spawn(garage.system.clone().run(watch_cancel.clone()));
config.consul_host,
config.consul_service_name,
);
info!("Crate admin RPC handler..."); info!("Crate admin RPC handler...");
AdminRpcHandler::new(garage.clone()).register_handler(&mut rpc_server); AdminRpcHandler::new(garage.clone());
info!("Initializing RPC and API servers..."); info!("Initializing API server...");
let run_rpc_server = Arc::new(rpc_server).run(wait_from(watch_cancel.clone())); let api_server = tokio::spawn(run_api_server(
let api_server = run_api_server(garage.clone(), wait_from(watch_cancel.clone())); garage.clone(),
let web_server = run_web_server(garage, wait_from(watch_cancel.clone())); wait_from(watch_cancel.clone()),
));
futures::try_join!( info!("Initializing web server...");
bootstrap.map(|()| { let web_server = tokio::spawn(run_web_server(
info!("Bootstrap done"); garage.clone(),
Ok(()) wait_from(watch_cancel.clone()),
}), ));
run_rpc_server.map(|rv| {
info!("RPC server exited"); // Stuff runs
rv
}), // When a cancel signal is sent, stuff stops
api_server.map(|rv| { if let Err(e) = api_server.await? {
info!("API server exited"); warn!("API server exited with error: {}", e);
rv }
}), if let Err(e) = web_server.await? {
web_server.map(|rv| { warn!("Web server exited with error: {}", e);
info!("Web server exited"); }
rv
}), // Remove RPC handlers for system to break reference cycles
await_background_done.map(|rv| { garage.system.netapp.drop_all_handlers();
info!("Background runner exited: {:?}", rv);
Ok(()) // Await for last parts to end
}), run_system.await?;
shutdown_signal(send_cancel), await_background_done.await?;
)?;
info!("Cleaning up..."); info!("Cleaning up...");

View File

@ -1,6 +1,6 @@
[package] [package]
name = "garage_model" name = "garage_model"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -13,10 +13,11 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_rpc = { version = "0.3.0", path = "../rpc" } garage_rpc = { version = "0.4.0", path = "../rpc" }
garage_table = { version = "0.3.0", path = "../table" } garage_table = { version = "0.4.0", path = "../table" }
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
async-trait = "0.1.7"
arc-swap = "1.0" arc-swap = "1.0"
hex = "0.4" hex = "0.4"
log = "0.4" log = "0.4"
@ -31,3 +32,5 @@ serde_bytes = "0.11"
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }

View File

@ -3,6 +3,7 @@ use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use arc_swap::ArcSwapOption; use arc_swap::ArcSwapOption;
use async_trait::async_trait;
use futures::future::*; use futures::future::*;
use futures::select; use futures::select;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -14,9 +15,8 @@ use garage_util::data::*;
use garage_util::error::Error; use garage_util::error::Error;
use garage_util::time::*; use garage_util::time::*;
use garage_rpc::membership::System; use garage_rpc::system::System;
use garage_rpc::rpc_client::*; use garage_rpc::*;
use garage_rpc::rpc_server::*;
use garage_table::replication::{TableReplication, TableShardedReplication}; use garage_table::replication::{TableReplication, TableShardedReplication};
@ -36,8 +36,9 @@ const RESYNC_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
/// RPC messages used to share blocks of data between nodes /// RPC messages used to share blocks of data between nodes
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub enum Message { pub enum BlockRpc {
Ok, Ok,
Error(String),
/// Message to ask for a block of data, by hash /// Message to ask for a block of data, by hash
GetBlock(Hash), GetBlock(Hash),
/// Message to send a block of data, either because requested, of for first delivery of new /// Message to send a block of data, either because requested, of for first delivery of new
@ -60,7 +61,9 @@ pub struct PutBlockMessage {
pub data: Vec<u8>, pub data: Vec<u8>,
} }
impl RpcMessage for Message {} impl Message for BlockRpc {
type Response = BlockRpc;
}
/// The block manager, handling block exchange between nodes, and block storage on local node /// The block manager, handling block exchange between nodes, and block storage on local node
pub struct BlockManager { pub struct BlockManager {
@ -77,7 +80,7 @@ pub struct BlockManager {
resync_notify: Notify, resync_notify: Notify,
system: Arc<System>, system: Arc<System>,
rpc_client: Arc<RpcClient<Message>>, endpoint: Arc<Endpoint<BlockRpc, Self>>,
pub(crate) garage: ArcSwapOption<Garage>, pub(crate) garage: ArcSwapOption<Garage>,
} }
@ -87,7 +90,6 @@ impl BlockManager {
data_dir: PathBuf, data_dir: PathBuf,
replication: TableShardedReplication, replication: TableShardedReplication,
system: Arc<System>, system: Arc<System>,
rpc_server: &mut RpcServer,
) -> Arc<Self> { ) -> Arc<Self> {
let rc = db let rc = db
.open_tree("block_local_rc") .open_tree("block_local_rc")
@ -97,8 +99,7 @@ impl BlockManager {
.open_tree("block_local_resync_queue") .open_tree("block_local_resync_queue")
.expect("Unable to open block_local_resync_queue tree"); .expect("Unable to open block_local_resync_queue tree");
let rpc_path = "block_manager"; let endpoint = system.netapp.endpoint(format!("garage_model/block.rs/Rpc"));
let rpc_client = system.rpc_client::<Message>(rpc_path);
let block_manager = Arc::new(Self { let block_manager = Arc::new(Self {
replication, replication,
@ -108,35 +109,19 @@ impl BlockManager {
resync_queue, resync_queue,
resync_notify: Notify::new(), resync_notify: Notify::new(),
system, system,
rpc_client, endpoint,
garage: ArcSwapOption::from(None), garage: ArcSwapOption::from(None),
}); });
block_manager block_manager.endpoint.set_handler(block_manager.clone());
.clone()
.register_handler(rpc_server, rpc_path.into());
block_manager block_manager
} }
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) { async fn handle_rpc(self: Arc<Self>, msg: &BlockRpc) -> Result<BlockRpc, Error> {
let self2 = self.clone();
rpc_server.add_handler::<Message, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
}
async fn handle(self: Arc<Self>, msg: &Message) -> Result<Message, Error> {
match msg { match msg {
Message::PutBlock(m) => self.write_block(&m.hash, &m.data).await, BlockRpc::PutBlock(m) => self.write_block(&m.hash, &m.data).await,
Message::GetBlock(h) => self.read_block(h).await, BlockRpc::GetBlock(h) => self.read_block(h).await,
Message::NeedBlockQuery(h) => self.need_block(h).await.map(Message::NeedBlockReply), BlockRpc::NeedBlockQuery(h) => self.need_block(h).await.map(BlockRpc::NeedBlockReply),
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())), _ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
} }
} }
@ -157,7 +142,7 @@ impl BlockManager {
} }
/// Write a block to disk /// Write a block to disk
async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<Message, Error> { async fn write_block(&self, hash: &Hash, data: &[u8]) -> Result<BlockRpc, Error> {
let _lock = self.data_dir_lock.lock().await; let _lock = self.data_dir_lock.lock().await;
let mut path = self.block_dir(hash); let mut path = self.block_dir(hash);
@ -165,18 +150,18 @@ impl BlockManager {
path.push(hex::encode(hash)); path.push(hex::encode(hash));
if fs::metadata(&path).await.is_ok() { if fs::metadata(&path).await.is_ok() {
return Ok(Message::Ok); return Ok(BlockRpc::Ok);
} }
let mut f = fs::File::create(path).await?; let mut f = fs::File::create(path).await?;
f.write_all(data).await?; f.write_all(data).await?;
drop(f); drop(f);
Ok(Message::Ok) Ok(BlockRpc::Ok)
} }
/// Read block from disk, verifying it's integrity /// Read block from disk, verifying it's integrity
async fn read_block(&self, hash: &Hash) -> Result<Message, Error> { async fn read_block(&self, hash: &Hash) -> Result<BlockRpc, Error> {
let path = self.block_path(hash); let path = self.block_path(hash);
let mut f = match fs::File::open(&path).await { let mut f = match fs::File::open(&path).await {
@ -204,7 +189,7 @@ impl BlockManager {
return Err(Error::CorruptData(*hash)); return Err(Error::CorruptData(*hash));
} }
Ok(Message::PutBlock(PutBlockMessage { hash: *hash, data })) Ok(BlockRpc::PutBlock(PutBlockMessage { hash: *hash, data }))
} }
/// Check if this node should have a block, but don't actually have it /// Check if this node should have a block, but don't actually have it
@ -346,17 +331,22 @@ impl BlockManager {
} }
who.retain(|id| *id != self.system.id); who.retain(|id| *id != self.system.id);
let msg = Arc::new(Message::NeedBlockQuery(*hash)); let msg = Arc::new(BlockRpc::NeedBlockQuery(*hash));
let who_needs_fut = who.iter().map(|to| { let who_needs_fut = who.iter().map(|to| {
self.rpc_client self.system.rpc.call_arc(
.call_arc(*to, msg.clone(), NEED_BLOCK_QUERY_TIMEOUT) &self.endpoint,
*to,
msg.clone(),
RequestStrategy::with_priority(PRIO_NORMAL)
.with_timeout(NEED_BLOCK_QUERY_TIMEOUT),
)
}); });
let who_needs_resps = join_all(who_needs_fut).await; let who_needs_resps = join_all(who_needs_fut).await;
let mut need_nodes = vec![]; let mut need_nodes = vec![];
for (node, needed) in who.iter().zip(who_needs_resps.into_iter()) { for (node, needed) in who.iter().zip(who_needs_resps.into_iter()) {
match needed? { match needed? {
Message::NeedBlockReply(needed) => { BlockRpc::NeedBlockReply(needed) => {
if needed { if needed {
need_nodes.push(*node); need_nodes.push(*node);
} }
@ -377,11 +367,14 @@ impl BlockManager {
); );
let put_block_message = self.read_block(hash).await?; let put_block_message = self.read_block(hash).await?;
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&need_nodes[..], &need_nodes[..],
put_block_message, put_block_message,
RequestStrategy::with_quorum(need_nodes.len()) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(need_nodes.len())
.with_timeout(BLOCK_RW_TIMEOUT), .with_timeout(BLOCK_RW_TIMEOUT),
) )
.await?; .await?;
@ -413,18 +406,21 @@ impl BlockManager {
pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> { pub async fn rpc_get_block(&self, hash: &Hash) -> Result<Vec<u8>, Error> {
let who = self.replication.read_nodes(&hash); let who = self.replication.read_nodes(&hash);
let resps = self let resps = self
.rpc_client .system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&who[..], &who[..],
Message::GetBlock(*hash), BlockRpc::GetBlock(*hash),
RequestStrategy::with_quorum(1) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(1)
.with_timeout(BLOCK_RW_TIMEOUT) .with_timeout(BLOCK_RW_TIMEOUT)
.interrupt_after_quorum(true), .interrupt_after_quorum(true),
) )
.await?; .await?;
for resp in resps { for resp in resps {
if let Message::PutBlock(msg) = resp { if let BlockRpc::PutBlock(msg) = resp {
return Ok(msg.data); return Ok(msg.data);
} }
} }
@ -437,11 +433,14 @@ impl BlockManager {
/// Send block to nodes that should have it /// Send block to nodes that should have it
pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> { pub async fn rpc_put_block(&self, hash: Hash, data: Vec<u8>) -> Result<(), Error> {
let who = self.replication.write_nodes(&hash); let who = self.replication.write_nodes(&hash);
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&who[..], &who[..],
Message::PutBlock(PutBlockMessage { hash, data }), BlockRpc::PutBlock(PutBlockMessage { hash, data }),
RequestStrategy::with_quorum(self.replication.write_quorum()) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.replication.write_quorum())
.with_timeout(BLOCK_RW_TIMEOUT), .with_timeout(BLOCK_RW_TIMEOUT),
) )
.await?; .await?;
@ -531,6 +530,16 @@ impl BlockManager {
} }
} }
#[async_trait]
impl EndpointHandler<BlockRpc> for BlockManager {
async fn handle(self: &Arc<Self>, message: &BlockRpc, _from: NodeID) -> BlockRpc {
self.clone()
.handle_rpc(message)
.await
.unwrap_or_else(|e| BlockRpc::Error(format!("{}", e)))
}
}
fn u64_from_be_bytes<T: AsRef<[u8]>>(bytes: T) -> u64 { fn u64_from_be_bytes<T: AsRef<[u8]>>(bytes: T) -> u64 {
assert!(bytes.as_ref().len() == 8); assert!(bytes.as_ref().len() == 8);
let mut x8 = [0u8; 8]; let mut x8 = [0u8; 8];

View File

@ -1,11 +1,11 @@
use std::sync::Arc; use std::sync::Arc;
use netapp::NetworkKey;
use garage_util::background::*; use garage_util::background::*;
use garage_util::config::*; use garage_util::config::*;
use garage_rpc::membership::System; use garage_rpc::system::System;
use garage_rpc::rpc_client::RpcHttpClient;
use garage_rpc::rpc_server::RpcServer;
use garage_table::replication::ReplicationMode; use garage_table::replication::ReplicationMode;
use garage_table::replication::TableFullReplication; use garage_table::replication::TableFullReplication;
@ -45,26 +45,25 @@ pub struct Garage {
impl Garage { impl Garage {
/// Create and run garage /// Create and run garage
pub fn new( pub fn new(config: Config, db: sled::Db, background: Arc<BackgroundRunner>) -> Arc<Self> {
config: Config, let network_key = NetworkKey::from_slice(
db: sled::Db, &hex::decode(&config.rpc_secret).expect("Invalid RPC secret key")[..],
background: Arc<BackgroundRunner>, )
rpc_server: &mut RpcServer, .expect("Invalid RPC secret key");
) -> Arc<Self> {
let replication_mode = ReplicationMode::parse(&config.replication_mode) let replication_mode = ReplicationMode::parse(&config.replication_mode)
.expect("Invalid replication_mode in config file."); .expect("Invalid replication_mode in config file.");
info!("Initialize membership management system..."); info!("Initialize membership management system...");
let rpc_http_client = Arc::new(
RpcHttpClient::new(config.max_concurrent_rpc_requests, &config.rpc_tls)
.expect("Could not create RPC client"),
);
let system = System::new( let system = System::new(
network_key,
config.metadata_dir.clone(), config.metadata_dir.clone(),
rpc_http_client,
background.clone(), background.clone(),
rpc_server,
replication_mode.replication_factor(), replication_mode.replication_factor(),
config.rpc_bind_addr,
config.bootstrap_peers.clone(),
config.consul_host.clone(),
config.consul_service_name.clone(),
); );
let data_rep_param = TableShardedReplication { let data_rep_param = TableShardedReplication {
@ -87,13 +86,8 @@ impl Garage {
}; };
info!("Initialize block manager..."); info!("Initialize block manager...");
let block_manager = BlockManager::new( let block_manager =
&db, BlockManager::new(&db, config.data_dir.clone(), data_rep_param, system.clone());
config.data_dir.clone(),
data_rep_param,
system.clone(),
rpc_server,
);
info!("Initialize block_ref_table..."); info!("Initialize block_ref_table...");
let block_ref_table = Table::new( let block_ref_table = Table::new(
@ -104,7 +98,6 @@ impl Garage {
system.clone(), system.clone(),
&db, &db,
"block_ref".to_string(), "block_ref".to_string(),
rpc_server,
); );
info!("Initialize version_table..."); info!("Initialize version_table...");
@ -117,7 +110,6 @@ impl Garage {
system.clone(), system.clone(),
&db, &db,
"version".to_string(), "version".to_string(),
rpc_server,
); );
info!("Initialize object_table..."); info!("Initialize object_table...");
@ -130,7 +122,6 @@ impl Garage {
system.clone(), system.clone(),
&db, &db,
"object".to_string(), "object".to_string(),
rpc_server,
); );
info!("Initialize bucket_table..."); info!("Initialize bucket_table...");
@ -140,7 +131,6 @@ impl Garage {
system.clone(), system.clone(),
&db, &db,
"bucket".to_string(), "bucket".to_string(),
rpc_server,
); );
info!("Initialize key_table_table..."); info!("Initialize key_table_table...");
@ -150,7 +140,6 @@ impl Garage {
system.clone(), system.clone(),
&db, &db,
"key".to_string(), "key".to_string(),
rpc_server,
); );
info!("Initialize Garage..."); info!("Initialize Garage...");

View File

@ -1,6 +1,6 @@
[package] [package]
name = "garage_rpc" name = "garage_rpc"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -13,7 +13,7 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
garage_rpc_021 = { package = "garage_rpc", version = "0.2.1" } garage_rpc_021 = { package = "garage_rpc", version = "0.2.1" }
@ -22,7 +22,10 @@ bytes = "1.0"
gethostname = "0.2" gethostname = "0.2"
hex = "0.4" hex = "0.4"
log = "0.4" log = "0.4"
rand = "0.8"
sodiumoxide = { version = "0.2.5-0", package = "kuska-sodiumoxide" }
async-trait = "0.1.7"
rmp-serde = "0.15" rmp-serde = "0.15"
serde = { version = "1.0", default-features = false, features = ["derive", "rc"] } serde = { version = "1.0", default-features = false, features = ["derive", "rc"] }
serde_json = "1.0" serde_json = "1.0"
@ -32,11 +35,6 @@ futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
tokio-stream = { version = "0.1", features = ["net"] } tokio-stream = { version = "0.1", features = ["net"] }
http = "0.2" netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
hyper = { version = "0.14", features = ["full"] } hyper = "0.14"
hyper-rustls = { version = "0.22", default-features = false }
rustls = "0.19"
tokio-rustls = "0.22"
webpki = "0.21"

View File

@ -4,10 +4,10 @@
extern crate log; extern crate log;
mod consul; mod consul;
pub(crate) mod tls_util;
pub mod membership;
pub mod ring; pub mod ring;
pub mod system;
pub mod rpc_client; pub mod rpc_helper;
pub mod rpc_server;
pub use rpc_helper::*;

View File

@ -1,722 +0,0 @@
//! Module containing structs related to membership management
use std::collections::HashMap;
use std::fmt::Write as FmtWrite;
use std::io::{Read, Write};
use std::net::{IpAddr, SocketAddr};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::Duration;
use futures::future::join_all;
use futures::select;
use futures_util::future::*;
use serde::{Deserialize, Serialize};
use tokio::sync::watch;
use tokio::sync::Mutex;
use garage_util::background::BackgroundRunner;
use garage_util::data::*;
use garage_util::error::Error;
use garage_util::persister::Persister;
use garage_util::time::*;
use crate::consul::get_consul_nodes;
use crate::ring::*;
use crate::rpc_client::*;
use crate::rpc_server::*;
const PING_INTERVAL: Duration = Duration::from_secs(10);
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
const PING_TIMEOUT: Duration = Duration::from_secs(2);
const MAX_FAILURES_BEFORE_CONSIDERED_DOWN: usize = 5;
/// RPC endpoint used for calls related to membership
pub const MEMBERSHIP_RPC_PATH: &str = "_membership";
/// RPC messages related to membership
#[derive(Debug, Serialize, Deserialize)]
pub enum Message {
/// Response to successfull advertisements
Ok,
/// Message sent to detect other nodes status
Ping(PingMessage),
/// Ask other node for the nodes it knows. Answered with AdvertiseNodesUp
PullStatus,
/// Ask other node its config. Answered with AdvertiseConfig
PullConfig,
/// Advertisement of nodes the host knows up. Sent spontanously or in response to PullStatus
AdvertiseNodesUp(Vec<AdvertisedNode>),
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
AdvertiseConfig(NetworkConfig),
}
impl RpcMessage for Message {}
/// A ping, containing informations about status and config
#[derive(Debug, Serialize, Deserialize)]
pub struct PingMessage {
id: Uuid,
rpc_port: u16,
status_hash: Hash,
config_version: u64,
state_info: StateInfo,
}
/// A node advertisement
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AdvertisedNode {
/// Id of the node this advertisement relates to
pub id: Uuid,
/// IP and port of the node
pub addr: SocketAddr,
/// Is the node considered up
pub is_up: bool,
/// When was the node last seen up, in milliseconds since UNIX epoch
pub last_seen: u64,
pub state_info: StateInfo,
}
/// This node's membership manager
pub struct System {
/// The id of this node
pub id: Uuid,
persist_config: Persister<NetworkConfig>,
persist_status: Persister<Vec<AdvertisedNode>>,
rpc_local_port: u16,
state_info: StateInfo,
rpc_http_client: Arc<RpcHttpClient>,
rpc_client: Arc<RpcClient<Message>>,
replication_factor: usize,
pub(crate) status: watch::Receiver<Arc<Status>>,
/// The ring
pub ring: watch::Receiver<Arc<Ring>>,
update_lock: Mutex<Updaters>,
/// The job runner of this node
pub background: Arc<BackgroundRunner>,
}
struct Updaters {
update_status: watch::Sender<Arc<Status>>,
update_ring: watch::Sender<Arc<Ring>>,
}
/// The status of each nodes, viewed by this node
#[derive(Debug, Clone)]
pub struct Status {
/// Mapping of each node id to its known status
pub nodes: HashMap<Uuid, Arc<StatusEntry>>,
/// Hash of `nodes`, used to detect when nodes have different views of the cluster
pub hash: Hash,
}
/// The status of a single node
#[derive(Debug)]
pub struct StatusEntry {
/// The IP and port used to connect to this node
pub addr: SocketAddr,
/// Last time this node was seen
pub last_seen: u64,
/// Number of consecutive pings sent without reply to this node
pub num_failures: AtomicUsize,
pub state_info: StateInfo,
}
impl StatusEntry {
/// is the node associated to this entry considered up
pub fn is_up(&self) -> bool {
self.num_failures.load(Ordering::SeqCst) < MAX_FAILURES_BEFORE_CONSIDERED_DOWN
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StateInfo {
/// Hostname of the node
pub hostname: String,
/// Replication factor configured on the node
pub replication_factor: Option<usize>, // TODO Option is just for retrocompatibility. It should become a simple usize at some point
}
impl Status {
fn handle_ping(&mut self, ip: IpAddr, info: &PingMessage) -> bool {
let addr = SocketAddr::new(ip, info.rpc_port);
let old_status = self.nodes.insert(
info.id,
Arc::new(StatusEntry {
addr,
last_seen: now_msec(),
num_failures: AtomicUsize::from(0),
state_info: info.state_info.clone(),
}),
);
match old_status {
None => {
info!("Newly pingable node: {}", hex::encode(&info.id));
true
}
Some(x) => x.addr != addr,
}
}
fn recalculate_hash(&mut self) {
let mut nodes = self.nodes.iter().collect::<Vec<_>>();
nodes.sort_unstable_by_key(|(id, _status)| *id);
let mut nodes_txt = String::new();
debug!("Current set of pingable nodes: --");
for (id, status) in nodes {
debug!("{} {}", hex::encode(&id), status.addr);
writeln!(&mut nodes_txt, "{} {}", hex::encode(&id), status.addr).unwrap();
}
debug!("END --");
self.hash = blake2sum(nodes_txt.as_bytes());
}
fn to_serializable_membership(&self, system: &System) -> Vec<AdvertisedNode> {
let mut mem = vec![];
for (node, status) in self.nodes.iter() {
let state_info = if *node == system.id {
system.state_info.clone()
} else {
status.state_info.clone()
};
mem.push(AdvertisedNode {
id: *node,
addr: status.addr,
is_up: status.is_up(),
last_seen: status.last_seen,
state_info,
});
}
mem
}
}
fn gen_node_id(metadata_dir: &Path) -> Result<Uuid, Error> {
let mut id_file = metadata_dir.to_path_buf();
id_file.push("node_id");
if id_file.as_path().exists() {
let mut f = std::fs::File::open(id_file.as_path())?;
let mut d = vec![];
f.read_to_end(&mut d)?;
if d.len() != 32 {
return Err(Error::Message("Corrupt node_id file".to_string()));
}
let mut id = [0u8; 32];
id.copy_from_slice(&d[..]);
Ok(id.into())
} else {
let id = gen_uuid();
let mut f = std::fs::File::create(id_file.as_path())?;
f.write_all(id.as_slice())?;
Ok(id)
}
}
impl System {
/// Create this node's membership manager
pub fn new(
metadata_dir: PathBuf,
rpc_http_client: Arc<RpcHttpClient>,
background: Arc<BackgroundRunner>,
rpc_server: &mut RpcServer,
replication_factor: usize,
) -> Arc<Self> {
let id = gen_node_id(&metadata_dir).expect("Unable to read or generate node ID");
info!("Node ID: {}", hex::encode(&id));
let persist_config = Persister::new(&metadata_dir, "network_config");
let persist_status = Persister::new(&metadata_dir, "peer_info");
let net_config = match persist_config.load() {
Ok(x) => x,
Err(e) => {
match Persister::<garage_rpc_021::ring::NetworkConfig>::new(
&metadata_dir,
"network_config",
)
.load()
{
Ok(old_config) => NetworkConfig::migrate_from_021(old_config),
Err(e2) => {
info!(
"No valid previous network configuration stored ({}, {}), starting fresh.",
e, e2
);
NetworkConfig::new()
}
}
}
};
let mut status = Status {
nodes: HashMap::new(),
hash: Hash::default(),
};
status.recalculate_hash();
let (update_status, status) = watch::channel(Arc::new(status));
let state_info = StateInfo {
hostname: gethostname::gethostname()
.into_string()
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
replication_factor: Some(replication_factor),
};
let ring = Ring::new(net_config, replication_factor);
let (update_ring, ring) = watch::channel(Arc::new(ring));
let rpc_path = MEMBERSHIP_RPC_PATH.to_string();
let rpc_client = RpcClient::new(
RpcAddrClient::<Message>::new(rpc_http_client.clone(), rpc_path.clone()),
background.clone(),
status.clone(),
);
let sys = Arc::new(System {
id,
persist_config,
persist_status,
rpc_local_port: rpc_server.bind_addr.port(),
state_info,
rpc_http_client,
rpc_client,
replication_factor,
status,
ring,
update_lock: Mutex::new(Updaters {
update_status,
update_ring,
}),
background,
});
sys.clone().register_handler(rpc_server, rpc_path);
sys
}
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) {
rpc_server.add_handler::<Message, _, _>(path, move |msg, addr| {
let self2 = self.clone();
async move {
match msg {
Message::Ping(ping) => self2.handle_ping(&addr, &ping).await,
Message::PullStatus => Ok(self2.handle_pull_status()),
Message::PullConfig => Ok(self2.handle_pull_config()),
Message::AdvertiseNodesUp(adv) => self2.handle_advertise_nodes_up(&adv).await,
Message::AdvertiseConfig(adv) => self2.handle_advertise_config(&adv).await,
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
}
}
});
}
/// Get an RPC client
pub fn rpc_client<M: RpcMessage + 'static>(self: &Arc<Self>, path: &str) -> Arc<RpcClient<M>> {
RpcClient::new(
RpcAddrClient::new(self.rpc_http_client.clone(), path.to_string()),
self.background.clone(),
self.status.clone(),
)
}
/// Save network configuration to disc
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
let ring = self.ring.borrow().clone();
self.persist_config
.save_async(&ring.config)
.await
.expect("Cannot save current cluster configuration");
Ok(())
}
fn make_ping(&self) -> Message {
let status = self.status.borrow().clone();
let ring = self.ring.borrow().clone();
Message::Ping(PingMessage {
id: self.id,
rpc_port: self.rpc_local_port,
status_hash: status.hash,
config_version: ring.config.version,
state_info: self.state_info.clone(),
})
}
async fn broadcast(self: Arc<Self>, msg: Message, timeout: Duration) {
let status = self.status.borrow().clone();
let to = status
.nodes
.keys()
.filter(|x| **x != self.id)
.cloned()
.collect::<Vec<_>>();
self.rpc_client.call_many(&to[..], msg, timeout).await;
}
/// Perform bootstraping, starting the ping loop
pub async fn bootstrap(
self: Arc<Self>,
peers: Vec<SocketAddr>,
consul_host: Option<String>,
consul_service_name: Option<String>,
) {
let self2 = self.clone();
self.background
.spawn_worker("discovery loop".to_string(), |stop_signal| {
self2.discovery_loop(peers, consul_host, consul_service_name, stop_signal)
});
let self2 = self.clone();
self.background
.spawn_worker("ping loop".to_string(), |stop_signal| {
self2.ping_loop(stop_signal)
});
}
async fn ping_nodes(self: Arc<Self>, peers: Vec<(SocketAddr, Option<Uuid>)>) {
let ping_msg = self.make_ping();
let ping_resps = join_all(peers.iter().map(|(addr, id_option)| {
let sys = self.clone();
let ping_msg_ref = &ping_msg;
async move {
(
id_option,
addr,
sys.rpc_client
.by_addr()
.call(&addr, ping_msg_ref, PING_TIMEOUT)
.await,
)
}
}))
.await;
let update_locked = self.update_lock.lock().await;
let mut status: Status = self.status.borrow().as_ref().clone();
let ring = self.ring.borrow().clone();
let mut has_changes = false;
let mut to_advertise = vec![];
for (id_option, addr, ping_resp) in ping_resps {
if let Ok(Ok(Message::Ping(info))) = ping_resp {
let is_new = status.handle_ping(addr.ip(), &info);
if is_new {
has_changes = true;
to_advertise.push(AdvertisedNode {
id: info.id,
addr: *addr,
is_up: true,
last_seen: now_msec(),
state_info: info.state_info.clone(),
});
}
if is_new || status.hash != info.status_hash {
self.background
.spawn_cancellable(self.clone().pull_status(info.id).map(Ok));
}
if is_new || ring.config.version < info.config_version {
self.background
.spawn_cancellable(self.clone().pull_config(info.id).map(Ok));
}
} else if let Some(id) = id_option {
if let Some(st) = status.nodes.get_mut(id) {
// we need to increment failure counter as call was done using by_addr so the
// counter was not auto-incremented
st.num_failures.fetch_add(1, Ordering::SeqCst);
if !st.is_up() {
warn!("Node {:?} seems to be down.", id);
if !ring.config.members.contains_key(id) {
info!("Removing node {:?} from status (not in config and not responding to pings anymore)", id);
status.nodes.remove(&id);
has_changes = true;
}
}
}
}
}
if has_changes {
status.recalculate_hash();
}
self.update_status(&update_locked, status).await;
drop(update_locked);
if !to_advertise.is_empty() {
self.broadcast(Message::AdvertiseNodesUp(to_advertise), PING_TIMEOUT)
.await;
}
}
async fn handle_ping(
self: Arc<Self>,
from: &SocketAddr,
ping: &PingMessage,
) -> Result<Message, Error> {
let update_locked = self.update_lock.lock().await;
let mut status: Status = self.status.borrow().as_ref().clone();
let is_new = status.handle_ping(from.ip(), ping);
if is_new {
status.recalculate_hash();
}
let status_hash = status.hash;
let config_version = self.ring.borrow().config.version;
self.update_status(&update_locked, status).await;
drop(update_locked);
if is_new || status_hash != ping.status_hash {
self.background
.spawn_cancellable(self.clone().pull_status(ping.id).map(Ok));
}
if is_new || config_version < ping.config_version {
self.background
.spawn_cancellable(self.clone().pull_config(ping.id).map(Ok));
}
Ok(self.make_ping())
}
fn handle_pull_status(&self) -> Message {
Message::AdvertiseNodesUp(self.status.borrow().to_serializable_membership(self))
}
fn handle_pull_config(&self) -> Message {
let ring = self.ring.borrow().clone();
Message::AdvertiseConfig(ring.config.clone())
}
async fn handle_advertise_nodes_up(
self: Arc<Self>,
adv: &[AdvertisedNode],
) -> Result<Message, Error> {
let mut to_ping = vec![];
let update_lock = self.update_lock.lock().await;
let mut status: Status = self.status.borrow().as_ref().clone();
let mut has_changed = false;
let mut max_replication_factor = 0;
for node in adv.iter() {
if node.id == self.id {
// learn our own ip address
let self_addr = SocketAddr::new(node.addr.ip(), self.rpc_local_port);
let old_self = status.nodes.insert(
node.id,
Arc::new(StatusEntry {
addr: self_addr,
last_seen: now_msec(),
num_failures: AtomicUsize::from(0),
state_info: self.state_info.clone(),
}),
);
has_changed = match old_self {
None => true,
Some(x) => x.addr != self_addr,
};
} else {
let ping_them = match status.nodes.get(&node.id) {
// Case 1: new node
None => true,
// Case 2: the node might have changed address
Some(our_node) => node.is_up && !our_node.is_up() && our_node.addr != node.addr,
};
max_replication_factor = std::cmp::max(
max_replication_factor,
node.state_info.replication_factor.unwrap_or_default(),
);
if ping_them {
to_ping.push((node.addr, Some(node.id)));
}
}
}
if self.replication_factor < max_replication_factor {
error!("Some node have a higher replication factor ({}) than this one ({}). This is not supported and might lead to bugs",
max_replication_factor,
self.replication_factor);
std::process::exit(1);
}
if has_changed {
status.recalculate_hash();
}
self.update_status(&update_lock, status).await;
drop(update_lock);
if !to_ping.is_empty() {
self.background
.spawn_cancellable(self.clone().ping_nodes(to_ping).map(Ok));
}
Ok(Message::Ok)
}
async fn handle_advertise_config(
self: Arc<Self>,
adv: &NetworkConfig,
) -> Result<Message, Error> {
let update_lock = self.update_lock.lock().await;
let ring: Arc<Ring> = self.ring.borrow().clone();
if adv.version > ring.config.version {
let ring = Ring::new(adv.clone(), self.replication_factor);
update_lock.update_ring.send(Arc::new(ring))?;
drop(update_lock);
self.background.spawn_cancellable(
self.clone()
.broadcast(Message::AdvertiseConfig(adv.clone()), PING_TIMEOUT)
.map(Ok),
);
self.background.spawn(self.clone().save_network_config());
}
Ok(Message::Ok)
}
async fn ping_loop(self: Arc<Self>, mut stop_signal: watch::Receiver<bool>) {
while !*stop_signal.borrow() {
let restart_at = tokio::time::sleep(PING_INTERVAL);
let status = self.status.borrow().clone();
let ping_addrs = status
.nodes
.iter()
.filter(|(id, _)| **id != self.id)
.map(|(id, status)| (status.addr, Some(*id)))
.collect::<Vec<_>>();
self.clone().ping_nodes(ping_addrs).await;
select! {
_ = restart_at.fuse() => {},
_ = stop_signal.changed().fuse() => {},
}
}
}
async fn discovery_loop(
self: Arc<Self>,
bootstrap_peers: Vec<SocketAddr>,
consul_host: Option<String>,
consul_service_name: Option<String>,
mut stop_signal: watch::Receiver<bool>,
) {
let consul_config = match (consul_host, consul_service_name) {
(Some(ch), Some(csn)) => Some((ch, csn)),
_ => None,
};
while !*stop_signal.borrow() {
let not_configured = self.ring.borrow().config.members.is_empty();
let no_peers = self.status.borrow().nodes.len() < 3;
let bad_peers = self
.status
.borrow()
.nodes
.iter()
.filter(|(_, v)| v.is_up())
.count() != self.ring.borrow().config.members.len();
if not_configured || no_peers || bad_peers {
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
let mut ping_list = bootstrap_peers
.iter()
.map(|ip| (*ip, None))
.collect::<Vec<_>>();
if let Ok(peers) = self.persist_status.load_async().await {
ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id))));
}
if let Some((consul_host, consul_service_name)) = &consul_config {
match get_consul_nodes(consul_host, consul_service_name).await {
Ok(node_list) => {
ping_list.extend(node_list.iter().map(|a| (*a, None)));
}
Err(e) => {
warn!("Could not retrieve node list from Consul: {}", e);
}
}
}
self.clone().ping_nodes(ping_list).await;
}
let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL);
select! {
_ = restart_at.fuse() => {},
_ = stop_signal.changed().fuse() => {},
}
}
}
// for some reason fixing this is causing compilation error, see https://github.com/rust-lang/rust-clippy/issues/7052
#[allow(clippy::manual_async_fn)]
fn pull_status(
self: Arc<Self>,
peer: Uuid,
) -> impl futures::future::Future<Output = ()> + Send + 'static {
async move {
let resp = self
.rpc_client
.call(peer, Message::PullStatus, PING_TIMEOUT)
.await;
if let Ok(Message::AdvertiseNodesUp(nodes)) = resp {
let _: Result<_, _> = self.handle_advertise_nodes_up(&nodes).await;
}
}
}
async fn pull_config(self: Arc<Self>, peer: Uuid) {
let resp = self
.rpc_client
.call(peer, Message::PullConfig, PING_TIMEOUT)
.await;
if let Ok(Message::AdvertiseConfig(config)) = resp {
let _: Result<_, _> = self.handle_advertise_config(&config).await;
}
}
async fn update_status(self: &Arc<Self>, updaters: &Updaters, status: Status) {
if status.hash != self.status.borrow().hash {
let mut list = status.to_serializable_membership(&self);
// Combine with old peer list to make sure no peer is lost
if let Ok(old_list) = self.persist_status.load_async().await {
for pp in old_list {
if !list.iter().any(|np| pp.id == np.id) {
list.push(pp);
}
}
}
if !list.is_empty() {
info!("Persisting new peer list ({} peers)", list.len());
self.persist_status
.save_async(&list)
.await
.expect("Unable to persist peer list");
}
}
updaters
.update_status
.send(Arc::new(status))
.expect("Could not update internal membership status");
}
}

View File

@ -3,6 +3,8 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::convert::TryInto; use std::convert::TryInto;
use netapp::NodeID;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use garage_util::data::*; use garage_util::data::*;
@ -98,7 +100,7 @@ pub struct Ring {
pub config: NetworkConfig, pub config: NetworkConfig,
// Internal order of nodes used to make a more compact representation of the ring // Internal order of nodes used to make a more compact representation of the ring
nodes: Vec<Uuid>, nodes: Vec<NodeID>,
// The list of entries in the ring // The list of entries in the ring
ring: Vec<RingEntry>, ring: Vec<RingEntry>,
@ -260,6 +262,11 @@ impl Ring {
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let nodes = nodes
.iter()
.map(|id| NodeID::from_slice(id.as_slice()).unwrap())
.collect::<Vec<_>>();
Self { Self {
replication_factor, replication_factor,
config, config,
@ -291,7 +298,7 @@ impl Ring {
} }
/// Walk the ring to find the n servers in which data should be replicated /// Walk the ring to find the n servers in which data should be replicated
pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<Uuid> { pub fn get_nodes(&self, position: &Hash, n: usize) -> Vec<NodeID> {
if self.ring.len() != 1 << PARTITION_BITS { if self.ring.len() != 1 << PARTITION_BITS {
warn!("Ring not yet ready, read/writes will be lost!"); warn!("Ring not yet ready, read/writes will be lost!");
return vec![]; return vec![];

View File

@ -1,369 +0,0 @@
//! Contain structs related to making RPCs
use std::borrow::Borrow;
use std::marker::PhantomData;
use std::net::SocketAddr;
use std::pin::Pin;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::time::Duration;
use arc_swap::ArcSwapOption;
use futures::future::Future;
use futures::stream::futures_unordered::FuturesUnordered;
use futures::stream::StreamExt;
use futures_util::future::FutureExt;
use hyper::client::{Client, HttpConnector};
use hyper::{Body, Method, Request};
use tokio::sync::{watch, Semaphore};
use garage_util::background::BackgroundRunner;
use garage_util::config::TlsConfig;
use garage_util::data::*;
use garage_util::error::{Error, RpcError};
use crate::membership::Status;
use crate::rpc_server::RpcMessage;
use crate::tls_util;
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
/// Strategy to apply when making RPC
#[derive(Copy, Clone)]
pub struct RequestStrategy {
/// Max time to wait for reponse
pub rs_timeout: Duration,
/// Min number of response to consider the request successful
pub rs_quorum: usize,
/// Should requests be dropped after enough response are received
pub rs_interrupt_after_quorum: bool,
}
impl RequestStrategy {
/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
pub fn with_quorum(quorum: usize) -> Self {
RequestStrategy {
rs_timeout: DEFAULT_TIMEOUT,
rs_quorum: quorum,
rs_interrupt_after_quorum: false,
}
}
/// Set timeout of the strategy
pub fn with_timeout(mut self, timeout: Duration) -> Self {
self.rs_timeout = timeout;
self
}
/// Set if requests can be dropped after quorum has been reached
/// In general true for read requests, and false for write
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
self.rs_interrupt_after_quorum = interrupt;
self
}
}
/// Shortcut for a boxed async function taking a message, and resolving to another message or an
/// error
pub type LocalHandlerFn<M> =
Box<dyn Fn(Arc<M>) -> Pin<Box<dyn Future<Output = Result<M, Error>> + Send>> + Send + Sync>;
/// Client used to send RPC
pub struct RpcClient<M: RpcMessage> {
status: watch::Receiver<Arc<Status>>,
background: Arc<BackgroundRunner>,
local_handler: ArcSwapOption<(Uuid, LocalHandlerFn<M>)>,
rpc_addr_client: RpcAddrClient<M>,
}
impl<M: RpcMessage + 'static> RpcClient<M> {
/// Create a new RpcClient from an address, a job runner, and the status of all RPC servers
pub fn new(
rac: RpcAddrClient<M>,
background: Arc<BackgroundRunner>,
status: watch::Receiver<Arc<Status>>,
) -> Arc<Self> {
Arc::new(Self {
rpc_addr_client: rac,
background,
status,
local_handler: ArcSwapOption::new(None),
})
}
/// Set the local handler, to process RPC to this node without network usage
pub fn set_local_handler<F, Fut>(&self, my_id: Uuid, handler: F)
where
F: Fn(Arc<M>) -> Fut + Send + Sync + 'static,
Fut: Future<Output = Result<M, Error>> + Send + 'static,
{
let handler_arc = Arc::new(handler);
let handler: LocalHandlerFn<M> = Box::new(move |msg| {
let handler_arc2 = handler_arc.clone();
Box::pin(async move { handler_arc2(msg).await })
});
self.local_handler.swap(Some(Arc::new((my_id, handler))));
}
/// Get a RPC client to make calls using node's SocketAddr instead of its ID
pub fn by_addr(&self) -> &RpcAddrClient<M> {
&self.rpc_addr_client
}
/// Make a RPC call
pub async fn call(&self, to: Uuid, msg: M, timeout: Duration) -> Result<M, Error> {
self.call_arc(to, Arc::new(msg), timeout).await
}
/// Make a RPC call from a message stored in an Arc
pub async fn call_arc(&self, to: Uuid, msg: Arc<M>, timeout: Duration) -> Result<M, Error> {
if let Some(lh) = self.local_handler.load_full() {
let (my_id, local_handler) = lh.as_ref();
if to.borrow() == my_id {
return local_handler(msg).await;
}
}
let status = self.status.borrow().clone();
let node_status = match status.nodes.get(&to) {
Some(node_status) => {
if node_status.is_up() {
node_status
} else {
return Err(Error::from(RpcError::NodeDown(to)));
}
}
None => {
return Err(Error::Message(format!(
"Peer ID not found: {:?}",
to.borrow()
)))
}
};
match self
.rpc_addr_client
.call(&node_status.addr, msg, timeout)
.await
{
Err(rpc_error) => {
node_status.num_failures.fetch_add(1, Ordering::SeqCst);
Err(Error::from(rpc_error))
}
Ok(x) => x,
}
}
/// Make a RPC call to multiple servers, returning a Vec containing each result
pub async fn call_many(&self, to: &[Uuid], msg: M, timeout: Duration) -> Vec<Result<M, Error>> {
let msg = Arc::new(msg);
let mut resp_stream = to
.iter()
.map(|to| self.call_arc(*to, msg.clone(), timeout))
.collect::<FuturesUnordered<_>>();
let mut results = vec![];
while let Some(resp) = resp_stream.next().await {
results.push(resp);
}
results
}
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
/// strategy could not be respected due to too many errors
pub async fn try_call_many(
self: &Arc<Self>,
to: &[Uuid],
msg: M,
strategy: RequestStrategy,
) -> Result<Vec<M>, Error> {
let timeout = strategy.rs_timeout;
let msg = Arc::new(msg);
let mut resp_stream = to
.to_vec()
.into_iter()
.map(|to| {
let self2 = self.clone();
let msg = msg.clone();
async move { self2.call_arc(to, msg, timeout).await }
})
.collect::<FuturesUnordered<_>>();
let mut results = vec![];
let mut errors = vec![];
while let Some(resp) = resp_stream.next().await {
match resp {
Ok(msg) => {
results.push(msg);
if results.len() >= strategy.rs_quorum {
break;
}
}
Err(e) => {
errors.push(e);
}
}
}
if results.len() >= strategy.rs_quorum {
// Continue requests in background.
// Continue the remaining requests immediately using tokio::spawn
// but enqueue a task in the background runner
// to ensure that the process won't exit until the requests are done
// (if we had just enqueued the resp_stream.collect directly in the background runner,
// the requests might have been put on hold in the background runner's queue,
// in which case they might timeout or otherwise fail)
if !strategy.rs_interrupt_after_quorum {
let wait_finished_fut = tokio::spawn(async move {
resp_stream.collect::<Vec<_>>().await;
});
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
}
Ok(results)
} else {
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
Err(Error::from(RpcError::TooManyErrors(errors)))
}
}
}
/// Thin wrapper arround an `RpcHttpClient` specifying the path of the request
pub struct RpcAddrClient<M: RpcMessage> {
phantom: PhantomData<M>,
http_client: Arc<RpcHttpClient>,
path: String,
}
impl<M: RpcMessage> RpcAddrClient<M> {
/// Create an RpcAddrClient from an HTTP client and the endpoint to reach for RPCs
pub fn new(http_client: Arc<RpcHttpClient>, path: String) -> Self {
Self {
phantom: PhantomData::default(),
http_client,
path,
}
}
/// Make a RPC
pub async fn call<MB>(
&self,
to_addr: &SocketAddr,
msg: MB,
timeout: Duration,
) -> Result<Result<M, Error>, RpcError>
where
MB: Borrow<M>,
{
self.http_client
.call(&self.path, to_addr, msg, timeout)
.await
}
}
/// HTTP client used to make RPCs
pub struct RpcHttpClient {
request_limiter: Semaphore,
method: ClientMethod,
}
enum ClientMethod {
Http(Client<HttpConnector, hyper::Body>),
Https(Client<tls_util::HttpsConnectorFixedDnsname<HttpConnector>, hyper::Body>),
}
impl RpcHttpClient {
/// Create a new RpcHttpClient
pub fn new(
max_concurrent_requests: usize,
tls_config: &Option<TlsConfig>,
) -> Result<Self, Error> {
let method = if let Some(cf) = tls_config {
let ca_certs = tls_util::load_certs(&cf.ca_cert).map_err(|e| {
Error::Message(format!("Failed to open CA certificate file: {:?}", e))
})?;
let node_certs = tls_util::load_certs(&cf.node_cert)
.map_err(|e| Error::Message(format!("Failed to open certificate file: {:?}", e)))?;
let node_key = tls_util::load_private_key(&cf.node_key)
.map_err(|e| Error::Message(format!("Failed to open private key file: {:?}", e)))?;
let mut config = rustls::ClientConfig::new();
for crt in ca_certs.iter() {
config.root_store.add(crt)?;
}
config.set_single_client_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
let connector =
tls_util::HttpsConnectorFixedDnsname::<HttpConnector>::new(config, "garage");
ClientMethod::Https(Client::builder().build(connector))
} else {
ClientMethod::Http(Client::new())
};
Ok(RpcHttpClient {
method,
request_limiter: Semaphore::new(max_concurrent_requests),
})
}
/// Make a RPC
async fn call<M, MB>(
&self,
path: &str,
to_addr: &SocketAddr,
msg: MB,
timeout: Duration,
) -> Result<Result<M, Error>, RpcError>
where
MB: Borrow<M>,
M: RpcMessage,
{
let uri = match self.method {
ClientMethod::Http(_) => format!("http://{}/{}", to_addr, path),
ClientMethod::Https(_) => format!("https://{}/{}", to_addr, path),
};
let req = Request::builder()
.method(Method::POST)
.uri(uri)
.body(Body::from(rmp_to_vec_all_named(msg.borrow())?))?;
let resp_fut = match &self.method {
ClientMethod::Http(client) => client.request(req).fuse(),
ClientMethod::Https(client) => client.request(req).fuse(),
};
trace!("({}) Acquiring request_limiter slot...", path);
let slot = self.request_limiter.acquire().await;
trace!("({}) Got slot, doing request to {}...", path, to_addr);
let resp = tokio::time::timeout(timeout, resp_fut)
.await
.map_err(|e| {
debug!(
"RPC timeout to {}: {}",
to_addr,
debug_serialize(msg.borrow())
);
e
})?
.map_err(|e| {
warn!(
"RPC HTTP client error when connecting to {}: {}",
to_addr, e
);
e
})?;
let status = resp.status();
trace!("({}) Request returned, got status {}", path, status);
let body = hyper::body::to_bytes(resp.into_body()).await?;
drop(slot);
match rmp_serde::decode::from_read::<_, Result<M, String>>(&body[..])? {
Err(e) => Ok(Err(Error::RemoteError(e, status))),
Ok(x) => Ok(Ok(x)),
}
}
}

206
src/rpc/rpc_helper.rs Normal file
View File

@ -0,0 +1,206 @@
//! Contain structs related to making RPCs
use std::sync::Arc;
use std::time::Duration;
use futures::future::join_all;
use futures::stream::futures_unordered::FuturesUnordered;
use futures::stream::StreamExt;
use futures_util::future::FutureExt;
use tokio::select;
pub use netapp::endpoint::{Endpoint, EndpointHandler, Message};
use netapp::peering::fullmesh::FullMeshPeeringStrategy;
pub use netapp::proto::*;
pub use netapp::{NetApp, NodeID};
use garage_util::background::BackgroundRunner;
use garage_util::error::{Error, RpcError};
const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
/// Strategy to apply when making RPC
#[derive(Copy, Clone)]
pub struct RequestStrategy {
/// Max time to wait for reponse
pub rs_timeout: Duration,
/// Min number of response to consider the request successful
pub rs_quorum: Option<usize>,
/// Should requests be dropped after enough response are received
pub rs_interrupt_after_quorum: bool,
/// Request priority
pub rs_priority: RequestPriority,
}
impl RequestStrategy {
/// Create a RequestStrategy with default timeout and not interrupting when quorum reached
pub fn with_priority(prio: RequestPriority) -> Self {
RequestStrategy {
rs_timeout: DEFAULT_TIMEOUT,
rs_quorum: None,
rs_interrupt_after_quorum: false,
rs_priority: prio,
}
}
/// Set quorum to be reached for request
pub fn with_quorum(mut self, quorum: usize) -> Self {
self.rs_quorum = Some(quorum);
self
}
/// Set timeout of the strategy
pub fn with_timeout(mut self, timeout: Duration) -> Self {
self.rs_timeout = timeout;
self
}
/// Set if requests can be dropped after quorum has been reached
/// In general true for read requests, and false for write
pub fn interrupt_after_quorum(mut self, interrupt: bool) -> Self {
self.rs_interrupt_after_quorum = interrupt;
self
}
}
#[derive(Clone)]
pub struct RpcHelper {
pub(crate) fullmesh: Arc<FullMeshPeeringStrategy>,
pub(crate) background: Arc<BackgroundRunner>,
}
impl RpcHelper {
pub async fn call<M, H>(
&self,
endpoint: &Endpoint<M, H>,
to: NodeID,
msg: M,
strat: RequestStrategy,
) -> Result<M::Response, Error>
where
M: Message,
H: EndpointHandler<M>,
{
self.call_arc(endpoint, to, Arc::new(msg), strat).await
}
pub async fn call_arc<M, H>(
&self,
endpoint: &Endpoint<M, H>,
to: NodeID,
msg: Arc<M>,
strat: RequestStrategy,
) -> Result<M::Response, Error>
where
M: Message,
H: EndpointHandler<M>,
{
select! {
res = endpoint.call(&to, &msg, strat.rs_priority) => Ok(res?),
_ = tokio::time::sleep(strat.rs_timeout) => Err(Error::Rpc(RpcError::Timeout)),
}
}
pub async fn call_many<M, H>(
&self,
endpoint: &Endpoint<M, H>,
to: &[NodeID],
msg: M,
strat: RequestStrategy,
) -> Vec<(NodeID, Result<M::Response, Error>)>
where
M: Message,
H: EndpointHandler<M>,
{
let msg = Arc::new(msg);
let resps = join_all(
to.iter()
.map(|to| self.call_arc(endpoint, *to, msg.clone(), strat)),
)
.await;
to.iter()
.cloned()
.zip(resps.into_iter())
.collect::<Vec<_>>()
}
pub async fn broadcast<M, H>(
&self,
endpoint: &Endpoint<M, H>,
msg: M,
strat: RequestStrategy,
) -> Vec<(NodeID, Result<M::Response, Error>)>
where
M: Message,
H: EndpointHandler<M>,
{
let to = self
.fullmesh
.get_peer_list()
.iter()
.map(|p| p.id)
.collect::<Vec<_>>();
self.call_many(endpoint, &to[..], msg, strat).await
}
/// Make a RPC call to multiple servers, returning either a Vec of responses, or an error if
/// strategy could not be respected due to too many errors
pub async fn try_call_many<M, H>(
&self,
endpoint: &Arc<Endpoint<M, H>>,
to: &[NodeID],
msg: M,
strategy: RequestStrategy,
) -> Result<Vec<M::Response>, Error>
where
M: Message + 'static,
H: EndpointHandler<M> + 'static,
{
let msg = Arc::new(msg);
let mut resp_stream = to
.to_vec()
.into_iter()
.map(|to| {
let self2 = self.clone();
let msg = msg.clone();
let endpoint2 = endpoint.clone();
async move { self2.call_arc(&endpoint2, to, msg, strategy).await }
})
.collect::<FuturesUnordered<_>>();
let mut results = vec![];
let mut errors = vec![];
let quorum = strategy.rs_quorum.unwrap_or(to.len());
while let Some(resp) = resp_stream.next().await {
match resp {
Ok(msg) => {
results.push(msg);
if results.len() >= quorum {
break;
}
}
Err(e) => {
errors.push(e);
}
}
}
if results.len() >= quorum {
// Continue requests in background.
// Continue the remaining requests immediately using tokio::spawn
// but enqueue a task in the background runner
// to ensure that the process won't exit until the requests are done
// (if we had just enqueued the resp_stream.collect directly in the background runner,
// the requests might have been put on hold in the background runner's queue,
// in which case they might timeout or otherwise fail)
if !strategy.rs_interrupt_after_quorum {
let wait_finished_fut = tokio::spawn(async move {
resp_stream.collect::<Vec<_>>().await;
});
self.background.spawn(wait_finished_fut.map(|_| Ok(())));
}
Ok(results)
} else {
let errors = errors.iter().map(|e| format!("{}", e)).collect::<Vec<_>>();
Err(Error::from(RpcError::TooManyErrors(errors)))
}
}
}

View File

@ -1,247 +0,0 @@
//! Contains structs related to receiving RPCs
use std::collections::HashMap;
use std::net::SocketAddr;
use std::pin::Pin;
use std::sync::Arc;
use std::time::Instant;
use futures::future::Future;
use futures_util::future::*;
use futures_util::stream::*;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use serde::{Deserialize, Serialize};
use tokio::net::{TcpListener, TcpStream};
use tokio_rustls::server::TlsStream;
use tokio_rustls::TlsAcceptor;
use tokio_stream::wrappers::TcpListenerStream;
use garage_util::config::TlsConfig;
use garage_util::data::*;
use garage_util::error::Error;
use crate::tls_util;
/// Trait for messages that can be sent as RPC
pub trait RpcMessage: Serialize + for<'de> Deserialize<'de> + Send + Sync {}
type ResponseFuture = Pin<Box<dyn Future<Output = Result<Response<Body>, Error>> + Send>>;
type Handler = Box<dyn Fn(Request<Body>, SocketAddr) -> ResponseFuture + Send + Sync>;
/// Structure handling RPCs
pub struct RpcServer {
/// The address the RpcServer will bind
pub bind_addr: SocketAddr,
/// The tls configuration used for RPC
pub tls_config: Option<TlsConfig>,
handlers: HashMap<String, Handler>,
}
async fn handle_func<M, F, Fut>(
handler: Arc<F>,
req: Request<Body>,
sockaddr: SocketAddr,
name: Arc<String>,
) -> Result<Response<Body>, Error>
where
M: RpcMessage + 'static,
F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static,
Fut: Future<Output = Result<M, Error>> + Send + 'static,
{
let begin_time = Instant::now();
let whole_body = hyper::body::to_bytes(req.into_body()).await?;
let msg = rmp_serde::decode::from_read::<_, M>(&whole_body[..])?;
trace!(
"Request message: {}",
serde_json::to_string(&msg)
.unwrap_or_else(|_| "<json error>".into())
.chars()
.take(100)
.collect::<String>()
);
match handler(msg, sockaddr).await {
Ok(resp) => {
let resp_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Ok(resp))?;
let rpc_duration = (Instant::now() - begin_time).as_millis();
if rpc_duration > 100 {
debug!("RPC {} ok, took long: {} ms", name, rpc_duration,);
}
Ok(Response::new(Body::from(resp_bytes)))
}
Err(e) => {
let err_str = format!("{}", e);
let rep_bytes = rmp_to_vec_all_named::<Result<M, String>>(&Err(err_str))?;
let mut err_response = Response::new(Body::from(rep_bytes));
*err_response.status_mut() = match e {
Error::BadRpc(_) => StatusCode::BAD_REQUEST,
_ => StatusCode::INTERNAL_SERVER_ERROR,
};
warn!(
"RPC error ({}): {} ({} ms)",
name,
e,
(Instant::now() - begin_time).as_millis(),
);
Ok(err_response)
}
}
}
impl RpcServer {
/// Create a new RpcServer
pub fn new(bind_addr: SocketAddr, tls_config: Option<TlsConfig>) -> Self {
Self {
bind_addr,
tls_config,
handlers: HashMap::new(),
}
}
/// Add handler handling request made to `name`
pub fn add_handler<M, F, Fut>(&mut self, name: String, handler: F)
where
M: RpcMessage + 'static,
F: Fn(M, SocketAddr) -> Fut + Send + Sync + 'static,
Fut: Future<Output = Result<M, Error>> + Send + 'static,
{
let name2 = Arc::new(name.clone());
let handler_arc = Arc::new(handler);
let handler = Box::new(move |req: Request<Body>, sockaddr: SocketAddr| {
let handler2 = handler_arc.clone();
let b: ResponseFuture = Box::pin(handle_func(handler2, req, sockaddr, name2.clone()));
b
});
self.handlers.insert(name, handler);
}
async fn handler(
self: Arc<Self>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<Body>, Error> {
if req.method() != Method::POST {
let mut bad_request = Response::default();
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
}
let path = &req.uri().path()[1..].to_string();
let handler = match self.handlers.get(path) {
Some(h) => h,
None => {
let mut not_found = Response::default();
*not_found.status_mut() = StatusCode::NOT_FOUND;
return Ok(not_found);
}
};
trace!("({}) Handling request", path);
let resp_waiter = tokio::spawn(handler(req, addr));
match resp_waiter.await {
Err(err) => {
warn!("Handler await error: {}", err);
let mut ise = Response::default();
*ise.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
Ok(ise)
}
Ok(Err(err)) => {
trace!("({}) Request handler failed: {}", path, err);
let mut bad_request = Response::new(Body::from(format!("{}", err)));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
Ok(bad_request)
}
Ok(Ok(resp)) => {
trace!("({}) Request handler succeeded", path);
Ok(resp)
}
}
}
/// Run the RpcServer
pub async fn run(
self: Arc<Self>,
shutdown_signal: impl Future<Output = ()>,
) -> Result<(), Error> {
if let Some(tls_config) = self.tls_config.as_ref() {
let ca_certs = tls_util::load_certs(&tls_config.ca_cert)?;
let node_certs = tls_util::load_certs(&tls_config.node_cert)?;
let node_key = tls_util::load_private_key(&tls_config.node_key)?;
let mut ca_store = rustls::RootCertStore::empty();
for crt in ca_certs.iter() {
ca_store.add(crt)?;
}
let mut config =
rustls::ServerConfig::new(rustls::AllowAnyAuthenticatedClient::new(ca_store));
config.set_single_cert([&node_certs[..], &ca_certs[..]].concat(), node_key)?;
let tls_acceptor = Arc::new(TlsAcceptor::from(Arc::new(config)));
let listener = TcpListener::bind(&self.bind_addr).await?;
let incoming = TcpListenerStream::new(listener).filter_map(|socket| async {
match socket {
Ok(stream) => match tls_acceptor.clone().accept(stream).await {
Ok(x) => Some(Ok::<_, hyper::Error>(x)),
Err(_e) => None,
},
Err(_) => None,
}
});
let incoming = hyper::server::accept::from_stream(incoming);
let self_arc = self.clone();
let service = make_service_fn(|conn: &TlsStream<TcpStream>| {
let client_addr = conn
.get_ref()
.0
.peer_addr()
.unwrap_or_else(|_| ([0, 0, 0, 0], 0).into());
let self_arc = self_arc.clone();
async move {
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
self_arc.clone().handler(req, client_addr).map_err(|e| {
warn!("RPC handler error: {}", e);
e
})
}))
}
});
let server = Server::builder(incoming).serve(service);
let graceful = server.with_graceful_shutdown(shutdown_signal);
info!("RPC server listening on http://{}", self.bind_addr);
graceful.await?;
} else {
let self_arc = self.clone();
let service = make_service_fn(move |conn: &AddrStream| {
let client_addr = conn.remote_addr();
let self_arc = self_arc.clone();
async move {
Ok::<_, Error>(service_fn(move |req: Request<Body>| {
self_arc.clone().handler(req, client_addr).map_err(|e| {
warn!("RPC handler error: {}", e);
e
})
}))
}
});
let server = Server::bind(&self.bind_addr).serve(service);
let graceful = server.with_graceful_shutdown(shutdown_signal);
info!("RPC server listening on http://{}", self.bind_addr);
graceful.await?;
}
Ok(())
}
}

363
src/rpc/system.rs Normal file
View File

@ -0,0 +1,363 @@
//! Module containing structs related to membership management
use std::io::{Read, Write};
use std::net::SocketAddr;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use arc_swap::ArcSwap;
use async_trait::async_trait;
use futures::{join, select};
use futures_util::future::*;
use serde::{Deserialize, Serialize};
use sodiumoxide::crypto::sign::ed25519;
use tokio::sync::watch;
use tokio::sync::Mutex;
use netapp::endpoint::{Endpoint, EndpointHandler, Message};
use netapp::peering::fullmesh::FullMeshPeeringStrategy;
use netapp::proto::*;
use netapp::{NetApp, NetworkKey, NodeID, NodeKey};
use garage_util::background::BackgroundRunner;
use garage_util::error::Error;
use garage_util::persister::Persister;
//use garage_util::time::*;
//use crate::consul::get_consul_nodes;
use crate::ring::*;
use crate::rpc_helper::{RequestStrategy, RpcHelper};
const DISCOVERY_INTERVAL: Duration = Duration::from_secs(60);
const PING_TIMEOUT: Duration = Duration::from_secs(2);
/// RPC endpoint used for calls related to membership
pub const SYSTEM_RPC_PATH: &str = "garage_rpc/membership.rs/SystemRpc";
/// RPC messages related to membership
#[derive(Debug, Serialize, Deserialize, Clone)]
pub enum SystemRpc {
/// Response to successfull advertisements
Ok,
/// Error response
Error(String),
/// Ask other node its config. Answered with AdvertiseConfig
PullConfig,
/// Advertise Garage status. Answered with another AdvertiseStatus.
/// Exchanged with every node on a regular basis.
AdvertiseStatus(StateInfo),
/// Advertisement of nodes config. Sent spontanously or in response to PullConfig
AdvertiseConfig(NetworkConfig),
/// Get known nodes states
GetKnownNodes,
/// Return known nodes
ReturnKnownNodes(Vec<(NodeID, SocketAddr, bool)>),
}
impl Message for SystemRpc {
type Response = SystemRpc;
}
/// This node's membership manager
pub struct System {
/// The id of this node
pub id: NodeID,
persist_config: Persister<NetworkConfig>,
state_info: ArcSwap<StateInfo>,
pub netapp: Arc<NetApp>,
fullmesh: Arc<FullMeshPeeringStrategy>,
pub rpc: RpcHelper,
system_endpoint: Arc<Endpoint<SystemRpc, System>>,
rpc_listen_addr: SocketAddr,
bootstrap_peers: Vec<(NodeID, SocketAddr)>,
consul_host: Option<String>,
consul_service_name: Option<String>,
replication_factor: usize,
/// The ring
pub ring: watch::Receiver<Arc<Ring>>,
update_ring: Mutex<watch::Sender<Arc<Ring>>>,
/// The job runner of this node
pub background: Arc<BackgroundRunner>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StateInfo {
/// Hostname of the node
pub hostname: String,
/// Replication factor configured on the node
pub replication_factor: usize,
/// Configuration version
pub config_version: u64,
}
fn gen_node_key(metadata_dir: &Path) -> Result<NodeKey, Error> {
let mut id_file = metadata_dir.to_path_buf();
id_file.push("node_id");
if id_file.as_path().exists() {
let mut f = std::fs::File::open(id_file.as_path())?;
let mut d = vec![];
f.read_to_end(&mut d)?;
if d.len() != 64 {
return Err(Error::Message("Corrupt node_id file".to_string()));
}
let mut key = [0u8; 64];
key.copy_from_slice(&d[..]);
Ok(NodeKey::from_slice(&key[..]).unwrap())
} else {
let (key, _) = ed25519::gen_keypair();
let mut f = std::fs::File::create(id_file.as_path())?;
f.write_all(&key[..])?;
Ok(NodeKey::from_slice(&key[..]).unwrap())
}
}
impl System {
/// Create this node's membership manager
pub fn new(
network_key: NetworkKey,
metadata_dir: PathBuf,
background: Arc<BackgroundRunner>,
replication_factor: usize,
rpc_listen_addr: SocketAddr,
bootstrap_peers: Vec<(NodeID, SocketAddr)>,
consul_host: Option<String>,
consul_service_name: Option<String>,
) -> Arc<Self> {
let node_key = gen_node_key(&metadata_dir).expect("Unable to read or generate node ID");
info!("Node public key: {}", hex::encode(&node_key.public_key()));
let persist_config = Persister::new(&metadata_dir, "network_config");
let net_config = match persist_config.load() {
Ok(x) => x,
Err(e) => {
match Persister::<garage_rpc_021::ring::NetworkConfig>::new(
&metadata_dir,
"network_config",
)
.load()
{
Ok(old_config) => NetworkConfig::migrate_from_021(old_config),
Err(e2) => {
info!(
"No valid previous network configuration stored ({}, {}), starting fresh.",
e, e2
);
NetworkConfig::new()
}
}
}
};
let state_info = StateInfo {
hostname: gethostname::gethostname()
.into_string()
.unwrap_or_else(|_| "<invalid utf-8>".to_string()),
replication_factor: replication_factor,
config_version: net_config.version,
};
let ring = Ring::new(net_config, replication_factor);
let (update_ring, ring) = watch::channel(Arc::new(ring));
let netapp = NetApp::new(network_key, node_key);
let fullmesh = FullMeshPeeringStrategy::new(netapp.clone(), bootstrap_peers.clone());
let system_endpoint = netapp.endpoint(SYSTEM_RPC_PATH.into());
let sys = Arc::new(System {
id: netapp.id.clone(),
persist_config,
state_info: ArcSwap::new(Arc::new(state_info)),
netapp: netapp.clone(),
fullmesh: fullmesh.clone(),
rpc: RpcHelper {
fullmesh: fullmesh.clone(),
background: background.clone(),
},
system_endpoint,
replication_factor,
rpc_listen_addr,
bootstrap_peers,
consul_host,
consul_service_name,
ring,
update_ring: Mutex::new(update_ring),
background: background.clone(),
});
sys.system_endpoint.set_handler(sys.clone());
sys
}
/// Perform bootstraping, starting the ping loop
pub async fn run(self: Arc<Self>, must_exit: watch::Receiver<bool>) {
join!(
self.netapp
.clone()
.listen(self.rpc_listen_addr, None, must_exit.clone()),
self.fullmesh.clone().run(must_exit.clone()),
self.discovery_loop(must_exit.clone()),
);
}
// ---- INTERNALS ----
/// Save network configuration to disc
async fn save_network_config(self: Arc<Self>) -> Result<(), Error> {
let ring: Arc<Ring> = self.ring.borrow().clone();
self.persist_config
.save_async(&ring.config)
.await
.expect("Cannot save current cluster configuration");
Ok(())
}
fn update_state_info(&self) {
let mut new_si: StateInfo = self.state_info.load().as_ref().clone();
let ring = self.ring.borrow();
new_si.config_version = ring.config.version;
self.state_info.swap(Arc::new(new_si));
}
fn handle_pull_config(&self) -> SystemRpc {
let ring = self.ring.borrow().clone();
SystemRpc::AdvertiseConfig(ring.config.clone())
}
async fn handle_advertise_config(
self: Arc<Self>,
adv: &NetworkConfig,
) -> Result<SystemRpc, Error> {
let update_ring = self.update_ring.lock().await;
let ring: Arc<Ring> = self.ring.borrow().clone();
if adv.version > ring.config.version {
let ring = Ring::new(adv.clone(), self.replication_factor);
update_ring.send(Arc::new(ring))?;
drop(update_ring);
let self2 = self.clone();
let adv2 = adv.clone();
self.background.spawn_cancellable(async move {
self2
.rpc
.broadcast(
&self2.system_endpoint,
SystemRpc::AdvertiseConfig(adv2),
RequestStrategy::with_priority(PRIO_NORMAL),
)
.await;
Ok(())
});
self.background.spawn(self.clone().save_network_config());
}
Ok(SystemRpc::Ok)
}
async fn discovery_loop(&self, mut stop_signal: watch::Receiver<bool>) {
/* TODO
let consul_config = match (&self.consul_host, &self.consul_service_name) {
(Some(ch), Some(csn)) => Some((ch.clone(), csn.clone())),
_ => None,
};
*/
while !*stop_signal.borrow() {
let not_configured = self.ring.borrow().config.members.is_empty();
let no_peers = self.fullmesh.get_peer_list().len() < self.replication_factor;
let bad_peers = self
.fullmesh
.get_peer_list()
.iter()
.filter(|p| p.is_up())
.count() != self.ring.borrow().config.members.len();
if not_configured || no_peers || bad_peers {
info!("Doing a bootstrap/discovery step (not_configured: {}, no_peers: {}, bad_peers: {})", not_configured, no_peers, bad_peers);
let ping_list = self.bootstrap_peers.clone();
/*
*TODO bring this back: persisted list of peers
if let Ok(peers) = self.persist_status.load_async().await {
ping_list.extend(peers.iter().map(|x| (x.addr, Some(x.id))));
}
*/
/*
* TODO bring this back: get peers from consul
if let Some((consul_host, consul_service_name)) = &consul_config {
match get_consul_nodes(consul_host, consul_service_name).await {
Ok(node_list) => {
ping_list.extend(node_list.iter().map(|a| (*a, None)));
}
Err(e) => {
warn!("Could not retrieve node list from Consul: {}", e);
}
}
}
*/
for (node_id, node_addr) in ping_list {
tokio::spawn(self.netapp.clone().try_connect(node_addr, node_id));
}
}
let restart_at = tokio::time::sleep(DISCOVERY_INTERVAL);
select! {
_ = restart_at.fuse() => {},
_ = stop_signal.changed().fuse() => {},
}
}
}
async fn pull_config(self: Arc<Self>, peer: NodeID) {
let resp = self
.rpc
.call(
&self.system_endpoint,
peer,
SystemRpc::PullConfig,
RequestStrategy::with_priority(PRIO_HIGH).with_timeout(PING_TIMEOUT),
)
.await;
if let Ok(SystemRpc::AdvertiseConfig(config)) = resp {
let _: Result<_, _> = self.handle_advertise_config(&config).await;
}
}
}
#[async_trait]
impl EndpointHandler<SystemRpc> for System {
async fn handle(self: &Arc<Self>, msg: &SystemRpc, _from: NodeID) -> SystemRpc {
let resp = match msg {
SystemRpc::PullConfig => Ok(self.handle_pull_config()),
SystemRpc::AdvertiseConfig(adv) => self.clone().handle_advertise_config(&adv).await,
SystemRpc::GetKnownNodes => {
let known_nodes = self
.fullmesh
.get_peer_list()
.iter()
.map(|n| (n.id, n.addr, n.is_up()))
.collect::<Vec<_>>();
Ok(SystemRpc::ReturnKnownNodes(known_nodes))
}
_ => Err(Error::BadRpc("Unexpected RPC message".to_string())),
};
match resp {
Ok(r) => r,
Err(e) => SystemRpc::Error(format!("{}", e)),
}
}
}

View File

@ -1,140 +0,0 @@
use core::future::Future;
use core::task::{Context, Poll};
use std::pin::Pin;
use std::sync::Arc;
use std::{fs, io};
use futures_util::future::*;
use hyper::client::connect::Connection;
use hyper::client::HttpConnector;
use hyper::service::Service;
use hyper::Uri;
use hyper_rustls::MaybeHttpsStream;
use rustls::internal::pemfile;
use tokio::io::{AsyncRead, AsyncWrite};
use tokio_rustls::TlsConnector;
use webpki::DNSNameRef;
use garage_util::error::Error;
pub fn load_certs(filename: &str) -> Result<Vec<rustls::Certificate>, Error> {
let certfile = fs::File::open(&filename)?;
let mut reader = io::BufReader::new(certfile);
let certs = pemfile::certs(&mut reader).map_err(|_| {
Error::Message(format!(
"Could not deecode certificates from file: {}",
filename
))
})?;
if certs.is_empty() {
return Err(Error::Message(format!(
"Invalid certificate file: {}",
filename
)));
}
Ok(certs)
}
pub fn load_private_key(filename: &str) -> Result<rustls::PrivateKey, Error> {
let keydata = fs::read_to_string(filename)?;
let mut buf1 = keydata.as_bytes();
let rsa_keys = pemfile::rsa_private_keys(&mut buf1).unwrap_or_default();
let mut buf2 = keydata.as_bytes();
let pkcs8_keys = pemfile::pkcs8_private_keys(&mut buf2).unwrap_or_default();
let mut keys = rsa_keys;
keys.extend(pkcs8_keys.into_iter());
if keys.len() != 1 {
return Err(Error::Message(format!(
"Invalid private key file: {} ({} private keys)",
filename,
keys.len()
)));
}
Ok(keys[0].clone())
}
// ---- AWFUL COPYPASTA FROM HYPER-RUSTLS connector.rs
// ---- ALWAYS USE `garage` AS HOSTNAME FOR TLS VERIFICATION
#[derive(Clone)]
pub struct HttpsConnectorFixedDnsname<T> {
http: T,
tls_config: Arc<rustls::ClientConfig>,
fixed_dnsname: &'static str,
}
type BoxError = Box<dyn std::error::Error + Send + Sync>;
impl HttpsConnectorFixedDnsname<HttpConnector> {
pub fn new(mut tls_config: rustls::ClientConfig, fixed_dnsname: &'static str) -> Self {
let mut http = HttpConnector::new();
http.enforce_http(false);
tls_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
Self {
http,
tls_config: Arc::new(tls_config),
fixed_dnsname,
}
}
}
impl<T> Service<Uri> for HttpsConnectorFixedDnsname<T>
where
T: Service<Uri>,
T::Response: Connection + AsyncRead + AsyncWrite + Send + Unpin + 'static,
T::Future: Send + 'static,
T::Error: Into<BoxError>,
{
type Response = MaybeHttpsStream<T::Response>;
type Error = BoxError;
#[allow(clippy::type_complexity)]
type Future =
Pin<Box<dyn Future<Output = Result<MaybeHttpsStream<T::Response>, BoxError>> + Send>>;
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
match self.http.poll_ready(cx) {
Poll::Ready(Ok(())) => Poll::Ready(Ok(())),
Poll::Ready(Err(e)) => Poll::Ready(Err(e.into())),
Poll::Pending => Poll::Pending,
}
}
fn call(&mut self, dst: Uri) -> Self::Future {
let is_https = dst.scheme_str() == Some("https");
if !is_https {
let connecting_future = self.http.call(dst);
let f = async move {
let tcp = connecting_future.await.map_err(Into::into)?;
Ok(MaybeHttpsStream::Http(tcp))
};
f.boxed()
} else {
let cfg = self.tls_config.clone();
let connecting_future = self.http.call(dst);
let dnsname =
DNSNameRef::try_from_ascii_str(self.fixed_dnsname).expect("Invalid fixed dnsname");
let f = async move {
let tcp = connecting_future.await.map_err(Into::into)?;
let connector = TlsConnector::from(cfg);
let tls = connector
.connect(dnsname, tcp)
.await
.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
Ok(MaybeHttpsStream::Https(tls))
};
f.boxed()
}
}
}

View File

@ -1,6 +1,6 @@
[package] [package]
name = "garage_table" name = "garage_table"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -13,9 +13,10 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_rpc = { version = "0.3.0", path = "../rpc" } garage_rpc = { version = "0.4.0", path = "../rpc" }
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
async-trait = "0.1.7"
bytes = "1.0" bytes = "1.0"
hexdump = "0.1" hexdump = "0.1"
log = "0.4" log = "0.4"
@ -30,4 +31,3 @@ serde_bytes = "0.11"
futures = "0.3" futures = "0.3"
futures-util = "0.3" futures-util = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }

View File

@ -9,7 +9,7 @@ use tokio::sync::Notify;
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::*; use garage_util::error::*;
use garage_rpc::membership::System; use garage_rpc::system::System;
use crate::crdt::Crdt; use crate::crdt::Crdt;
use crate::replication::*; use crate::replication::*;

View File

@ -2,6 +2,7 @@ use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use async_trait::async_trait;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_bytes::ByteBuf; use serde_bytes::ByteBuf;
@ -13,9 +14,8 @@ use tokio::sync::watch;
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::Error; use garage_util::error::Error;
use garage_rpc::membership::System; use garage_rpc::system::System;
use garage_rpc::rpc_client::*; use garage_rpc::*;
use garage_rpc::rpc_server::*;
use crate::data::*; use crate::data::*;
use crate::replication::*; use crate::replication::*;
@ -24,11 +24,11 @@ use crate::schema::*;
const TABLE_GC_BATCH_SIZE: usize = 1024; const TABLE_GC_BATCH_SIZE: usize = 1024;
const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30); const TABLE_GC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
pub struct TableGc<F: TableSchema, R: TableReplication> { pub struct TableGc<F: TableSchema + 'static, R: TableReplication + 'static> {
system: Arc<System>, system: Arc<System>,
data: Arc<TableData<F, R>>, data: Arc<TableData<F, R>>,
rpc_client: Arc<RpcClient<GcRpc>>, endpoint: Arc<Endpoint<GcRpc, Self>>,
} }
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
@ -36,30 +36,30 @@ enum GcRpc {
Update(Vec<ByteBuf>), Update(Vec<ByteBuf>),
DeleteIfEqualHash(Vec<(ByteBuf, Hash)>), DeleteIfEqualHash(Vec<(ByteBuf, Hash)>),
Ok, Ok,
Error(String),
} }
impl RpcMessage for GcRpc {} impl Message for GcRpc {
type Response = GcRpc;
}
impl<F, R> TableGc<F, R> impl<F, R> TableGc<F, R>
where where
F: TableSchema + 'static, F: TableSchema + 'static,
R: TableReplication + 'static, R: TableReplication + 'static,
{ {
pub(crate) fn launch( pub(crate) fn launch(system: Arc<System>, data: Arc<TableData<F, R>>) -> Arc<Self> {
system: Arc<System>, let endpoint = system
data: Arc<TableData<F, R>>, .netapp
rpc_server: &mut RpcServer, .endpoint(format!("garage_table/gc.rs/Rpc:{}", data.name));
) -> Arc<Self> {
let rpc_path = format!("table_{}/gc", data.name);
let rpc_client = system.rpc_client::<GcRpc>(&rpc_path);
let gc = Arc::new(Self { let gc = Arc::new(Self {
system: system.clone(), system: system.clone(),
data: data.clone(), data: data.clone(),
rpc_client, endpoint,
}); });
gc.register_handler(rpc_server, rpc_path); gc.endpoint.set_handler(gc.clone());
let gc1 = gc.clone(); let gc1 = gc.clone();
system.background.spawn_worker( system.background.spawn_worker(
@ -168,7 +168,7 @@ where
async fn try_send_and_delete( async fn try_send_and_delete(
&self, &self,
nodes: Vec<Uuid>, nodes: Vec<NodeID>,
items: Vec<(ByteBuf, Hash, ByteBuf)>, items: Vec<(ByteBuf, Hash, ByteBuf)>,
) -> Result<(), Error> { ) -> Result<(), Error> {
let n_items = items.len(); let n_items = items.len();
@ -180,11 +180,15 @@ where
deletes.push((k, vhash)); deletes.push((k, vhash));
} }
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&nodes[..], &nodes[..],
GcRpc::Update(updates), GcRpc::Update(updates),
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT), RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(nodes.len())
.with_timeout(TABLE_GC_RPC_TIMEOUT),
) )
.await?; .await?;
@ -193,11 +197,15 @@ where
self.data.name, n_items self.data.name, n_items
); );
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&nodes[..], &nodes[..],
GcRpc::DeleteIfEqualHash(deletes.clone()), GcRpc::DeleteIfEqualHash(deletes.clone()),
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_GC_RPC_TIMEOUT), RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(nodes.len())
.with_timeout(TABLE_GC_RPC_TIMEOUT),
) )
.await?; .await?;
@ -217,24 +225,7 @@ where
Ok(()) Ok(())
} }
// ---- RPC HANDLER ---- async fn handle_rpc(&self, message: &GcRpc) -> Result<GcRpc, Error> {
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
let self2 = self.clone();
rpc_server.add_handler::<GcRpc, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
}
async fn handle_rpc(self: &Arc<Self>, message: &GcRpc) -> Result<GcRpc, Error> {
match message { match message {
GcRpc::Update(items) => { GcRpc::Update(items) => {
self.data.update_many(items)?; self.data.update_many(items)?;
@ -251,3 +242,16 @@ where
} }
} }
} }
#[async_trait]
impl<F, R> EndpointHandler<GcRpc> for TableGc<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, message: &GcRpc, _from: NodeID) -> GcRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| GcRpc::Error(format!("{}", e)))
}
}

View File

@ -1,7 +1,8 @@
use std::sync::Arc; use std::sync::Arc;
use garage_rpc::membership::System;
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::system::System;
use garage_rpc::NodeID;
use garage_util::data::*; use garage_util::data::*;
use crate::replication::*; use crate::replication::*;
@ -19,16 +20,20 @@ pub struct TableFullReplication {
} }
impl TableReplication for TableFullReplication { impl TableReplication for TableFullReplication {
fn read_nodes(&self, _hash: &Hash) -> Vec<Uuid> { fn read_nodes(&self, _hash: &Hash) -> Vec<NodeID> {
vec![self.system.id] vec![self.system.id]
} }
fn read_quorum(&self) -> usize { fn read_quorum(&self) -> usize {
1 1
} }
fn write_nodes(&self, _hash: &Hash) -> Vec<Uuid> { fn write_nodes(&self, _hash: &Hash) -> Vec<NodeID> {
let ring = self.system.ring.borrow(); let ring = self.system.ring.borrow();
ring.config.members.keys().cloned().collect::<Vec<_>>() ring.config
.members
.keys()
.map(|id| NodeID::from_slice(id.as_slice()).unwrap())
.collect::<Vec<_>>()
} }
fn write_quorum(&self) -> usize { fn write_quorum(&self) -> usize {
let nmembers = self.system.ring.borrow().config.members.len(); let nmembers = self.system.ring.borrow().config.members.len();

View File

@ -1,5 +1,5 @@
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::NodeID;
use garage_util::data::*; use garage_util::data::*;
/// Trait to describe how a table shall be replicated /// Trait to describe how a table shall be replicated
@ -8,12 +8,12 @@ pub trait TableReplication: Send + Sync {
// To understand various replication methods // To understand various replication methods
/// Which nodes to send read requests to /// Which nodes to send read requests to
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid>; fn read_nodes(&self, hash: &Hash) -> Vec<NodeID>;
/// Responses needed to consider a read succesfull /// Responses needed to consider a read succesfull
fn read_quorum(&self) -> usize; fn read_quorum(&self) -> usize;
/// Which nodes to send writes to /// Which nodes to send writes to
fn write_nodes(&self, hash: &Hash) -> Vec<Uuid>; fn write_nodes(&self, hash: &Hash) -> Vec<NodeID>;
/// Responses needed to consider a write succesfull /// Responses needed to consider a write succesfull
fn write_quorum(&self) -> usize; fn write_quorum(&self) -> usize;
fn max_write_errors(&self) -> usize; fn max_write_errors(&self) -> usize;

View File

@ -1,7 +1,8 @@
use std::sync::Arc; use std::sync::Arc;
use garage_rpc::membership::System;
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::system::System;
use garage_rpc::NodeID;
use garage_util::data::*; use garage_util::data::*;
use crate::replication::*; use crate::replication::*;
@ -25,7 +26,7 @@ pub struct TableShardedReplication {
} }
impl TableReplication for TableShardedReplication { impl TableReplication for TableShardedReplication {
fn read_nodes(&self, hash: &Hash) -> Vec<Uuid> { fn read_nodes(&self, hash: &Hash) -> Vec<NodeID> {
let ring = self.system.ring.borrow(); let ring = self.system.ring.borrow();
ring.get_nodes(&hash, self.replication_factor) ring.get_nodes(&hash, self.replication_factor)
} }
@ -33,7 +34,7 @@ impl TableReplication for TableShardedReplication {
self.read_quorum self.read_quorum
} }
fn write_nodes(&self, hash: &Hash) -> Vec<Uuid> { fn write_nodes(&self, hash: &Hash) -> Vec<NodeID> {
let ring = self.system.ring.borrow(); let ring = self.system.ring.borrow();
ring.get_nodes(&hash, self.replication_factor) ring.get_nodes(&hash, self.replication_factor)
} }

View File

@ -2,6 +2,7 @@ use std::collections::VecDeque;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use async_trait::async_trait;
use futures::select; use futures::select;
use futures_util::future::*; use futures_util::future::*;
use futures_util::stream::*; use futures_util::stream::*;
@ -13,10 +14,9 @@ use tokio::sync::{mpsc, watch};
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::Error; use garage_util::error::Error;
use garage_rpc::membership::System;
use garage_rpc::ring::*; use garage_rpc::ring::*;
use garage_rpc::rpc_client::*; use garage_rpc::system::System;
use garage_rpc::rpc_server::*; use garage_rpc::*;
use crate::data::*; use crate::data::*;
use crate::merkle::*; use crate::merkle::*;
@ -28,13 +28,13 @@ const TABLE_SYNC_RPC_TIMEOUT: Duration = Duration::from_secs(30);
// Do anti-entropy every 10 minutes // Do anti-entropy every 10 minutes
const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60); const ANTI_ENTROPY_INTERVAL: Duration = Duration::from_secs(10 * 60);
pub struct TableSyncer<F: TableSchema, R: TableReplication> { pub struct TableSyncer<F: TableSchema + 'static, R: TableReplication + 'static> {
system: Arc<System>, system: Arc<System>,
data: Arc<TableData<F, R>>, data: Arc<TableData<F, R>>,
merkle: Arc<MerkleUpdater<F, R>>, merkle: Arc<MerkleUpdater<F, R>>,
todo: Mutex<SyncTodo>, todo: Mutex<SyncTodo>,
rpc_client: Arc<RpcClient<SyncRpc>>, endpoint: Arc<Endpoint<SyncRpc, Self>>,
} }
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
@ -45,9 +45,12 @@ pub(crate) enum SyncRpc {
Node(MerkleNodeKey, MerkleNode), Node(MerkleNodeKey, MerkleNode),
Items(Vec<Arc<ByteBuf>>), Items(Vec<Arc<ByteBuf>>),
Ok, Ok,
Error(String),
} }
impl RpcMessage for SyncRpc {} impl Message for SyncRpc {
type Response = SyncRpc;
}
struct SyncTodo { struct SyncTodo {
todo: Vec<TodoPartition>, todo: Vec<TodoPartition>,
@ -72,10 +75,10 @@ where
system: Arc<System>, system: Arc<System>,
data: Arc<TableData<F, R>>, data: Arc<TableData<F, R>>,
merkle: Arc<MerkleUpdater<F, R>>, merkle: Arc<MerkleUpdater<F, R>>,
rpc_server: &mut RpcServer,
) -> Arc<Self> { ) -> Arc<Self> {
let rpc_path = format!("table_{}/sync", data.name); let endpoint = system
let rpc_client = system.rpc_client::<SyncRpc>(&rpc_path); .netapp
.endpoint(format!("garage_table/sync.rs/Rpc:{}", data.name));
let todo = SyncTodo { todo: vec![] }; let todo = SyncTodo { todo: vec![] };
@ -84,10 +87,10 @@ where
data: data.clone(), data: data.clone(),
merkle, merkle,
todo: Mutex::new(todo), todo: Mutex::new(todo),
rpc_client, endpoint,
}); });
syncer.register_handler(rpc_server, rpc_path); syncer.endpoint.set_handler(syncer.clone());
let (busy_tx, busy_rx) = mpsc::unbounded_channel(); let (busy_tx, busy_rx) = mpsc::unbounded_channel();
@ -112,21 +115,6 @@ where
syncer syncer
} }
fn register_handler(self: &Arc<Self>, rpc_server: &mut RpcServer, path: String) {
let self2 = self.clone();
rpc_server.add_handler::<SyncRpc, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle_rpc(&msg).await }
});
}
async fn watcher_task( async fn watcher_task(
self: Arc<Self>, self: Arc<Self>,
mut must_exit: watch::Receiver<bool>, mut must_exit: watch::Receiver<bool>,
@ -317,15 +305,19 @@ where
async fn offload_items( async fn offload_items(
self: &Arc<Self>, self: &Arc<Self>,
items: &[(Vec<u8>, Arc<ByteBuf>)], items: &[(Vec<u8>, Arc<ByteBuf>)],
nodes: &[Uuid], nodes: &[NodeID],
) -> Result<(), Error> { ) -> Result<(), Error> {
let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>(); let values = items.iter().map(|(_k, v)| v.clone()).collect::<Vec<_>>();
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
nodes, nodes,
SyncRpc::Items(values), SyncRpc::Items(values),
RequestStrategy::with_quorum(nodes.len()).with_timeout(TABLE_SYNC_RPC_TIMEOUT), RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_quorum(nodes.len())
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
) )
.await?; .await?;
@ -362,7 +354,7 @@ where
async fn do_sync_with( async fn do_sync_with(
self: Arc<Self>, self: Arc<Self>,
partition: TodoPartition, partition: TodoPartition,
who: Uuid, who: NodeID,
must_exit: watch::Receiver<bool>, must_exit: watch::Receiver<bool>,
) -> Result<(), Error> { ) -> Result<(), Error> {
let (root_ck_key, root_ck) = self.get_root_ck(partition.partition)?; let (root_ck_key, root_ck) = self.get_root_ck(partition.partition)?;
@ -378,11 +370,14 @@ where
// Check if they have the same root checksum // Check if they have the same root checksum
// If so, do nothing. // If so, do nothing.
let root_resp = self let root_resp = self
.rpc_client .system
.rpc
.call( .call(
&self.endpoint,
who, who,
SyncRpc::RootCkHash(partition.partition, root_ck_hash), SyncRpc::RootCkHash(partition.partition, root_ck_hash),
TABLE_SYNC_RPC_TIMEOUT, RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
) )
.await?; .await?;
@ -430,8 +425,15 @@ where
// Get Merkle node for this tree position at remote node // Get Merkle node for this tree position at remote node
// and compare it with local node // and compare it with local node
let remote_node = match self let remote_node = match self
.rpc_client .system
.call(who, SyncRpc::GetNode(key.clone()), TABLE_SYNC_RPC_TIMEOUT) .rpc
.call(
&self.endpoint,
who,
SyncRpc::GetNode(key.clone()),
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
)
.await? .await?
{ {
SyncRpc::Node(_, node) => node, SyncRpc::Node(_, node) => node,
@ -478,7 +480,7 @@ where
Ok(()) Ok(())
} }
async fn send_items(&self, who: Uuid, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> { async fn send_items(&self, who: NodeID, item_value_list: Vec<Vec<u8>>) -> Result<(), Error> {
info!( info!(
"({}) Sending {} items to {:?}", "({}) Sending {} items to {:?}",
self.data.name, self.data.name,
@ -492,8 +494,15 @@ where
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let rpc_resp = self let rpc_resp = self
.rpc_client .system
.call(who, SyncRpc::Items(values), TABLE_SYNC_RPC_TIMEOUT) .rpc
.call(
&self.endpoint,
who,
SyncRpc::Items(values),
RequestStrategy::with_priority(PRIO_BACKGROUND)
.with_timeout(TABLE_SYNC_RPC_TIMEOUT),
)
.await?; .await?;
if let SyncRpc::Ok = rpc_resp { if let SyncRpc::Ok = rpc_resp {
Ok(()) Ok(())
@ -506,7 +515,6 @@ where
} }
// ======= SYNCHRONIZATION PROCEDURE -- RECEIVER SIDE ====== // ======= SYNCHRONIZATION PROCEDURE -- RECEIVER SIDE ======
async fn handle_rpc(self: &Arc<Self>, message: &SyncRpc) -> Result<SyncRpc, Error> { async fn handle_rpc(self: &Arc<Self>, message: &SyncRpc) -> Result<SyncRpc, Error> {
match message { match message {
SyncRpc::RootCkHash(range, h) => { SyncRpc::RootCkHash(range, h) => {
@ -527,6 +535,19 @@ where
} }
} }
#[async_trait]
impl<F, R> EndpointHandler<SyncRpc> for TableSyncer<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, message: &SyncRpc, _from: NodeID) -> SyncRpc {
self.handle_rpc(message)
.await
.unwrap_or_else(|e| SyncRpc::Error(format!("{}", e)))
}
}
impl SyncTodo { impl SyncTodo {
fn add_full_sync<F: TableSchema, R: TableReplication>( fn add_full_sync<F: TableSchema, R: TableReplication>(
&mut self, &mut self,

View File

@ -2,6 +2,7 @@ use std::collections::{BTreeMap, HashMap};
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use async_trait::async_trait;
use futures::stream::*; use futures::stream::*;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_bytes::ByteBuf; use serde_bytes::ByteBuf;
@ -9,9 +10,8 @@ use serde_bytes::ByteBuf;
use garage_util::data::*; use garage_util::data::*;
use garage_util::error::Error; use garage_util::error::Error;
use garage_rpc::membership::System; use garage_rpc::system::System;
use garage_rpc::rpc_client::*; use garage_rpc::*;
use garage_rpc::rpc_server::*;
use crate::crdt::Crdt; use crate::crdt::Crdt;
use crate::data::*; use crate::data::*;
@ -23,17 +23,18 @@ use crate::sync::*;
const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10); const TABLE_RPC_TIMEOUT: Duration = Duration::from_secs(10);
pub struct Table<F: TableSchema, R: TableReplication> { pub struct Table<F: TableSchema + 'static, R: TableReplication + 'static> {
pub system: Arc<System>, pub system: Arc<System>,
pub data: Arc<TableData<F, R>>, pub data: Arc<TableData<F, R>>,
pub merkle_updater: Arc<MerkleUpdater<F, R>>, pub merkle_updater: Arc<MerkleUpdater<F, R>>,
pub syncer: Arc<TableSyncer<F, R>>, pub syncer: Arc<TableSyncer<F, R>>,
rpc_client: Arc<RpcClient<TableRpc<F>>>, endpoint: Arc<Endpoint<TableRpc<F>, Self>>,
} }
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub(crate) enum TableRpc<F: TableSchema> { pub(crate) enum TableRpc<F: TableSchema> {
Ok, Ok,
Error(String),
ReadEntry(F::P, F::S), ReadEntry(F::P, F::S),
ReadEntryResponse(Option<ByteBuf>), ReadEntryResponse(Option<ByteBuf>),
@ -44,7 +45,9 @@ pub(crate) enum TableRpc<F: TableSchema> {
Update(Vec<Arc<ByteBuf>>), Update(Vec<Arc<ByteBuf>>),
} }
impl<F: TableSchema> RpcMessage for TableRpc<F> {} impl<F: TableSchema> Message for TableRpc<F> {
type Response = TableRpc<F>;
}
impl<F, R> Table<F, R> impl<F, R> Table<F, R>
where where
@ -59,32 +62,27 @@ where
system: Arc<System>, system: Arc<System>,
db: &sled::Db, db: &sled::Db,
name: String, name: String,
rpc_server: &mut RpcServer,
) -> Arc<Self> { ) -> Arc<Self> {
let rpc_path = format!("table_{}", name); let endpoint = system
let rpc_client = system.rpc_client::<TableRpc<F>>(&rpc_path); .netapp
.endpoint(format!("garage_table/table.rs/Rpc:{}", name));
let data = TableData::new(system.clone(), name, instance, replication, db); let data = TableData::new(system.clone(), name, instance, replication, db);
let merkle_updater = MerkleUpdater::launch(&system.background, data.clone()); let merkle_updater = MerkleUpdater::launch(&system.background, data.clone());
let syncer = TableSyncer::launch( let syncer = TableSyncer::launch(system.clone(), data.clone(), merkle_updater.clone());
system.clone(), TableGc::launch(system.clone(), data.clone());
data.clone(),
merkle_updater.clone(),
rpc_server,
);
TableGc::launch(system.clone(), data.clone(), rpc_server);
let table = Arc::new(Self { let table = Arc::new(Self {
system, system,
data, data,
merkle_updater, merkle_updater,
syncer, syncer,
rpc_client, endpoint,
}); });
table.clone().register_handler(rpc_server, rpc_path); table.endpoint.set_handler(table.clone());
table table
} }
@ -97,11 +95,14 @@ where
let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?)); let e_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(e)?));
let rpc = TableRpc::<F>::Update(vec![e_enc]); let rpc = TableRpc::<F>::Update(vec![e_enc]);
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&who[..], &who[..],
rpc, rpc,
RequestStrategy::with_quorum(self.data.replication.write_quorum()) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.data.replication.write_quorum())
.with_timeout(TABLE_RPC_TIMEOUT), .with_timeout(TABLE_RPC_TIMEOUT),
) )
.await?; .await?;
@ -123,7 +124,16 @@ where
let call_futures = call_list.drain().map(|(node, entries)| async move { let call_futures = call_list.drain().map(|(node, entries)| async move {
let rpc = TableRpc::<F>::Update(entries); let rpc = TableRpc::<F>::Update(entries);
let resp = self.rpc_client.call(node, rpc, TABLE_RPC_TIMEOUT).await?; let resp = self
.system
.rpc
.call(
&self.endpoint,
node,
rpc,
RequestStrategy::with_priority(PRIO_NORMAL).with_timeout(TABLE_RPC_TIMEOUT),
)
.await?;
Ok::<_, Error>((node, resp)) Ok::<_, Error>((node, resp))
}); });
let mut resps = call_futures.collect::<FuturesUnordered<_>>(); let mut resps = call_futures.collect::<FuturesUnordered<_>>();
@ -152,11 +162,14 @@ where
let rpc = TableRpc::<F>::ReadEntry(partition_key.clone(), sort_key.clone()); let rpc = TableRpc::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
let resps = self let resps = self
.rpc_client .system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&who[..], &who[..],
rpc, rpc,
RequestStrategy::with_quorum(self.data.replication.read_quorum()) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.data.replication.read_quorum())
.with_timeout(TABLE_RPC_TIMEOUT) .with_timeout(TABLE_RPC_TIMEOUT)
.interrupt_after_quorum(true), .interrupt_after_quorum(true),
) )
@ -208,11 +221,14 @@ where
let rpc = TableRpc::<F>::ReadRange(partition_key.clone(), begin_sort_key, filter, limit); let rpc = TableRpc::<F>::ReadRange(partition_key.clone(), begin_sort_key, filter, limit);
let resps = self let resps = self
.rpc_client .system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
&who[..], &who[..],
rpc, rpc,
RequestStrategy::with_quorum(self.data.replication.read_quorum()) RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(self.data.replication.read_quorum())
.with_timeout(TABLE_RPC_TIMEOUT) .with_timeout(TABLE_RPC_TIMEOUT)
.interrupt_after_quorum(true), .interrupt_after_quorum(true),
) )
@ -261,36 +277,25 @@ where
// =============== UTILITY FUNCTION FOR CLIENT OPERATIONS =============== // =============== UTILITY FUNCTION FOR CLIENT OPERATIONS ===============
async fn repair_on_read(&self, who: &[Uuid], what: F::E) -> Result<(), Error> { async fn repair_on_read(&self, who: &[NodeID], what: F::E) -> Result<(), Error> {
let what_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(&what)?)); let what_enc = Arc::new(ByteBuf::from(rmp_to_vec_all_named(&what)?));
self.rpc_client self.system
.rpc
.try_call_many( .try_call_many(
&self.endpoint,
who, who,
TableRpc::<F>::Update(vec![what_enc]), TableRpc::<F>::Update(vec![what_enc]),
RequestStrategy::with_quorum(who.len()).with_timeout(TABLE_RPC_TIMEOUT), RequestStrategy::with_priority(PRIO_NORMAL)
.with_quorum(who.len())
.with_timeout(TABLE_RPC_TIMEOUT),
) )
.await?; .await?;
Ok(()) Ok(())
} }
// =============== HANDLERS FOR RPC OPERATIONS (SERVER SIDE) ============== // ====== RPC HANDLER =====
//
fn register_handler(self: Arc<Self>, rpc_server: &mut RpcServer, path: String) { async fn handle_rpc(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> {
let self2 = self.clone();
rpc_server.add_handler::<TableRpc<F>, _, _>(path, move |msg, _addr| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
let self2 = self.clone();
self.rpc_client
.set_local_handler(self.system.id, move |msg| {
let self2 = self2.clone();
async move { self2.handle(&msg).await }
});
}
async fn handle(self: &Arc<Self>, msg: &TableRpc<F>) -> Result<TableRpc<F>, Error> {
match msg { match msg {
TableRpc::ReadEntry(key, sort_key) => { TableRpc::ReadEntry(key, sort_key) => {
let value = self.data.read_entry(key, sort_key)?; let value = self.data.read_entry(key, sort_key)?;
@ -308,3 +313,16 @@ where
} }
} }
} }
#[async_trait]
impl<F, R> EndpointHandler<TableRpc<F>> for Table<F, R>
where
F: TableSchema + 'static,
R: TableReplication + 'static,
{
async fn handle(self: &Arc<Self>, msg: &TableRpc<F>, _from: NodeID) -> TableRpc<F> {
self.handle_rpc(msg)
.await
.unwrap_or_else(|e| TableRpc::<F>::Error(format!("{}", e)))
}
}

View File

@ -1,6 +1,6 @@
[package] [package]
name = "garage_util" name = "garage_util"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>"] authors = ["Alex Auvolat <alex@adnab.me>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -32,7 +32,6 @@ toml = "0.5"
futures = "0.3" futures = "0.3"
tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] } tokio = { version = "1.0", default-features = false, features = ["rt", "rt-multi-thread", "io-util", "net", "time", "macros", "sync", "signal", "fs"] }
netapp = { version = "0.3.0", git = "https://git.deuxfleurs.fr/lx/netapp" }
http = "0.2" http = "0.2"
hyper = "0.14" hyper = "0.14"
rustls = "0.19"
webpki = "0.21"

View File

@ -3,8 +3,11 @@ use std::io::Read;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::PathBuf; use std::path::PathBuf;
use serde::de::Error as SerdeError;
use serde::{de, Deserialize}; use serde::{de, Deserialize};
use netapp::NodeID;
use crate::error::Error; use crate::error::Error;
/// Represent the whole configuration /// Represent the whole configuration
@ -26,20 +29,20 @@ pub struct Config {
// (we can add more aliases for this later) // (we can add more aliases for this later)
pub replication_mode: String, pub replication_mode: String,
/// RPC secret key: 32 bytes hex encoded
pub rpc_secret: String,
/// Address to bind for RPC /// Address to bind for RPC
pub rpc_bind_addr: SocketAddr, pub rpc_bind_addr: SocketAddr,
/// Bootstrap peers RPC address /// Bootstrap peers RPC address
#[serde(deserialize_with = "deserialize_vec_addr")] #[serde(deserialize_with = "deserialize_vec_addr")]
pub bootstrap_peers: Vec<SocketAddr>, pub bootstrap_peers: Vec<(NodeID, SocketAddr)>,
/// Consule host to connect to to discover more peers /// Consule host to connect to to discover more peers
pub consul_host: Option<String>, pub consul_host: Option<String>,
/// Consul service name to use /// Consul service name to use
pub consul_service_name: Option<String>, pub consul_service_name: Option<String>,
/// Configuration for RPC TLS
pub rpc_tls: Option<TlsConfig>,
/// Max number of concurrent RPC request /// Max number of concurrent RPC request
#[serde(default = "default_max_concurrent_rpc_requests")] #[serde(default = "default_max_concurrent_rpc_requests")]
pub max_concurrent_rpc_requests: usize, pub max_concurrent_rpc_requests: usize,
@ -59,17 +62,6 @@ pub struct Config {
pub s3_web: WebConfig, pub s3_web: WebConfig,
} }
/// Configuration for RPC TLS
#[derive(Deserialize, Debug, Clone)]
pub struct TlsConfig {
/// Path to certificate autority used for all nodes
pub ca_cert: String,
/// Path to public certificate for this node
pub node_cert: String,
/// Path to private key for this node
pub node_key: String,
}
/// Configuration for S3 api /// Configuration for S3 api
#[derive(Deserialize, Debug, Clone)] #[derive(Deserialize, Debug, Clone)]
pub struct ApiConfig { pub struct ApiConfig {
@ -115,27 +107,32 @@ pub fn read_config(config_file: PathBuf) -> Result<Config, Error> {
Ok(toml::from_str(&config)?) Ok(toml::from_str(&config)?)
} }
fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<SocketAddr>, D::Error> fn deserialize_vec_addr<'de, D>(deserializer: D) -> Result<Vec<(NodeID, SocketAddr)>, D::Error>
where where
D: de::Deserializer<'de>, D: de::Deserializer<'de>,
{ {
use std::net::ToSocketAddrs; use std::net::ToSocketAddrs;
Ok(<Vec<&str>>::deserialize(deserializer)? let mut ret = vec![];
.iter()
.filter_map(|&name| { for peer in <Vec<&str>>::deserialize(deserializer)? {
name.to_socket_addrs() let delim = peer
.map(|iter| (name, iter)) .find('@')
.map_err(|_| warn!("Error resolving \"{}\"", name)) .ok_or_else(|| D::Error::custom("Invalid bootstrap peer: public key not specified"))?;
.ok() let (key, host) = peer.split_at(delim);
}) let pubkey = NodeID::from_slice(&hex::decode(&key).map_err(D::Error::custom)?)
.map(|(name, iter)| { .ok_or_else(|| D::Error::custom("Invalid bootstrap peer public key"))?;
let v = iter.collect::<Vec<_>>(); let hosts = host[1..]
if v.is_empty() { .to_socket_addrs()
warn!("Error resolving \"{}\"", name) .map_err(D::Error::custom)?
.collect::<Vec<_>>();
if hosts.is_empty() {
return Err(D::Error::custom(format!("Error resolving {}", &host[1..])));
} }
v for host in hosts {
}) ret.push((pubkey.clone(), host));
.flatten() }
.collect()) }
Ok(ret)
} }

View File

@ -11,8 +11,8 @@ pub enum RpcError {
#[error(display = "Node is down: {:?}.", _0)] #[error(display = "Node is down: {:?}.", _0)]
NodeDown(Uuid), NodeDown(Uuid),
#[error(display = "Timeout: {}", _0)] #[error(display = "Timeout")]
Timeout(#[error(source)] tokio::time::error::Elapsed), Timeout,
#[error(display = "HTTP error: {}", _0)] #[error(display = "HTTP error: {}", _0)]
Http(#[error(source)] http::Error), Http(#[error(source)] http::Error),
@ -45,11 +45,8 @@ pub enum Error {
#[error(display = "Invalid HTTP header value: {}", _0)] #[error(display = "Invalid HTTP header value: {}", _0)]
HttpHeader(#[error(source)] http::header::ToStrError), HttpHeader(#[error(source)] http::header::ToStrError),
#[error(display = "TLS error: {}", _0)] #[error(display = "Netapp error: {}", _0)]
Tls(#[error(source)] rustls::TLSError), Netapp(#[error(source)] netapp::error::Error),
#[error(display = "PKI error: {}", _0)]
Pki(#[error(source)] webpki::Error),
#[error(display = "Sled error: {}", _0)] #[error(display = "Sled error: {}", _0)]
Sled(#[error(source)] sled::Error), Sled(#[error(source)] sled::Error),

View File

@ -1,6 +1,6 @@
[package] [package]
name = "garage_web" name = "garage_web"
version = "0.3.0" version = "0.4.0"
authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"] authors = ["Alex Auvolat <alex@adnab.me>", "Quentin Dufour <quentin@dufour.io>"]
edition = "2018" edition = "2018"
license = "AGPL-3.0" license = "AGPL-3.0"
@ -13,10 +13,10 @@ path = "lib.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
garage_api = { version = "0.3.0", path = "../api" } garage_api = { version = "0.4.0", path = "../api" }
garage_model = { version = "0.3.0", path = "../model" } garage_model = { version = "0.4.0", path = "../model" }
garage_util = { version = "0.3.0", path = "../util" } garage_util = { version = "0.4.0", path = "../util" }
garage_table = { version = "0.3.0", path = "../table" } garage_table = { version = "0.4.0", path = "../table" }
err-derive = "0.3" err-derive = "0.3"
idna = "0.2" idna = "0.2"