Fix the Sync issue. Details:

So the HTTP client future of Hyper is not Sync, thus the stream
that read blocks wasn't either. However Hyper's default Body type
requires a stream to be Sync for wrap_stream. Solution: reimplement
a custom HTTP body type.
This commit is contained in:
Alex Auvolat 2020-04-10 22:01:48 +02:00
parent d66c0d6833
commit 3477864142
14 changed files with 663 additions and 432 deletions

1
rustfmt.toml Normal file
View File

@ -0,0 +1 @@
hard_tabs = true

View File

@ -1,23 +1,32 @@
use std::sync::Arc; use core::pin::Pin;
use std::net::SocketAddr; use core::task::{Context, Poll};
use std::collections::VecDeque; use std::collections::VecDeque;
use std::net::SocketAddr;
use std::sync::Arc;
use futures::stream::*;
use hyper::service::{make_service_fn, service_fn};
use hyper::server::conn::AddrStream;
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use hyper::body::Bytes;
use futures::future::Future; use futures::future::Future;
use futures::ready;
use futures::stream::*;
use hyper::body::{Bytes, HttpBody};
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use crate::error::Error;
use crate::data::*;
use crate::data; use crate::data;
use crate::data::*;
use crate::error::Error;
use crate::proto::*; use crate::proto::*;
use crate::rpc_client::*; use crate::rpc_client::*;
use crate::server::Garage; use crate::server::Garage;
use crate::table::EmptySortKey; use crate::table::EmptySortKey;
pub async fn run_api_server(garage: Arc<Garage>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> { type BodyType = Box<dyn HttpBody<Data = Bytes, Error = Error> + Send + Unpin>;
pub async fn run_api_server(
garage: Arc<Garage>,
shutdown_signal: impl Future<Output = ()>,
) -> Result<(), hyper::Error> {
let addr = ([0, 0, 0, 0], garage.system.config.api_port).into(); let addr = ([0, 0, 0, 0], garage.system.config.api_port).into();
let service = make_service_fn(|conn: &AddrStream| { let service = make_service_fn(|conn: &AddrStream| {
@ -39,21 +48,31 @@ pub async fn run_api_server(garage: Arc<Garage>, shutdown_signal: impl Future<Ou
graceful.await graceful.await
} }
async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> { async fn handler(
garage: Arc<Garage>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<BodyType>, Error> {
match handler_inner(garage, req, addr).await { match handler_inner(garage, req, addr).await {
Ok(x) => Ok(x), Ok(x) => Ok(x),
Err(e) => { Err(e) => {
let mut http_error = Response::new(Body::from(format!("{}\n", e))); let body: BodyType = Box::new(BytesBody::from(format!("{}\n", e)));
let mut http_error = Response::new(body);
*http_error.status_mut() = e.http_status_code(); *http_error.status_mut() = e.http_status_code();
Ok(http_error) Ok(http_error)
} }
} }
} }
async fn handler_inner(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> { async fn handler_inner(
garage: Arc<Garage>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<BodyType>, Error> {
eprintln!("{} {} {}", addr, req.method(), req.uri()); eprintln!("{} {} {}", addr, req.method(), req.uri());
let bucket = req.headers() let bucket = req
.headers()
.get(hyper::header::HOST) .get(hyper::header::HOST)
.map(|x| x.to_str().map_err(Error::from)) .map(|x| x.to_str().map_err(Error::from))
.unwrap_or(Err(Error::BadRequest(format!("Host: header missing"))))? .unwrap_or(Err(Error::BadRequest(format!("Host: header missing"))))?
@ -61,29 +80,32 @@ async fn handler_inner(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr
let key = req.uri().path().to_string(); let key = req.uri().path().to_string();
match req.method() { match req.method() {
&Method::GET => { &Method::GET => Ok(handle_get(garage, &bucket, &key).await?),
Ok(handle_get(garage, &bucket, &key).await?)
}
&Method::PUT => { &Method::PUT => {
let mime_type = req.headers() let mime_type = req
.headers()
.get(hyper::header::CONTENT_TYPE) .get(hyper::header::CONTENT_TYPE)
.map(|x| x.to_str()) .map(|x| x.to_str())
.unwrap_or(Ok("blob"))? .unwrap_or(Ok("blob"))?
.to_string(); .to_string();
let version_uuid = handle_put(garage, &mime_type, &bucket, &key, req.into_body()).await?; let version_uuid =
Ok(Response::new(Body::from( handle_put(garage, &mime_type, &bucket, &key, req.into_body()).await?;
format!("{:?}\n", version_uuid), Ok(Response::new(Box::new(BytesBody::from(format!(
))) "{:?}\n",
version_uuid
)))))
} }
_ => Err(Error::BadRequest(format!("Invalid method"))), _ => Err(Error::BadRequest(format!("Invalid method"))),
} }
} }
async fn handle_put(garage: Arc<Garage>, async fn handle_put(
garage: Arc<Garage>,
mime_type: &str, mime_type: &str,
bucket: &str, key: &str, body: Body) bucket: &str,
-> Result<UUID, Error> key: &str,
{ body: Body,
) -> Result<UUID, Error> {
let version_uuid = gen_uuid(); let version_uuid = gen_uuid();
let mut chunker = BodyChunker::new(body, garage.system.config.block_size); let mut chunker = BodyChunker::new(body, garage.system.config.block_size);
@ -97,7 +119,7 @@ async fn handle_put(garage: Arc<Garage>,
key: key.into(), key: key.into(),
versions: Vec::new(), versions: Vec::new(),
}; };
object.versions.push(Box::new(ObjectVersion{ object.versions.push(Box::new(ObjectVersion {
uuid: version_uuid.clone(), uuid: version_uuid.clone(),
timestamp: now_msec(), timestamp: now_msec(),
mime_type: mime_type.to_string(), mime_type: mime_type.to_string(),
@ -110,7 +132,7 @@ async fn handle_put(garage: Arc<Garage>,
object.versions[0].data = ObjectVersionData::Inline(first_block); object.versions[0].data = ObjectVersionData::Inline(first_block);
object.versions[0].is_complete = true; object.versions[0].is_complete = true;
garage.object_table.insert(&object).await?; garage.object_table.insert(&object).await?;
return Ok(version_uuid) return Ok(version_uuid);
} }
let version = Version { let version = Version {
@ -126,15 +148,22 @@ async fn handle_put(garage: Arc<Garage>,
garage.object_table.insert(&object).await?; garage.object_table.insert(&object).await?;
let mut next_offset = first_block.len(); let mut next_offset = first_block.len();
let mut put_curr_version_block = put_version_block(garage.clone(), &version, 0, first_block_hash.clone()); let mut put_curr_version_block =
put_version_block(garage.clone(), &version, 0, first_block_hash.clone());
let mut put_curr_block = put_block(garage.clone(), first_block_hash, first_block); let mut put_curr_block = put_block(garage.clone(), first_block_hash, first_block);
loop { loop {
let (_, _, next_block) = futures::try_join!(put_curr_block, put_curr_version_block, chunker.next())?; let (_, _, next_block) =
futures::try_join!(put_curr_block, put_curr_version_block, chunker.next())?;
if let Some(block) = next_block { if let Some(block) = next_block {
let block_hash = hash(&block[..]); let block_hash = hash(&block[..]);
let block_len = block.len(); let block_len = block.len();
put_curr_version_block = put_version_block(garage.clone(), &version, next_offset as u64, block_hash.clone()); put_curr_version_block = put_version_block(
garage.clone(),
&version,
next_offset as u64,
block_hash.clone(),
);
put_curr_block = put_block(garage.clone(), block_hash, block); put_curr_block = put_block(garage.clone(), block_hash, block);
next_offset += block_len; next_offset += block_len;
} else { } else {
@ -150,27 +179,33 @@ async fn handle_put(garage: Arc<Garage>,
Ok(version_uuid) Ok(version_uuid)
} }
async fn put_version_block(garage: Arc<Garage>, version: &Version, offset: u64, hash: Hash) -> Result<(), Error> { async fn put_version_block(
garage: Arc<Garage>,
version: &Version,
offset: u64,
hash: Hash,
) -> Result<(), Error> {
let mut version = version.clone(); let mut version = version.clone();
version.blocks.push(VersionBlock{ version.blocks.push(VersionBlock { offset, hash });
offset,
hash,
});
garage.version_table.insert(&version).await?; garage.version_table.insert(&version).await?;
Ok(()) Ok(())
} }
async fn put_block(garage: Arc<Garage>, hash: Hash, data: Vec<u8>) -> Result<(), Error> { async fn put_block(garage: Arc<Garage>, hash: Hash, data: Vec<u8>) -> Result<(), Error> {
let who = garage.system.members.read().await let who = garage
.system
.members
.read()
.await
.walk_ring(&hash, garage.system.config.meta_replication_factor); .walk_ring(&hash, garage.system.config.meta_replication_factor);
rpc_try_call_many(garage.system.clone(), rpc_try_call_many(
garage.system.clone(),
&who[..], &who[..],
&Message::PutBlock(PutBlockMessage{ &Message::PutBlock(PutBlockMessage { hash, data }),
hash, (garage.system.config.meta_replication_factor + 1) / 2,
data, DEFAULT_TIMEOUT,
}), )
(garage.system.config.meta_replication_factor+1)/2, .await?;
DEFAULT_TIMEOUT).await?;
Ok(()) Ok(())
} }
@ -183,7 +218,7 @@ struct BodyChunker {
impl BodyChunker { impl BodyChunker {
fn new(body: Body, block_size: usize) -> Self { fn new(body: Body, block_size: usize) -> Self {
Self{ Self {
body, body,
read_all: false, read_all: false,
block_size, block_size,
@ -203,26 +238,36 @@ impl BodyChunker {
if self.buf.len() == 0 { if self.buf.len() == 0 {
Ok(None) Ok(None)
} else if self.buf.len() <= self.block_size { } else if self.buf.len() <= self.block_size {
let block = self.buf.drain(..) let block = self.buf.drain(..).collect::<Vec<u8>>();
.collect::<Vec<u8>>();
Ok(Some(block)) Ok(Some(block))
} else { } else {
let block = self.buf.drain(..self.block_size) let block = self.buf.drain(..self.block_size).collect::<Vec<u8>>();
.collect::<Vec<u8>>();
Ok(Some(block)) Ok(Some(block))
} }
} }
} }
async fn handle_get(garage: Arc<Garage>, bucket: &str, key: &str) -> Result<Response<Body>, Error> { async fn handle_get(
let mut object = match garage.object_table.get(&bucket.to_string(), &key.to_string()).await? { garage: Arc<Garage>,
bucket: &str,
key: &str,
) -> Result<Response<BodyType>, Error> {
let mut object = match garage
.object_table
.get(&bucket.to_string(), &key.to_string())
.await?
{
None => return Err(Error::NotFound), None => return Err(Error::NotFound),
Some(o) => o Some(o) => o,
}; };
let last_v = match object.versions.drain(..) let last_v = match object
.rev().filter(|v| v.is_complete) .versions
.next() { .drain(..)
.rev()
.filter(|v| v.is_complete)
.next()
{
Some(v) => v, Some(v) => v,
None => return Err(Error::NotFound), None => return Err(Error::NotFound),
}; };
@ -234,7 +279,8 @@ async fn handle_get(garage: Arc<Garage>, bucket: &str, key: &str) -> Result<Resp
match last_v.data { match last_v.data {
ObjectVersionData::DeleteMarker => Err(Error::NotFound), ObjectVersionData::DeleteMarker => Err(Error::NotFound),
ObjectVersionData::Inline(bytes) => { ObjectVersionData::Inline(bytes) => {
Ok(resp_builder.body(bytes.into())?) let body: BodyType = Box::new(BytesBody::from(bytes));
Ok(resp_builder.body(body)?)
} }
ObjectVersionData::FirstBlock(first_block_hash) => { ObjectVersionData::FirstBlock(first_block_hash) => {
let read_first_block = get_block(garage.clone(), &first_block_hash); let read_first_block = get_block(garage.clone(), &first_block_hash);
@ -246,42 +292,119 @@ async fn handle_get(garage: Arc<Garage>, bucket: &str, key: &str) -> Result<Resp
None => return Err(Error::NotFound), None => return Err(Error::NotFound),
}; };
let mut blocks = version.blocks.iter() let mut blocks = version
.blocks
.iter()
.map(|vb| (vb.hash.clone(), None)) .map(|vb| (vb.hash.clone(), None))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
blocks[0].1 = Some(first_block); blocks[0].1 = Some(first_block);
let block_futures = blocks.drain(..) let body_stream = futures::stream::iter(blocks)
.map(move |(hash, data_opt)| async { .map(move |(hash, data_opt)| {
let garage = garage.clone();
async move {
if let Some(data) = data_opt { if let Some(data) = data_opt {
Ok(data) Ok(Bytes::from(data))
} else { } else {
get_block(garage.clone(), &hash).await get_block(garage.clone(), &hash).await.map(Bytes::from)
.map_err(|e| format!("{}", e))
} }
}
})
.buffered(2);
let body: BodyType = Box::new(NonSyncStreamBody {
stream: Box::pin(body_stream),
}); });
let body_stream = futures::stream::iter(block_futures).buffered(2);
let body = Body::wrap_stream(body_stream);
Ok(resp_builder.body(body)?) Ok(resp_builder.body(body)?)
} }
} }
} }
async fn get_block(garage: Arc<Garage>, hash: &Hash) -> Result<Vec<u8>, Error> { async fn get_block(garage: Arc<Garage>, hash: &Hash) -> Result<Vec<u8>, Error> {
let who = garage.system.members.read().await let who = garage
.system
.members
.read()
.await
.walk_ring(&hash, garage.system.config.meta_replication_factor); .walk_ring(&hash, garage.system.config.meta_replication_factor);
let resps = rpc_try_call_many(garage.system.clone(), let resps = rpc_try_call_many(
garage.system.clone(),
&who[..], &who[..],
&Message::GetBlock(hash.clone()), &Message::GetBlock(hash.clone()),
1, 1,
DEFAULT_TIMEOUT).await?; DEFAULT_TIMEOUT,
)
.await?;
for resp in resps { for resp in resps {
if let Message::PutBlock(pbm) = resp { if let Message::PutBlock(pbm) = resp {
if data::hash(&pbm.data) == *hash { if data::hash(&pbm.data) == *hash {
return Ok(pbm.data) return Ok(pbm.data);
} }
} }
} }
Err(Error::Message(format!("No valid blocks returned"))) Err(Error::Message(format!("No valid blocks returned")))
} }
pub struct NonSyncStreamBody {
pub stream: Pin<Box<dyn Stream<Item = Result<Bytes, Error>> + Send>>,
}
impl HttpBody for NonSyncStreamBody {
type Data = Bytes;
type Error = Error;
fn poll_data(
mut self: Pin<&mut Self>,
cx: &mut Context,
) -> Poll<Option<Result<Bytes, Self::Error>>> {
match ready!(self.stream.as_mut().poll_next(cx)) {
Some(res) => Poll::Ready(Some(res)),
None => Poll::Ready(None),
}
}
fn poll_trailers(
self: Pin<&mut Self>,
cx: &mut Context,
) -> Poll<Result<Option<hyper::HeaderMap<hyper::header::HeaderValue>>, Self::Error>> {
Poll::Ready(Ok(None))
}
}
pub struct BytesBody {
pub bytes: Option<Bytes>,
}
impl HttpBody for BytesBody {
type Data = Bytes;
type Error = Error;
fn poll_data(
mut self: Pin<&mut Self>,
_cx: &mut Context,
) -> Poll<Option<Result<Bytes, Self::Error>>> {
Poll::Ready(self.bytes.take().map(Ok))
}
fn poll_trailers(
self: Pin<&mut Self>,
_cx: &mut Context,
) -> Poll<Result<Option<hyper::HeaderMap<hyper::header::HeaderValue>>, Self::Error>> {
Poll::Ready(Ok(None))
}
}
impl From<String> for BytesBody {
fn from(x: String) -> BytesBody {
BytesBody {
bytes: Some(Bytes::from(x.into_bytes())),
}
}
}
impl From<Vec<u8>> for BytesBody {
fn from(x: Vec<u8>) -> BytesBody {
BytesBody {
bytes: Some(Bytes::from(x)),
}
}
}

View File

@ -1,13 +1,13 @@
use std::sync::Arc;
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::Arc;
use tokio::fs; use tokio::fs;
use tokio::prelude::*; use tokio::prelude::*;
use crate::error::Error;
use crate::server::Garage;
use crate::proto::*;
use crate::data::*; use crate::data::*;
use crate::error::Error;
use crate::proto::*;
use crate::server::Garage;
fn block_dir(garage: &Garage, hash: &Hash) -> PathBuf { fn block_dir(garage: &Garage, hash: &Hash) -> PathBuf {
let mut path = garage.system.config.data_dir.clone(); let mut path = garage.system.config.data_dir.clone();
@ -24,7 +24,7 @@ pub async fn write_block(garage: Arc<Garage>, hash: &Hash, data: &[u8]) -> Resul
path.push(hex::encode(hash)); path.push(hex::encode(hash));
if fs::metadata(&path).await.is_ok() { if fs::metadata(&path).await.is_ok() {
return Ok(Message::Ok) return Ok(Message::Ok);
} }
let mut f = fs::File::create(path).await?; let mut f = fs::File::create(path).await?;
@ -42,7 +42,7 @@ pub async fn read_block(garage: Arc<Garage>, hash: &Hash) -> Result<Message, Err
let mut data = vec![]; let mut data = vec![];
f.read_to_end(&mut data).await?; f.read_to_end(&mut data).await?;
Ok(Message::PutBlock(PutBlockMessage{ Ok(Message::PutBlock(PutBlockMessage {
hash: hash.clone(), hash: hash.clone(),
data, data,
})) }))

View File

@ -1,10 +1,10 @@
use std::time::{SystemTime, UNIX_EPOCH};
use std::fmt;
use std::collections::HashMap;
use serde::{Serializer, Deserializer, Serialize, Deserialize};
use serde::de::{self, Visitor};
use rand::Rng; use rand::Rng;
use sha2::{Sha256, Digest}; use serde::de::{self, Visitor};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::fmt;
use std::time::{SystemTime, UNIX_EPOCH};
#[derive(Default, PartialOrd, Ord, Clone, Hash, PartialEq)] #[derive(Default, PartialOrd, Ord, Clone, Hash, PartialEq)]
pub struct FixedBytes32([u8; 32]); pub struct FixedBytes32([u8; 32]);
@ -43,7 +43,10 @@ impl<'de> Visitor<'de> for FixedBytes32Visitor {
res.copy_from_slice(value); res.copy_from_slice(value);
Ok(res.into()) Ok(res.into())
} else { } else {
Err(E::custom(format!("Invalid byte string length {}, expected 32", value.len()))) Err(E::custom(format!(
"Invalid byte string length {}, expected 32",
value.len()
)))
} }
} }
} }
@ -88,7 +91,8 @@ pub fn gen_uuid() -> UUID {
} }
pub fn now_msec() -> u64 { pub fn now_msec() -> u64 {
SystemTime::now().duration_since(UNIX_EPOCH) SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Fix your clock :o") .expect("Fix your clock :o")
.as_millis() as u64 .as_millis() as u64
} }
@ -96,7 +100,8 @@ pub fn now_msec() -> u64 {
// RMP serialization with names of fields and variants // RMP serialization with names of fields and variants
pub fn rmp_to_vec_all_named<T>(val: &T) -> Result<Vec<u8>, rmp_serde::encode::Error> pub fn rmp_to_vec_all_named<T>(val: &T) -> Result<Vec<u8>, rmp_serde::encode::Error>
where T: Serialize + ?Sized where
T: Serialize + ?Sized,
{ {
let mut wr = Vec::with_capacity(128); let mut wr = Vec::with_capacity(128);
let mut se = rmp_serde::Serializer::new(&mut wr) let mut se = rmp_serde::Serializer::new(&mut wr)
@ -104,7 +109,6 @@ where T: Serialize + ?Sized
.with_string_variants(); .with_string_variants();
val.serialize(&mut se)?; val.serialize(&mut se)?;
Ok(wr) Ok(wr)
} }
// Network management // Network management

View File

@ -1,6 +1,6 @@
use std::io;
use err_derive::Error; use err_derive::Error;
use hyper::StatusCode; use hyper::StatusCode;
use std::io;
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum Error { pub enum Error {

View File

@ -1,28 +1,28 @@
mod error;
mod data; mod data;
mod error;
mod proto; mod proto;
mod membership; mod membership;
mod table; mod table;
mod block;
mod object_table; mod object_table;
mod version_table; mod version_table;
mod block;
mod server;
mod rpc_server;
mod rpc_client;
mod api_server; mod api_server;
mod rpc_client;
mod rpc_server;
mod server;
use std::collections::HashSet; use std::collections::HashSet;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::PathBuf; use std::path::PathBuf;
use structopt::StructOpt; use structopt::StructOpt;
use error::Error;
use rpc_client::RpcClient;
use data::*; use data::*;
use error::Error;
use proto::*; use proto::*;
use rpc_client::RpcClient;
#[derive(StructOpt, Debug)] #[derive(StructOpt, Debug)]
#[structopt(name = "garage")] #[structopt(name = "garage")]
@ -69,7 +69,6 @@ pub struct ConfigureOpt {
n_tokens: u32, n_tokens: u32,
} }
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
let opt = Opt::from_args(); let opt = Opt::from_args();
@ -77,12 +76,8 @@ async fn main() {
let rpc_cli = RpcClient::new(); let rpc_cli = RpcClient::new();
let resp = match opt.cmd { let resp = match opt.cmd {
Command::Server(server_opt) => { Command::Server(server_opt) => server::run_server(server_opt.config_file).await,
server::run_server(server_opt.config_file).await Command::Status => cmd_status(rpc_cli, opt.rpc_host).await,
}
Command::Status => {
cmd_status(rpc_cli, opt.rpc_host).await
}
Command::Configure(configure_opt) => { Command::Configure(configure_opt) => {
cmd_configure(rpc_cli, opt.rpc_host, configure_opt).await cmd_configure(rpc_cli, opt.rpc_host, configure_opt).await
} }
@ -94,28 +89,40 @@ async fn main() {
} }
async fn cmd_status(rpc_cli: RpcClient, rpc_host: SocketAddr) -> Result<(), Error> { async fn cmd_status(rpc_cli: RpcClient, rpc_host: SocketAddr) -> Result<(), Error> {
let status = match rpc_cli.call(&rpc_host, let status = match rpc_cli
&Message::PullStatus, .call(&rpc_host, &Message::PullStatus, DEFAULT_TIMEOUT)
DEFAULT_TIMEOUT).await? { .await?
{
Message::AdvertiseNodesUp(nodes) => nodes, Message::AdvertiseNodesUp(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))) resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
let config = match rpc_cli.call(&rpc_host, let config = match rpc_cli
&Message::PullConfig, .call(&rpc_host, &Message::PullConfig, DEFAULT_TIMEOUT)
DEFAULT_TIMEOUT).await? { .await?
{
Message::AdvertiseConfig(cfg) => cfg, Message::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))) resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
println!("Healthy nodes:"); println!("Healthy nodes:");
for adv in status.iter() { for adv in status.iter() {
if let Some(cfg) = config.members.get(&adv.id) { if let Some(cfg) = config.members.get(&adv.id) {
println!("{}\t{}\t{}\t{}", hex::encode(&adv.id), cfg.datacenter, cfg.n_tokens, adv.addr); println!(
"{}\t{}\t{}\t{}",
hex::encode(&adv.id),
cfg.datacenter,
cfg.n_tokens,
adv.addr
);
} }
} }
let status_keys = status.iter().map(|x| x.id.clone()).collect::<HashSet<_>>(); let status_keys = status.iter().map(|x| x.id.clone()).collect::<HashSet<_>>();
if config.members.iter().any(|(id, _)| !status_keys.contains(id)) { if config
.members
.iter()
.any(|(id, _)| !status_keys.contains(id))
{
println!("\nFailed nodes:"); println!("\nFailed nodes:");
for (id, cfg) in config.members.iter() { for (id, cfg) in config.members.iter() {
if !status.iter().any(|x| x.id == *id) { if !status.iter().any(|x| x.id == *id) {
@ -124,7 +131,10 @@ async fn cmd_status(rpc_cli: RpcClient, rpc_host: SocketAddr) -> Result<(), Erro
} }
} }
if status.iter().any(|adv| !config.members.contains_key(&adv.id)) { if status
.iter()
.any(|adv| !config.members.contains_key(&adv.id))
{
println!("\nUnconfigured nodes:"); println!("\nUnconfigured nodes:");
for adv in status.iter() { for adv in status.iter() {
if !config.members.contains_key(&adv.id) { if !config.members.contains_key(&adv.id) {
@ -136,12 +146,17 @@ async fn cmd_status(rpc_cli: RpcClient, rpc_host: SocketAddr) -> Result<(), Erro
Ok(()) Ok(())
} }
async fn cmd_configure(rpc_cli: RpcClient, rpc_host: SocketAddr, args: ConfigureOpt) -> Result<(), Error> { async fn cmd_configure(
let status = match rpc_cli.call(&rpc_host, rpc_cli: RpcClient,
&Message::PullStatus, rpc_host: SocketAddr,
DEFAULT_TIMEOUT).await? { args: ConfigureOpt,
) -> Result<(), Error> {
let status = match rpc_cli
.call(&rpc_host, &Message::PullStatus, DEFAULT_TIMEOUT)
.await?
{
Message::AdvertiseNodesUp(nodes) => nodes, Message::AdvertiseNodesUp(nodes) => nodes,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))) resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
let mut candidates = vec![]; let mut candidates = vec![];
@ -151,25 +166,35 @@ async fn cmd_configure(rpc_cli: RpcClient, rpc_host: SocketAddr, args: Configure
} }
} }
if candidates.len() != 1 { if candidates.len() != 1 {
return Err(Error::Message(format!("{} matching nodes", candidates.len()))); return Err(Error::Message(format!(
"{} matching nodes",
candidates.len()
)));
} }
let mut config = match rpc_cli.call(&rpc_host, let mut config = match rpc_cli
&Message::PullConfig, .call(&rpc_host, &Message::PullConfig, DEFAULT_TIMEOUT)
DEFAULT_TIMEOUT).await? { .await?
{
Message::AdvertiseConfig(cfg) => cfg, Message::AdvertiseConfig(cfg) => cfg,
resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))) resp => return Err(Error::Message(format!("Invalid RPC response: {:?}", resp))),
}; };
config.members.insert(candidates[0].clone(), config.members.insert(
NetworkConfigEntry{ candidates[0].clone(),
NetworkConfigEntry {
datacenter: args.datacenter, datacenter: args.datacenter,
n_tokens: args.n_tokens, n_tokens: args.n_tokens,
}); },
);
config.version += 1; config.version += 1;
rpc_cli.call(&rpc_host, rpc_cli
.call(
&rpc_host,
&Message::AdvertiseConfig(config), &Message::AdvertiseConfig(config),
DEFAULT_TIMEOUT).await?; DEFAULT_TIMEOUT,
)
.await?;
Ok(()) Ok(())
} }

View File

@ -1,22 +1,22 @@
use std::sync::Arc; use std::collections::HashMap;
use std::hash::Hash as StdHash; use std::hash::Hash as StdHash;
use std::hash::Hasher; use std::hash::Hasher;
use std::path::PathBuf; use std::io::Read;
use std::io::{Read};
use std::collections::HashMap;
use std::time::Duration;
use std::net::{IpAddr, SocketAddr}; use std::net::{IpAddr, SocketAddr};
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use sha2::{Sha256, Digest};
use tokio::prelude::*;
use futures::future::join_all; use futures::future::join_all;
use sha2::{Digest, Sha256};
use tokio::prelude::*;
use tokio::sync::RwLock; use tokio::sync::RwLock;
use crate::server::Config;
use crate::error::Error;
use crate::data::*; use crate::data::*;
use crate::error::Error;
use crate::proto::*; use crate::proto::*;
use crate::rpc_client::*; use crate::rpc_client::*;
use crate::server::Config;
const PING_INTERVAL: Duration = Duration::from_secs(10); const PING_INTERVAL: Duration = Duration::from_secs(10);
const PING_TIMEOUT: Duration = Duration::from_secs(2); const PING_TIMEOUT: Duration = Duration::from_secs(2);
@ -55,11 +55,13 @@ pub struct RingEntry {
impl Members { impl Members {
fn handle_ping(&mut self, ip: IpAddr, info: &PingMessage) -> bool { fn handle_ping(&mut self, ip: IpAddr, info: &PingMessage) -> bool {
let addr = SocketAddr::new(ip, info.rpc_port); let addr = SocketAddr::new(ip, info.rpc_port);
let old_status = self.status.insert(info.id.clone(), let old_status = self.status.insert(
NodeStatus{ info.id.clone(),
NodeStatus {
addr: addr.clone(), addr: addr.clone(),
remaining_ping_attempts: MAX_FAILED_PINGS, remaining_ping_attempts: MAX_FAILED_PINGS,
}); },
);
match old_status { match old_status {
None => { None => {
eprintln!("Newly pingable node: {}", hex::encode(&info.id)); eprintln!("Newly pingable node: {}", hex::encode(&info.id));
@ -80,7 +82,9 @@ impl Members {
hasher.input(format!("{} {}\n", hex::encode(&id), status.addr)); hasher.input(format!("{} {}\n", hex::encode(&id), status.addr));
} }
eprintln!("END --"); eprintln!("END --");
self.status_hash.as_slice_mut().copy_from_slice(&hasher.result()[..]); self.status_hash
.as_slice_mut()
.copy_from_slice(&hasher.result()[..]);
} }
fn rebuild_ring(&mut self) { fn rebuild_ring(&mut self) {
@ -99,7 +103,7 @@ impl Members {
for i in 0..config.n_tokens { for i in 0..config.n_tokens {
let location = hash(format!("{} {}", hex::encode(&id), i).as_bytes()); let location = hash(format!("{} {}", hex::encode(&id), i).as_bytes());
new_ring.push(RingEntry{ new_ring.push(RingEntry {
location: location.into(), location: location.into(),
node: id.clone(), node: id.clone(),
datacenter, datacenter,
@ -125,11 +129,13 @@ impl Members {
let start = match self.ring.binary_search_by(|x| x.location.cmp(from)) { let start = match self.ring.binary_search_by(|x| x.location.cmp(from)) {
Ok(i) => i, Ok(i) => i,
Err(i) => if i == 0 { Err(i) => {
if i == 0 {
self.ring.len() - 1 self.ring.len() - 1
} else { } else {
i - 1 i - 1
} }
}
}; };
self.walk_ring_from_pos(start, n) self.walk_ring_from_pos(start, n)
@ -179,14 +185,17 @@ impl System {
let net_config = match read_network_config(&config.metadata_dir) { let net_config = match read_network_config(&config.metadata_dir) {
Ok(x) => x, Ok(x) => x,
Err(e) => { Err(e) => {
println!("No valid previous network configuration stored ({}), starting fresh.", e); println!(
NetworkConfig{ "No valid previous network configuration stored ({}), starting fresh.",
e
);
NetworkConfig {
members: HashMap::new(), members: HashMap::new(),
version: 0, version: 0,
} }
}, }
}; };
let mut members = Members{ let mut members = Members {
status: HashMap::new(), status: HashMap::new(),
status_hash: Hash::default(), status_hash: Hash::default(),
config: net_config, config: net_config,
@ -195,7 +204,7 @@ impl System {
}; };
members.recalculate_status_hash(); members.recalculate_status_hash();
members.rebuild_ring(); members.rebuild_ring();
System{ System {
config, config,
id, id,
rpc_client: RpcClient::new(), rpc_client: RpcClient::new(),
@ -208,19 +217,21 @@ impl System {
path.push("network_config"); path.push("network_config");
let members = self.members.read().await; let members = self.members.read().await;
let data = rmp_to_vec_all_named(&members.config) let data =
.expect("Error while encoding network config"); rmp_to_vec_all_named(&members.config).expect("Error while encoding network config");
drop(members); drop(members);
let mut f = tokio::fs::File::create(path.as_path()).await let mut f = tokio::fs::File::create(path.as_path())
.await
.expect("Could not create network_config"); .expect("Could not create network_config");
f.write_all(&data[..]).await f.write_all(&data[..])
.await
.expect("Could not write network_config"); .expect("Could not write network_config");
} }
pub async fn make_ping(&self) -> Message { pub async fn make_ping(&self) -> Message {
let members = self.members.read().await; let members = self.members.read().await;
Message::Ping(PingMessage{ Message::Ping(PingMessage {
id: self.id.clone(), id: self.id.clone(),
rpc_port: self.config.rpc_port, rpc_port: self.config.rpc_port,
status_hash: members.status_hash.clone(), status_hash: members.status_hash.clone(),
@ -230,13 +241,20 @@ impl System {
pub async fn broadcast(self: Arc<Self>, msg: Message, timeout: Duration) { pub async fn broadcast(self: Arc<Self>, msg: Message, timeout: Duration) {
let members = self.members.read().await; let members = self.members.read().await;
let to = members.status.keys().filter(|x| **x != self.id).cloned().collect::<Vec<_>>(); let to = members
.status
.keys()
.filter(|x| **x != self.id)
.cloned()
.collect::<Vec<_>>();
drop(members); drop(members);
rpc_call_many(self.clone(), &to[..], &msg, timeout).await; rpc_call_many(self.clone(), &to[..], &msg, timeout).await;
} }
pub async fn bootstrap(self: Arc<Self>) { pub async fn bootstrap(self: Arc<Self>) {
let bootstrap_peers = self.config.bootstrap_peers let bootstrap_peers = self
.config
.bootstrap_peers
.iter() .iter()
.map(|ip| (ip.clone(), None)) .map(|ip| (ip.clone(), None))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
@ -247,15 +265,18 @@ impl System {
pub async fn ping_nodes(self: Arc<Self>, peers: Vec<(SocketAddr, Option<UUID>)>) { pub async fn ping_nodes(self: Arc<Self>, peers: Vec<(SocketAddr, Option<UUID>)>) {
let ping_msg = self.make_ping().await; let ping_msg = self.make_ping().await;
let ping_resps = join_all( let ping_resps = join_all(peers.iter().map(|(addr, id_option)| {
peers.iter()
.map(|(addr, id_option)| {
let sys = self.clone(); let sys = self.clone();
let ping_msg_ref = &ping_msg; let ping_msg_ref = &ping_msg;
async move { async move {
(id_option, addr.clone(), sys.rpc_client.call(&addr, ping_msg_ref, PING_TIMEOUT).await) (
id_option,
addr.clone(),
sys.rpc_client.call(&addr, ping_msg_ref, PING_TIMEOUT).await,
)
} }
})).await; }))
.await;
let mut members = self.members.write().await; let mut members = self.members.write().await;
@ -267,7 +288,7 @@ impl System {
let is_new = members.handle_ping(addr.ip(), &info); let is_new = members.handle_ping(addr.ip(), &info);
if is_new { if is_new {
has_changes = true; has_changes = true;
to_advertise.push(AdvertisedNode{ to_advertise.push(AdvertisedNode {
id: info.id.clone(), id: info.id.clone(),
addr: addr.clone(), addr: addr.clone(),
}); });
@ -279,9 +300,16 @@ impl System {
tokio::spawn(self.clone().pull_config(info.id.clone())); tokio::spawn(self.clone().pull_config(info.id.clone()));
} }
} else if let Some(id) = id_option { } else if let Some(id) = id_option {
let remaining_attempts = members.status.get(id).map(|x| x.remaining_ping_attempts).unwrap_or(0); let remaining_attempts = members
.status
.get(id)
.map(|x| x.remaining_ping_attempts)
.unwrap_or(0);
if remaining_attempts == 0 { if remaining_attempts == 0 {
eprintln!("Removing node {} after too many failed pings", hex::encode(&id)); eprintln!(
"Removing node {} after too many failed pings",
hex::encode(&id)
);
members.status.remove(&id); members.status.remove(&id);
has_changes = true; has_changes = true;
} else { } else {
@ -297,15 +325,16 @@ impl System {
drop(members); drop(members);
if to_advertise.len() > 0 { if to_advertise.len() > 0 {
self.broadcast(Message::AdvertiseNodesUp(to_advertise), PING_TIMEOUT).await; self.broadcast(Message::AdvertiseNodesUp(to_advertise), PING_TIMEOUT)
.await;
} }
} }
pub async fn handle_ping(self: Arc<Self>, pub async fn handle_ping(
self: Arc<Self>,
from: &SocketAddr, from: &SocketAddr,
ping: &PingMessage) ping: &PingMessage,
-> Result<Message, Error> ) -> Result<Message, Error> {
{
let mut members = self.members.write().await; let mut members = self.members.write().await;
let is_new = members.handle_ping(from.ip(), ping); let is_new = members.handle_ping(from.ip(), ping);
if is_new { if is_new {
@ -329,7 +358,7 @@ impl System {
let members = self.members.read().await; let members = self.members.read().await;
let mut mem = vec![]; let mut mem = vec![];
for (node, status) in members.status.iter() { for (node, status) in members.status.iter() {
mem.push(AdvertisedNode{ mem.push(AdvertisedNode {
id: node.clone(), id: node.clone(),
addr: status.addr.clone(), addr: status.addr.clone(),
}); });
@ -342,10 +371,10 @@ impl System {
Ok(Message::AdvertiseConfig(members.config.clone())) Ok(Message::AdvertiseConfig(members.config.clone()))
} }
pub async fn handle_advertise_nodes_up(self: Arc<Self>, pub async fn handle_advertise_nodes_up(
adv: &[AdvertisedNode]) self: Arc<Self>,
-> Result<Message, Error> adv: &[AdvertisedNode],
{ ) -> Result<Message, Error> {
let mut to_ping = vec![]; let mut to_ping = vec![];
let mut members = self.members.write().await; let mut members = self.members.write().await;
@ -355,11 +384,13 @@ impl System {
if node.id == self.id { if node.id == self.id {
// learn our own ip address // learn our own ip address
let self_addr = SocketAddr::new(node.addr.ip(), self.config.rpc_port); let self_addr = SocketAddr::new(node.addr.ip(), self.config.rpc_port);
let old_self = members.status.insert(node.id.clone(), let old_self = members.status.insert(
NodeStatus{ node.id.clone(),
NodeStatus {
addr: self_addr, addr: self_addr,
remaining_ping_attempts: MAX_FAILED_PINGS, remaining_ping_attempts: MAX_FAILED_PINGS,
}); },
);
has_changed = match old_self { has_changed = match old_self {
None => true, None => true,
Some(x) => x.addr != self_addr, Some(x) => x.addr != self_addr,
@ -380,17 +411,19 @@ impl System {
Ok(Message::Ok) Ok(Message::Ok)
} }
pub async fn handle_advertise_config(self: Arc<Self>, pub async fn handle_advertise_config(
adv: &NetworkConfig) self: Arc<Self>,
-> Result<Message, Error> adv: &NetworkConfig,
{ ) -> Result<Message, Error> {
let mut members = self.members.write().await; let mut members = self.members.write().await;
if adv.version > members.config.version { if adv.version > members.config.version {
members.config = adv.clone(); members.config = adv.clone();
members.rebuild_ring(); members.rebuild_ring();
tokio::spawn(self.clone().broadcast(Message::AdvertiseConfig(adv.clone()), PING_TIMEOUT)); tokio::spawn(
self.clone()
.broadcast(Message::AdvertiseConfig(adv.clone()), PING_TIMEOUT),
);
tokio::spawn(self.clone().save_network_config()); tokio::spawn(self.clone().save_network_config());
} }
@ -402,7 +435,9 @@ impl System {
let restart_at = tokio::time::delay_for(PING_INTERVAL); let restart_at = tokio::time::delay_for(PING_INTERVAL);
let members = self.members.read().await; let members = self.members.read().await;
let ping_addrs = members.status.iter() let ping_addrs = members
.status
.iter()
.filter(|(id, _)| **id != self.id) .filter(|(id, _)| **id != self.id)
.map(|(id, status)| (status.addr.clone(), Some(id.clone()))) .map(|(id, status)| (status.addr.clone(), Some(id.clone())))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
@ -414,12 +449,12 @@ impl System {
} }
} }
pub fn pull_status(self: Arc<Self>, peer: UUID) -> impl futures::future::Future<Output=()> + Send + 'static { pub fn pull_status(
self: Arc<Self>,
peer: UUID,
) -> impl futures::future::Future<Output = ()> + Send + 'static {
async move { async move {
let resp = rpc_call(self.clone(), let resp = rpc_call(self.clone(), &peer, &Message::PullStatus, PING_TIMEOUT).await;
&peer,
&Message::PullStatus,
PING_TIMEOUT).await;
if let Ok(Message::AdvertiseNodesUp(nodes)) = resp { if let Ok(Message::AdvertiseNodesUp(nodes)) = resp {
let _: Result<_, _> = self.handle_advertise_nodes_up(&nodes).await; let _: Result<_, _> = self.handle_advertise_nodes_up(&nodes).await;
} }
@ -427,10 +462,7 @@ impl System {
} }
pub async fn pull_config(self: Arc<Self>, peer: UUID) { pub async fn pull_config(self: Arc<Self>, peer: UUID) {
let resp = rpc_call(self.clone(), let resp = rpc_call(self.clone(), &peer, &Message::PullConfig, PING_TIMEOUT).await;
&peer,
&Message::PullConfig,
PING_TIMEOUT).await;
if let Ok(Message::AdvertiseConfig(config)) = resp { if let Ok(Message::AdvertiseConfig(config)) = resp {
let _: Result<_, _> = self.handle_advertise_config(&config).await; let _: Result<_, _> = self.handle_advertise_config(&config).await;
} }

View File

@ -1,12 +1,11 @@
use std::sync::Arc;
use serde::{Serialize, Deserialize};
use async_trait::async_trait; use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tokio::sync::RwLock; use tokio::sync::RwLock;
use crate::data::*; use crate::data::*;
use crate::table::*;
use crate::server::Garage; use crate::server::Garage;
use crate::table::*;
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] #[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
pub struct Object { pub struct Object {
@ -35,7 +34,7 @@ pub struct ObjectVersion {
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] #[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
pub enum ObjectVersionData { pub enum ObjectVersionData {
DeleteMarker, DeleteMarker,
Inline(#[serde(with="serde_bytes")] Vec<u8>), Inline(#[serde(with = "serde_bytes")] Vec<u8>),
FirstBlock(Hash), FirstBlock(Hash),
} }
@ -49,7 +48,9 @@ impl Entry<String, String> for Object {
fn merge(&mut self, other: &Self) { fn merge(&mut self, other: &Self) {
for other_v in other.versions.iter() { for other_v in other.versions.iter() {
match self.versions.binary_search_by(|v| (v.timestamp, &v.uuid).cmp(&(other_v.timestamp, &other_v.uuid))) { match self.versions.binary_search_by(|v| {
(v.timestamp, &v.uuid).cmp(&(other_v.timestamp, &other_v.uuid))
}) {
Ok(i) => { Ok(i) => {
let mut v = &mut self.versions[i]; let mut v = &mut self.versions[i];
if other_v.size > v.size { if other_v.size > v.size {
@ -64,8 +65,11 @@ impl Entry<String, String> for Object {
} }
} }
} }
let last_complete = self.versions let last_complete = self
.iter().enumerate().rev() .versions
.iter()
.enumerate()
.rev()
.filter(|(_, v)| v.is_complete) .filter(|(_, v)| v.is_complete)
.next() .next()
.map(|(vi, _)| vi); .map(|(vi, _)| vi);

View File

@ -1,6 +1,6 @@
use std::time::Duration; use serde::{Deserialize, Serialize};
use std::net::SocketAddr; use std::net::SocketAddr;
use serde::{Serialize, Deserialize}; use std::time::Duration;
use crate::data::*; use crate::data::*;
@ -42,6 +42,6 @@ pub struct AdvertisedNode {
pub struct PutBlockMessage { pub struct PutBlockMessage {
pub hash: Hash, pub hash: Hash,
#[serde(with="serde_bytes")] #[serde(with = "serde_bytes")]
pub data: Vec<u8>, pub data: Vec<u8>,
} }

View File

@ -3,23 +3,25 @@ use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use bytes::IntoBuf; use bytes::IntoBuf;
use hyper::{Body, Method, Request, StatusCode};
use hyper::client::Client;
use futures::stream::futures_unordered::FuturesUnordered; use futures::stream::futures_unordered::FuturesUnordered;
use futures::stream::StreamExt; use futures::stream::StreamExt;
use futures_util::future::FutureExt;
use hyper::client::Client;
use hyper::{Body, Method, Request, StatusCode};
use crate::data::*; use crate::data::*;
use crate::error::Error; use crate::error::Error;
use crate::proto::Message;
use crate::membership::System; use crate::membership::System;
use crate::proto::Message;
pub async fn rpc_call_many(sys: Arc<System>, pub async fn rpc_call_many(
sys: Arc<System>,
to: &[UUID], to: &[UUID],
msg: &Message, msg: &Message,
timeout: Duration) timeout: Duration,
-> Vec<Result<Message, Error>> ) -> Vec<Result<Message, Error>> {
{ let mut resp_stream = to
let mut resp_stream = to.iter() .iter()
.map(|to| rpc_call(sys.clone(), to, msg, timeout)) .map(|to| rpc_call(sys.clone(), to, msg, timeout))
.collect::<FuturesUnordered<_>>(); .collect::<FuturesUnordered<_>>();
@ -30,14 +32,15 @@ pub async fn rpc_call_many(sys: Arc<System>,
results results
} }
pub async fn rpc_try_call_many(sys: Arc<System>, pub async fn rpc_try_call_many(
sys: Arc<System>,
to: &[UUID], to: &[UUID],
msg: &Message, msg: &Message,
stop_after: usize, stop_after: usize,
timeout: Duration) timeout: Duration,
-> Result<Vec<Message>, Error> ) -> Result<Vec<Message>, Error> {
{ let mut resp_stream = to
let mut resp_stream = to.iter() .iter()
.map(|to| rpc_call(sys.clone(), to, msg, timeout)) .map(|to| rpc_call(sys.clone(), to, msg, timeout))
.collect::<FuturesUnordered<_>>(); .collect::<FuturesUnordered<_>>();
@ -49,7 +52,7 @@ pub async fn rpc_try_call_many(sys: Arc<System>,
Ok(msg) => { Ok(msg) => {
results.push(msg); results.push(msg);
if results.len() >= stop_after { if results.len() >= stop_after {
break break;
} }
} }
Err(e) => { Err(e) => {
@ -69,12 +72,12 @@ pub async fn rpc_try_call_many(sys: Arc<System>,
} }
} }
pub async fn rpc_call(sys: Arc<System>, pub async fn rpc_call(
sys: Arc<System>,
to: &UUID, to: &UUID,
msg: &Message, msg: &Message,
timeout: Duration) timeout: Duration,
-> Result<Message, Error> ) -> Result<Message, Error> {
{
let addr = { let addr = {
let members = sys.members.read().await; let members = sys.members.read().await;
match members.status.get(to) { match members.status.get(to) {
@ -91,24 +94,24 @@ pub struct RpcClient {
impl RpcClient { impl RpcClient {
pub fn new() -> Self { pub fn new() -> Self {
RpcClient{ RpcClient {
client: Client::new(), client: Client::new(),
} }
} }
pub async fn call(&self, pub async fn call(
&self,
to_addr: &SocketAddr, to_addr: &SocketAddr,
msg: &Message, msg: &Message,
timeout: Duration) timeout: Duration,
-> Result<Message, Error> ) -> Result<Message, Error> {
{
let uri = format!("http://{}/rpc", to_addr); let uri = format!("http://{}/rpc", to_addr);
let req = Request::builder() let req = Request::builder()
.method(Method::POST) .method(Method::POST)
.uri(uri) .uri(uri)
.body(Body::from(rmp_to_vec_all_named(msg)?))?; .body(Body::from(rmp_to_vec_all_named(msg)?))?;
let resp_fut = self.client.request(req); let resp_fut = self.client.request(req).fuse();
let resp = tokio::time::timeout(timeout, resp_fut).await??; let resp = tokio::time::timeout(timeout, resp_fut).await??;
if resp.status() == StatusCode::OK { if resp.status() == StatusCode::OK {
@ -116,7 +119,7 @@ impl RpcClient {
let msg = rmp_serde::decode::from_read::<_, Message>(body.into_buf())?; let msg = rmp_serde::decode::from_read::<_, Message>(body.into_buf())?;
match msg { match msg {
Message::Error(e) => Err(Error::RPCError(e)), Message::Error(e) => Err(Error::RPCError(e)),
x => Ok(x) x => Ok(x),
} }
} else { } else {
Err(Error::RPCError(format!("Status code {}", resp.status()))) Err(Error::RPCError(format!("Status code {}", resp.status())))

View File

@ -1,18 +1,18 @@
use std::net::SocketAddr; use std::net::SocketAddr;
use std::sync::Arc; use std::sync::Arc;
use serde::Serialize;
use bytes::IntoBuf; use bytes::IntoBuf;
use hyper::service::{make_service_fn, service_fn};
use hyper::server::conn::AddrStream;
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use futures::future::Future; use futures::future::Future;
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode};
use serde::Serialize;
use crate::error::Error; use crate::block::*;
use crate::data::rmp_to_vec_all_named; use crate::data::rmp_to_vec_all_named;
use crate::error::Error;
use crate::proto::Message; use crate::proto::Message;
use crate::server::Garage; use crate::server::Garage;
use crate::block::*;
fn debug_serialize<T: Serialize>(x: T) -> Result<String, Error> { fn debug_serialize<T: Serialize>(x: T) -> Result<String, Error> {
let ss = serde_json::to_string(&x)?; let ss = serde_json::to_string(&x)?;
@ -30,7 +30,11 @@ fn err_to_msg(x: Result<Message, Error>) -> Message {
} }
} }
async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> Result<Response<Body>, Error> { async fn handler(
garage: Arc<Garage>,
req: Request<Body>,
addr: SocketAddr,
) -> Result<Response<Body>, Error> {
if req.method() != &Method::POST { if req.method() != &Method::POST {
let mut bad_request = Response::default(); let mut bad_request = Response::default();
*bad_request.status_mut() = StatusCode::BAD_REQUEST; *bad_request.status_mut() = StatusCode::BAD_REQUEST;
@ -40,7 +44,12 @@ async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> R
let whole_body = hyper::body::to_bytes(req.into_body()).await?; let whole_body = hyper::body::to_bytes(req.into_body()).await?;
let msg = rmp_serde::decode::from_read::<_, Message>(whole_body.into_buf())?; let msg = rmp_serde::decode::from_read::<_, Message>(whole_body.into_buf())?;
eprintln!("RPC from {}: {} ({} bytes)", addr, debug_serialize(&msg)?, whole_body.len()); eprintln!(
"RPC from {}: {} ({} bytes)",
addr,
debug_serialize(&msg)?,
whole_body.len()
);
let sys = garage.system.clone(); let sys = garage.system.clone();
let resp = err_to_msg(match &msg { let resp = err_to_msg(match &msg {
@ -49,15 +58,13 @@ async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> R
Message::PullConfig => sys.handle_pull_config().await, Message::PullConfig => sys.handle_pull_config().await,
Message::AdvertiseNodesUp(adv) => sys.handle_advertise_nodes_up(adv).await, Message::AdvertiseNodesUp(adv) => sys.handle_advertise_nodes_up(adv).await,
Message::AdvertiseConfig(adv) => sys.handle_advertise_config(adv).await, Message::AdvertiseConfig(adv) => sys.handle_advertise_config(adv).await,
Message::PutBlock(m) => { Message::PutBlock(m) => write_block(garage, &m.hash, &m.data).await,
write_block(garage, &m.hash, &m.data).await Message::GetBlock(h) => read_block(garage, &h).await,
}
Message::GetBlock(h) => {
read_block(garage, &h).await
}
Message::TableRPC(table, msg) => { Message::TableRPC(table, msg) => {
if let Some(rpc_handler) = garage.table_rpc_handlers.get(table) { if let Some(rpc_handler) = garage.table_rpc_handlers.get(table) {
rpc_handler.handle(&msg[..]).await rpc_handler
.handle(&msg[..])
.await
.map(|rep| Message::TableRPC(table.to_string(), rep)) .map(|rep| Message::TableRPC(table.to_string(), rep))
} else { } else {
Ok(Message::Error(format!("Unknown table: {}", table))) Ok(Message::Error(format!("Unknown table: {}", table)))
@ -69,13 +76,13 @@ async fn handler(garage: Arc<Garage>, req: Request<Body>, addr: SocketAddr) -> R
eprintln!("reply to {}: {}", addr, debug_serialize(&resp)?); eprintln!("reply to {}: {}", addr, debug_serialize(&resp)?);
Ok(Response::new(Body::from( Ok(Response::new(Body::from(rmp_to_vec_all_named(&resp)?)))
rmp_to_vec_all_named(&resp)?
)))
} }
pub async fn run_rpc_server(
pub async fn run_rpc_server(garage: Arc<Garage>, shutdown_signal: impl Future<Output=()>) -> Result<(), hyper::Error> { garage: Arc<Garage>,
shutdown_signal: impl Future<Output = ()>,
) -> Result<(), hyper::Error> {
let bind_addr = ([0, 0, 0, 0], garage.system.config.rpc_port).into(); let bind_addr = ([0, 0, 0, 0], garage.system.config.rpc_port).into();
let service = make_service_fn(|conn: &AddrStream| { let service = make_service_fn(|conn: &AddrStream| {
@ -89,7 +96,7 @@ pub async fn run_rpc_server(garage: Arc<Garage>, shutdown_signal: impl Future<Ou
} }
}); });
let server = Server::bind(&bind_addr).serve(service) ; let server = Server::bind(&bind_addr).serve(service);
let graceful = server.with_graceful_shutdown(shutdown_signal); let graceful = server.with_graceful_shutdown(shutdown_signal);
println!("RPC server listening on http://{}", bind_addr); println!("RPC server listening on http://{}", bind_addr);

View File

@ -1,17 +1,17 @@
use std::collections::HashMap;
use std::io::{Read, Write};
use std::sync::Arc;
use std::net::SocketAddr;
use std::path::PathBuf;
use futures::channel::oneshot; use futures::channel::oneshot;
use serde::Deserialize; use serde::Deserialize;
use std::collections::HashMap;
use std::io::{Read, Write};
use std::net::SocketAddr;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::{Mutex, RwLock}; use tokio::sync::{Mutex, RwLock};
use crate::api_server;
use crate::data::*; use crate::data::*;
use crate::proto::*;
use crate::error::Error; use crate::error::Error;
use crate::membership::System; use crate::membership::System;
use crate::api_server; use crate::proto::*;
use crate::rpc_server; use crate::rpc_server;
use crate::table::*; use crate::table::*;
@ -30,27 +30,33 @@ impl Garage {
pub async fn new(config: Config, id: UUID, db: sled::Db) -> Arc<Self> { pub async fn new(config: Config, id: UUID, db: sled::Db) -> Arc<Self> {
let system = Arc::new(System::new(config, id)); let system = Arc::new(System::new(config, id));
let meta_rep_param = TableReplicationParams{ let meta_rep_param = TableReplicationParams {
replication_factor: system.config.meta_replication_factor, replication_factor: system.config.meta_replication_factor,
write_quorum: (system.config.meta_replication_factor+1)/2, write_quorum: (system.config.meta_replication_factor + 1) / 2,
read_quorum: (system.config.meta_replication_factor+1)/2, read_quorum: (system.config.meta_replication_factor + 1) / 2,
timeout: DEFAULT_TIMEOUT, timeout: DEFAULT_TIMEOUT,
}; };
let object_table = Arc::new(Table::new( let object_table = Arc::new(Table::new(
ObjectTable{garage: RwLock::new(None)}, ObjectTable {
garage: RwLock::new(None),
},
system.clone(), system.clone(),
&db, &db,
"object".to_string(), "object".to_string(),
meta_rep_param.clone())); meta_rep_param.clone(),
));
let version_table = Arc::new(Table::new( let version_table = Arc::new(Table::new(
VersionTable{garage: RwLock::new(None)}, VersionTable {
garage: RwLock::new(None),
},
system.clone(), system.clone(),
&db, &db,
"version".to_string(), "version".to_string(),
meta_rep_param.clone())); meta_rep_param.clone(),
));
let mut garage = Self{ let mut garage = Self {
db, db,
system: system.clone(), system: system.clone(),
fs_lock: Mutex::new(()), fs_lock: Mutex::new(()),
@ -61,10 +67,12 @@ impl Garage {
garage.table_rpc_handlers.insert( garage.table_rpc_handlers.insert(
garage.object_table.name.clone(), garage.object_table.name.clone(),
garage.object_table.clone().rpc_handler()); garage.object_table.clone().rpc_handler(),
);
garage.table_rpc_handlers.insert( garage.table_rpc_handlers.insert(
garage.version_table.name.clone(), garage.version_table.name.clone(),
garage.version_table.clone().rpc_handler()); garage.version_table.clone().rpc_handler(),
);
let garage = Arc::new(garage); let garage = Arc::new(garage);
@ -118,7 +126,7 @@ fn gen_node_id(metadata_dir: &PathBuf) -> Result<UUID, Error> {
let mut d = vec![]; let mut d = vec![];
f.read_to_end(&mut d)?; f.read_to_end(&mut d)?;
if d.len() != 32 { if d.len() != 32 {
return Err(Error::Message(format!("Corrupt node_id file"))) return Err(Error::Message(format!("Corrupt node_id file")));
} }
let mut id = [0u8; 32]; let mut id = [0u8; 32];
@ -149,16 +157,13 @@ async fn wait_from(chan: oneshot::Receiver<()>) -> () {
} }
pub async fn run_server(config_file: PathBuf) -> Result<(), Error> { pub async fn run_server(config_file: PathBuf) -> Result<(), Error> {
let config = read_config(config_file) let config = read_config(config_file).expect("Unable to read config file");
.expect("Unable to read config file");
let mut db_path = config.metadata_dir.clone(); let mut db_path = config.metadata_dir.clone();
db_path.push("db"); db_path.push("db");
let db = sled::open(db_path) let db = sled::open(db_path).expect("Unable to open DB");
.expect("Unable to open DB");
let id = gen_node_id(&config.metadata_dir) let id = gen_node_id(&config.metadata_dir).expect("Unable to read or generate node ID");
.expect("Unable to read or generate node ID");
println!("Node ID: {}", hex::encode(&id)); println!("Node ID: {}", hex::encode(&id));
let garage = Garage::new(config, id, db).await; let garage = Garage::new(config, id, db).await;

View File

@ -1,15 +1,14 @@
use std::time::Duration;
use std::sync::Arc;
use serde::{Serialize, Deserialize};
use async_trait::async_trait; use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use std::time::Duration;
use crate::error::Error;
use crate::proto::*;
use crate::data::*; use crate::data::*;
use crate::error::Error;
use crate::membership::System; use crate::membership::System;
use crate::proto::*;
use crate::rpc_client::*; use crate::rpc_client::*;
pub struct Table<F: TableFormat> { pub struct Table<F: TableFormat> {
pub instance: F, pub instance: F,
@ -72,7 +71,9 @@ pub trait SortKey {
fn sort_key(&self) -> &[u8]; fn sort_key(&self) -> &[u8];
} }
pub trait Entry<P: PartitionKey, S: SortKey>: PartialEq + Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync { pub trait Entry<P: PartitionKey, S: SortKey>:
PartialEq + Clone + Serialize + for<'de> Deserialize<'de> + Send + Sync
{
fn partition_key(&self) -> &P; fn partition_key(&self) -> &P;
fn sort_key(&self) -> &S; fn sort_key(&self) -> &S;
@ -114,10 +115,15 @@ pub trait TableFormat: Send + Sync {
} }
impl<F: TableFormat + 'static> Table<F> { impl<F: TableFormat + 'static> Table<F> {
pub fn new(instance: F, system: Arc<System>, db: &sled::Db, name: String, param: TableReplicationParams) -> Self { pub fn new(
let store = db.open_tree(&name) instance: F,
.expect("Unable to open DB tree"); system: Arc<System>,
Self{ db: &sled::Db,
name: String,
param: TableReplicationParams,
) -> Self {
let store = db.open_tree(&name).expect("Unable to open DB tree");
Self {
instance, instance,
name, name,
system, system,
@ -128,33 +134,39 @@ impl<F: TableFormat + 'static> Table<F> {
} }
pub fn rpc_handler(self: Arc<Self>) -> Box<dyn TableRpcHandler + Send + Sync> { pub fn rpc_handler(self: Arc<Self>) -> Box<dyn TableRpcHandler + Send + Sync> {
Box::new(TableRpcHandlerAdapter::<F>{ table: self }) Box::new(TableRpcHandlerAdapter::<F> { table: self })
} }
pub async fn insert(&self, e: &F::E) -> Result<(), Error> { pub async fn insert(&self, e: &F::E) -> Result<(), Error> {
let hash = e.partition_key().hash(); let hash = e.partition_key().hash();
let who = self.system.members.read().await let who = self
.system
.members
.read()
.await
.walk_ring(&hash, self.param.replication_factor); .walk_ring(&hash, self.param.replication_factor);
eprintln!("insert who: {:?}", who); eprintln!("insert who: {:?}", who);
let rpc = &TableRPC::<F>::Update(vec![e.clone()]); let rpc = &TableRPC::<F>::Update(vec![e.clone()]);
self.rpc_try_call_many(&who[..], self.rpc_try_call_many(&who[..], &rpc, self.param.write_quorum)
&rpc, .await?;
self.param.write_quorum).await?;
Ok(()) Ok(())
} }
pub async fn get(&self, partition_key: &F::P, sort_key: &F::S) -> Result<Option<F::E>, Error> { pub async fn get(&self, partition_key: &F::P, sort_key: &F::S) -> Result<Option<F::E>, Error> {
let hash = partition_key.hash(); let hash = partition_key.hash();
let who = self.system.members.read().await let who = self
.system
.members
.read()
.await
.walk_ring(&hash, self.param.replication_factor); .walk_ring(&hash, self.param.replication_factor);
eprintln!("get who: {:?}", who); eprintln!("get who: {:?}", who);
let rpc = &TableRPC::<F>::ReadEntry(partition_key.clone(), sort_key.clone()); let rpc = &TableRPC::<F>::ReadEntry(partition_key.clone(), sort_key.clone());
let resps = self.rpc_try_call_many(&who[..], let resps = self
&rpc, .rpc_try_call_many(&who[..], &rpc, self.param.read_quorum)
self.param.read_quorum)
.await?; .await?;
let mut ret = None; let mut ret = None;
@ -180,27 +192,37 @@ impl<F: TableFormat + 'static> Table<F> {
if let Some(ret_entry) = &ret { if let Some(ret_entry) = &ret {
if not_all_same { if not_all_same {
// Repair on read // Repair on read
let _: Result<_, _> = self.rpc_try_call_many( let _: Result<_, _> = self
.rpc_try_call_many(
&who[..], &who[..],
&TableRPC::<F>::Update(vec![ret_entry.clone()]), &TableRPC::<F>::Update(vec![ret_entry.clone()]),
who.len()) who.len(),
)
.await; .await;
} }
} }
Ok(ret) Ok(ret)
} }
async fn rpc_try_call_many(&self, who: &[UUID], rpc: &TableRPC<F>, quorum: usize) -> Result<Vec<TableRPC<F>>, Error> { async fn rpc_try_call_many(
&self,
who: &[UUID],
rpc: &TableRPC<F>,
quorum: usize,
) -> Result<Vec<TableRPC<F>>, Error> {
eprintln!("Table RPC to {:?}: {}", who, serde_json::to_string(&rpc)?); eprintln!("Table RPC to {:?}: {}", who, serde_json::to_string(&rpc)?);
let rpc_bytes = rmp_to_vec_all_named(rpc)?; let rpc_bytes = rmp_to_vec_all_named(rpc)?;
let rpc_msg = Message::TableRPC(self.name.to_string(), rpc_bytes); let rpc_msg = Message::TableRPC(self.name.to_string(), rpc_bytes);
let resps = rpc_try_call_many(self.system.clone(), let resps = rpc_try_call_many(
self.system.clone(),
who, who,
&rpc_msg, &rpc_msg,
quorum, quorum,
self.param.timeout).await?; self.param.timeout,
)
.await?;
let mut resps_vals = vec![]; let mut resps_vals = vec![];
for resp in resps { for resp in resps {
@ -210,9 +232,15 @@ impl<F: TableFormat + 'static> Table<F> {
continue; continue;
} }
} }
return Err(Error::Message(format!("Invalid reply to TableRPC: {:?}", resp))) return Err(Error::Message(format!(
"Invalid reply to TableRPC: {:?}",
resp
)));
} }
eprintln!("Table RPC responses: {}", serde_json::to_string(&resps_vals)?); eprintln!(
"Table RPC responses: {}",
serde_json::to_string(&resps_vals)?
);
Ok(resps_vals) Ok(resps_vals)
} }
@ -226,7 +254,7 @@ impl<F: TableFormat + 'static> Table<F> {
self.handle_update(pairs).await?; self.handle_update(pairs).await?;
Ok(TableRPC::Ok) Ok(TableRPC::Ok)
} }
_ => Err(Error::RPCError(format!("Unexpected table RPC"))) _ => Err(Error::RPCError(format!("Unexpected table RPC"))),
} }
} }
@ -254,7 +282,7 @@ impl<F: TableFormat + 'static> Table<F> {
new_entry.merge(&update); new_entry.merge(&update);
(Some(old_entry), new_entry) (Some(old_entry), new_entry)
} }
None => (None, update.clone()) None => (None, update.clone()),
}; };
let new_bytes = rmp_to_vec_all_named(&new_entry) let new_bytes = rmp_to_vec_all_named(&new_entry)

View File

@ -1,12 +1,11 @@
use std::sync::Arc;
use serde::{Serialize, Deserialize};
use async_trait::async_trait; use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tokio::sync::RwLock; use tokio::sync::RwLock;
use crate::data::*; use crate::data::*;
use crate::table::*;
use crate::server::Garage; use crate::server::Garage;
use crate::table::*;
#[derive(PartialEq, Clone, Debug, Serialize, Deserialize)] #[derive(PartialEq, Clone, Debug, Serialize, Deserialize)]
pub struct Version { pub struct Version {