Implement garage stats to get info on node contents

This commit is contained in:
Alex Auvolat 2021-03-12 15:40:54 +01:00
parent cbe7e1a66a
commit a1442f072a
7 changed files with 126 additions and 4 deletions

23
Cargo.lock generated
View File

@ -489,6 +489,7 @@ dependencies = [
"garage_table",
"garage_util",
"garage_web",
"git-version",
"hex",
"log",
"pretty_env_logger",
@ -705,6 +706,28 @@ dependencies = [
"wasi 0.9.0+wasi-snapshot-preview1",
]
[[package]]
name = "git-version"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94918e83f1e01dedc2e361d00ce9487b14c58c7f40bab148026fa39d42cb41e2"
dependencies = [
"git-version-macro",
"proc-macro-hack",
]
[[package]]
name = "git-version-macro"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34a97a52fdee1870a34fa6e4b77570cba531b27d1838874fef4429a791a3d657"
dependencies = [
"proc-macro-hack",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "h2"
version = "0.2.7"

View File

@ -4,7 +4,7 @@ DOCKER=lxpz/garage_amd64
all:
#cargo fmt || true
#RUSTFLAGS="-C link-arg=-fuse-ld=lld" cargo build
cargo build
clear; cargo build
$(BIN):
#RUSTFLAGS="-C link-arg=-fuse-ld=lld" cargo build --release

View File

@ -27,6 +27,7 @@ hex = "0.3"
sha2 = "0.8"
log = "0.4"
pretty_env_logger = "0.4"
git-version = "0.3.4"
sled = "0.34"

View File

@ -1,4 +1,6 @@
use std::sync::Arc;
use std::fmt::Write;
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
@ -6,6 +8,7 @@ use garage_util::error::Error;
use garage_table::crdt::CRDT;
use garage_table::*;
use garage_table::replication::*;
use garage_rpc::rpc_client::*;
use garage_rpc::rpc_server::*;
@ -25,6 +28,7 @@ pub enum AdminRPC {
BucketOperation(BucketOperation),
KeyOperation(KeyOperation),
LaunchRepair(RepairOpt),
Stats(StatsOpt),
// Replies
Ok(String),
@ -55,6 +59,7 @@ impl AdminRpcHandler {
AdminRPC::BucketOperation(bo) => self2.handle_bucket_cmd(bo).await,
AdminRPC::KeyOperation(ko) => self2.handle_key_cmd(ko).await,
AdminRPC::LaunchRepair(opt) => self2.handle_launch_repair(opt).await,
AdminRPC::Stats(opt) => self2.handle_stats(opt).await,
_ => Err(Error::BadRPC(format!("Invalid RPC"))),
}
}
@ -357,4 +362,79 @@ impl AdminRpcHandler {
)))
}
}
async fn handle_stats(&self, opt: StatsOpt) -> Result<AdminRPC, Error> {
if opt.all_nodes {
let mut ret = String::new();
let ring = self.garage.system.ring.borrow().clone();
for node in ring.config.members.keys() {
let mut opt = opt.clone();
opt.all_nodes = false;
writeln!(&mut ret, "\n======================").unwrap();
writeln!(&mut ret, "Stats for node {:?}:", node).unwrap();
match self
.rpc_client
.call(
*node,
AdminRPC::Stats(opt),
ADMIN_RPC_TIMEOUT,
)
.await
{
Ok(AdminRPC::Ok(s)) => writeln!(&mut ret, "{}", s).unwrap(),
Ok(x) => writeln!(&mut ret, "Bad answer: {:?}", x).unwrap(),
Err(e) => writeln!(&mut ret, "Error: {}", e).unwrap(),
}
}
Ok(AdminRPC::Ok(ret))
} else {
Ok(AdminRPC::Ok(self.gather_stats_local(opt)?))
}
}
fn gather_stats_local(&self, opt: StatsOpt) -> Result<String, Error> {
let mut ret = String::new();
writeln!(&mut ret, "\nGarage version: {}", git_version::git_version!()).unwrap();
// Gather ring statistics
let ring = self.garage.system.ring.borrow().clone();
let mut ring_nodes = HashMap::new();
for r in ring.ring.iter() {
for n in r.nodes.iter() {
if !ring_nodes.contains_key(n) {
ring_nodes.insert(*n, 0usize);
}
*ring_nodes.get_mut(n).unwrap() += 1;
}
}
writeln!(&mut ret, "\nRing nodes & partition count:").unwrap();
for (n, c) in ring_nodes.iter() {
writeln!(&mut ret, " {:?} {}", n, c).unwrap();
}
self.gather_table_stats(&mut ret, &self.garage.bucket_table, &opt)?;
self.gather_table_stats(&mut ret, &self.garage.key_table, &opt)?;
self.gather_table_stats(&mut ret, &self.garage.object_table, &opt)?;
self.gather_table_stats(&mut ret, &self.garage.version_table, &opt)?;
self.gather_table_stats(&mut ret, &self.garage.block_ref_table, &opt)?;
writeln!(&mut ret, "\nBlock manager stats:").unwrap();
writeln!(&mut ret, " resync queue length: {}", self.garage.block_manager.resync_queue.len()).unwrap();
if opt.detailed {
writeln!(&mut ret, "\nDetailed stats not implemented yet.").unwrap();
}
Ok(ret)
}
fn gather_table_stats<F: TableSchema, R: TableReplication>(&self, to: &mut String, t: &Arc<Table<F, R>>, _opt: &StatsOpt) -> Result<(), Error> {
writeln!(to, "\nTable stats for {}", t.data.name).unwrap();
writeln!(to, " number of items: {}", t.data.store.len()).unwrap();
writeln!(to, " Merkle updater todo queue length: {}", t.data.merkle_updater.todo.len()).unwrap();
Ok(())
}
}

View File

@ -69,6 +69,10 @@ pub enum Command {
/// Start repair of node data
#[structopt(name = "repair")]
Repair(RepairOpt),
/// Gather node statistics
#[structopt(name = "stats")]
Stats(StatsOpt),
}
#[derive(StructOpt, Debug)]
@ -281,6 +285,17 @@ pub enum RepairWhat {
BlockRefs,
}
#[derive(Serialize, Deserialize, StructOpt, Debug, Clone)]
pub struct StatsOpt {
/// Gather statistics from all nodes
#[structopt(short = "a", long = "all-nodes")]
pub all_nodes: bool,
/// Gather detailed statistics (this can be long)
#[structopt(short = "d", long = "detailed")]
pub detailed: bool,
}
#[tokio::main]
async fn main() {
pretty_env_logger::init();
@ -332,6 +347,9 @@ async fn main() {
Command::Repair(ro) => {
cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::LaunchRepair(ro)).await
}
Command::Stats(so) => {
cmd_admin(admin_rpc_cli, opt.rpc_host, AdminRPC::Stats(so)).await
}
};
if let Err(e) = resp {

View File

@ -17,7 +17,7 @@ pub struct TableData<F: TableSchema> {
pub instance: F,
pub store: sled::Tree,
pub(crate) merkle_updater: Arc<MerkleUpdater>,
pub merkle_updater: Arc<MerkleUpdater>,
}
impl<F> TableData<F>

View File

@ -32,7 +32,7 @@ pub fn hash_of_merkle_partition_opt(p: Option<MerklePartition>) -> Hash {
// 16 bits (two bytes) of item's partition keys' hashes.
// It builds one Merkle tree for each of these 2**16 partitions.
pub(crate) struct MerkleUpdater {
pub struct MerkleUpdater {
table_name: String,
background: Arc<BackgroundRunner>,
@ -40,7 +40,7 @@ pub(crate) struct MerkleUpdater {
// - key = the key of an item in the main table, ie hash(partition_key)+sort_key
// - value = the hash of the full serialized item, if present,
// or an empty vec if item is absent (deleted)
pub(crate) todo: sled::Tree,
pub todo: sled::Tree,
pub(crate) todo_notify: Notify,
// Content of the merkle tree: items where