Prettier worker list table; remove useless CLI log messages

This commit is contained in:
Alex Auvolat 2022-12-12 17:16:49 +01:00
parent f7c65e830e
commit de9d6cddf7
No known key found for this signature in database
GPG Key ID: 0E496D15096376BE
11 changed files with 139 additions and 116 deletions

View File

@ -53,7 +53,7 @@ impl Worker for RepairWorker {
"Block repair worker".into() "Block repair worker".into()
} }
fn info(&self) -> Option<String> { fn status(&self) -> WorkerStatus {
match self.block_iter.as_ref() { match self.block_iter.as_ref() {
None => { None => {
let idx_bytes = self let idx_bytes = self
@ -66,9 +66,17 @@ impl Worker for RepairWorker {
} else { } else {
idx_bytes idx_bytes
}; };
Some(format!("Phase 1: {}", hex::encode(idx_bytes))) WorkerStatus {
progress: Some("Phase 1".into()),
freeform: vec![format!("Now at: {}", hex::encode(idx_bytes))],
..Default::default()
}
} }
Some(bi) => Some(format!("Phase 2: {:.2}% done", bi.progress() * 100.)), Some(bi) => WorkerStatus {
progress: Some(format!("{:.2}%", bi.progress() * 100.)),
freeform: vec!["Phase 2".into()],
..Default::default()
},
} }
} }
@ -271,29 +279,28 @@ impl Worker for ScrubWorker {
"Block scrub worker".into() "Block scrub worker".into()
} }
fn info(&self) -> Option<String> { fn status(&self) -> WorkerStatus {
let s = match &self.work { let mut s = WorkerStatus {
ScrubWorkerState::Running(bsi) => format!( persistent_errors: Some(self.persisted.corruptions_detected),
"{:.2}% done (tranquility = {})", tranquility: Some(self.persisted.tranquility),
bsi.progress() * 100., ..Default::default()
self.persisted.tranquility
),
ScrubWorkerState::Paused(bsi, rt) => {
format!(
"Paused, {:.2}% done, resumes at {}",
bsi.progress() * 100.,
msec_to_rfc3339(*rt)
)
}
ScrubWorkerState::Finished => format!(
"Last completed scrub: {}",
msec_to_rfc3339(self.persisted.time_last_complete_scrub)
),
}; };
Some(format!( match &self.work {
"{} ; corruptions detected: {}", ScrubWorkerState::Running(bsi) => {
s, self.persisted.corruptions_detected s.progress = Some(format!("{:.2}%", bsi.progress() * 100.));
)) }
ScrubWorkerState::Paused(bsi, rt) => {
s.progress = Some(format!("{:.2}%", bsi.progress() * 100.));
s.freeform = vec![format!("Paused, resumes at {}", msec_to_rfc3339(*rt))];
}
ScrubWorkerState::Finished => {
s.freeform = vec![format!(
"Completed {}",
msec_to_rfc3339(self.persisted.time_last_complete_scrub)
)];
}
}
s
} }
async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> { async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {

View File

@ -477,27 +477,22 @@ impl Worker for ResyncWorker {
format!("Block resync worker #{}", self.index + 1) format!("Block resync worker #{}", self.index + 1)
} }
fn info(&self) -> Option<String> { fn status(&self) -> WorkerStatus {
let persisted = self.manager.resync.persisted.load(); let persisted = self.manager.resync.persisted.load();
if self.index >= persisted.n_workers { if self.index >= persisted.n_workers {
return Some("(unused)".into()); return WorkerStatus {
freeform: vec!["(unused)".into()],
..Default::default()
};
} }
let mut ret = vec![]; WorkerStatus {
ret.push(format!("tranquility = {}", persisted.tranquility)); queue_length: Some(self.manager.resync.queue_len().unwrap_or(0) as u64),
tranquility: Some(persisted.tranquility),
let qlen = self.manager.resync.queue_len().unwrap_or(0); persistent_errors: Some(self.manager.resync.errors_len().unwrap_or(0) as u64),
if qlen > 0 { ..Default::default()
ret.push(format!("{} blocks in queue", qlen));
} }
let elen = self.manager.resync.errors_len().unwrap_or(0);
if elen > 0 {
ret.push(format!("{} blocks in error state", elen));
}
Some(ret.join(", "))
} }
async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> { async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {

View File

@ -254,7 +254,7 @@ pub fn print_worker_info(wi: HashMap<usize, WorkerInfo>, wlo: WorkerListOpt) {
) )
}); });
let mut table = vec![]; let mut table = vec!["TID\tState\tName\tTranq\tDone\tQueue\tErrors\tConsec\tLast".to_string()];
for (tid, info) in wi.iter() { for (tid, info) in wi.iter() {
if wlo.busy && !matches!(info.state, WorkerState::Busy | WorkerState::Throttled(_)) { if wlo.busy && !matches!(info.state, WorkerState::Busy | WorkerState::Throttled(_)) {
continue; continue;
@ -263,33 +263,38 @@ pub fn print_worker_info(wi: HashMap<usize, WorkerInfo>, wlo: WorkerListOpt) {
continue; continue;
} }
table.push(format!("{}\t{}\t{}", tid, info.state, info.name));
if let Some(i) = &info.info {
table.push(format!("\t\t {}", i));
}
let tf = timeago::Formatter::new(); let tf = timeago::Formatter::new();
let (err_ago, err_msg) = info let err_ago = info
.last_error .last_error
.as_ref() .as_ref()
.map(|(m, t)| { .map(|(_, t)| tf.convert(Duration::from_millis(now_msec() - t)))
( .unwrap_or_default();
tf.convert(Duration::from_millis(now_msec() - t)), let (total_err, consec_err) = if info.errors > 0 {
m.as_str(), (info.errors.to_string(), info.consecutive_errors.to_string())
) } else {
}) ("-".into(), "-".into())
.unwrap_or(("(?) ago".into(), "(?)")); };
if info.consecutive_errors > 0 {
table.push(format!( table.push(format!(
"\t\t {} consecutive errors ({} total), last {}", "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}",
info.consecutive_errors, info.errors, err_ago, tid,
)); info.state,
table.push(format!("\t\t {}", err_msg)); info.name,
} else if info.errors > 0 { info.status
table.push(format!("\t\t ({} errors, last {})", info.errors, err_ago,)); .tranquility
if wlo.errors { .as_ref()
table.push(format!("\t\t {}", err_msg)); .map(ToString::to_string)
} .unwrap_or("-".into()),
} info.status.progress.as_deref().unwrap_or("-"),
info.status
.queue_length
.as_ref()
.map(ToString::to_string)
.unwrap_or("-".into()),
total_err,
consec_err,
err_ago,
));
} }
format_table(table); format_table(table);
} }

View File

@ -127,9 +127,16 @@ async fn main() {
std::process::abort(); std::process::abort();
})); }));
// Parse arguments and dispatch command line
let opt = Opt::from_clap(&Opt::clap().version(version.as_str()).get_matches());
// Initialize logging as well as other libraries used in Garage // Initialize logging as well as other libraries used in Garage
if std::env::var("RUST_LOG").is_err() { if std::env::var("RUST_LOG").is_err() {
std::env::set_var("RUST_LOG", "netapp=info,garage=info") let default_log = match &opt.cmd {
Command::Server => "netapp=info,garage=info",
_ => "netapp=warn,garage=warn",
};
std::env::set_var("RUST_LOG", default_log)
} }
tracing_subscriber::fmt() tracing_subscriber::fmt()
.with_writer(std::io::stderr) .with_writer(std::io::stderr)
@ -137,9 +144,6 @@ async fn main() {
.init(); .init();
sodiumoxide::init().expect("Unable to init sodiumoxide"); sodiumoxide::init().expect("Unable to init sodiumoxide");
// Parse arguments and dispatch command line
let opt = Opt::from_clap(&Opt::clap().version(version.as_str()).get_matches());
let res = match opt.cmd { let res = match opt.cmd {
Command::Server => server::run_server(opt.config_file).await, Command::Server => server::run_server(opt.config_file).await,
Command::OfflineRepair(repair_opt) => { Command::OfflineRepair(repair_opt) => {
@ -182,9 +186,9 @@ async fn cli_command(opt: Opt) -> Result<(), Error> {
let netapp = NetApp::new(GARAGE_VERSION_TAG, network_key, sk); let netapp = NetApp::new(GARAGE_VERSION_TAG, network_key, sk);
// Find and parse the address of the target host // Find and parse the address of the target host
let (id, addr) = if let Some(h) = opt.rpc_host { let (id, addr, is_default_addr) = if let Some(h) = opt.rpc_host {
let (id, addrs) = parse_and_resolve_peer_addr(&h).ok_or_else(|| format!("Invalid RPC remote node identifier: {}. Expected format is <pubkey>@<IP or hostname>:<port>.", h))?; let (id, addrs) = parse_and_resolve_peer_addr(&h).ok_or_else(|| format!("Invalid RPC remote node identifier: {}. Expected format is <pubkey>@<IP or hostname>:<port>.", h))?;
(id, addrs[0]) (id, addrs[0], false)
} else { } else {
let node_id = garage_rpc::system::read_node_id(&config.as_ref().unwrap().metadata_dir) let node_id = garage_rpc::system::read_node_id(&config.as_ref().unwrap().metadata_dir)
.err_context(READ_KEY_ERROR)?; .err_context(READ_KEY_ERROR)?;
@ -195,24 +199,26 @@ async fn cli_command(opt: Opt) -> Result<(), Error> {
.ok_or_message("unable to resolve rpc_public_addr specified in config file")? .ok_or_message("unable to resolve rpc_public_addr specified in config file")?
.next() .next()
.ok_or_message("unable to resolve rpc_public_addr specified in config file")?; .ok_or_message("unable to resolve rpc_public_addr specified in config file")?;
(node_id, a) (node_id, a, false)
} else { } else {
let default_addr = SocketAddr::new( let default_addr = SocketAddr::new(
"127.0.0.1".parse().unwrap(), "127.0.0.1".parse().unwrap(),
config.as_ref().unwrap().rpc_bind_addr.port(), config.as_ref().unwrap().rpc_bind_addr.port(),
); );
warn!( (node_id, default_addr, true)
"Trying to contact Garage node at default address {}",
default_addr
);
warn!("If this doesn't work, consider adding rpc_public_addr in your config file or specifying the -h command line parameter.");
(node_id, default_addr)
} }
}; };
// Connect to target host // Connect to target host
netapp.clone().try_connect(addr, id).await if let Err(e) = netapp.clone().try_connect(addr, id).await {
.err_context("Unable to connect to destination RPC host. Check that you are using the same value of rpc_secret as them, and that you have their correct public key.")?; if is_default_addr {
warn!(
"Tried to contact Garage node at default address {}, which didn't work. If that address is wrong, consider setting rpc_public_addr in your config file.",
addr
);
}
Err(e).err_context("Unable to connect to destination RPC host. Check that you are using the same value of rpc_secret as them, and that you have their correct public key.")?;
}
let system_rpc_endpoint = netapp.endpoint::<SystemRpc, ()>(SYSTEM_RPC_PATH.into()); let system_rpc_endpoint = netapp.endpoint::<SystemRpc, ()>(SYSTEM_RPC_PATH.into());
let admin_rpc_endpoint = netapp.endpoint::<AdminRpc, ()>(ADMIN_RPC_PATH.into()); let admin_rpc_endpoint = netapp.endpoint::<AdminRpc, ()>(ADMIN_RPC_PATH.into());

View File

@ -85,8 +85,11 @@ impl Worker for RepairVersionsWorker {
"Version repair worker".into() "Version repair worker".into()
} }
fn info(&self) -> Option<String> { fn status(&self) -> WorkerStatus {
Some(format!("{} items done", self.counter)) WorkerStatus {
progress: Some(self.counter.to_string()),
..Default::default()
}
} }
async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> { async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {
@ -163,8 +166,11 @@ impl Worker for RepairBlockrefsWorker {
"Block refs repair worker".into() "Block refs repair worker".into()
} }
fn info(&self) -> Option<String> { fn status(&self) -> WorkerStatus {
Some(format!("{} items done", self.counter)) WorkerStatus {
progress: Some(self.counter.to_string()),
..Default::default()
}
} }
async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> { async fn work(&mut self, _must_exit: &mut watch::Receiver<bool>) -> Result<WorkerState, Error> {

View File

@ -404,14 +404,13 @@ impl<T: CountedItem> IndexPropagatorWorker<T> {
#[async_trait] #[async_trait]
impl<T: CountedItem> Worker for IndexPropagatorWorker<T> { impl<T: CountedItem> Worker for IndexPropagatorWorker<T> {
fn name(&self) -> String { fn name(&self) -> String {
format!("{} index counter propagator", T::COUNTER_TABLE_NAME) format!("{} counter", T::COUNTER_TABLE_NAME)
} }
fn info(&self) -> Option<String> { fn status(&self) -> WorkerStatus {
if !self.buf.is_empty() { WorkerStatus {
Some(format!("{} items in queue", self.buf.len())) queue_length: Some(self.buf.len() as u64),
} else { ..Default::default()
None
} }
} }

View File

@ -330,12 +330,10 @@ where
format!("{} GC", F::TABLE_NAME) format!("{} GC", F::TABLE_NAME)
} }
fn info(&self) -> Option<String> { fn status(&self) -> WorkerStatus {
let l = self.gc.data.gc_todo_len().unwrap_or(0); WorkerStatus {
if l > 0 { queue_length: Some(self.gc.data.gc_todo_len().unwrap_or(0) as u64),
Some(format!("{} items in queue", l)) ..Default::default()
} else {
None
} }
} }

View File

@ -310,15 +310,13 @@ where
R: TableReplication + 'static, R: TableReplication + 'static,
{ {
fn name(&self) -> String { fn name(&self) -> String {
format!("{} Merkle tree updater", F::TABLE_NAME) format!("{} Merkle", F::TABLE_NAME)
} }
fn info(&self) -> Option<String> { fn status(&self) -> WorkerStatus {
let l = self.0.todo_len().unwrap_or(0); WorkerStatus {
if l > 0 { queue_length: Some(self.0.todo_len().unwrap_or(0) as u64),
Some(format!("{} items in queue", l)) ..Default::default()
} else {
None
} }
} }

View File

@ -570,12 +570,10 @@ impl<F: TableSchema + 'static, R: TableReplication + 'static> Worker for SyncWor
format!("{} sync", F::TABLE_NAME) format!("{} sync", F::TABLE_NAME)
} }
fn info(&self) -> Option<String> { fn status(&self) -> WorkerStatus {
let l = self.todo.len(); WorkerStatus {
if l > 0 { queue_length: Some(self.todo.len() as u64),
Some(format!("{} partitions remaining", l)) ..Default::default()
} else {
None
} }
} }

View File

@ -29,13 +29,24 @@ pub struct BackgroundRunner {
#[derive(Clone, Serialize, Deserialize, Debug)] #[derive(Clone, Serialize, Deserialize, Debug)]
pub struct WorkerInfo { pub struct WorkerInfo {
pub name: String, pub name: String,
pub info: Option<String>, pub status: WorkerStatus,
pub state: WorkerState, pub state: WorkerState,
pub errors: usize, pub errors: usize,
pub consecutive_errors: usize, pub consecutive_errors: usize,
pub last_error: Option<(String, u64)>, pub last_error: Option<(String, u64)>,
} }
/// WorkerStatus is a struct returned by the worker with a bunch of canonical
/// fields to indicate their status to CLI users. All fields are optional.
#[derive(Clone, Serialize, Deserialize, Debug, Default)]
pub struct WorkerStatus {
pub tranquility: Option<u32>,
pub progress: Option<String>,
pub queue_length: Option<u64>,
pub persistent_errors: Option<u64>,
pub freeform: Vec<String>,
}
impl BackgroundRunner { impl BackgroundRunner {
/// Create a new BackgroundRunner /// Create a new BackgroundRunner
pub fn new( pub fn new(

View File

@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize};
use tokio::select; use tokio::select;
use tokio::sync::{mpsc, watch}; use tokio::sync::{mpsc, watch};
use crate::background::WorkerInfo; use crate::background::{WorkerInfo, WorkerStatus};
use crate::error::Error; use crate::error::Error;
use crate::time::now_msec; use crate::time::now_msec;
@ -26,7 +26,7 @@ impl std::fmt::Display for WorkerState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
WorkerState::Busy => write!(f, "Busy"), WorkerState::Busy => write!(f, "Busy"),
WorkerState::Throttled(t) => write!(f, "Thr:{:.3}", t), WorkerState::Throttled(_) => write!(f, "Busy*"),
WorkerState::Idle => write!(f, "Idle"), WorkerState::Idle => write!(f, "Idle"),
WorkerState::Done => write!(f, "Done"), WorkerState::Done => write!(f, "Done"),
} }
@ -37,8 +37,8 @@ impl std::fmt::Display for WorkerState {
pub trait Worker: Send { pub trait Worker: Send {
fn name(&self) -> String; fn name(&self) -> String;
fn info(&self) -> Option<String> { fn status(&self) -> WorkerStatus {
None Default::default()
} }
/// Work: do a basic unit of work, if one is available (otherwise, should return /// Work: do a basic unit of work, if one is available (otherwise, should return
@ -119,7 +119,7 @@ impl WorkerProcessor {
match wi.get_mut(&worker.task_id) { match wi.get_mut(&worker.task_id) {
Some(i) => { Some(i) => {
i.state = worker.state; i.state = worker.state;
i.info = worker.worker.info(); i.status = worker.worker.status();
i.errors = worker.errors; i.errors = worker.errors;
i.consecutive_errors = worker.consecutive_errors; i.consecutive_errors = worker.consecutive_errors;
if worker.last_error.is_some() { if worker.last_error.is_some() {
@ -130,7 +130,7 @@ impl WorkerProcessor {
wi.insert(worker.task_id, WorkerInfo { wi.insert(worker.task_id, WorkerInfo {
name: worker.worker.name(), name: worker.worker.name(),
state: worker.state, state: worker.state,
info: worker.worker.info(), status: worker.worker.status(),
errors: worker.errors, errors: worker.errors,
consecutive_errors: worker.consecutive_errors, consecutive_errors: worker.consecutive_errors,
last_error: worker.last_error.take(), last_error: worker.last_error.take(),