cli: new worker info command

This commit is contained in:
Alex Auvolat 2022-12-13 12:24:30 +01:00
parent a51e8d94c6
commit 9d82196945
No known key found for this signature in database
GPG Key ID: 0E496D15096376BE
6 changed files with 86 additions and 11 deletions

View File

@ -67,14 +67,17 @@ impl Worker for RepairWorker {
idx_bytes idx_bytes
}; };
WorkerStatus { WorkerStatus {
progress: Some("Phase 1".into()), progress: Some("0.00%".into()),
freeform: vec![format!("Now at: {}", hex::encode(idx_bytes))], freeform: vec![format!(
"Currently in phase 1, iterator position: {}",
hex::encode(idx_bytes)
)],
..Default::default() ..Default::default()
} }
} }
Some(bi) => WorkerStatus { Some(bi) => WorkerStatus {
progress: Some(format!("{:.2}%", bi.progress() * 100.)), progress: Some(format!("{:.2}%", bi.progress() * 100.)),
freeform: vec!["Phase 2".into()], freeform: vec!["Currently in phase 2".into()],
..Default::default() ..Default::default()
}, },
} }
@ -291,11 +294,11 @@ impl Worker for ScrubWorker {
} }
ScrubWorkerState::Paused(bsi, rt) => { ScrubWorkerState::Paused(bsi, rt) => {
s.progress = Some(format!("{:.2}%", bsi.progress() * 100.)); s.progress = Some(format!("{:.2}%", bsi.progress() * 100.));
s.freeform = vec![format!("Paused, resumes at {}", msec_to_rfc3339(*rt))]; s.freeform = vec![format!("Scrub paused, resumes at {}", msec_to_rfc3339(*rt))];
} }
ScrubWorkerState::Finished => { ScrubWorkerState::Finished => {
s.freeform = vec![format!( s.freeform = vec![format!(
"Completed {}", "Last scrub completed at {}",
msec_to_rfc3339(self.persisted.time_last_complete_scrub) msec_to_rfc3339(self.persisted.time_last_complete_scrub)
)]; )];
} }

View File

@ -257,7 +257,7 @@ impl BlockResyncManager {
if let Err(e) = &res { if let Err(e) = &res {
manager.metrics.resync_error_counter.add(1); manager.metrics.resync_error_counter.add(1);
warn!("Error when resyncing {:?}: {}", hash, e); error!("Error when resyncing {:?}: {}", hash, e);
let err_counter = match self.errors.get(hash.as_slice())? { let err_counter = match self.errors.get(hash.as_slice())? {
Some(ec) => ErrorCounter::decode(&ec).add1(now + 1), Some(ec) => ErrorCounter::decode(&ec).add1(now + 1),
@ -482,7 +482,7 @@ impl Worker for ResyncWorker {
if self.index >= persisted.n_workers { if self.index >= persisted.n_workers {
return WorkerStatus { return WorkerStatus {
freeform: vec!["(unused)".into()], freeform: vec!["This worker is currently disabled".into()],
..Default::default() ..Default::default()
}; };
} }

View File

@ -54,6 +54,7 @@ pub enum AdminRpc {
HashMap<usize, garage_util::background::WorkerInfo>, HashMap<usize, garage_util::background::WorkerInfo>,
WorkerListOpt, WorkerListOpt,
), ),
WorkerInfo(usize, garage_util::background::WorkerInfo),
} }
impl Rpc for AdminRpc { impl Rpc for AdminRpc {
@ -880,6 +881,16 @@ impl AdminRpcHandler {
let workers = self.garage.background.get_worker_info(); let workers = self.garage.background.get_worker_info();
Ok(AdminRpc::WorkerList(workers, opt)) Ok(AdminRpc::WorkerList(workers, opt))
} }
WorkerCmd::Info { tid } => {
let info = self
.garage
.background
.get_worker_info()
.get(&tid)
.ok_or_bad_request(format!("No worker with TID {}", tid))?
.clone();
Ok(AdminRpc::WorkerInfo(tid, info))
}
WorkerCmd::Set { opt } => match opt { WorkerCmd::Set { opt } => match opt {
WorkerSetCmd::ScrubTranquility { tranquility } => { WorkerSetCmd::ScrubTranquility { tranquility } => {
let scrub_command = ScrubWorkerCommand::SetTranquility(tranquility); let scrub_command = ScrubWorkerCommand::SetTranquility(tranquility);

View File

@ -186,7 +186,10 @@ pub async fn cmd_admin(
print_key_info(&key, &rb); print_key_info(&key, &rb);
} }
AdminRpc::WorkerList(wi, wlo) => { AdminRpc::WorkerList(wi, wlo) => {
print_worker_info(wi, wlo); print_worker_list(wi, wlo);
}
AdminRpc::WorkerInfo(tid, wi) => {
print_worker_info(tid, wi);
} }
r => { r => {
error!("Unexpected response: {:?}", r); error!("Unexpected response: {:?}", r);

View File

@ -516,6 +516,9 @@ pub enum WorkerCmd {
#[structopt(flatten)] #[structopt(flatten)]
opt: WorkerListOpt, opt: WorkerListOpt,
}, },
/// Get detailed information about a worker
#[structopt(name = "info", version = garage_version())]
Info { tid: usize },
/// Set worker parameter /// Set worker parameter
#[structopt(name = "set", version = garage_version())] #[structopt(name = "set", version = garage_version())]
Set { Set {

View File

@ -241,7 +241,7 @@ pub fn find_matching_node(
} }
} }
pub fn print_worker_info(wi: HashMap<usize, WorkerInfo>, wlo: WorkerListOpt) { pub fn print_worker_list(wi: HashMap<usize, WorkerInfo>, wlo: WorkerListOpt) {
let mut wi = wi.into_iter().collect::<Vec<_>>(); let mut wi = wi.into_iter().collect::<Vec<_>>();
wi.sort_by_key(|(tid, info)| { wi.sort_by_key(|(tid, info)| {
( (
@ -284,13 +284,13 @@ pub fn print_worker_info(wi: HashMap<usize, WorkerInfo>, wlo: WorkerListOpt) {
.tranquility .tranquility
.as_ref() .as_ref()
.map(ToString::to_string) .map(ToString::to_string)
.unwrap_or("-".into()), .unwrap_or_else(|| "-".into()),
info.status.progress.as_deref().unwrap_or("-"), info.status.progress.as_deref().unwrap_or("-"),
info.status info.status
.queue_length .queue_length
.as_ref() .as_ref()
.map(ToString::to_string) .map(ToString::to_string)
.unwrap_or("-".into()), .unwrap_or_else(|| "-".into()),
total_err, total_err,
consec_err, consec_err,
err_ago, err_ago,
@ -298,3 +298,58 @@ pub fn print_worker_info(wi: HashMap<usize, WorkerInfo>, wlo: WorkerListOpt) {
} }
format_table(table); format_table(table);
} }
pub fn print_worker_info(tid: usize, info: WorkerInfo) {
let mut table = vec![];
table.push(format!("Task id:\t{}", tid));
table.push(format!("Worker name:\t{}", info.name));
match info.state {
WorkerState::Throttled(t) => {
table.push(format!(
"Worker state:\tBusy (throttled, paused for {:.3}s)",
t
));
}
s => {
table.push(format!("Worker state:\t{}", s));
}
};
if let Some(tql) = info.status.tranquility {
table.push(format!("Tranquility:\t{}", tql));
}
table.push("".into());
table.push(format!("Total errors:\t{}", info.errors));
table.push(format!("Consecutive errs:\t{}", info.consecutive_errors));
if let Some((s, t)) = info.last_error {
table.push(format!("Last error:\t{}", s));
let tf = timeago::Formatter::new();
table.push(format!(
"Last error time:\t{}",
tf.convert(Duration::from_millis(now_msec() - t))
));
}
table.push("".into());
if let Some(p) = info.status.progress {
table.push(format!("Progress:\t{}", p));
}
if let Some(ql) = info.status.queue_length {
table.push(format!("Queue length:\t{}", ql));
}
if let Some(pe) = info.status.persistent_errors {
table.push(format!("Persistent errors:\t{}", pe));
}
for (i, s) in info.status.freeform.iter().enumerate() {
if i == 0 {
if table.last() != Some(&"".into()) {
table.push("".into());
}
table.push(format!("Message:\t{}", s));
} else {
table.push(format!("\t{}", s));
}
}
format_table(table);
}