diff --git a/testing-framework/workflows/src/builder/mod.rs b/testing-framework/workflows/src/builder/mod.rs index 034e4e4..d9edfaf 100644 --- a/testing-framework/workflows/src/builder/mod.rs +++ b/testing-framework/workflows/src/builder/mod.rs @@ -275,11 +275,15 @@ impl ChaosBuilder { /// Configure a random restarts chaos workload. #[must_use] pub fn restart(self) -> ChaosRestartBuilder { + const DEFAULT_CHAOS_MIN_DELAY: Duration = Duration::from_secs(10); + const DEFAULT_CHAOS_MAX_DELAY: Duration = Duration::from_secs(30); + const DEFAULT_CHAOS_TARGET_COOLDOWN: Duration = Duration::from_secs(60); + ChaosRestartBuilder { builder: self.builder, - min_delay: Duration::from_secs(10), - max_delay: Duration::from_secs(30), - target_cooldown: Duration::from_secs(60), + min_delay: DEFAULT_CHAOS_MIN_DELAY, + max_delay: DEFAULT_CHAOS_MAX_DELAY, + target_cooldown: DEFAULT_CHAOS_TARGET_COOLDOWN, include_validators: true, include_executors: true, } diff --git a/testing-framework/workflows/src/expectations/consensus_liveness.rs b/testing-framework/workflows/src/expectations/consensus_liveness.rs index 27b4017..618e6c5 100644 --- a/testing-framework/workflows/src/expectations/consensus_liveness.rs +++ b/testing-framework/workflows/src/expectations/consensus_liveness.rs @@ -103,22 +103,32 @@ impl ConsensusLiveness { let mut issues = Vec::new(); for (idx, client) in clients.iter().enumerate() { + let node = format!("node-{idx}"); + for attempt in 0..REQUEST_RETRIES { match Self::fetch_cluster_info(client).await { - Ok((height, tip)) => { - let label = format!("node-{idx}"); - - tracing::debug!(node = %label, height, tip = ?tip, attempt, "consensus_info collected"); - samples.push(NodeSample { label, height, tip }); + Ok(sample) => { + tracing::debug!( + node = %node, + height = sample.height, + tip = ?sample.tip, + attempt, + "consensus_info collected" + ); + samples.push(NodeSample { + label: node.clone(), + height: sample.height, + tip: sample.tip, + }); break; } Err(err) if attempt + 1 == REQUEST_RETRIES => { - tracing::warn!(node = %format!("node-{idx}"), %err, "consensus_info failed after retries"); + tracing::warn!(node = %node, %err, "consensus_info failed after retries"); issues.push(ConsensusLivenessIssue::RequestFailed { - node: format!("node-{idx}"), + node: node.clone(), source: err, }); } @@ -131,11 +141,14 @@ impl ConsensusLiveness { LivenessCheck { samples, issues } } - async fn fetch_cluster_info(client: &ApiClient) -> Result<(u64, HeaderId), DynError> { + async fn fetch_cluster_info(client: &ApiClient) -> Result { client .consensus_info() .await - .map(|info| (info.height, info.tip)) + .map(|info| ConsensusInfoSample { + height: info.height, + tip: info.tip, + }) .map_err(|err| -> DynError { err.into() }) } @@ -215,6 +228,11 @@ impl ConsensusLiveness { } } +struct ConsensusInfoSample { + height: u64, + tip: HeaderId, +} + struct NodeSample { label: String, height: u64, diff --git a/testing-framework/workflows/src/workloads/chaos.rs b/testing-framework/workflows/src/workloads/chaos.rs index 13889ce..8e13120 100644 --- a/testing-framework/workflows/src/workloads/chaos.rs +++ b/testing-framework/workflows/src/workloads/chaos.rs @@ -6,6 +6,8 @@ use testing_framework_core::scenario::{DynError, RunContext, Workload}; use tokio::time::{Instant, sleep}; use tracing::info; +const MIN_DELAY_SPREAD_FALLBACK: Duration = Duration::from_millis(1); + /// Randomly restarts validators and executors during a run to introduce chaos. #[derive(Debug)] pub struct RandomRestartWorkload { @@ -66,7 +68,7 @@ impl RandomRestartWorkload { let spread = self .max_delay .checked_sub(self.min_delay) - .unwrap_or_else(|| Duration::from_millis(1)) + .unwrap_or(MIN_DELAY_SPREAD_FALLBACK) .as_secs_f64(); let offset = thread_rng().gen_range(0.0..=spread); let delay = self