refactor(workflows): name policies and samples

This commit is contained in:
andrussal 2025-12-16 02:07:24 +01:00
parent b188bd7364
commit 3e7d14b5f6
3 changed files with 37 additions and 13 deletions

View File

@ -275,11 +275,15 @@ impl ChaosBuilder {
/// Configure a random restarts chaos workload.
#[must_use]
pub fn restart(self) -> ChaosRestartBuilder {
const DEFAULT_CHAOS_MIN_DELAY: Duration = Duration::from_secs(10);
const DEFAULT_CHAOS_MAX_DELAY: Duration = Duration::from_secs(30);
const DEFAULT_CHAOS_TARGET_COOLDOWN: Duration = Duration::from_secs(60);
ChaosRestartBuilder {
builder: self.builder,
min_delay: Duration::from_secs(10),
max_delay: Duration::from_secs(30),
target_cooldown: Duration::from_secs(60),
min_delay: DEFAULT_CHAOS_MIN_DELAY,
max_delay: DEFAULT_CHAOS_MAX_DELAY,
target_cooldown: DEFAULT_CHAOS_TARGET_COOLDOWN,
include_validators: true,
include_executors: true,
}

View File

@ -103,22 +103,32 @@ impl ConsensusLiveness {
let mut issues = Vec::new();
for (idx, client) in clients.iter().enumerate() {
let node = format!("node-{idx}");
for attempt in 0..REQUEST_RETRIES {
match Self::fetch_cluster_info(client).await {
Ok((height, tip)) => {
let label = format!("node-{idx}");
tracing::debug!(node = %label, height, tip = ?tip, attempt, "consensus_info collected");
samples.push(NodeSample { label, height, tip });
Ok(sample) => {
tracing::debug!(
node = %node,
height = sample.height,
tip = ?sample.tip,
attempt,
"consensus_info collected"
);
samples.push(NodeSample {
label: node.clone(),
height: sample.height,
tip: sample.tip,
});
break;
}
Err(err) if attempt + 1 == REQUEST_RETRIES => {
tracing::warn!(node = %format!("node-{idx}"), %err, "consensus_info failed after retries");
tracing::warn!(node = %node, %err, "consensus_info failed after retries");
issues.push(ConsensusLivenessIssue::RequestFailed {
node: format!("node-{idx}"),
node: node.clone(),
source: err,
});
}
@ -131,11 +141,14 @@ impl ConsensusLiveness {
LivenessCheck { samples, issues }
}
async fn fetch_cluster_info(client: &ApiClient) -> Result<(u64, HeaderId), DynError> {
async fn fetch_cluster_info(client: &ApiClient) -> Result<ConsensusInfoSample, DynError> {
client
.consensus_info()
.await
.map(|info| (info.height, info.tip))
.map(|info| ConsensusInfoSample {
height: info.height,
tip: info.tip,
})
.map_err(|err| -> DynError { err.into() })
}
@ -215,6 +228,11 @@ impl ConsensusLiveness {
}
}
struct ConsensusInfoSample {
height: u64,
tip: HeaderId,
}
struct NodeSample {
label: String,
height: u64,

View File

@ -6,6 +6,8 @@ use testing_framework_core::scenario::{DynError, RunContext, Workload};
use tokio::time::{Instant, sleep};
use tracing::info;
const MIN_DELAY_SPREAD_FALLBACK: Duration = Duration::from_millis(1);
/// Randomly restarts validators and executors during a run to introduce chaos.
#[derive(Debug)]
pub struct RandomRestartWorkload {
@ -66,7 +68,7 @@ impl RandomRestartWorkload {
let spread = self
.max_delay
.checked_sub(self.min_delay)
.unwrap_or_else(|| Duration::from_millis(1))
.unwrap_or(MIN_DELAY_SPREAD_FALLBACK)
.as_secs_f64();
let offset = thread_rng().gen_range(0.0..=spread);
let delay = self