diff --git a/examples/src/bin/compose_runner.rs b/examples/src/bin/compose_runner.rs index 45f2411..bc6142c 100644 --- a/examples/src/bin/compose_runner.rs +++ b/examples/src/bin/compose_runner.rs @@ -58,7 +58,7 @@ async fn main() { ); if let Err(err) = run_compose_case(validators, executors, Duration::from_secs(run_secs)).await { - warn!("compose runner demo failed: {err}"); + warn!("compose runner demo failed: {err:#}"); process::exit(1); } } @@ -75,26 +75,36 @@ async fn run_compose_case( "building scenario plan" ); - let (chaos_min_delay, chaos_max_delay, chaos_target_cooldown) = chaos_timings(run_duration); + let enable_chaos = env::var("NOMOS_DEMO_CHAOS") + .or_else(|_| env::var("COMPOSE_DEMO_CHAOS")) + .map(|value| value == "1" || value.eq_ignore_ascii_case("true")) + .unwrap_or(false); - let mut plan = ScenarioBuilder::topology_with(|t| { + let scenario = ScenarioBuilder::topology_with(|t| { t.network_star().validators(validators).executors(executors) }) - .enable_node_control() - .chaos_with(|c| { - c.restart() - // Keep chaos restarts outside the test run window to avoid crash loops on restart. - .min_delay(chaos_min_delay) - .max_delay(chaos_max_delay) - .target_cooldown(chaos_target_cooldown) - .apply() - }) - .wallets(TOTAL_WALLETS) - .transactions_with(|txs| txs.rate(MIXED_TXS_PER_BLOCK).users(TRANSACTION_WALLETS)) - .da_with(|da| da.channel_rate(DA_CHANNEL_RATE).blob_rate(DA_BLOB_RATE)) - .with_run_duration(run_duration) - .expect_consensus_liveness() - .build(); + .enable_node_control(); + + let scenario = if enable_chaos { + let (chaos_min_delay, chaos_max_delay, chaos_target_cooldown) = chaos_timings(run_duration); + scenario.chaos_with(|c| { + c.restart() + .min_delay(chaos_min_delay) + .max_delay(chaos_max_delay) + .target_cooldown(chaos_target_cooldown) + .apply() + }) + } else { + scenario + }; + + let mut plan = scenario + .wallets(TOTAL_WALLETS) + .transactions_with(|txs| txs.rate(MIXED_TXS_PER_BLOCK).users(TRANSACTION_WALLETS)) + .da_with(|da| da.channel_rate(DA_CHANNEL_RATE).blob_rate(DA_BLOB_RATE)) + .with_run_duration(run_duration) + .expect_consensus_liveness() + .build(); let deployer = ComposeDeployer::new(); info!("deploying compose stack"); diff --git a/examples/src/bin/k8s_runner.rs b/examples/src/bin/k8s_runner.rs index 7c74e84..087a764 100644 --- a/examples/src/bin/k8s_runner.rs +++ b/examples/src/bin/k8s_runner.rs @@ -12,7 +12,7 @@ use tracing::{info, warn}; const DEFAULT_RUN_SECS: u64 = 60; const DEFAULT_VALIDATORS: usize = 1; const DEFAULT_EXECUTORS: usize = 1; -const MIXED_TXS_PER_BLOCK: u64 = 5; +const MIXED_TXS_PER_BLOCK: u64 = 2; const TOTAL_WALLETS: usize = 1000; const TRANSACTION_WALLETS: usize = 500; const DA_BLOB_RATE: u64 = 1; @@ -37,7 +37,7 @@ async fn main() { info!(validators, executors, run_secs, "starting k8s runner demo"); if let Err(err) = run_k8s_case(validators, executors, Duration::from_secs(run_secs)).await { - warn!("k8s runner demo failed: {err}"); + warn!("k8s runner demo failed: {err:#}"); process::exit(1); } } @@ -49,15 +49,22 @@ async fn run_k8s_case(validators: usize, executors: usize, run_duration: Duratio duration_secs = run_duration.as_secs(), "building scenario plan" ); + let enable_da = env::var("NOMOS_DEMO_DA") + .or_else(|_| env::var("K8S_DEMO_DA")) + .map(|value| value == "1" || value.eq_ignore_ascii_case("true")) + .unwrap_or(false); + let mut scenario = ScenarioBuilder::topology_with(|t| { t.network_star().validators(validators).executors(executors) }) .with_capabilities(ObservabilityCapability::default()) .wallets(TOTAL_WALLETS) .transactions_with(|txs| txs.rate(MIXED_TXS_PER_BLOCK).users(TRANSACTION_WALLETS)) - .da_with(|da| da.blob_rate(DA_BLOB_RATE)) - .with_run_duration(run_duration) - .expect_consensus_liveness(); + .with_run_duration(run_duration); + + if enable_da { + scenario = scenario.da_with(|da| da.blob_rate(DA_BLOB_RATE).headroom_percent(0)); + } if let Ok(url) = env::var("K8S_RUNNER_METRICS_QUERY_URL") .or_else(|_| env::var("NOMOS_METRICS_QUERY_URL")) diff --git a/examples/src/bin/local_runner.rs b/examples/src/bin/local_runner.rs index 23fb2a3..01d5477 100644 --- a/examples/src/bin/local_runner.rs +++ b/examples/src/bin/local_runner.rs @@ -43,7 +43,7 @@ async fn main() { ); if let Err(err) = run_local_case(validators, executors, Duration::from_secs(run_secs)).await { - warn!("local runner demo failed: {err}"); + warn!("local runner demo failed: {err:#}"); process::exit(1); } } diff --git a/testing-framework/deployers/compose/src/descriptor/node.rs b/testing-framework/deployers/compose/src/descriptor/node.rs index 61c19f1..810c7b2 100644 --- a/testing-framework/deployers/compose/src/descriptor/node.rs +++ b/testing-framework/deployers/compose/src/descriptor/node.rs @@ -69,9 +69,12 @@ impl NodeDescriptor { EnvEntry::new("CFG_HOST_IDENTIFIER", identifier), ]); + // Publish container ports on random host ports to avoid collisions with + // local services and allow multiple compose stacks to run concurrently. + // The runner discovers the chosen host ports via `docker compose port`. let ports = vec![ - format!("127.0.0.1:{api_port}:{api_port}"), - format!("127.0.0.1:{testing_port}:{testing_port}"), + format!("127.0.0.1::{api_port}"), + format!("127.0.0.1::{testing_port}"), ]; Self { diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/templates/executor-deployments.yaml b/testing-framework/deployers/k8s/helm/nomos-runner/templates/executor-deployments.yaml index baaa33b..738987e 100644 --- a/testing-framework/deployers/k8s/helm/nomos-runner/templates/executor-deployments.yaml +++ b/testing-framework/deployers/k8s/helm/nomos-runner/templates/executor-deployments.yaml @@ -31,6 +31,8 @@ spec: env: - name: CFG_SERVER_ADDR value: http://{{ include "nomos-runner.fullname" $root }}-cfgsync:{{ $root.Values.cfgsync.port }} + - name: NOMOS_TIME_BACKEND + value: {{ $root.Values.timeBackend | default "monotonic" | quote }} - name: NOMOS_KZGRS_PARAMS_PATH value: '{{ if eq $root.Values.kzg.mode "inImage" }}{{ $root.Values.kzg.inImageParamsPath }}{{ else }}{{ $root.Values.kzg.hostPathParamsPath }}{{ end }}' {{- range $key, $value := $node.env }} diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/templates/validator-deployments.yaml b/testing-framework/deployers/k8s/helm/nomos-runner/templates/validator-deployments.yaml index ed0f4e0..46db6e3 100644 --- a/testing-framework/deployers/k8s/helm/nomos-runner/templates/validator-deployments.yaml +++ b/testing-framework/deployers/k8s/helm/nomos-runner/templates/validator-deployments.yaml @@ -31,6 +31,8 @@ spec: env: - name: CFG_SERVER_ADDR value: http://{{ include "nomos-runner.fullname" $root }}-cfgsync:{{ $root.Values.cfgsync.port }} + - name: NOMOS_TIME_BACKEND + value: {{ $root.Values.timeBackend | default "monotonic" | quote }} - name: NOMOS_KZGRS_PARAMS_PATH value: '{{ if eq $root.Values.kzg.mode "inImage" }}{{ $root.Values.kzg.inImageParamsPath }}{{ else }}{{ $root.Values.kzg.hostPathParamsPath }}{{ end }}' {{- range $key, $value := $node.env }} diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/values.yaml b/testing-framework/deployers/k8s/helm/nomos-runner/values.yaml index bb6c63d..5671f0d 100644 --- a/testing-framework/deployers/k8s/helm/nomos-runner/values.yaml +++ b/testing-framework/deployers/k8s/helm/nomos-runner/values.yaml @@ -1,6 +1,10 @@ image: "public.ecr.aws/r4s5t9y4/logos/logos-blockchain:test" imagePullPolicy: IfNotPresent +# Keep node time in-process by default; NTP backends are flaky in short-lived +# test clusters and can stall consensus if they fail to sync. +timeBackend: "monotonic" + cfgsync: port: 4400 config: "" diff --git a/testing-framework/deployers/k8s/src/lifecycle/block_feed.rs b/testing-framework/deployers/k8s/src/lifecycle/block_feed.rs index 93b88a5..5380bcc 100644 --- a/testing-framework/deployers/k8s/src/lifecycle/block_feed.rs +++ b/testing-framework/deployers/k8s/src/lifecycle/block_feed.rs @@ -13,7 +13,9 @@ pub async fn spawn_block_feed_with( ); let block_source_client = node_clients - .any_client() + .validator_clients() + .first() + .or_else(|| node_clients.any_client()) .cloned() .ok_or(K8sRunnerError::BlockFeedMissing)?; diff --git a/testing-framework/deployers/k8s/src/lifecycle/wait/forwarding.rs b/testing-framework/deployers/k8s/src/lifecycle/wait/forwarding.rs index e9da949..a120959 100644 --- a/testing-framework/deployers/k8s/src/lifecycle/wait/forwarding.rs +++ b/testing-framework/deployers/k8s/src/lifecycle/wait/forwarding.rs @@ -9,7 +9,7 @@ use anyhow::{Result as AnyhowResult, anyhow}; use super::{ClusterWaitError, NodeConfigPorts, NodePortAllocation}; -const PORT_FORWARD_READY_ATTEMPTS: u32 = 20; +const PORT_FORWARD_READY_ATTEMPTS: u32 = 240; const PORT_FORWARD_READY_POLL_INTERVAL: Duration = Duration::from_millis(250); pub struct PortForwardHandle {