test: stabilize compose/k8s runners

This commit is contained in:
andrussal 2025-12-17 21:52:11 +01:00
parent 40cddbea42
commit 3a25c776b2
9 changed files with 58 additions and 28 deletions

View File

@ -58,7 +58,7 @@ async fn main() {
);
if let Err(err) = run_compose_case(validators, executors, Duration::from_secs(run_secs)).await {
warn!("compose runner demo failed: {err}");
warn!("compose runner demo failed: {err:#}");
process::exit(1);
}
}
@ -75,26 +75,36 @@ async fn run_compose_case(
"building scenario plan"
);
let (chaos_min_delay, chaos_max_delay, chaos_target_cooldown) = chaos_timings(run_duration);
let enable_chaos = env::var("NOMOS_DEMO_CHAOS")
.or_else(|_| env::var("COMPOSE_DEMO_CHAOS"))
.map(|value| value == "1" || value.eq_ignore_ascii_case("true"))
.unwrap_or(false);
let mut plan = ScenarioBuilder::topology_with(|t| {
let scenario = ScenarioBuilder::topology_with(|t| {
t.network_star().validators(validators).executors(executors)
})
.enable_node_control()
.chaos_with(|c| {
c.restart()
// Keep chaos restarts outside the test run window to avoid crash loops on restart.
.min_delay(chaos_min_delay)
.max_delay(chaos_max_delay)
.target_cooldown(chaos_target_cooldown)
.apply()
})
.wallets(TOTAL_WALLETS)
.transactions_with(|txs| txs.rate(MIXED_TXS_PER_BLOCK).users(TRANSACTION_WALLETS))
.da_with(|da| da.channel_rate(DA_CHANNEL_RATE).blob_rate(DA_BLOB_RATE))
.with_run_duration(run_duration)
.expect_consensus_liveness()
.build();
.enable_node_control();
let scenario = if enable_chaos {
let (chaos_min_delay, chaos_max_delay, chaos_target_cooldown) = chaos_timings(run_duration);
scenario.chaos_with(|c| {
c.restart()
.min_delay(chaos_min_delay)
.max_delay(chaos_max_delay)
.target_cooldown(chaos_target_cooldown)
.apply()
})
} else {
scenario
};
let mut plan = scenario
.wallets(TOTAL_WALLETS)
.transactions_with(|txs| txs.rate(MIXED_TXS_PER_BLOCK).users(TRANSACTION_WALLETS))
.da_with(|da| da.channel_rate(DA_CHANNEL_RATE).blob_rate(DA_BLOB_RATE))
.with_run_duration(run_duration)
.expect_consensus_liveness()
.build();
let deployer = ComposeDeployer::new();
info!("deploying compose stack");

View File

@ -12,7 +12,7 @@ use tracing::{info, warn};
const DEFAULT_RUN_SECS: u64 = 60;
const DEFAULT_VALIDATORS: usize = 1;
const DEFAULT_EXECUTORS: usize = 1;
const MIXED_TXS_PER_BLOCK: u64 = 5;
const MIXED_TXS_PER_BLOCK: u64 = 2;
const TOTAL_WALLETS: usize = 1000;
const TRANSACTION_WALLETS: usize = 500;
const DA_BLOB_RATE: u64 = 1;
@ -37,7 +37,7 @@ async fn main() {
info!(validators, executors, run_secs, "starting k8s runner demo");
if let Err(err) = run_k8s_case(validators, executors, Duration::from_secs(run_secs)).await {
warn!("k8s runner demo failed: {err}");
warn!("k8s runner demo failed: {err:#}");
process::exit(1);
}
}
@ -49,15 +49,22 @@ async fn run_k8s_case(validators: usize, executors: usize, run_duration: Duratio
duration_secs = run_duration.as_secs(),
"building scenario plan"
);
let enable_da = env::var("NOMOS_DEMO_DA")
.or_else(|_| env::var("K8S_DEMO_DA"))
.map(|value| value == "1" || value.eq_ignore_ascii_case("true"))
.unwrap_or(false);
let mut scenario = ScenarioBuilder::topology_with(|t| {
t.network_star().validators(validators).executors(executors)
})
.with_capabilities(ObservabilityCapability::default())
.wallets(TOTAL_WALLETS)
.transactions_with(|txs| txs.rate(MIXED_TXS_PER_BLOCK).users(TRANSACTION_WALLETS))
.da_with(|da| da.blob_rate(DA_BLOB_RATE))
.with_run_duration(run_duration)
.expect_consensus_liveness();
.with_run_duration(run_duration);
if enable_da {
scenario = scenario.da_with(|da| da.blob_rate(DA_BLOB_RATE).headroom_percent(0));
}
if let Ok(url) = env::var("K8S_RUNNER_METRICS_QUERY_URL")
.or_else(|_| env::var("NOMOS_METRICS_QUERY_URL"))

View File

@ -43,7 +43,7 @@ async fn main() {
);
if let Err(err) = run_local_case(validators, executors, Duration::from_secs(run_secs)).await {
warn!("local runner demo failed: {err}");
warn!("local runner demo failed: {err:#}");
process::exit(1);
}
}

View File

@ -69,9 +69,12 @@ impl NodeDescriptor {
EnvEntry::new("CFG_HOST_IDENTIFIER", identifier),
]);
// Publish container ports on random host ports to avoid collisions with
// local services and allow multiple compose stacks to run concurrently.
// The runner discovers the chosen host ports via `docker compose port`.
let ports = vec![
format!("127.0.0.1:{api_port}:{api_port}"),
format!("127.0.0.1:{testing_port}:{testing_port}"),
format!("127.0.0.1::{api_port}"),
format!("127.0.0.1::{testing_port}"),
];
Self {

View File

@ -31,6 +31,8 @@ spec:
env:
- name: CFG_SERVER_ADDR
value: http://{{ include "nomos-runner.fullname" $root }}-cfgsync:{{ $root.Values.cfgsync.port }}
- name: NOMOS_TIME_BACKEND
value: {{ $root.Values.timeBackend | default "monotonic" | quote }}
- name: NOMOS_KZGRS_PARAMS_PATH
value: '{{ if eq $root.Values.kzg.mode "inImage" }}{{ $root.Values.kzg.inImageParamsPath }}{{ else }}{{ $root.Values.kzg.hostPathParamsPath }}{{ end }}'
{{- range $key, $value := $node.env }}

View File

@ -31,6 +31,8 @@ spec:
env:
- name: CFG_SERVER_ADDR
value: http://{{ include "nomos-runner.fullname" $root }}-cfgsync:{{ $root.Values.cfgsync.port }}
- name: NOMOS_TIME_BACKEND
value: {{ $root.Values.timeBackend | default "monotonic" | quote }}
- name: NOMOS_KZGRS_PARAMS_PATH
value: '{{ if eq $root.Values.kzg.mode "inImage" }}{{ $root.Values.kzg.inImageParamsPath }}{{ else }}{{ $root.Values.kzg.hostPathParamsPath }}{{ end }}'
{{- range $key, $value := $node.env }}

View File

@ -1,6 +1,10 @@
image: "public.ecr.aws/r4s5t9y4/logos/logos-blockchain:test"
imagePullPolicy: IfNotPresent
# Keep node time in-process by default; NTP backends are flaky in short-lived
# test clusters and can stall consensus if they fail to sync.
timeBackend: "monotonic"
cfgsync:
port: 4400
config: ""

View File

@ -13,7 +13,9 @@ pub async fn spawn_block_feed_with(
);
let block_source_client = node_clients
.any_client()
.validator_clients()
.first()
.or_else(|| node_clients.any_client())
.cloned()
.ok_or(K8sRunnerError::BlockFeedMissing)?;

View File

@ -9,7 +9,7 @@ use anyhow::{Result as AnyhowResult, anyhow};
use super::{ClusterWaitError, NodeConfigPorts, NodePortAllocation};
const PORT_FORWARD_READY_ATTEMPTS: u32 = 20;
const PORT_FORWARD_READY_ATTEMPTS: u32 = 240;
const PORT_FORWARD_READY_POLL_INTERVAL: Duration = Duration::from_millis(250);
pub struct PortForwardHandle {