refactor: remove embedded observability

2026-01-02 13:23:13 +00:00 · 2025-12-18 13:05:40 +01:00 · 2025-12-18 13:05:40 +01:00 · 91c9044abb
commit 91c9044abb
parent d8be8e589a
33 changed files with 941 additions and 73 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,6 +3,7 @@
 .tmp/
 /.tmp*/
 tmp-local-logs/
 tmp/node-logs/
 # IDE / OS cruft
 .idea/
 .DS_Store
--- a/Cargo.toml
+++ b/Cargo.toml
@ -4,7 +4,7 @@ members = [
  "examples/doc-snippets",
  "testing-framework/configs",
  "testing-framework/core",
-  "testing-framework/cucumber_ext",
+  "testing-framework/cucumber",
  "testing-framework/deployers/compose",
  "testing-framework/deployers/k8s",
  "testing-framework/deployers/local",
--- a/book/src/SUMMARY.md
+++ b/book/src/SUMMARY.md
@ -26,6 +26,7 @@
  - [Operations](operations.md)
 - [Part III — Developer Reference](part-iii.md)
  - [Scenario Model (Developer Level)](scenario-model.md)
  - [API Levels: Builder DSL vs. Direct](api-levels.md)
  - [Extending the Framework](extending.md)
  - [Example: New Workload & Expectation (Rust)](custom-workload-example.md)
  - [Internal Crate Reference](internal-crate-reference.md)
--- a/book/src/api-levels.md
+++ b/book/src/api-levels.md
@ -0,0 +1,131 @@
 # API Levels: Builder DSL vs. Direct Instantiation
 The framework supports two styles for constructing scenarios:
 1. **High-level Builder DSL** (recommended): fluent helper methods (e.g. `.transactions_with(...)`)
 2. **Low-level direct instantiation**: construct workload/expectation types explicitly, then attach them
 Both styles produce the same runtime behavior because they ultimately call the same core builder APIs.
 ## High-Level Builder DSL (Recommended)
 The DSL is implemented as extension traits (primarily `testing_framework_workflows::ScenarioBuilderExt`) on the core scenario builder.
 ```rust
 use std::time::Duration;
 use testing_framework_core::scenario::ScenarioBuilder;
 use testing_framework_workflows::ScenarioBuilderExt;
 let plan = ScenarioBuilder::topology_with(|t| t.network_star().validators(3).executors(2))
    .wallets(5)
    .transactions_with(|txs| txs.rate(5).users(3))
    .da_with(|da| da.channel_rate(1).blob_rate(1).headroom_percent(20))
    .expect_consensus_liveness()
    .with_run_duration(Duration::from_secs(60))
    .build();
 ```
 **When to use:**
 - Most test code (smoke, regression, CI)
 - When you want sensible defaults and minimal boilerplate
 ## Low-Level Direct Instantiation
 Direct instantiation gives you explicit control over the concrete types you attach:
 ```rust
 use std::{
    num::{NonZeroU64, NonZeroUsize},
    time::Duration,
 };
 use testing_framework_core::scenario::ScenarioBuilder;
 use testing_framework_workflows::{
    expectations::ConsensusLiveness,
    workloads::{da, transaction},
 };
 let tx_workload = transaction::Workload::with_rate(5)
    .expect("transaction rate must be non-zero")
    .with_user_limit(NonZeroUsize::new(3));
 let da_workload = da::Workload::with_rate(
    NonZeroU64::new(1).unwrap(),  // blob rate per block
    NonZeroU64::new(1).unwrap(),  // channel rate per block
    da::Workload::default_headroom_percent(),
 );
 let plan = ScenarioBuilder::topology_with(|t| t.network_star().validators(3).executors(2))
    .wallets(5)
    .with_workload(tx_workload)
    .with_workload(da_workload)
    .with_expectation(ConsensusLiveness::default())
    .with_run_duration(Duration::from_secs(60))
    .build();
 ```
 **When to use:**
 - Custom workload/expectation implementations
 - Reusing preconfigured workload instances across multiple scenarios
 - Debugging / exploring the underlying workload types
 ## Method Correspondence
 | High-Level DSL | Low-Level Direct |
 |----------------|------------------|
 | `.transactions_with(\|txs\| txs.rate(5).users(3))` | `.with_workload(transaction::Workload::with_rate(5).expect(...).with_user_limit(...))` |
 | `.da_with(\|da\| da.blob_rate(1).channel_rate(1))` | `.with_workload(da::Workload::with_rate(...))` |
 | `.expect_consensus_liveness()` | `.with_expectation(ConsensusLiveness::default())` |
 ## Bundled Expectations (Important)
 Workloads can bundle expectations by implementing `Workload::expectations()`.
 These bundled expectations are attached automatically whenever you call `.with_workload(...)` (including when you use the DSL), because the core builder expands workload expectations during attachment.
 ## Mixing Both Styles
 Mixing is common: use the DSL for built-ins, and direct instantiation for custom pieces.
 ```rust
 use std::time::Duration;
 use testing_framework_core::scenario::ScenarioBuilder;
 use testing_framework_workflows::ScenarioBuilderExt;
 let custom_workload = MyCustomWorkload::new(config);
 let plan = ScenarioBuilder::topology_with(|t| t.network_star().validators(3).executors(2))
    .transactions_with(|txs| txs.rate(5).users(3)) // DSL
    .with_workload(custom_workload)                // direct
    .expect_consensus_liveness()                   // DSL
    .with_run_duration(Duration::from_secs(60))
    .build();
 ```
 ## Implementation Detail (How the DSL Works)
 The DSL methods are thin wrappers. For example:
 ```rust
 builder.transactions_with(|txs| txs.rate(5).users(3))
 ```
 is roughly equivalent to:
 ```rust
 builder.transactions().rate(5).users(3).apply()
 ```
 ## Troubleshooting
 **DSL method not found**
 - Ensure the extension traits are in scope, e.g. `use testing_framework_workflows::ScenarioBuilderExt;`
 - Cross-check method names in [Builder API Quick Reference](dsl-cheat-sheet.md)
 ## See Also
 - [Builder API Quick Reference](dsl-cheat-sheet.md)
 - [Example: New Workload & Expectation (Rust)](custom-workload-example.md)
 - [Extending the Framework](extending.md)
--- a/book/src/extending.md
+++ b/book/src/extending.md
@ -1,31 +1,311 @@
 # Extending the Framework
-## Adding a workload
+This guide shows how to extend the framework with custom workloads, expectations, runners, and topology helpers. Each section includes the trait outline and a minimal code example.
 1) Implement `testing_framework_core::scenario::Workload`:
   - Provide a name and any bundled expectations.
   - In `init`, derive inputs from `GeneratedTopology` and `RunMetrics`; fail
     fast if prerequisites are missing (e.g., wallet data, node addresses).
   - In `start`, drive async traffic using the `RunContext` clients.
 2) Expose the workload from a module under `testing-framework/workflows` and
   consider adding a DSL helper for ergonomic wiring.
-## Adding an expectation
+## Adding a Workload
 1) Implement `testing_framework_core::scenario::Expectation`:
   - Use `start_capture` to snapshot baseline metrics.
   - Use `evaluate` to assert outcomes after workloads finish; return all errors
     so the runner can aggregate them.
 2) Export it from `testing-framework/workflows` if it is reusable.
-## Adding a runner
+**Steps:**
-1) Implement `testing_framework_core::scenario::Deployer` for your backend.
+1. Implement `testing_framework_core::scenario::Workload`
-   - Produce a `RunContext` with `NodeClients`, metrics endpoints, and optional
+2. Provide a name and any bundled expectations
-     `NodeControlHandle`.
+3. Use `init` to derive inputs from topology/metrics; fail fast if prerequisites missing
-   - Guard cleanup with `CleanupGuard` to reclaim resources even on failures.
+4. Use `start` to drive async traffic using `RunContext` clients
-2) Mirror the readiness and block-feed probes used by the existing runners so
+5. Expose from `testing-framework/workflows` and optionally add a DSL helper
   workloads can rely on consistent signals.
-## Adding topology helpers
+**Trait outline:**
- Extend `testing_framework_core::topology::config::TopologyBuilder` with new layouts or
+
-  configuration presets (e.g., specialized DA parameters). Keep defaults safe:
+```rust
-  ensure at least one participant and clamp dispersal factors as the current
+use async_trait::async_trait;
-  helpers do.
+use testing_framework_core::scenario::{
    DynError, Expectation, RunContext, RunMetrics, Workload,
 };
 use testing_framework_core::topology::generation::GeneratedTopology;
 pub struct MyWorkload {
    // Configuration fields
    target_rate: u64,
 }
 impl MyWorkload {
    pub fn new(target_rate: u64) -> Self {
        Self { target_rate }
    }
 }
 #[async_trait]
 impl Workload for MyWorkload {
    fn name(&self) -> &str {
        "my_workload"
    }
    fn expectations(&self) -> Vec<Box<dyn Expectation>> {
        // Return bundled expectations that should run with this workload
        vec![Box::new(MyExpectation::new(self.target_rate))]
    }
    fn init(
        &mut self,
        topology: &GeneratedTopology,
        _run_metrics: &RunMetrics,
    ) -> Result<(), DynError> {
        // Validate prerequisites (e.g., enough nodes, wallet data present)
        if topology.validators().is_empty() {
            return Err("no validators available".into());
        }
        Ok(())
    }
    async fn start(&self, ctx: &RunContext) -> Result<(), DynError> {
        // Drive async activity: submit transactions, query nodes, etc.
        let clients = ctx.node_clients().validator_clients();
        for client in clients {
            let info = client.consensus_info().await?;
            tracing::info!(?info, "workload queried node");
        }
        Ok(())
    }
 }
 ```
 **Key points:**
 - `name()` identifies the workload in logs
 - `expectations()` bundles default checks (can be empty)
 - `init()` validates topology before run starts
 - `start()` executes concurrently with other workloads; it should complete before run duration expires
 See [Example: New Workload & Expectation](custom-workload-example.md) for a complete, runnable example.
 ## Adding an Expectation
 **Steps:**
 1. Implement `testing_framework_core::scenario::Expectation`
 2. Use `start_capture` to snapshot baseline metrics (optional)
 3. Use `evaluate` to assert outcomes after workloads finish
 4. Return descriptive errors; the runner aggregates them
 5. Export from `testing-framework/workflows` if reusable
 **Trait outline:**
 ```rust
 use async_trait::async_trait;
 use testing_framework_core::scenario::{DynError, Expectation, RunContext};
 pub struct MyExpectation {
    expected_value: u64,
    captured_baseline: Option<u64>,
 }
 impl MyExpectation {
    pub fn new(expected_value: u64) -> Self {
        Self {
            expected_value,
            captured_baseline: None,
        }
    }
 }
 #[async_trait]
 impl Expectation for MyExpectation {
    fn name(&self) -> &str {
        "my_expectation"
    }
    async fn start_capture(&mut self, ctx: &RunContext) -> Result<(), DynError> {
        // Optional: capture baseline state before workloads start
        let client = ctx.node_clients().validator_clients().first()
            .ok_or("no validators")?;
        let info = client.consensus_info().await?;
        self.captured_baseline = Some(info.current_block_id.slot);
        tracing::info!(baseline = self.captured_baseline, "captured baseline");
        Ok(())
    }
    async fn evaluate(&mut self, ctx: &RunContext) -> Result<(), DynError> {
        // Assert the expected condition holds after workloads finish
        let client = ctx.node_clients().validator_clients().first()
            .ok_or("no validators")?;
        let info = client.consensus_info().await?;
        let final_slot = info.current_block_id.slot;
        let baseline = self.captured_baseline.unwrap_or(0);
        let delta = final_slot.saturating_sub(baseline);
        if delta < self.expected_value {
            return Err(format!(
                "expected at least {} blocks, got {}",
                self.expected_value, delta
            ).into());
        }
        tracing::info!(delta, "expectation passed");
        Ok(())
    }
 }
 ```
 **Key points:**
 - `name()` identifies the expectation in logs
 - `start_capture()` runs before workloads start (optional)
 - `evaluate()` runs after workloads finish; return descriptive errors
 - Expectations run sequentially; keep them fast
 ## Adding a Runner (Deployer)
 **Steps:**
 1. Implement `testing_framework_core::scenario::Deployer<Caps>` for your capability type
 2. Deploy infrastructure and return a `Runner`
 3. Construct `NodeClients` and spawn a `BlockFeed`
 4. Build a `RunContext` and provide a `CleanupGuard` for teardown
 **Trait outline:**
 ```rust
 use async_trait::async_trait;
 use testing_framework_core::scenario::{
    CleanupGuard, Deployer, DynError, Metrics, NodeClients, RunContext, Runner, Scenario,
    spawn_block_feed,
 };
 use testing_framework_core::topology::deployment::Topology;
 pub struct MyDeployer {
    // Configuration: cluster connection details, etc.
 }
 impl MyDeployer {
    pub fn new() -> Self {
        Self {}
    }
 }
 #[async_trait]
 impl Deployer<()> for MyDeployer {
    type Error = DynError;
    async fn deploy(&self, scenario: &Scenario<()>) -> Result<Runner, Self::Error> {
        // 1. Launch nodes using scenario.topology()
        // 2. Wait for readiness (e.g., consensus info endpoint responds)
        // 3. Build NodeClients for validators/executors
        // 4. Spawn a block feed for expectations (optional but recommended)
        // 5. Create NodeControlHandle if you support restarts (optional)
        // 6. Return a Runner wrapping RunContext + CleanupGuard
        tracing::info!("deploying scenario with MyDeployer");
        let topology: Option<Topology> = None; // Some(topology) if you spawned one
        let node_clients = NodeClients::default(); // Or NodeClients::from_topology(...)
        let (block_feed, block_feed_guard) = spawn_block_feed(&node_clients).await?;
        let telemetry = Metrics::empty(); // or Metrics::from_prometheus(...)
        let node_control = None; // or Some(Arc<dyn NodeControlHandle>)
        let context = RunContext::new(
            scenario.topology().clone(),
            topology,
            node_clients,
            scenario.duration(),
            telemetry,
            block_feed,
            node_control,
        );
        // If you also have other resources to clean up (containers/pods/etc),
        // wrap them in your own CleanupGuard implementation and call
        // CleanupGuard::cleanup(Box::new(block_feed_guard)) inside it.
        Ok(Runner::new(context, Some(Box::new(block_feed_guard))))
    }
 }
 ```
 **Key points:**
 - `deploy()` must return a fully prepared `Runner`
 - Block until nodes are ready before returning (avoid false negatives)
 - Use a `CleanupGuard` to tear down resources on failure (and on `RunHandle` drop)
 - If you want chaos workloads, also provide a `NodeControlHandle` via `RunContext`
 ## Adding Topology Helpers
 **Steps:**
 1. Extend `testing_framework_core::topology::config::TopologyBuilder` with new layouts
 2. Keep defaults safe: ensure at least one participant, clamp dispersal factors
 3. Consider adding configuration presets for specialized parameters
 **Example:**
 ```rust
 use testing_framework_core::topology::config::TopologyBuilder;
 impl TopologyBuilder {
    /// Creates a "ring" topology where each node connects to its neighbors
    pub fn network_ring(&mut self) -> &mut Self {
        // Configure peer connections in a ring layout
        self.with_network_layout(|layout| {
            // Implement ring connection logic
            layout.ring_peers()
        });
        self
    }
    /// Preset for high-throughput DA configuration
    pub fn da_high_throughput(&mut self) -> &mut Self {
        self.with_da_params(|params| {
            params
                .dispersal_factor(8)
                .replication_factor(16)
                .chunk_size(4096)
        });
        self
    }
 }
 ```
 **Key points:**
 - Maintain method chaining (return `&mut Self`)
 - Validate inputs: clamp factors, enforce minimums
 - Document assumptions (e.g., "requires at least 4 nodes")
 ## Adding a DSL Helper
 To expose your custom workload through the high-level DSL, add a trait extension:
 ```rust
 use testing_framework_core::scenario::Builder as ScenarioBuilder;
 pub trait MyWorkloadDsl {
    fn my_workload_with(
        self,
        f: impl FnOnce(MyWorkloadBuilder) -> MyWorkloadBuilder,
    ) -> Self;
 }
 impl<Caps> MyWorkloadDsl for ScenarioBuilder<Caps> {
    fn my_workload_with(
        self,
        f: impl FnOnce(MyWorkloadBuilder) -> MyWorkloadBuilder,
    ) -> Self {
        let builder = f(MyWorkloadBuilder::default());
        self.with_workload(builder.build())
    }
 }
 ```
 Users can then call:
 ```rust
 ScenarioBuilder::topology_with(|t| { /* ... */ })
    .my_workload_with(|w| {
        w.target_rate(10)
         .some_option(true)
    })
    .build()
 ```
 ## See Also
 - [API Levels: Builder DSL vs. Direct](api-levels.md) - Understanding the two API levels
 - [Custom Workload Example](custom-workload-example.md) - Complete runnable example
 - [Internal Crate Reference](internal-crate-reference.md) - Where to add new code
--- a/book/src/operations.md
+++ b/book/src/operations.md
@ -37,7 +37,7 @@ Both **LocalDeployer** and **ComposeDeployer** work in CI environments:
 **ComposeDeployer in CI (recommended):**
 - Better isolation (containerized)
 - Reproducible environment
- Includes Prometheus/observability
+- Can integrate with external Prometheus/Grafana (optional)
 - **Trade-off:** Slower startup (Docker image build)
 - **Trade-off:** Requires Docker daemon
@ -60,7 +60,21 @@ scripts/run-examples.sh -t 60 -v 1 -e 1 compose
 scripts/run-examples.sh -t 60 -v 1 -e 1 k8s
 ```
-This script handles circuit setup, binary building/bundling, image building, and execution.
+This script handles circuit setup, binary building/bundling, (local) image building, and execution.
 Note: for `k8s` runs against non-local clusters (e.g. EKS), the cluster pulls images from a registry,
 so a local `docker build` is not used. In that case, build + push your image separately (see
 `scripts/build_test_image.sh`) and set `NOMOS_TESTNET_IMAGE` to the pushed reference.
 ### Quick Smoke Matrix (Host/Compose/K8s)
 For a small “does everything still run?” matrix (including `--no-image-build` variants where relevant), use:
 ```bash
 scripts/run-test-matrix.sh -t 120 -v 1 -e 1
 ```
 This is useful after making runner/image/script changes, and it forwards `--metrics-*` options through to `scripts/run-examples.sh`.
 **Environment overrides:**
 - `VERSION=v0.3.1` — Circuit version
@ -192,6 +206,7 @@ cargo run -p runner-examples --bin compose_runner
 **Compose-specific features:**
 - **Node control support**: Only runner that supports chaos testing (`.enable_node_control()` + chaos workloads)
 - **Observability is external**: Set `NOMOS_METRICS_*` / `NOMOS_GRAFANA_URL` to enable telemetry links and querying
  - Quickstart: `scripts/setup-observability.sh compose up` then `scripts/setup-observability.sh compose env`
 **Important:** 
 - Containers expect KZG parameters at `/kzgrs_test_params/kzgrs_test_params` (note the repeated filename)
@ -248,13 +263,13 @@ cargo run -p runner-examples --bin k8s_runner
 Notes:
 - `NOMOS_METRICS_QUERY_URL` must be reachable from the runner process (often via `kubectl port-forward`).
 - `NOMOS_METRICS_OTLP_INGEST_URL` must be reachable from nodes (pods/containers) and is backend-specific (Prometheus vs VictoriaMetrics paths differ).
  - Quickstart installer: `scripts/setup-observability.sh k8s install` then `scripts/setup-observability.sh k8s env` (optional dashboards: `scripts/setup-observability.sh k8s dashboards`)
 **Via `scripts/run-examples.sh` (optional):**
 ```bash
 scripts/run-examples.sh -t 60 -v 1 -e 1 k8s \
  --metrics-query-url http://your-prometheus:9090 \
-  --metrics-otlp-ingest-url http://your-prometheus:9090/api/v1/otlp/v1/metrics \
+  --metrics-otlp-ingest-url http://your-prometheus:9090/api/v1/otlp/v1/metrics
  --grafana-url http://your-grafana:3000
 ```
 **In code (optional):**
@ -565,12 +580,15 @@ cargo run -p runner-examples --bin local_runner
 Runners expose metrics and node HTTP endpoints for expectation code and debugging:
 **Prometheus-compatible metrics querying (optional):**
- The framework does **not** deploy Prometheus.
+- Runners do **not** provision Prometheus automatically.
 - For a ready-to-run stack, use `scripts/setup-observability.sh`:
  - Compose: `scripts/setup-observability.sh compose up` then `scripts/setup-observability.sh compose env`
  - K8s: `scripts/setup-observability.sh k8s install` then `scripts/setup-observability.sh k8s env`
 - Provide `NOMOS_METRICS_QUERY_URL` (PromQL base URL) to enable `ctx.telemetry()` queries.
 - Access from expectations when configured: `ctx.telemetry().prometheus().map(|p| p.base_url())`
 **Grafana (optional):**
- The framework does **not** deploy Grafana.
+- Runners do **not** provision Grafana automatically (but `scripts/setup-observability.sh` can).
 - If you set `NOMOS_GRAFANA_URL`, the deployer prints it in `TESTNET_ENDPOINTS`.
 - Dashboards live in `testing-framework/assets/stack/monitoring/grafana/dashboards/` for import into your Grafana.
--- a/book/src/quickstart.md
+++ b/book/src/quickstart.md
@ -163,7 +163,7 @@ pub fn step_5_run_duration() -> testing_framework_core::scenario::Builder<()> {
 }
 ```
-Run for 60 seconds (~27 blocks with default 2s slots, 0.9 coefficient). Framework ensures this is at least 2× the consensus slot duration.
+Run for 60 seconds (~27 blocks with default 2s slots, 0.9 coefficient). Framework ensures this is at least 2× the consensus slot duration. Adjust consensus timing via `CONSENSUS_SLOT_TIME` and `CONSENSUS_ACTIVE_SLOT_COEFF`.
 ### 6. Deploy and Execute
@ -239,7 +239,18 @@ POL_PROOF_DEV_MODE=true \
 cargo run -p runner-examples --bin compose_runner
 ```
-**Benefit:** Reproducible containerized environment with Prometheus at `http://localhost:9090`.
+**Benefit:** Reproducible containerized environment (Dockerized nodes, repeatable deployments).
 **Optional: Prometheus + Grafana**
 The runner can integrate with external observability endpoints. For a ready-to-run local stack:
 ```bash
 scripts/setup-observability.sh compose up
 eval "$(scripts/setup-observability.sh compose env)"
 ```
 Then run your compose scenario as usual (the environment variables enable PromQL querying and node OTLP metrics export).
 **Note:** Compose expects KZG parameters at `/kzgrs_test_params/kzgrs_test_params` inside containers (the directory name is repeated as the filename).
--- a/book/src/testing-philosophy.md
+++ b/book/src/testing-philosophy.md
@ -36,7 +36,7 @@ Reason in **blocks** and **consensus intervals**, not wall-clock seconds.
 **Consensus defaults:**
 - Slot duration: 2 seconds (NTP-synchronized, configurable via `CONSENSUS_SLOT_TIME`)
- Active slot coefficient: 0.9 (90% block probability per slot)
+- Active slot coefficient: 0.9 (90% block probability per slot, configurable via `CONSENSUS_ACTIVE_SLOT_COEFF`)
 - Expected rate: ~27 blocks per minute
 ```rust
@ -159,7 +159,7 @@ pub fn minimum_run_windows() {
 **Note:** Block counts assume default consensus parameters:
 - Slot duration: 2 seconds (configurable via `CONSENSUS_SLOT_TIME`)
- Active slot coefficient: 0.9 (90% block probability per slot)
+- Active slot coefficient: 0.9 (90% block probability per slot, configurable via `CONSENSUS_ACTIVE_SLOT_COEFF`)
 - Formula: `blocks ≈ (duration / slot_duration) × active_slot_coeff`
 If upstream changes these parameters, adjust your duration expectations accordingly.
--- a/examples/Cargo.toml
+++ b/examples/Cargo.toml
@ -12,7 +12,7 @@ version               = "0.1.0"
 [dependencies]
 anyhow                           = "1"
 cucumber                         = { version = "0.22.0" }
-cucumber_ext                     = { path = "../testing-framework/cucumber_ext" }
+cucumber_ext                     = { path = "../testing-framework/cucumber" }
 testing-framework-core           = { workspace = true }
 testing-framework-runner-compose = { workspace = true }
 testing-framework-runner-k8s     = { workspace = true }
--- a/examples/src/bin/compose_runner.rs
+++ b/examples/src/bin/compose_runner.rs
@ -1,4 +1,9 @@
-use std::{env, process, time::Duration};
+use std::{
    env, fs,
    path::{Path, PathBuf},
    process,
    time::Duration,
 };
 use anyhow::{Context as _, Result};
 use runner_examples::{ChaosBuilderExt as _, ScenarioBuilderExt as _, read_env_any};
@ -25,6 +30,8 @@ const DA_BLOB_RATE: u64 = 1;
 #[tokio::main]
 async fn main() {
    init_node_log_dir_defaults();
    // Compose containers mount KZG params at /kzgrs_test_params; ensure the
    // generated configs point there unless the caller overrides explicitly.
    if env::var("NOMOS_KZGRS_PARAMS_PATH").is_err() {
@ -57,6 +64,35 @@ async fn main() {
    }
 }
 fn init_node_log_dir_defaults() {
    if env::var_os("NOMOS_LOG_DIR").is_some() {
        return;
    }
    let repo_root = repo_root();
    let host_dir = repo_root.join("tmp").join("node-logs");
    let _ = fs::create_dir_all(&host_dir);
    // In compose mode, node processes run inside containers; configs should
    // point to the container path, while the compose deployer mounts the host
    // repo's `tmp/node-logs` there.
    unsafe {
        env::set_var("NOMOS_LOG_DIR", "/tmp/node-logs");
    }
 }
 fn repo_root() -> PathBuf {
    env::var("CARGO_WORKSPACE_DIR")
        .map(PathBuf::from)
        .ok()
        .or_else(|| {
            Path::new(env!("CARGO_MANIFEST_DIR"))
                .parent()
                .map(Path::to_path_buf)
        })
        .expect("repo root must be discoverable from CARGO_WORKSPACE_DIR or CARGO_MANIFEST_DIR")
 }
 async fn run_compose_case(
    validators: usize,
    executors: usize,
--- a/examples/src/bin/cucumber_compose.rs
+++ b/examples/src/bin/cucumber_compose.rs
@ -1,8 +1,11 @@
-use runner_examples::cucumber::{Mode, init_logging_defaults, init_tracing, run};
+use runner_examples::cucumber::{
    Mode, init_logging_defaults, init_node_log_dir_defaults, init_tracing, run,
 };
 #[tokio::main(flavor = "current_thread")]
 async fn main() {
    init_logging_defaults();
    init_node_log_dir_defaults(Mode::Compose);
    init_tracing();
    run(Mode::Compose).await;
--- a/examples/src/bin/cucumber_host.rs
+++ b/examples/src/bin/cucumber_host.rs
@ -1,8 +1,11 @@
-use runner_examples::cucumber::{Mode, init_logging_defaults, init_tracing, run};
+use runner_examples::cucumber::{
    Mode, init_logging_defaults, init_node_log_dir_defaults, init_tracing, run,
 };
 #[tokio::main(flavor = "current_thread")]
 async fn main() {
    init_logging_defaults();
    init_node_log_dir_defaults(Mode::Host);
    init_tracing();
    run(Mode::Host).await;
--- a/examples/src/bin/local_runner.rs
+++ b/examples/src/bin/local_runner.rs
@ -1,4 +1,9 @@
-use std::{env, process, time::Duration};
+use std::{
    env, fs,
    path::{Path, PathBuf},
    process,
    time::Duration,
 };
 use anyhow::{Context as _, Result};
 use runner_examples::{ScenarioBuilderExt as _, read_env_any};
@ -12,11 +17,13 @@ const DEFAULT_RUN_SECS: u64 = 60;
 const MIXED_TXS_PER_BLOCK: u64 = 5;
 const TOTAL_WALLETS: usize = 1000;
 const TRANSACTION_WALLETS: usize = 500;
-const DA_BLOB_RATE: u64 = 1;
+const DA_BLOB_RATE: u64 = 3;
 const SMOKE_RUN_SECS_MAX: u64 = 30;
 #[tokio::main]
 async fn main() {
    init_node_log_dir_defaults();
    tracing_subscriber::fmt::init();
    if env::var("POL_PROOF_DEV_MODE").is_err() {
@ -39,6 +46,30 @@ async fn main() {
    }
 }
 fn init_node_log_dir_defaults() {
    if env::var_os("NOMOS_LOG_DIR").is_some() {
        return;
    }
    let host_dir = repo_root().join("tmp").join("node-logs");
    let _ = fs::create_dir_all(&host_dir);
    unsafe {
        env::set_var("NOMOS_LOG_DIR", host_dir);
    }
 }
 fn repo_root() -> PathBuf {
    env::var("CARGO_WORKSPACE_DIR")
        .map(PathBuf::from)
        .ok()
        .or_else(|| {
            Path::new(env!("CARGO_MANIFEST_DIR"))
                .parent()
                .map(Path::to_path_buf)
        })
        .expect("repo root must be discoverable from CARGO_WORKSPACE_DIR or CARGO_MANIFEST_DIR")
 }
 async fn run_local_case(validators: usize, executors: usize, run_duration: Duration) -> Result<()> {
    info!(
        validators,
--- a/examples/src/cucumber.rs
+++ b/examples/src/cucumber.rs
@ -1,3 +1,8 @@
 use std::{
    env, fs,
    path::{Path, PathBuf},
 };
 use cucumber::World;
 use cucumber_ext::TestingFrameworkWorld;
 use tracing_subscriber::{EnvFilter, fmt};
@ -31,11 +36,36 @@ fn is_compose(
 pub fn init_logging_defaults() {
    set_default_env("POL_PROOF_DEV_MODE", "true");
    set_default_env("NOMOS_TESTS_KEEP_LOGS", "1");
    set_default_env("NOMOS_LOG_DIR", ".tmp/cucumber-logs");
    set_default_env("NOMOS_LOG_LEVEL", "info");
    set_default_env("RUST_LOG", "info");
 }
 pub fn init_node_log_dir_defaults(mode: Mode) {
    if env::var_os("NOMOS_LOG_DIR").is_some() {
        return;
    }
    let host_dir = repo_root().join("tmp").join("node-logs");
    let _ = fs::create_dir_all(&host_dir);
    match mode {
        Mode::Host => set_default_env("NOMOS_LOG_DIR", &host_dir.display().to_string()),
        Mode::Compose => set_default_env("NOMOS_LOG_DIR", "/tmp/node-logs"),
    }
 }
 fn repo_root() -> PathBuf {
    env::var("CARGO_WORKSPACE_DIR")
        .map(PathBuf::from)
        .ok()
        .or_else(|| {
            Path::new(env!("CARGO_MANIFEST_DIR"))
                .parent()
                .map(Path::to_path_buf)
        })
        .expect("repo root must be discoverable from CARGO_WORKSPACE_DIR or CARGO_MANIFEST_DIR")
 }
 pub fn init_tracing() {
    let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
    let _ = fmt().with_env_filter(filter).with_target(true).try_init();
--- a/scripts/build-bundle.sh
+++ b/scripts/build-bundle.sh
@ -196,6 +196,24 @@ build_bundle::clean_cargo_linux_cache() {
  rm -rf "${ROOT_DIR}/.tmp/cargo-linux/registry" "${ROOT_DIR}/.tmp/cargo-linux/git"
 }
 build_bundle::docker_platform_suffix() {
  # Map a docker platform string (e.g. linux/amd64) to a filesystem-safe suffix
  # used for arch-specific target dirs, to avoid mixing build artifacts between
  # different container architectures.
  local platform="${1:-}"
  if [ -z "${platform}" ]; then
    echo ""
    return 0
  fi
  platform="${platform#linux/}"
  platform="${platform//\//-}"
  if [ -z "${platform}" ] || [ "${platform}" = "linux" ]; then
    echo ""
    return 0
  fi
  echo "-${platform}"
 }
 build_bundle::maybe_run_linux_build_in_docker() {
  # With `set -e`, this function must return 0 when no Docker cross-build is needed.
  if [ "${PLATFORM}" != "linux" ] || [ "$(uname -s)" = "Linux" ] || [ -n "${BUNDLE_IN_CONTAINER:-}" ]; then
@ -224,7 +242,10 @@ build_bundle::maybe_run_linux_build_in_docker() {
  echo "==> Building Linux bundle inside Docker"
  local container_output="/workspace${OUTPUT#"${ROOT_DIR}"}"
-  mkdir -p "${ROOT_DIR}/.tmp/cargo-linux" "${ROOT_DIR}/.tmp/nomos-node-linux-target"
+  local target_suffix
  target_suffix="$(build_bundle::docker_platform_suffix "${DOCKER_PLATFORM}")"
  local host_target_dir="${ROOT_DIR}/.tmp/nomos-node-linux-target${target_suffix}"
  mkdir -p "${ROOT_DIR}/.tmp/cargo-linux" "${host_target_dir}"
  local -a features_args=()
  if [ -n "${NOMOS_EXTRA_FEATURES:-}" ]; then
@ -242,15 +263,16 @@ build_bundle::maybe_run_linux_build_in_docker() {
    -e VERSION="${VERSION}" \
    -e NOMOS_NODE_REV="${NOMOS_NODE_REV}" \
    -e NOMOS_NODE_PATH="${node_path_env}" \
    -e NOMOS_BUNDLE_DOCKER_PLATFORM="${DOCKER_PLATFORM}" \
    -e NOMOS_CIRCUITS="/workspace/.tmp/nomos-circuits-linux" \
    -e STACK_DIR="/workspace/.tmp/nomos-circuits-linux" \
    -e HOST_DIR="/workspace/.tmp/nomos-circuits-linux" \
    -e NOMOS_EXTRA_FEATURES="${NOMOS_EXTRA_FEATURES:-}" \
    -e BUNDLE_IN_CONTAINER=1 \
    -e CARGO_HOME=/workspace/.tmp/cargo-linux \
-    -e CARGO_TARGET_DIR=/workspace/.tmp/nomos-node-linux-target \
+    -e CARGO_TARGET_DIR="/workspace/.tmp/nomos-node-linux-target${target_suffix}" \
    -v "${ROOT_DIR}/.tmp/cargo-linux":/workspace/.tmp/cargo-linux \
-    -v "${ROOT_DIR}/.tmp/nomos-node-linux-target":/workspace/.tmp/nomos-node-linux-target \
+    -v "${host_target_dir}:/workspace/.tmp/nomos-node-linux-target${target_suffix}" \
    -v "${ROOT_DIR}:/workspace" \
    "${extra_mounts[@]}" \
    -w /workspace \
@ -267,7 +289,14 @@ build_bundle::prepare_circuits() {
    NODE_TARGET="${ROOT_DIR}/.tmp/nomos-node-host-target"
  else
    CIRCUITS_DIR="${ROOT_DIR}/.tmp/nomos-circuits-linux"
-    NODE_TARGET="${ROOT_DIR}/.tmp/nomos-node-linux-target"
+    # When building Linux bundles in Docker, avoid reusing the same target dir
    # across different container architectures (e.g. linux/arm64 vs linux/amd64),
    # as the native-host `target/debug` layout would otherwise get mixed.
    local target_suffix=""
    if [ -n "${BUNDLE_IN_CONTAINER:-}" ]; then
      target_suffix="$(build_bundle::docker_platform_suffix "${NOMOS_BUNDLE_DOCKER_PLATFORM:-}")"
    fi
    NODE_TARGET="${ROOT_DIR}/.tmp/nomos-node-linux-target${target_suffix}"
  fi
  NODE_SRC_DEFAULT="${ROOT_DIR}/.tmp/nomos-node-${PLATFORM}-src"
--- a/scripts/observability/compose/docker-compose.yml
+++ b/scripts/observability/compose/docker-compose.yml
@ -0,0 +1,38 @@
 services:
  prometheus:
    image: prom/prometheus:v2.53.0
    command:
      - --config.file=/etc/prometheus/prometheus.yml
      - --storage.tsdb.path=/prometheus
      # Exposes OTLP HTTP ingest at /api/v1/otlp/v1/metrics
      - --enable-feature=otlp-write-receiver
      - --web.enable-lifecycle
      - --web.enable-admin-api
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
      - prometheus-data:/prometheus
    ports:
      - "9090:9090"
  grafana:
    image: grafana/grafana:11.4.0
    depends_on:
      - prometheus
    env_file:
      - ../../../testing-framework/assets/stack/monitoring/grafana/plugins.env
    environment:
      GF_SECURITY_ADMIN_USER: admin
      GF_SECURITY_ADMIN_PASSWORD: admin
      GF_USERS_ALLOW_SIGN_UP: "false"
    volumes:
      - grafana-data:/var/lib/grafana
      - ../../../testing-framework/assets/stack/monitoring/grafana/grafana.ini:/etc/grafana/grafana.ini:ro
      - ../../../testing-framework/assets/stack/monitoring/grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:ro
      - ../../../testing-framework/assets/stack/monitoring/grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro
      - ../../../testing-framework/assets/stack/monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro
    ports:
      - "3000:3000"
 volumes:
  prometheus-data: {}
  grafana-data: {}
--- a/scripts/observability/compose/prometheus/prometheus.yml
+++ b/scripts/observability/compose/prometheus/prometheus.yml
@ -0,0 +1,10 @@
 global:
  scrape_interval: 15s
  evaluation_interval: 15s
  external_labels:
    monitor: "NomosTesting"
 scrape_configs:
  - job_name: prometheus
    static_configs:
      - targets: ["prometheus:9090"]
--- a/scripts/observability/k8s/kube-prometheus-stack.values.yaml
+++ b/scripts/observability/k8s/kube-prometheus-stack.values.yaml
@ -0,0 +1,19 @@
 prometheus:
  prometheusSpec:
    enableOTLPReceiver: true
    additionalArgs:
      - name: web.enable-admin-api
    # Basic OTLP → Prometheus translation defaults are fine for most setups.
    # See: https://prometheus.io/docs/guides/opentelemetry/
    otlp: {}
 grafana:
  adminUser: admin
  adminPassword: admin
  sidecar:
    dashboards:
      enabled: true
      label: grafana_dashboard
      labelValue: "1"
    datasources:
      enabled: true
--- a/scripts/run-examples.sh
+++ b/scripts/run-examples.sh
@ -23,6 +23,15 @@ readonly DEFAULT_PRIVATE_AWS_REGION="ap-southeast-2"
 readonly DEFAULT_PULL_POLICY_LOCAL="IfNotPresent"
 readonly DEFAULT_PULL_POLICY_ECR="Always"
 readonly DOCKER_DESKTOP_CONTEXT="docker-desktop"
 readonly DEFAULT_K8S_ECR_SKIP_IMAGE_BUILD="1"
 run_examples::cleanup() {
  rm -f "${SETUP_OUT:-}" 2>/dev/null || true
 }
 # Avoid inheriting environment-provided EXIT traps (e.g., from BASH_ENV) that can
 # reference missing functions and fail at script termination.
 trap run_examples::cleanup EXIT
 run_examples::usage() {
  cat <<EOF
@ -40,7 +49,6 @@ Options:
  --bundle PATH           Convenience alias for setting NOMOS_BINARIES_TAR=PATH
  --metrics-query-url URL         PromQL base URL the runner process can query (optional)
  --metrics-otlp-ingest-url URL   Full OTLP HTTP ingest URL for node metrics export (optional)
  --grafana-url URL               Grafana base URL for printing/logging (optional)
  --external-prometheus URL            Alias for --metrics-query-url
  --external-otlp-metrics-endpoint URL  Alias for --metrics-otlp-ingest-url
  --local                 Use a local Docker image tag (default for docker-desktop k8s)
@ -48,6 +56,8 @@ Options:
 Environment:
  VERSION                          Circuits version (default from versions.env)
  CONSENSUS_SLOT_TIME              Consensus slot duration in seconds (default 2)
  CONSENSUS_ACTIVE_SLOT_COEFF      Probability a slot is active (default 0.9); expected block interval ≈ slot_time / coeff
  NOMOS_TESTNET_IMAGE              Image reference (overridden by --local/--ecr selection)
  ECR_IMAGE                        Full image reference for --ecr (overrides ECR_REGISTRY/ECR_REPO/TAG)
  ECR_REGISTRY                     Registry hostname for --ecr (default ${DEFAULT_PUBLIC_ECR_REGISTRY})
@ -56,9 +66,16 @@ Environment:
  NOMOS_TESTNET_IMAGE_PULL_POLICY  K8s imagePullPolicy (default ${DEFAULT_PULL_POLICY_LOCAL}; set to ${DEFAULT_PULL_POLICY_ECR} for --ecr)
  NOMOS_BINARIES_TAR               Path to prebuilt binaries/circuits tarball (default .tmp/nomos-binaries-<platform>-<version>.tar.gz)
  NOMOS_SKIP_IMAGE_BUILD           Set to 1 to skip rebuilding the compose/k8s image
  NOMOS_FORCE_IMAGE_BUILD          Set to 1 to force image rebuild even for k8s ECR mode
  NOMOS_METRICS_QUERY_URL           PromQL base URL for the runner process (optional)
  NOMOS_METRICS_OTLP_INGEST_URL     Full OTLP HTTP ingest URL for node metrics export (optional)
  NOMOS_GRAFANA_URL                 Grafana base URL for printing/logging (optional)
 Notes:
  - For k8s runs on non-docker-desktop clusters (e.g. EKS), a locally built Docker image is not
    visible to the cluster. By default, this script skips local image rebuilds in that case.
    If you need a custom image, run scripts/build_test_image.sh and push it to a registry the
    cluster can pull from, then set NOMOS_TESTNET_IMAGE accordingly.
 EOF
 }
@ -104,7 +121,6 @@ run_examples::parse_args() {
  IMAGE_SELECTION_MODE="auto"
  METRICS_QUERY_URL=""
  METRICS_OTLP_INGEST_URL=""
  GRAFANA_URL=""
  RUN_SECS_RAW_SPECIFIED=""
@ -166,14 +182,6 @@ run_examples::parse_args() {
        METRICS_OTLP_INGEST_URL="${1#*=}"
        shift
        ;;
      --grafana-url)
        GRAFANA_URL="${2:-}"
        shift 2
        ;;
      --grafana-url=*)
        GRAFANA_URL="${1#*=}"
        shift
        ;;
      --external-prometheus)
        METRICS_QUERY_URL="${2:-}"
        shift 2
@ -279,12 +287,20 @@ run_examples::select_image() {
    run_examples::fail_with_usage "Unknown image selection mode: ${selection}"
  fi
  export NOMOS_IMAGE_SELECTION="${selection}"
  export IMAGE_TAG="${IMAGE}"
  export NOMOS_TESTNET_IMAGE="${IMAGE}"
  if [ "${MODE}" = "k8s" ]; then
    if [ "${selection}" = "ecr" ]; then
      export NOMOS_KZG_MODE="${NOMOS_KZG_MODE:-inImage}"
      # A locally built Docker image isn't visible to remote clusters (e.g. EKS). Default to
      # skipping the local rebuild, unless the user explicitly set NOMOS_SKIP_IMAGE_BUILD or
      # overrides via NOMOS_FORCE_IMAGE_BUILD=1.
      if [ "${NOMOS_FORCE_IMAGE_BUILD:-0}" != "1" ]; then
        NOMOS_SKIP_IMAGE_BUILD="${NOMOS_SKIP_IMAGE_BUILD:-${DEFAULT_K8S_ECR_SKIP_IMAGE_BUILD}}"
        export NOMOS_SKIP_IMAGE_BUILD
      fi
    else
      export NOMOS_KZG_MODE="${NOMOS_KZG_MODE:-hostPath}"
    fi
@ -548,9 +564,6 @@ run_examples::run() {
  if [ -n "${METRICS_OTLP_INGEST_URL}" ]; then
    export NOMOS_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}"
  fi
  if [ -n "${GRAFANA_URL}" ]; then
    export NOMOS_GRAFANA_URL="${GRAFANA_URL}"
  fi
  echo "==> Running ${BIN} for ${RUN_SECS}s (mode=${MODE}, image=${IMAGE})"
  cd "${ROOT_DIR}"
@ -576,8 +589,6 @@ run_examples::main() {
  echo "==> Using restored circuits/binaries bundle"
  SETUP_OUT="$(common::tmpfile nomos-setup-output.XXXXXX)"
  cleanup() { rm -f "${SETUP_OUT}" 2>/dev/null || true; }
  trap cleanup EXIT
  run_examples::maybe_rebuild_image
  run_examples::maybe_restore_host_after_image
--- a/scripts/run-test-matrix.sh
+++ b/scripts/run-test-matrix.sh
@ -25,7 +25,6 @@ Options:
  --force-k8s-image-build Allow the k8s "rebuild image" run even on non-docker-desktop clusters
  --metrics-query-url URL       Forwarded to scripts/run-examples.sh (optional)
  --metrics-otlp-ingest-url URL Forwarded to scripts/run-examples.sh (optional)
  --grafana-url URL             Forwarded to scripts/run-examples.sh (optional)
  -h, --help              Show this help
 Notes:
@ -51,7 +50,6 @@ matrix::parse_args() {
  FORCE_K8S_IMAGE_BUILD=0
  METRICS_QUERY_URL=""
  METRICS_OTLP_INGEST_URL=""
  GRAFANA_URL=""
  while [ "$#" -gt 0 ]; do
    case "$1" in
@ -71,8 +69,6 @@ matrix::parse_args() {
      --metrics-query-url=*) METRICS_QUERY_URL="${1#*=}"; shift ;;
      --metrics-otlp-ingest-url) METRICS_OTLP_INGEST_URL="${2:-}"; shift 2 ;;
      --metrics-otlp-ingest-url=*) METRICS_OTLP_INGEST_URL="${1#*=}"; shift ;;
      --grafana-url) GRAFANA_URL="${2:-}"; shift 2 ;;
      --grafana-url=*) GRAFANA_URL="${1#*=}"; shift ;;
      *) matrix::die "Unknown argument: $1" ;;
    esac
  done
@ -104,9 +100,6 @@ matrix::forwarded_args() {
  if [ -n "${METRICS_OTLP_INGEST_URL}" ]; then
    args+=(--metrics-otlp-ingest-url "${METRICS_OTLP_INGEST_URL}")
  fi
  if [ -n "${GRAFANA_URL}" ]; then
    args+=(--grafana-url "${GRAFANA_URL}")
  fi
  printf '%s\0' "${args[@]}"
 }
@ -148,6 +141,7 @@ matrix::k8s_context() {
 matrix::main() {
  ROOT_DIR="$(common::repo_root)"
  export ROOT_DIR
  export RUST_LOG="${RUST_LOG:-info}"
  matrix::parse_args "$@"
  matrix::split_modes
@ -211,11 +205,17 @@ matrix::main() {
        fi
        if [ "${ctx}" = "docker-desktop" ] || [ "${FORCE_K8S_IMAGE_BUILD}" -eq 1 ]; then
          # On non-docker-desktop clusters, run-examples.sh defaults to skipping local image builds
          # since the cluster can't see them. Honor the matrix "force" option by overriding.
          if [ "${ctx}" != "docker-desktop" ] && [ "${FORCE_K8S_IMAGE_BUILD}" -eq 1 ]; then
            export NOMOS_FORCE_IMAGE_BUILD=1
          fi
          matrix::run_case "k8s.image_build" \
            "${ROOT_DIR}/scripts/run-examples.sh" \
              -t "${RUN_SECS}" -v "${VALIDATORS}" -e "${EXECUTORS}" \
              "${forward[@]}" \
              k8s
          unset NOMOS_FORCE_IMAGE_BUILD || true
        else
          echo "==> [k8s] Detected context '${ctx}'; skipping image-build variant (use --force-k8s-image-build to override)"
        fi
@ -259,4 +259,3 @@ matrix::main() {
 if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
  matrix::main "$@"
 fi
--- a/scripts/setup-observability.sh
+++ b/scripts/setup-observability.sh
@ -0,0 +1,168 @@
 #!/usr/bin/env bash
 set -euo pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # shellcheck disable=SC1091
 . "${SCRIPT_DIR}/common.sh"
 common::ensure_bash "$@"
 ROOT="$(common::repo_root)"
 usage() {
  cat <<'USAGE'
 Usage:
  scripts/setup-observability.sh compose up|down|logs|env
  scripts/setup-observability.sh k8s install|uninstall|dashboards|env
 Compose:
  - Runs Prometheus (+ OTLP receiver) and Grafana via docker compose.
  - Prints NOMOS_METRICS_* / NOMOS_GRAFANA_URL exports to wire into runs.
 Kubernetes:
  - Installs prometheus-community/kube-prometheus-stack into namespace
    "nomos-observability" and optionally loads Nomos Grafana dashboards.
  - Prints port-forward commands + NOMOS_METRICS_* / NOMOS_GRAFANA_URL exports.
 USAGE
 }
 require_cmd() {
  command -v "$1" >/dev/null 2>&1 || common::die "Missing required command: $1"
 }
 compose_file() {
  echo "${ROOT}/scripts/observability/compose/docker-compose.yml"
 }
 compose_run() {
  local file
  file="$(compose_file)"
  common::require_file "${file}"
  docker compose -f "${file}" "$@"
 }
 compose_env() {
  cat <<'EOF'
 export NOMOS_METRICS_QUERY_URL=http://localhost:9090
 export NOMOS_METRICS_OTLP_INGEST_URL=http://host.docker.internal:9090/api/v1/otlp/v1/metrics
 export NOMOS_GRAFANA_URL=http://localhost:3000
 EOF
 }
 k8s_namespace() { echo "nomos-observability"; }
 k8s_release() { echo "nomos-observability"; }
 k8s_values() { echo "${ROOT}/scripts/observability/k8s/kube-prometheus-stack.values.yaml"; }
 k8s_install() {
  require_cmd kubectl
  require_cmd helm
  local ns release values
  ns="$(k8s_namespace)"
  release="$(k8s_release)"
  values="$(k8s_values)"
  common::require_file "${values}"
  kubectl get ns "${ns}" >/dev/null 2>&1 || kubectl create ns "${ns}"
  if ! helm repo list | grep -q '^prometheus-community[[:space:]]'; then
    helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
  fi
  helm repo update prometheus-community
  helm upgrade --install "${release}" prometheus-community/kube-prometheus-stack \
    -n "${ns}" \
    -f "${values}"
  kubectl -n "${ns}" wait --for=condition=Available deploy -l "release=${release}" --timeout=10m || true
  kubectl -n "${ns}" wait --for=condition=Ready pod -l "release=${release}" --timeout=10m || true
 }
 k8s_uninstall() {
  require_cmd kubectl
  require_cmd helm
  local ns release
  ns="$(k8s_namespace)"
  release="$(k8s_release)"
  helm uninstall "${release}" -n "${ns}" 2>/dev/null || true
  kubectl delete ns "${ns}" --ignore-not-found
 }
 k8s_apply_dashboards() {
  require_cmd kubectl
  local ns dash_dir
  ns="$(k8s_namespace)"
  dash_dir="${ROOT}/testing-framework/assets/stack/monitoring/grafana/dashboards"
  [ -d "${dash_dir}" ] || common::die "Missing dashboards directory: ${dash_dir}"
  local file base name
  for file in "${dash_dir}"/*.json; do
    base="$(basename "${file}" .json)"
    name="nomos-dashboard-${base//[^a-zA-Z0-9-]/-}"
    kubectl -n "${ns}" create configmap "${name}" \
      --from-file="$(basename "${file}")=${file}" \
      --dry-run=client -o yaml | kubectl apply -f -
    kubectl -n "${ns}" label configmap "${name}" grafana_dashboard=1 --overwrite >/dev/null
  done
 }
 k8s_env() {
  local ns release
  ns="$(k8s_namespace)"
  release="$(k8s_release)"
  cat <<EOF
 # Prometheus (runner-side): port-forward then set:
 kubectl -n ${ns} port-forward svc/${release}-kube-p-prometheus 9090:9090
 export NOMOS_METRICS_QUERY_URL=http://localhost:9090
 # Grafana (runner-side): port-forward then set:
 kubectl -n ${ns} port-forward svc/${release}-grafana 3000:80
 export NOMOS_GRAFANA_URL=http://localhost:3000
 # Prometheus OTLP ingest (node-side inside the cluster):
 export NOMOS_METRICS_OTLP_INGEST_URL=http://${release}-kube-p-prometheus.${ns}:9090/api/v1/otlp/v1/metrics
 EOF
 }
 main() {
  local target="${1:-}"
  local action="${2:-}"
  case "${target}" in
    compose)
      require_cmd docker
      case "${action}" in
        up) compose_run up -d ;;
        down) compose_run down -v ;;
        logs) compose_run logs -f ;;
        env) compose_env ;;
        ""|help|-h|--help) usage ;;
        *) common::die "Unknown compose action: ${action}" ;;
      esac
      ;;
    k8s)
      case "${action}" in
        install) k8s_install ;;
        uninstall) k8s_uninstall ;;
        dashboards) k8s_apply_dashboards ;;
        env) k8s_env ;;
        ""|help|-h|--help) usage ;;
        *) common::die "Unknown k8s action: ${action}" ;;
      esac
      ;;
    ""|help|-h|--help)
      usage
      ;;
    *)
      common::die "Unknown target: ${target}"
      ;;
  esac
 }
 main "$@"
--- a/testing-framework/configs/src/topology/configs/consensus.rs
+++ b/testing-framework/configs/src/topology/configs/consensus.rs
@ -1,5 +1,7 @@
 use std::{
    env,
    num::{NonZero, NonZeroU64},
    str::FromStr as _,
    sync::Arc,
 };
@ -35,8 +37,30 @@ pub struct ConsensusParams {
 }
 impl ConsensusParams {
    const DEFAULT_ACTIVE_SLOT_COEFF: f64 = 0.9;
    const CONSENSUS_ACTIVE_SLOT_COEFF_VAR: &str = "CONSENSUS_ACTIVE_SLOT_COEFF";
    #[must_use]
-    pub const fn default_for_participants(n_participants: usize) -> Self {
+    pub fn default_for_participants(n_participants: usize) -> Self {
        let active_slot_coeff = env::var(Self::CONSENSUS_ACTIVE_SLOT_COEFF_VAR)
            .map(|s| {
                f64::from_str(&s).unwrap_or_else(|err| {
                    panic!(
                        "invalid {}='{}' (expected a float in (0.0, 1.0]): {err}",
                        Self::CONSENSUS_ACTIVE_SLOT_COEFF_VAR,
                        s
                    )
                })
            })
            .unwrap_or(Self::DEFAULT_ACTIVE_SLOT_COEFF);
        assert!(
            (0.0..=1.0).contains(&active_slot_coeff) && active_slot_coeff > 0.0,
            "{} must be in (0.0, 1.0], got {}",
            Self::CONSENSUS_ACTIVE_SLOT_COEFF_VAR,
            active_slot_coeff
        );
        Self {
            n_participants,
            // by setting the slot coeff to 1, we also increase the probability of multiple blocks
@ -45,7 +69,7 @@ impl ConsensusParams {
            // deciding on the longest chain.
            security_param: NonZero::new(10).unwrap(),
            // a block should be produced (on average) every slot
-            active_slot_coeff: 0.9,
+            active_slot_coeff,
        }
    }
 }
--- a/testing-framework/configs/src/topology/configs/time.rs
+++ b/testing-framework/configs/src/topology/configs/time.rs
@ -27,6 +27,7 @@ pub fn default_time_config() -> GeneralTimeConfig {
    let slot_duration = std::env::var(CONSENSUS_SLOT_TIME_VAR)
        .map(|s| <u64>::from_str(&s).unwrap())
        .unwrap_or(DEFAULT_SLOT_TIME);
    GeneralTimeConfig {
        slot_duration: Duration::from_secs(slot_duration),
        chain_start_time: OffsetDateTime::now_utc(),
--- a/testing-framework/cucumber_ext/Cargo.toml
+++ b/testing-framework/cucumber_ext/Cargo.toml
--- a/testing-framework/cucumber_ext/src/lib.rs
+++ b/testing-framework/cucumber_ext/src/lib.rs
--- a/testing-framework/cucumber_ext/src/steps/mod.rs
+++ b/testing-framework/cucumber_ext/src/steps/mod.rs
--- a/testing-framework/cucumber_ext/src/steps/run.rs
+++ b/testing-framework/cucumber_ext/src/steps/run.rs
--- a/testing-framework/cucumber_ext/src/steps/scenario.rs
+++ b/testing-framework/cucumber_ext/src/steps/scenario.rs
--- a/testing-framework/cucumber_ext/src/steps/workloads.rs
+++ b/testing-framework/cucumber_ext/src/steps/workloads.rs
--- a/testing-framework/cucumber_ext/src/world.rs
+++ b/testing-framework/cucumber_ext/src/world.rs
--- a/testing-framework/deployers/compose/src/descriptor/mod.rs
+++ b/testing-framework/deployers/compose/src/descriptor/mod.rs
@ -1,3 +1,8 @@
 use std::{
    env,
    path::{Path, PathBuf},
 };
 use serde::Serialize;
 use testing_framework_core::{
    constants::{DEFAULT_CFGSYNC_PORT, kzg_container_path},
@ -150,9 +155,26 @@ fn base_volumes(use_kzg_mount: bool) -> Vec<String> {
    if use_kzg_mount {
        volumes.push("./kzgrs_test_params:/kzgrs_test_params:z".into());
    }
    if let Some(host_log_dir) = repo_root()
        .map(|root| root.join("tmp").join("node-logs"))
        .map(|dir| dir.display().to_string())
    {
        volumes.push(format!("{host_log_dir}:/tmp/node-logs"));
    }
    volumes
 }
 fn repo_root() -> Option<PathBuf> {
    if let Ok(root) = env::var("CARGO_WORKSPACE_DIR") {
        return Some(PathBuf::from(root));
    }
    Path::new(env!("CARGO_MANIFEST_DIR"))
        .parent()
        .and_then(Path::parent)
        .and_then(Path::parent)
        .map(Path::to_path_buf)
 }
 fn default_extra_hosts() -> Vec<String> {
    host_gateway_entry().into_iter().collect()
 }
--- a/testing-framework/deployers/compose/src/infrastructure/environment.rs
+++ b/testing-framework/deployers/compose/src/infrastructure/environment.rs
@ -311,6 +311,7 @@ pub fn write_compose_artifacts(
    let compose_path = workspace.root.join("compose.generated.yml");
    write_compose_file(&descriptor, &compose_path)
        .map_err(|source| ConfigError::Template { source })?;
    debug!(compose_file = %compose_path.display(), "rendered compose file");
    Ok(compose_path)
 }
--- a/testing-framework/workflows/src/builder/mod.rs
+++ b/testing-framework/workflows/src/builder/mod.rs
@ -302,6 +302,7 @@ impl<Caps> TransactionFlowBuilder<Caps> {
        let workload = transaction::Workload::with_rate(self.rate.get())
            .expect("transaction rate must be non-zero")
            .with_user_limit(self.users);
        tracing::info!(
            rate = self.rate.get(),
            users = self.users.map(|u| u.get()),