diff --git a/.gitignore b/.gitignore index 405661c..d8126b9 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ .tmp/ /.tmp*/ tmp-local-logs/ +tmp/node-logs/ # IDE / OS cruft .idea/ .DS_Store diff --git a/Cargo.toml b/Cargo.toml index 00cb219..da1c7f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ members = [ "examples/doc-snippets", "testing-framework/configs", "testing-framework/core", - "testing-framework/cucumber_ext", + "testing-framework/cucumber", "testing-framework/deployers/compose", "testing-framework/deployers/k8s", "testing-framework/deployers/local", diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index cb0634b..e62d711 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -26,6 +26,7 @@ - [Operations](operations.md) - [Part III — Developer Reference](part-iii.md) - [Scenario Model (Developer Level)](scenario-model.md) + - [API Levels: Builder DSL vs. Direct](api-levels.md) - [Extending the Framework](extending.md) - [Example: New Workload & Expectation (Rust)](custom-workload-example.md) - [Internal Crate Reference](internal-crate-reference.md) diff --git a/book/src/api-levels.md b/book/src/api-levels.md new file mode 100644 index 0000000..cf70e8e --- /dev/null +++ b/book/src/api-levels.md @@ -0,0 +1,131 @@ +# API Levels: Builder DSL vs. Direct Instantiation + +The framework supports two styles for constructing scenarios: + +1. **High-level Builder DSL** (recommended): fluent helper methods (e.g. `.transactions_with(...)`) +2. **Low-level direct instantiation**: construct workload/expectation types explicitly, then attach them + +Both styles produce the same runtime behavior because they ultimately call the same core builder APIs. + +## High-Level Builder DSL (Recommended) + +The DSL is implemented as extension traits (primarily `testing_framework_workflows::ScenarioBuilderExt`) on the core scenario builder. + +```rust +use std::time::Duration; + +use testing_framework_core::scenario::ScenarioBuilder; +use testing_framework_workflows::ScenarioBuilderExt; + +let plan = ScenarioBuilder::topology_with(|t| t.network_star().validators(3).executors(2)) + .wallets(5) + .transactions_with(|txs| txs.rate(5).users(3)) + .da_with(|da| da.channel_rate(1).blob_rate(1).headroom_percent(20)) + .expect_consensus_liveness() + .with_run_duration(Duration::from_secs(60)) + .build(); +``` + +**When to use:** +- Most test code (smoke, regression, CI) +- When you want sensible defaults and minimal boilerplate + +## Low-Level Direct Instantiation + +Direct instantiation gives you explicit control over the concrete types you attach: + +```rust +use std::{ + num::{NonZeroU64, NonZeroUsize}, + time::Duration, +}; + +use testing_framework_core::scenario::ScenarioBuilder; +use testing_framework_workflows::{ + expectations::ConsensusLiveness, + workloads::{da, transaction}, +}; + +let tx_workload = transaction::Workload::with_rate(5) + .expect("transaction rate must be non-zero") + .with_user_limit(NonZeroUsize::new(3)); + +let da_workload = da::Workload::with_rate( + NonZeroU64::new(1).unwrap(), // blob rate per block + NonZeroU64::new(1).unwrap(), // channel rate per block + da::Workload::default_headroom_percent(), +); + +let plan = ScenarioBuilder::topology_with(|t| t.network_star().validators(3).executors(2)) + .wallets(5) + .with_workload(tx_workload) + .with_workload(da_workload) + .with_expectation(ConsensusLiveness::default()) + .with_run_duration(Duration::from_secs(60)) + .build(); +``` + +**When to use:** +- Custom workload/expectation implementations +- Reusing preconfigured workload instances across multiple scenarios +- Debugging / exploring the underlying workload types + +## Method Correspondence + +| High-Level DSL | Low-Level Direct | +|----------------|------------------| +| `.transactions_with(\|txs\| txs.rate(5).users(3))` | `.with_workload(transaction::Workload::with_rate(5).expect(...).with_user_limit(...))` | +| `.da_with(\|da\| da.blob_rate(1).channel_rate(1))` | `.with_workload(da::Workload::with_rate(...))` | +| `.expect_consensus_liveness()` | `.with_expectation(ConsensusLiveness::default())` | + +## Bundled Expectations (Important) + +Workloads can bundle expectations by implementing `Workload::expectations()`. + +These bundled expectations are attached automatically whenever you call `.with_workload(...)` (including when you use the DSL), because the core builder expands workload expectations during attachment. + +## Mixing Both Styles + +Mixing is common: use the DSL for built-ins, and direct instantiation for custom pieces. + +```rust +use std::time::Duration; + +use testing_framework_core::scenario::ScenarioBuilder; +use testing_framework_workflows::ScenarioBuilderExt; + +let custom_workload = MyCustomWorkload::new(config); + +let plan = ScenarioBuilder::topology_with(|t| t.network_star().validators(3).executors(2)) + .transactions_with(|txs| txs.rate(5).users(3)) // DSL + .with_workload(custom_workload) // direct + .expect_consensus_liveness() // DSL + .with_run_duration(Duration::from_secs(60)) + .build(); +``` + +## Implementation Detail (How the DSL Works) + +The DSL methods are thin wrappers. For example: + +```rust +builder.transactions_with(|txs| txs.rate(5).users(3)) +``` + +is roughly equivalent to: + +```rust +builder.transactions().rate(5).users(3).apply() +``` + +## Troubleshooting + +**DSL method not found** +- Ensure the extension traits are in scope, e.g. `use testing_framework_workflows::ScenarioBuilderExt;` +- Cross-check method names in [Builder API Quick Reference](dsl-cheat-sheet.md) + +## See Also + +- [Builder API Quick Reference](dsl-cheat-sheet.md) +- [Example: New Workload & Expectation (Rust)](custom-workload-example.md) +- [Extending the Framework](extending.md) diff --git a/book/src/extending.md b/book/src/extending.md index 5d45454..db84a61 100644 --- a/book/src/extending.md +++ b/book/src/extending.md @@ -1,31 +1,311 @@ # Extending the Framework -## Adding a workload -1) Implement `testing_framework_core::scenario::Workload`: - - Provide a name and any bundled expectations. - - In `init`, derive inputs from `GeneratedTopology` and `RunMetrics`; fail - fast if prerequisites are missing (e.g., wallet data, node addresses). - - In `start`, drive async traffic using the `RunContext` clients. -2) Expose the workload from a module under `testing-framework/workflows` and - consider adding a DSL helper for ergonomic wiring. +This guide shows how to extend the framework with custom workloads, expectations, runners, and topology helpers. Each section includes the trait outline and a minimal code example. -## Adding an expectation -1) Implement `testing_framework_core::scenario::Expectation`: - - Use `start_capture` to snapshot baseline metrics. - - Use `evaluate` to assert outcomes after workloads finish; return all errors - so the runner can aggregate them. -2) Export it from `testing-framework/workflows` if it is reusable. +## Adding a Workload -## Adding a runner -1) Implement `testing_framework_core::scenario::Deployer` for your backend. - - Produce a `RunContext` with `NodeClients`, metrics endpoints, and optional - `NodeControlHandle`. - - Guard cleanup with `CleanupGuard` to reclaim resources even on failures. -2) Mirror the readiness and block-feed probes used by the existing runners so - workloads can rely on consistent signals. +**Steps:** +1. Implement `testing_framework_core::scenario::Workload` +2. Provide a name and any bundled expectations +3. Use `init` to derive inputs from topology/metrics; fail fast if prerequisites missing +4. Use `start` to drive async traffic using `RunContext` clients +5. Expose from `testing-framework/workflows` and optionally add a DSL helper -## Adding topology helpers -- Extend `testing_framework_core::topology::config::TopologyBuilder` with new layouts or - configuration presets (e.g., specialized DA parameters). Keep defaults safe: - ensure at least one participant and clamp dispersal factors as the current - helpers do. +**Trait outline:** + +```rust +use async_trait::async_trait; +use testing_framework_core::scenario::{ + DynError, Expectation, RunContext, RunMetrics, Workload, +}; +use testing_framework_core::topology::generation::GeneratedTopology; + +pub struct MyWorkload { + // Configuration fields + target_rate: u64, +} + +impl MyWorkload { + pub fn new(target_rate: u64) -> Self { + Self { target_rate } + } +} + +#[async_trait] +impl Workload for MyWorkload { + fn name(&self) -> &str { + "my_workload" + } + + fn expectations(&self) -> Vec> { + // Return bundled expectations that should run with this workload + vec![Box::new(MyExpectation::new(self.target_rate))] + } + + fn init( + &mut self, + topology: &GeneratedTopology, + _run_metrics: &RunMetrics, + ) -> Result<(), DynError> { + // Validate prerequisites (e.g., enough nodes, wallet data present) + if topology.validators().is_empty() { + return Err("no validators available".into()); + } + Ok(()) + } + + async fn start(&self, ctx: &RunContext) -> Result<(), DynError> { + // Drive async activity: submit transactions, query nodes, etc. + let clients = ctx.node_clients().validator_clients(); + + for client in clients { + let info = client.consensus_info().await?; + tracing::info!(?info, "workload queried node"); + } + + Ok(()) + } +} +``` + +**Key points:** +- `name()` identifies the workload in logs +- `expectations()` bundles default checks (can be empty) +- `init()` validates topology before run starts +- `start()` executes concurrently with other workloads; it should complete before run duration expires + +See [Example: New Workload & Expectation](custom-workload-example.md) for a complete, runnable example. + +## Adding an Expectation + +**Steps:** +1. Implement `testing_framework_core::scenario::Expectation` +2. Use `start_capture` to snapshot baseline metrics (optional) +3. Use `evaluate` to assert outcomes after workloads finish +4. Return descriptive errors; the runner aggregates them +5. Export from `testing-framework/workflows` if reusable + +**Trait outline:** + +```rust +use async_trait::async_trait; +use testing_framework_core::scenario::{DynError, Expectation, RunContext}; + +pub struct MyExpectation { + expected_value: u64, + captured_baseline: Option, +} + +impl MyExpectation { + pub fn new(expected_value: u64) -> Self { + Self { + expected_value, + captured_baseline: None, + } + } +} + +#[async_trait] +impl Expectation for MyExpectation { + fn name(&self) -> &str { + "my_expectation" + } + + async fn start_capture(&mut self, ctx: &RunContext) -> Result<(), DynError> { + // Optional: capture baseline state before workloads start + let client = ctx.node_clients().validator_clients().first() + .ok_or("no validators")?; + + let info = client.consensus_info().await?; + self.captured_baseline = Some(info.current_block_id.slot); + + tracing::info!(baseline = self.captured_baseline, "captured baseline"); + Ok(()) + } + + async fn evaluate(&mut self, ctx: &RunContext) -> Result<(), DynError> { + // Assert the expected condition holds after workloads finish + let client = ctx.node_clients().validator_clients().first() + .ok_or("no validators")?; + + let info = client.consensus_info().await?; + let final_slot = info.current_block_id.slot; + + let baseline = self.captured_baseline.unwrap_or(0); + let delta = final_slot.saturating_sub(baseline); + + if delta < self.expected_value { + return Err(format!( + "expected at least {} blocks, got {}", + self.expected_value, delta + ).into()); + } + + tracing::info!(delta, "expectation passed"); + Ok(()) + } +} +``` + +**Key points:** +- `name()` identifies the expectation in logs +- `start_capture()` runs before workloads start (optional) +- `evaluate()` runs after workloads finish; return descriptive errors +- Expectations run sequentially; keep them fast + +## Adding a Runner (Deployer) + +**Steps:** +1. Implement `testing_framework_core::scenario::Deployer` for your capability type +2. Deploy infrastructure and return a `Runner` +3. Construct `NodeClients` and spawn a `BlockFeed` +4. Build a `RunContext` and provide a `CleanupGuard` for teardown + +**Trait outline:** + +```rust +use async_trait::async_trait; +use testing_framework_core::scenario::{ + CleanupGuard, Deployer, DynError, Metrics, NodeClients, RunContext, Runner, Scenario, + spawn_block_feed, +}; +use testing_framework_core::topology::deployment::Topology; + +pub struct MyDeployer { + // Configuration: cluster connection details, etc. +} + +impl MyDeployer { + pub fn new() -> Self { + Self {} + } +} + +#[async_trait] +impl Deployer<()> for MyDeployer { + type Error = DynError; + + async fn deploy(&self, scenario: &Scenario<()>) -> Result { + // 1. Launch nodes using scenario.topology() + // 2. Wait for readiness (e.g., consensus info endpoint responds) + // 3. Build NodeClients for validators/executors + // 4. Spawn a block feed for expectations (optional but recommended) + // 5. Create NodeControlHandle if you support restarts (optional) + // 6. Return a Runner wrapping RunContext + CleanupGuard + + tracing::info!("deploying scenario with MyDeployer"); + + let topology: Option = None; // Some(topology) if you spawned one + let node_clients = NodeClients::default(); // Or NodeClients::from_topology(...) + + let (block_feed, block_feed_guard) = spawn_block_feed(&node_clients).await?; + + let telemetry = Metrics::empty(); // or Metrics::from_prometheus(...) + let node_control = None; // or Some(Arc) + + let context = RunContext::new( + scenario.topology().clone(), + topology, + node_clients, + scenario.duration(), + telemetry, + block_feed, + node_control, + ); + + // If you also have other resources to clean up (containers/pods/etc), + // wrap them in your own CleanupGuard implementation and call + // CleanupGuard::cleanup(Box::new(block_feed_guard)) inside it. + Ok(Runner::new(context, Some(Box::new(block_feed_guard)))) + } +} +``` + +**Key points:** +- `deploy()` must return a fully prepared `Runner` +- Block until nodes are ready before returning (avoid false negatives) +- Use a `CleanupGuard` to tear down resources on failure (and on `RunHandle` drop) +- If you want chaos workloads, also provide a `NodeControlHandle` via `RunContext` + +## Adding Topology Helpers + +**Steps:** +1. Extend `testing_framework_core::topology::config::TopologyBuilder` with new layouts +2. Keep defaults safe: ensure at least one participant, clamp dispersal factors +3. Consider adding configuration presets for specialized parameters + +**Example:** + +```rust +use testing_framework_core::topology::config::TopologyBuilder; + +impl TopologyBuilder { + /// Creates a "ring" topology where each node connects to its neighbors + pub fn network_ring(&mut self) -> &mut Self { + // Configure peer connections in a ring layout + self.with_network_layout(|layout| { + // Implement ring connection logic + layout.ring_peers() + }); + self + } + + /// Preset for high-throughput DA configuration + pub fn da_high_throughput(&mut self) -> &mut Self { + self.with_da_params(|params| { + params + .dispersal_factor(8) + .replication_factor(16) + .chunk_size(4096) + }); + self + } +} +``` + +**Key points:** +- Maintain method chaining (return `&mut Self`) +- Validate inputs: clamp factors, enforce minimums +- Document assumptions (e.g., "requires at least 4 nodes") + +## Adding a DSL Helper + +To expose your custom workload through the high-level DSL, add a trait extension: + +```rust +use testing_framework_core::scenario::Builder as ScenarioBuilder; + +pub trait MyWorkloadDsl { + fn my_workload_with( + self, + f: impl FnOnce(MyWorkloadBuilder) -> MyWorkloadBuilder, + ) -> Self; +} + +impl MyWorkloadDsl for ScenarioBuilder { + fn my_workload_with( + self, + f: impl FnOnce(MyWorkloadBuilder) -> MyWorkloadBuilder, + ) -> Self { + let builder = f(MyWorkloadBuilder::default()); + self.with_workload(builder.build()) + } +} +``` + +Users can then call: + +```rust +ScenarioBuilder::topology_with(|t| { /* ... */ }) + .my_workload_with(|w| { + w.target_rate(10) + .some_option(true) + }) + .build() +``` + +## See Also + +- [API Levels: Builder DSL vs. Direct](api-levels.md) - Understanding the two API levels +- [Custom Workload Example](custom-workload-example.md) - Complete runnable example +- [Internal Crate Reference](internal-crate-reference.md) - Where to add new code diff --git a/book/src/operations.md b/book/src/operations.md index a7eb5bc..a7ca56a 100644 --- a/book/src/operations.md +++ b/book/src/operations.md @@ -37,7 +37,7 @@ Both **LocalDeployer** and **ComposeDeployer** work in CI environments: **ComposeDeployer in CI (recommended):** - Better isolation (containerized) - Reproducible environment -- Includes Prometheus/observability +- Can integrate with external Prometheus/Grafana (optional) - **Trade-off:** Slower startup (Docker image build) - **Trade-off:** Requires Docker daemon @@ -60,7 +60,21 @@ scripts/run-examples.sh -t 60 -v 1 -e 1 compose scripts/run-examples.sh -t 60 -v 1 -e 1 k8s ``` -This script handles circuit setup, binary building/bundling, image building, and execution. +This script handles circuit setup, binary building/bundling, (local) image building, and execution. + +Note: for `k8s` runs against non-local clusters (e.g. EKS), the cluster pulls images from a registry, +so a local `docker build` is not used. In that case, build + push your image separately (see +`scripts/build_test_image.sh`) and set `NOMOS_TESTNET_IMAGE` to the pushed reference. + +### Quick Smoke Matrix (Host/Compose/K8s) + +For a small “does everything still run?” matrix (including `--no-image-build` variants where relevant), use: + +```bash +scripts/run-test-matrix.sh -t 120 -v 1 -e 1 +``` + +This is useful after making runner/image/script changes, and it forwards `--metrics-*` options through to `scripts/run-examples.sh`. **Environment overrides:** - `VERSION=v0.3.1` — Circuit version @@ -192,6 +206,7 @@ cargo run -p runner-examples --bin compose_runner **Compose-specific features:** - **Node control support**: Only runner that supports chaos testing (`.enable_node_control()` + chaos workloads) - **Observability is external**: Set `NOMOS_METRICS_*` / `NOMOS_GRAFANA_URL` to enable telemetry links and querying + - Quickstart: `scripts/setup-observability.sh compose up` then `scripts/setup-observability.sh compose env` **Important:** - Containers expect KZG parameters at `/kzgrs_test_params/kzgrs_test_params` (note the repeated filename) @@ -248,13 +263,13 @@ cargo run -p runner-examples --bin k8s_runner Notes: - `NOMOS_METRICS_QUERY_URL` must be reachable from the runner process (often via `kubectl port-forward`). - `NOMOS_METRICS_OTLP_INGEST_URL` must be reachable from nodes (pods/containers) and is backend-specific (Prometheus vs VictoriaMetrics paths differ). + - Quickstart installer: `scripts/setup-observability.sh k8s install` then `scripts/setup-observability.sh k8s env` (optional dashboards: `scripts/setup-observability.sh k8s dashboards`) **Via `scripts/run-examples.sh` (optional):** ```bash scripts/run-examples.sh -t 60 -v 1 -e 1 k8s \ --metrics-query-url http://your-prometheus:9090 \ - --metrics-otlp-ingest-url http://your-prometheus:9090/api/v1/otlp/v1/metrics \ - --grafana-url http://your-grafana:3000 + --metrics-otlp-ingest-url http://your-prometheus:9090/api/v1/otlp/v1/metrics ``` **In code (optional):** @@ -565,12 +580,15 @@ cargo run -p runner-examples --bin local_runner Runners expose metrics and node HTTP endpoints for expectation code and debugging: **Prometheus-compatible metrics querying (optional):** -- The framework does **not** deploy Prometheus. +- Runners do **not** provision Prometheus automatically. +- For a ready-to-run stack, use `scripts/setup-observability.sh`: + - Compose: `scripts/setup-observability.sh compose up` then `scripts/setup-observability.sh compose env` + - K8s: `scripts/setup-observability.sh k8s install` then `scripts/setup-observability.sh k8s env` - Provide `NOMOS_METRICS_QUERY_URL` (PromQL base URL) to enable `ctx.telemetry()` queries. - Access from expectations when configured: `ctx.telemetry().prometheus().map(|p| p.base_url())` **Grafana (optional):** -- The framework does **not** deploy Grafana. +- Runners do **not** provision Grafana automatically (but `scripts/setup-observability.sh` can). - If you set `NOMOS_GRAFANA_URL`, the deployer prints it in `TESTNET_ENDPOINTS`. - Dashboards live in `testing-framework/assets/stack/monitoring/grafana/dashboards/` for import into your Grafana. diff --git a/book/src/quickstart.md b/book/src/quickstart.md index d326283..a51c02e 100644 --- a/book/src/quickstart.md +++ b/book/src/quickstart.md @@ -163,7 +163,7 @@ pub fn step_5_run_duration() -> testing_framework_core::scenario::Builder<()> { } ``` -Run for 60 seconds (~27 blocks with default 2s slots, 0.9 coefficient). Framework ensures this is at least 2× the consensus slot duration. +Run for 60 seconds (~27 blocks with default 2s slots, 0.9 coefficient). Framework ensures this is at least 2× the consensus slot duration. Adjust consensus timing via `CONSENSUS_SLOT_TIME` and `CONSENSUS_ACTIVE_SLOT_COEFF`. ### 6. Deploy and Execute @@ -239,7 +239,18 @@ POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin compose_runner ``` -**Benefit:** Reproducible containerized environment with Prometheus at `http://localhost:9090`. +**Benefit:** Reproducible containerized environment (Dockerized nodes, repeatable deployments). + +**Optional: Prometheus + Grafana** + +The runner can integrate with external observability endpoints. For a ready-to-run local stack: + +```bash +scripts/setup-observability.sh compose up +eval "$(scripts/setup-observability.sh compose env)" +``` + +Then run your compose scenario as usual (the environment variables enable PromQL querying and node OTLP metrics export). **Note:** Compose expects KZG parameters at `/kzgrs_test_params/kzgrs_test_params` inside containers (the directory name is repeated as the filename). diff --git a/book/src/testing-philosophy.md b/book/src/testing-philosophy.md index e68741a..e920ce8 100644 --- a/book/src/testing-philosophy.md +++ b/book/src/testing-philosophy.md @@ -36,7 +36,7 @@ Reason in **blocks** and **consensus intervals**, not wall-clock seconds. **Consensus defaults:** - Slot duration: 2 seconds (NTP-synchronized, configurable via `CONSENSUS_SLOT_TIME`) -- Active slot coefficient: 0.9 (90% block probability per slot) +- Active slot coefficient: 0.9 (90% block probability per slot, configurable via `CONSENSUS_ACTIVE_SLOT_COEFF`) - Expected rate: ~27 blocks per minute ```rust @@ -159,7 +159,7 @@ pub fn minimum_run_windows() { **Note:** Block counts assume default consensus parameters: - Slot duration: 2 seconds (configurable via `CONSENSUS_SLOT_TIME`) -- Active slot coefficient: 0.9 (90% block probability per slot) +- Active slot coefficient: 0.9 (90% block probability per slot, configurable via `CONSENSUS_ACTIVE_SLOT_COEFF`) - Formula: `blocks ≈ (duration / slot_duration) × active_slot_coeff` If upstream changes these parameters, adjust your duration expectations accordingly. diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 24c80dd..82590d5 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -12,7 +12,7 @@ version = "0.1.0" [dependencies] anyhow = "1" cucumber = { version = "0.22.0" } -cucumber_ext = { path = "../testing-framework/cucumber_ext" } +cucumber_ext = { path = "../testing-framework/cucumber" } testing-framework-core = { workspace = true } testing-framework-runner-compose = { workspace = true } testing-framework-runner-k8s = { workspace = true } diff --git a/examples/src/bin/compose_runner.rs b/examples/src/bin/compose_runner.rs index 4f39c83..e129305 100644 --- a/examples/src/bin/compose_runner.rs +++ b/examples/src/bin/compose_runner.rs @@ -1,4 +1,9 @@ -use std::{env, process, time::Duration}; +use std::{ + env, fs, + path::{Path, PathBuf}, + process, + time::Duration, +}; use anyhow::{Context as _, Result}; use runner_examples::{ChaosBuilderExt as _, ScenarioBuilderExt as _, read_env_any}; @@ -25,6 +30,8 @@ const DA_BLOB_RATE: u64 = 1; #[tokio::main] async fn main() { + init_node_log_dir_defaults(); + // Compose containers mount KZG params at /kzgrs_test_params; ensure the // generated configs point there unless the caller overrides explicitly. if env::var("NOMOS_KZGRS_PARAMS_PATH").is_err() { @@ -57,6 +64,35 @@ async fn main() { } } +fn init_node_log_dir_defaults() { + if env::var_os("NOMOS_LOG_DIR").is_some() { + return; + } + + let repo_root = repo_root(); + let host_dir = repo_root.join("tmp").join("node-logs"); + let _ = fs::create_dir_all(&host_dir); + + // In compose mode, node processes run inside containers; configs should + // point to the container path, while the compose deployer mounts the host + // repo's `tmp/node-logs` there. + unsafe { + env::set_var("NOMOS_LOG_DIR", "/tmp/node-logs"); + } +} + +fn repo_root() -> PathBuf { + env::var("CARGO_WORKSPACE_DIR") + .map(PathBuf::from) + .ok() + .or_else(|| { + Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .map(Path::to_path_buf) + }) + .expect("repo root must be discoverable from CARGO_WORKSPACE_DIR or CARGO_MANIFEST_DIR") +} + async fn run_compose_case( validators: usize, executors: usize, diff --git a/examples/src/bin/cucumber_compose.rs b/examples/src/bin/cucumber_compose.rs index 263f143..4b05005 100644 --- a/examples/src/bin/cucumber_compose.rs +++ b/examples/src/bin/cucumber_compose.rs @@ -1,8 +1,11 @@ -use runner_examples::cucumber::{Mode, init_logging_defaults, init_tracing, run}; +use runner_examples::cucumber::{ + Mode, init_logging_defaults, init_node_log_dir_defaults, init_tracing, run, +}; #[tokio::main(flavor = "current_thread")] async fn main() { init_logging_defaults(); + init_node_log_dir_defaults(Mode::Compose); init_tracing(); run(Mode::Compose).await; diff --git a/examples/src/bin/cucumber_host.rs b/examples/src/bin/cucumber_host.rs index 00746e8..b98945e 100644 --- a/examples/src/bin/cucumber_host.rs +++ b/examples/src/bin/cucumber_host.rs @@ -1,8 +1,11 @@ -use runner_examples::cucumber::{Mode, init_logging_defaults, init_tracing, run}; +use runner_examples::cucumber::{ + Mode, init_logging_defaults, init_node_log_dir_defaults, init_tracing, run, +}; #[tokio::main(flavor = "current_thread")] async fn main() { init_logging_defaults(); + init_node_log_dir_defaults(Mode::Host); init_tracing(); run(Mode::Host).await; diff --git a/examples/src/bin/local_runner.rs b/examples/src/bin/local_runner.rs index dfd7575..84a4f76 100644 --- a/examples/src/bin/local_runner.rs +++ b/examples/src/bin/local_runner.rs @@ -1,4 +1,9 @@ -use std::{env, process, time::Duration}; +use std::{ + env, fs, + path::{Path, PathBuf}, + process, + time::Duration, +}; use anyhow::{Context as _, Result}; use runner_examples::{ScenarioBuilderExt as _, read_env_any}; @@ -12,11 +17,13 @@ const DEFAULT_RUN_SECS: u64 = 60; const MIXED_TXS_PER_BLOCK: u64 = 5; const TOTAL_WALLETS: usize = 1000; const TRANSACTION_WALLETS: usize = 500; -const DA_BLOB_RATE: u64 = 1; +const DA_BLOB_RATE: u64 = 3; const SMOKE_RUN_SECS_MAX: u64 = 30; #[tokio::main] async fn main() { + init_node_log_dir_defaults(); + tracing_subscriber::fmt::init(); if env::var("POL_PROOF_DEV_MODE").is_err() { @@ -39,6 +46,30 @@ async fn main() { } } +fn init_node_log_dir_defaults() { + if env::var_os("NOMOS_LOG_DIR").is_some() { + return; + } + + let host_dir = repo_root().join("tmp").join("node-logs"); + let _ = fs::create_dir_all(&host_dir); + unsafe { + env::set_var("NOMOS_LOG_DIR", host_dir); + } +} + +fn repo_root() -> PathBuf { + env::var("CARGO_WORKSPACE_DIR") + .map(PathBuf::from) + .ok() + .or_else(|| { + Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .map(Path::to_path_buf) + }) + .expect("repo root must be discoverable from CARGO_WORKSPACE_DIR or CARGO_MANIFEST_DIR") +} + async fn run_local_case(validators: usize, executors: usize, run_duration: Duration) -> Result<()> { info!( validators, diff --git a/examples/src/cucumber.rs b/examples/src/cucumber.rs index 0eeb07e..9c720ea 100644 --- a/examples/src/cucumber.rs +++ b/examples/src/cucumber.rs @@ -1,3 +1,8 @@ +use std::{ + env, fs, + path::{Path, PathBuf}, +}; + use cucumber::World; use cucumber_ext::TestingFrameworkWorld; use tracing_subscriber::{EnvFilter, fmt}; @@ -31,11 +36,36 @@ fn is_compose( pub fn init_logging_defaults() { set_default_env("POL_PROOF_DEV_MODE", "true"); set_default_env("NOMOS_TESTS_KEEP_LOGS", "1"); - set_default_env("NOMOS_LOG_DIR", ".tmp/cucumber-logs"); set_default_env("NOMOS_LOG_LEVEL", "info"); set_default_env("RUST_LOG", "info"); } +pub fn init_node_log_dir_defaults(mode: Mode) { + if env::var_os("NOMOS_LOG_DIR").is_some() { + return; + } + + let host_dir = repo_root().join("tmp").join("node-logs"); + let _ = fs::create_dir_all(&host_dir); + + match mode { + Mode::Host => set_default_env("NOMOS_LOG_DIR", &host_dir.display().to_string()), + Mode::Compose => set_default_env("NOMOS_LOG_DIR", "/tmp/node-logs"), + } +} + +fn repo_root() -> PathBuf { + env::var("CARGO_WORKSPACE_DIR") + .map(PathBuf::from) + .ok() + .or_else(|| { + Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .map(Path::to_path_buf) + }) + .expect("repo root must be discoverable from CARGO_WORKSPACE_DIR or CARGO_MANIFEST_DIR") +} + pub fn init_tracing() { let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); let _ = fmt().with_env_filter(filter).with_target(true).try_init(); diff --git a/scripts/build-bundle.sh b/scripts/build-bundle.sh index 53992d3..8007c6e 100755 --- a/scripts/build-bundle.sh +++ b/scripts/build-bundle.sh @@ -196,6 +196,24 @@ build_bundle::clean_cargo_linux_cache() { rm -rf "${ROOT_DIR}/.tmp/cargo-linux/registry" "${ROOT_DIR}/.tmp/cargo-linux/git" } +build_bundle::docker_platform_suffix() { + # Map a docker platform string (e.g. linux/amd64) to a filesystem-safe suffix + # used for arch-specific target dirs, to avoid mixing build artifacts between + # different container architectures. + local platform="${1:-}" + if [ -z "${platform}" ]; then + echo "" + return 0 + fi + platform="${platform#linux/}" + platform="${platform//\//-}" + if [ -z "${platform}" ] || [ "${platform}" = "linux" ]; then + echo "" + return 0 + fi + echo "-${platform}" +} + build_bundle::maybe_run_linux_build_in_docker() { # With `set -e`, this function must return 0 when no Docker cross-build is needed. if [ "${PLATFORM}" != "linux" ] || [ "$(uname -s)" = "Linux" ] || [ -n "${BUNDLE_IN_CONTAINER:-}" ]; then @@ -224,7 +242,10 @@ build_bundle::maybe_run_linux_build_in_docker() { echo "==> Building Linux bundle inside Docker" local container_output="/workspace${OUTPUT#"${ROOT_DIR}"}" - mkdir -p "${ROOT_DIR}/.tmp/cargo-linux" "${ROOT_DIR}/.tmp/nomos-node-linux-target" + local target_suffix + target_suffix="$(build_bundle::docker_platform_suffix "${DOCKER_PLATFORM}")" + local host_target_dir="${ROOT_DIR}/.tmp/nomos-node-linux-target${target_suffix}" + mkdir -p "${ROOT_DIR}/.tmp/cargo-linux" "${host_target_dir}" local -a features_args=() if [ -n "${NOMOS_EXTRA_FEATURES:-}" ]; then @@ -242,15 +263,16 @@ build_bundle::maybe_run_linux_build_in_docker() { -e VERSION="${VERSION}" \ -e NOMOS_NODE_REV="${NOMOS_NODE_REV}" \ -e NOMOS_NODE_PATH="${node_path_env}" \ + -e NOMOS_BUNDLE_DOCKER_PLATFORM="${DOCKER_PLATFORM}" \ -e NOMOS_CIRCUITS="/workspace/.tmp/nomos-circuits-linux" \ -e STACK_DIR="/workspace/.tmp/nomos-circuits-linux" \ -e HOST_DIR="/workspace/.tmp/nomos-circuits-linux" \ -e NOMOS_EXTRA_FEATURES="${NOMOS_EXTRA_FEATURES:-}" \ -e BUNDLE_IN_CONTAINER=1 \ -e CARGO_HOME=/workspace/.tmp/cargo-linux \ - -e CARGO_TARGET_DIR=/workspace/.tmp/nomos-node-linux-target \ + -e CARGO_TARGET_DIR="/workspace/.tmp/nomos-node-linux-target${target_suffix}" \ -v "${ROOT_DIR}/.tmp/cargo-linux":/workspace/.tmp/cargo-linux \ - -v "${ROOT_DIR}/.tmp/nomos-node-linux-target":/workspace/.tmp/nomos-node-linux-target \ + -v "${host_target_dir}:/workspace/.tmp/nomos-node-linux-target${target_suffix}" \ -v "${ROOT_DIR}:/workspace" \ "${extra_mounts[@]}" \ -w /workspace \ @@ -267,7 +289,14 @@ build_bundle::prepare_circuits() { NODE_TARGET="${ROOT_DIR}/.tmp/nomos-node-host-target" else CIRCUITS_DIR="${ROOT_DIR}/.tmp/nomos-circuits-linux" - NODE_TARGET="${ROOT_DIR}/.tmp/nomos-node-linux-target" + # When building Linux bundles in Docker, avoid reusing the same target dir + # across different container architectures (e.g. linux/arm64 vs linux/amd64), + # as the native-host `target/debug` layout would otherwise get mixed. + local target_suffix="" + if [ -n "${BUNDLE_IN_CONTAINER:-}" ]; then + target_suffix="$(build_bundle::docker_platform_suffix "${NOMOS_BUNDLE_DOCKER_PLATFORM:-}")" + fi + NODE_TARGET="${ROOT_DIR}/.tmp/nomos-node-linux-target${target_suffix}" fi NODE_SRC_DEFAULT="${ROOT_DIR}/.tmp/nomos-node-${PLATFORM}-src" diff --git a/scripts/observability/compose/docker-compose.yml b/scripts/observability/compose/docker-compose.yml new file mode 100644 index 0000000..618944d --- /dev/null +++ b/scripts/observability/compose/docker-compose.yml @@ -0,0 +1,38 @@ +services: + prometheus: + image: prom/prometheus:v2.53.0 + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + # Exposes OTLP HTTP ingest at /api/v1/otlp/v1/metrics + - --enable-feature=otlp-write-receiver + - --web.enable-lifecycle + - --web.enable-admin-api + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + ports: + - "9090:9090" + + grafana: + image: grafana/grafana:11.4.0 + depends_on: + - prometheus + env_file: + - ../../../testing-framework/assets/stack/monitoring/grafana/plugins.env + environment: + GF_SECURITY_ADMIN_USER: admin + GF_SECURITY_ADMIN_PASSWORD: admin + GF_USERS_ALLOW_SIGN_UP: "false" + volumes: + - grafana-data:/var/lib/grafana + - ../../../testing-framework/assets/stack/monitoring/grafana/grafana.ini:/etc/grafana/grafana.ini:ro + - ../../../testing-framework/assets/stack/monitoring/grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:ro + - ../../../testing-framework/assets/stack/monitoring/grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro + - ../../../testing-framework/assets/stack/monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro + ports: + - "3000:3000" + +volumes: + prometheus-data: {} + grafana-data: {} diff --git a/scripts/observability/compose/prometheus/prometheus.yml b/scripts/observability/compose/prometheus/prometheus.yml new file mode 100644 index 0000000..6ca46b2 --- /dev/null +++ b/scripts/observability/compose/prometheus/prometheus.yml @@ -0,0 +1,10 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + external_labels: + monitor: "NomosTesting" + +scrape_configs: + - job_name: prometheus + static_configs: + - targets: ["prometheus:9090"] diff --git a/scripts/observability/k8s/kube-prometheus-stack.values.yaml b/scripts/observability/k8s/kube-prometheus-stack.values.yaml new file mode 100644 index 0000000..533f4aa --- /dev/null +++ b/scripts/observability/k8s/kube-prometheus-stack.values.yaml @@ -0,0 +1,19 @@ +prometheus: + prometheusSpec: + enableOTLPReceiver: true + additionalArgs: + - name: web.enable-admin-api + # Basic OTLP → Prometheus translation defaults are fine for most setups. + # See: https://prometheus.io/docs/guides/opentelemetry/ + otlp: {} + +grafana: + adminUser: admin + adminPassword: admin + sidecar: + dashboards: + enabled: true + label: grafana_dashboard + labelValue: "1" + datasources: + enabled: true diff --git a/scripts/run-examples.sh b/scripts/run-examples.sh index 7c09749..45c05bd 100755 --- a/scripts/run-examples.sh +++ b/scripts/run-examples.sh @@ -23,6 +23,15 @@ readonly DEFAULT_PRIVATE_AWS_REGION="ap-southeast-2" readonly DEFAULT_PULL_POLICY_LOCAL="IfNotPresent" readonly DEFAULT_PULL_POLICY_ECR="Always" readonly DOCKER_DESKTOP_CONTEXT="docker-desktop" +readonly DEFAULT_K8S_ECR_SKIP_IMAGE_BUILD="1" + +run_examples::cleanup() { + rm -f "${SETUP_OUT:-}" 2>/dev/null || true +} + +# Avoid inheriting environment-provided EXIT traps (e.g., from BASH_ENV) that can +# reference missing functions and fail at script termination. +trap run_examples::cleanup EXIT run_examples::usage() { cat <-.tar.gz) NOMOS_SKIP_IMAGE_BUILD Set to 1 to skip rebuilding the compose/k8s image + NOMOS_FORCE_IMAGE_BUILD Set to 1 to force image rebuild even for k8s ECR mode NOMOS_METRICS_QUERY_URL PromQL base URL for the runner process (optional) NOMOS_METRICS_OTLP_INGEST_URL Full OTLP HTTP ingest URL for node metrics export (optional) NOMOS_GRAFANA_URL Grafana base URL for printing/logging (optional) + +Notes: + - For k8s runs on non-docker-desktop clusters (e.g. EKS), a locally built Docker image is not + visible to the cluster. By default, this script skips local image rebuilds in that case. + If you need a custom image, run scripts/build_test_image.sh and push it to a registry the + cluster can pull from, then set NOMOS_TESTNET_IMAGE accordingly. EOF } @@ -104,7 +121,6 @@ run_examples::parse_args() { IMAGE_SELECTION_MODE="auto" METRICS_QUERY_URL="" METRICS_OTLP_INGEST_URL="" - GRAFANA_URL="" RUN_SECS_RAW_SPECIFIED="" @@ -166,14 +182,6 @@ run_examples::parse_args() { METRICS_OTLP_INGEST_URL="${1#*=}" shift ;; - --grafana-url) - GRAFANA_URL="${2:-}" - shift 2 - ;; - --grafana-url=*) - GRAFANA_URL="${1#*=}" - shift - ;; --external-prometheus) METRICS_QUERY_URL="${2:-}" shift 2 @@ -279,12 +287,20 @@ run_examples::select_image() { run_examples::fail_with_usage "Unknown image selection mode: ${selection}" fi + export NOMOS_IMAGE_SELECTION="${selection}" export IMAGE_TAG="${IMAGE}" export NOMOS_TESTNET_IMAGE="${IMAGE}" if [ "${MODE}" = "k8s" ]; then if [ "${selection}" = "ecr" ]; then export NOMOS_KZG_MODE="${NOMOS_KZG_MODE:-inImage}" + # A locally built Docker image isn't visible to remote clusters (e.g. EKS). Default to + # skipping the local rebuild, unless the user explicitly set NOMOS_SKIP_IMAGE_BUILD or + # overrides via NOMOS_FORCE_IMAGE_BUILD=1. + if [ "${NOMOS_FORCE_IMAGE_BUILD:-0}" != "1" ]; then + NOMOS_SKIP_IMAGE_BUILD="${NOMOS_SKIP_IMAGE_BUILD:-${DEFAULT_K8S_ECR_SKIP_IMAGE_BUILD}}" + export NOMOS_SKIP_IMAGE_BUILD + fi else export NOMOS_KZG_MODE="${NOMOS_KZG_MODE:-hostPath}" fi @@ -548,9 +564,6 @@ run_examples::run() { if [ -n "${METRICS_OTLP_INGEST_URL}" ]; then export NOMOS_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}" fi - if [ -n "${GRAFANA_URL}" ]; then - export NOMOS_GRAFANA_URL="${GRAFANA_URL}" - fi echo "==> Running ${BIN} for ${RUN_SECS}s (mode=${MODE}, image=${IMAGE})" cd "${ROOT_DIR}" @@ -576,8 +589,6 @@ run_examples::main() { echo "==> Using restored circuits/binaries bundle" SETUP_OUT="$(common::tmpfile nomos-setup-output.XXXXXX)" - cleanup() { rm -f "${SETUP_OUT}" 2>/dev/null || true; } - trap cleanup EXIT run_examples::maybe_rebuild_image run_examples::maybe_restore_host_after_image diff --git a/scripts/run-test-matrix.sh b/scripts/run-test-matrix.sh index e652fb7..814f5d6 100755 --- a/scripts/run-test-matrix.sh +++ b/scripts/run-test-matrix.sh @@ -25,7 +25,6 @@ Options: --force-k8s-image-build Allow the k8s "rebuild image" run even on non-docker-desktop clusters --metrics-query-url URL Forwarded to scripts/run-examples.sh (optional) --metrics-otlp-ingest-url URL Forwarded to scripts/run-examples.sh (optional) - --grafana-url URL Forwarded to scripts/run-examples.sh (optional) -h, --help Show this help Notes: @@ -51,7 +50,6 @@ matrix::parse_args() { FORCE_K8S_IMAGE_BUILD=0 METRICS_QUERY_URL="" METRICS_OTLP_INGEST_URL="" - GRAFANA_URL="" while [ "$#" -gt 0 ]; do case "$1" in @@ -71,8 +69,6 @@ matrix::parse_args() { --metrics-query-url=*) METRICS_QUERY_URL="${1#*=}"; shift ;; --metrics-otlp-ingest-url) METRICS_OTLP_INGEST_URL="${2:-}"; shift 2 ;; --metrics-otlp-ingest-url=*) METRICS_OTLP_INGEST_URL="${1#*=}"; shift ;; - --grafana-url) GRAFANA_URL="${2:-}"; shift 2 ;; - --grafana-url=*) GRAFANA_URL="${1#*=}"; shift ;; *) matrix::die "Unknown argument: $1" ;; esac done @@ -104,9 +100,6 @@ matrix::forwarded_args() { if [ -n "${METRICS_OTLP_INGEST_URL}" ]; then args+=(--metrics-otlp-ingest-url "${METRICS_OTLP_INGEST_URL}") fi - if [ -n "${GRAFANA_URL}" ]; then - args+=(--grafana-url "${GRAFANA_URL}") - fi printf '%s\0' "${args[@]}" } @@ -148,6 +141,7 @@ matrix::k8s_context() { matrix::main() { ROOT_DIR="$(common::repo_root)" export ROOT_DIR + export RUST_LOG="${RUST_LOG:-info}" matrix::parse_args "$@" matrix::split_modes @@ -211,11 +205,17 @@ matrix::main() { fi if [ "${ctx}" = "docker-desktop" ] || [ "${FORCE_K8S_IMAGE_BUILD}" -eq 1 ]; then + # On non-docker-desktop clusters, run-examples.sh defaults to skipping local image builds + # since the cluster can't see them. Honor the matrix "force" option by overriding. + if [ "${ctx}" != "docker-desktop" ] && [ "${FORCE_K8S_IMAGE_BUILD}" -eq 1 ]; then + export NOMOS_FORCE_IMAGE_BUILD=1 + fi matrix::run_case "k8s.image_build" \ "${ROOT_DIR}/scripts/run-examples.sh" \ -t "${RUN_SECS}" -v "${VALIDATORS}" -e "${EXECUTORS}" \ "${forward[@]}" \ k8s + unset NOMOS_FORCE_IMAGE_BUILD || true else echo "==> [k8s] Detected context '${ctx}'; skipping image-build variant (use --force-k8s-image-build to override)" fi @@ -259,4 +259,3 @@ matrix::main() { if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then matrix::main "$@" fi - diff --git a/scripts/setup-observability.sh b/scripts/setup-observability.sh new file mode 100755 index 0000000..5cceb31 --- /dev/null +++ b/scripts/setup-observability.sh @@ -0,0 +1,168 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck disable=SC1091 +. "${SCRIPT_DIR}/common.sh" + +common::ensure_bash "$@" + +ROOT="$(common::repo_root)" + +usage() { + cat <<'USAGE' +Usage: + scripts/setup-observability.sh compose up|down|logs|env + scripts/setup-observability.sh k8s install|uninstall|dashboards|env + +Compose: + - Runs Prometheus (+ OTLP receiver) and Grafana via docker compose. + - Prints NOMOS_METRICS_* / NOMOS_GRAFANA_URL exports to wire into runs. + +Kubernetes: + - Installs prometheus-community/kube-prometheus-stack into namespace + "nomos-observability" and optionally loads Nomos Grafana dashboards. + - Prints port-forward commands + NOMOS_METRICS_* / NOMOS_GRAFANA_URL exports. +USAGE +} + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || common::die "Missing required command: $1" +} + +compose_file() { + echo "${ROOT}/scripts/observability/compose/docker-compose.yml" +} + +compose_run() { + local file + file="$(compose_file)" + common::require_file "${file}" + docker compose -f "${file}" "$@" +} + +compose_env() { + cat <<'EOF' +export NOMOS_METRICS_QUERY_URL=http://localhost:9090 +export NOMOS_METRICS_OTLP_INGEST_URL=http://host.docker.internal:9090/api/v1/otlp/v1/metrics +export NOMOS_GRAFANA_URL=http://localhost:3000 +EOF +} + +k8s_namespace() { echo "nomos-observability"; } +k8s_release() { echo "nomos-observability"; } +k8s_values() { echo "${ROOT}/scripts/observability/k8s/kube-prometheus-stack.values.yaml"; } + +k8s_install() { + require_cmd kubectl + require_cmd helm + + local ns release values + ns="$(k8s_namespace)" + release="$(k8s_release)" + values="$(k8s_values)" + + common::require_file "${values}" + + kubectl get ns "${ns}" >/dev/null 2>&1 || kubectl create ns "${ns}" + + if ! helm repo list | grep -q '^prometheus-community[[:space:]]'; then + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + fi + helm repo update prometheus-community + + helm upgrade --install "${release}" prometheus-community/kube-prometheus-stack \ + -n "${ns}" \ + -f "${values}" + + kubectl -n "${ns}" wait --for=condition=Available deploy -l "release=${release}" --timeout=10m || true + kubectl -n "${ns}" wait --for=condition=Ready pod -l "release=${release}" --timeout=10m || true +} + +k8s_uninstall() { + require_cmd kubectl + require_cmd helm + + local ns release + ns="$(k8s_namespace)" + release="$(k8s_release)" + + helm uninstall "${release}" -n "${ns}" 2>/dev/null || true + kubectl delete ns "${ns}" --ignore-not-found +} + +k8s_apply_dashboards() { + require_cmd kubectl + + local ns dash_dir + ns="$(k8s_namespace)" + dash_dir="${ROOT}/testing-framework/assets/stack/monitoring/grafana/dashboards" + + [ -d "${dash_dir}" ] || common::die "Missing dashboards directory: ${dash_dir}" + + local file base name + for file in "${dash_dir}"/*.json; do + base="$(basename "${file}" .json)" + name="nomos-dashboard-${base//[^a-zA-Z0-9-]/-}" + kubectl -n "${ns}" create configmap "${name}" \ + --from-file="$(basename "${file}")=${file}" \ + --dry-run=client -o yaml | kubectl apply -f - + kubectl -n "${ns}" label configmap "${name}" grafana_dashboard=1 --overwrite >/dev/null + done +} + +k8s_env() { + local ns release + ns="$(k8s_namespace)" + release="$(k8s_release)" + + cat < Self { + pub fn default_for_participants(n_participants: usize) -> Self { + let active_slot_coeff = env::var(Self::CONSENSUS_ACTIVE_SLOT_COEFF_VAR) + .map(|s| { + f64::from_str(&s).unwrap_or_else(|err| { + panic!( + "invalid {}='{}' (expected a float in (0.0, 1.0]): {err}", + Self::CONSENSUS_ACTIVE_SLOT_COEFF_VAR, + s + ) + }) + }) + .unwrap_or(Self::DEFAULT_ACTIVE_SLOT_COEFF); + + assert!( + (0.0..=1.0).contains(&active_slot_coeff) && active_slot_coeff > 0.0, + "{} must be in (0.0, 1.0], got {}", + Self::CONSENSUS_ACTIVE_SLOT_COEFF_VAR, + active_slot_coeff + ); + Self { n_participants, // by setting the slot coeff to 1, we also increase the probability of multiple blocks @@ -45,7 +69,7 @@ impl ConsensusParams { // deciding on the longest chain. security_param: NonZero::new(10).unwrap(), // a block should be produced (on average) every slot - active_slot_coeff: 0.9, + active_slot_coeff, } } } diff --git a/testing-framework/configs/src/topology/configs/time.rs b/testing-framework/configs/src/topology/configs/time.rs index 14a5139..6d62ad1 100644 --- a/testing-framework/configs/src/topology/configs/time.rs +++ b/testing-framework/configs/src/topology/configs/time.rs @@ -27,6 +27,7 @@ pub fn default_time_config() -> GeneralTimeConfig { let slot_duration = std::env::var(CONSENSUS_SLOT_TIME_VAR) .map(|s| ::from_str(&s).unwrap()) .unwrap_or(DEFAULT_SLOT_TIME); + GeneralTimeConfig { slot_duration: Duration::from_secs(slot_duration), chain_start_time: OffsetDateTime::now_utc(), diff --git a/testing-framework/cucumber_ext/Cargo.toml b/testing-framework/cucumber/Cargo.toml similarity index 100% rename from testing-framework/cucumber_ext/Cargo.toml rename to testing-framework/cucumber/Cargo.toml diff --git a/testing-framework/cucumber_ext/src/lib.rs b/testing-framework/cucumber/src/lib.rs similarity index 100% rename from testing-framework/cucumber_ext/src/lib.rs rename to testing-framework/cucumber/src/lib.rs diff --git a/testing-framework/cucumber_ext/src/steps/mod.rs b/testing-framework/cucumber/src/steps/mod.rs similarity index 100% rename from testing-framework/cucumber_ext/src/steps/mod.rs rename to testing-framework/cucumber/src/steps/mod.rs diff --git a/testing-framework/cucumber_ext/src/steps/run.rs b/testing-framework/cucumber/src/steps/run.rs similarity index 100% rename from testing-framework/cucumber_ext/src/steps/run.rs rename to testing-framework/cucumber/src/steps/run.rs diff --git a/testing-framework/cucumber_ext/src/steps/scenario.rs b/testing-framework/cucumber/src/steps/scenario.rs similarity index 100% rename from testing-framework/cucumber_ext/src/steps/scenario.rs rename to testing-framework/cucumber/src/steps/scenario.rs diff --git a/testing-framework/cucumber_ext/src/steps/workloads.rs b/testing-framework/cucumber/src/steps/workloads.rs similarity index 100% rename from testing-framework/cucumber_ext/src/steps/workloads.rs rename to testing-framework/cucumber/src/steps/workloads.rs diff --git a/testing-framework/cucumber_ext/src/world.rs b/testing-framework/cucumber/src/world.rs similarity index 100% rename from testing-framework/cucumber_ext/src/world.rs rename to testing-framework/cucumber/src/world.rs diff --git a/testing-framework/deployers/compose/src/descriptor/mod.rs b/testing-framework/deployers/compose/src/descriptor/mod.rs index 08921cb..5dfdb0d 100644 --- a/testing-framework/deployers/compose/src/descriptor/mod.rs +++ b/testing-framework/deployers/compose/src/descriptor/mod.rs @@ -1,3 +1,8 @@ +use std::{ + env, + path::{Path, PathBuf}, +}; + use serde::Serialize; use testing_framework_core::{ constants::{DEFAULT_CFGSYNC_PORT, kzg_container_path}, @@ -150,9 +155,26 @@ fn base_volumes(use_kzg_mount: bool) -> Vec { if use_kzg_mount { volumes.push("./kzgrs_test_params:/kzgrs_test_params:z".into()); } + if let Some(host_log_dir) = repo_root() + .map(|root| root.join("tmp").join("node-logs")) + .map(|dir| dir.display().to_string()) + { + volumes.push(format!("{host_log_dir}:/tmp/node-logs")); + } volumes } +fn repo_root() -> Option { + if let Ok(root) = env::var("CARGO_WORKSPACE_DIR") { + return Some(PathBuf::from(root)); + } + Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(Path::parent) + .and_then(Path::parent) + .map(Path::to_path_buf) +} + fn default_extra_hosts() -> Vec { host_gateway_entry().into_iter().collect() } diff --git a/testing-framework/deployers/compose/src/infrastructure/environment.rs b/testing-framework/deployers/compose/src/infrastructure/environment.rs index 07fdee3..1104fb8 100644 --- a/testing-framework/deployers/compose/src/infrastructure/environment.rs +++ b/testing-framework/deployers/compose/src/infrastructure/environment.rs @@ -311,6 +311,7 @@ pub fn write_compose_artifacts( let compose_path = workspace.root.join("compose.generated.yml"); write_compose_file(&descriptor, &compose_path) .map_err(|source| ConfigError::Template { source })?; + debug!(compose_file = %compose_path.display(), "rendered compose file"); Ok(compose_path) } diff --git a/testing-framework/workflows/src/builder/mod.rs b/testing-framework/workflows/src/builder/mod.rs index 170a013..d6f667d 100644 --- a/testing-framework/workflows/src/builder/mod.rs +++ b/testing-framework/workflows/src/builder/mod.rs @@ -302,6 +302,7 @@ impl TransactionFlowBuilder { let workload = transaction::Workload::with_rate(self.rate.get()) .expect("transaction rate must be non-zero") .with_user_limit(self.users); + tracing::info!( rate = self.rate.get(), users = self.users.map(|u| u.get()),