mirror of
https://github.com/logos-blockchain/logos-blockchain-testing.git
synced 2026-01-02 13:23:13 +00:00
refactor: remove embedded observability
This commit is contained in:
parent
d8be8e589a
commit
91c9044abb
1
.gitignore
vendored
1
.gitignore
vendored
@ -3,6 +3,7 @@
|
|||||||
.tmp/
|
.tmp/
|
||||||
/.tmp*/
|
/.tmp*/
|
||||||
tmp-local-logs/
|
tmp-local-logs/
|
||||||
|
tmp/node-logs/
|
||||||
# IDE / OS cruft
|
# IDE / OS cruft
|
||||||
.idea/
|
.idea/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
|||||||
@ -4,7 +4,7 @@ members = [
|
|||||||
"examples/doc-snippets",
|
"examples/doc-snippets",
|
||||||
"testing-framework/configs",
|
"testing-framework/configs",
|
||||||
"testing-framework/core",
|
"testing-framework/core",
|
||||||
"testing-framework/cucumber_ext",
|
"testing-framework/cucumber",
|
||||||
"testing-framework/deployers/compose",
|
"testing-framework/deployers/compose",
|
||||||
"testing-framework/deployers/k8s",
|
"testing-framework/deployers/k8s",
|
||||||
"testing-framework/deployers/local",
|
"testing-framework/deployers/local",
|
||||||
|
|||||||
@ -26,6 +26,7 @@
|
|||||||
- [Operations](operations.md)
|
- [Operations](operations.md)
|
||||||
- [Part III — Developer Reference](part-iii.md)
|
- [Part III — Developer Reference](part-iii.md)
|
||||||
- [Scenario Model (Developer Level)](scenario-model.md)
|
- [Scenario Model (Developer Level)](scenario-model.md)
|
||||||
|
- [API Levels: Builder DSL vs. Direct](api-levels.md)
|
||||||
- [Extending the Framework](extending.md)
|
- [Extending the Framework](extending.md)
|
||||||
- [Example: New Workload & Expectation (Rust)](custom-workload-example.md)
|
- [Example: New Workload & Expectation (Rust)](custom-workload-example.md)
|
||||||
- [Internal Crate Reference](internal-crate-reference.md)
|
- [Internal Crate Reference](internal-crate-reference.md)
|
||||||
|
|||||||
131
book/src/api-levels.md
Normal file
131
book/src/api-levels.md
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
# API Levels: Builder DSL vs. Direct Instantiation
|
||||||
|
|
||||||
|
The framework supports two styles for constructing scenarios:
|
||||||
|
|
||||||
|
1. **High-level Builder DSL** (recommended): fluent helper methods (e.g. `.transactions_with(...)`)
|
||||||
|
2. **Low-level direct instantiation**: construct workload/expectation types explicitly, then attach them
|
||||||
|
|
||||||
|
Both styles produce the same runtime behavior because they ultimately call the same core builder APIs.
|
||||||
|
|
||||||
|
## High-Level Builder DSL (Recommended)
|
||||||
|
|
||||||
|
The DSL is implemented as extension traits (primarily `testing_framework_workflows::ScenarioBuilderExt`) on the core scenario builder.
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use testing_framework_core::scenario::ScenarioBuilder;
|
||||||
|
use testing_framework_workflows::ScenarioBuilderExt;
|
||||||
|
|
||||||
|
let plan = ScenarioBuilder::topology_with(|t| t.network_star().validators(3).executors(2))
|
||||||
|
.wallets(5)
|
||||||
|
.transactions_with(|txs| txs.rate(5).users(3))
|
||||||
|
.da_with(|da| da.channel_rate(1).blob_rate(1).headroom_percent(20))
|
||||||
|
.expect_consensus_liveness()
|
||||||
|
.with_run_duration(Duration::from_secs(60))
|
||||||
|
.build();
|
||||||
|
```
|
||||||
|
|
||||||
|
**When to use:**
|
||||||
|
- Most test code (smoke, regression, CI)
|
||||||
|
- When you want sensible defaults and minimal boilerplate
|
||||||
|
|
||||||
|
## Low-Level Direct Instantiation
|
||||||
|
|
||||||
|
Direct instantiation gives you explicit control over the concrete types you attach:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use std::{
|
||||||
|
num::{NonZeroU64, NonZeroUsize},
|
||||||
|
time::Duration,
|
||||||
|
};
|
||||||
|
|
||||||
|
use testing_framework_core::scenario::ScenarioBuilder;
|
||||||
|
use testing_framework_workflows::{
|
||||||
|
expectations::ConsensusLiveness,
|
||||||
|
workloads::{da, transaction},
|
||||||
|
};
|
||||||
|
|
||||||
|
let tx_workload = transaction::Workload::with_rate(5)
|
||||||
|
.expect("transaction rate must be non-zero")
|
||||||
|
.with_user_limit(NonZeroUsize::new(3));
|
||||||
|
|
||||||
|
let da_workload = da::Workload::with_rate(
|
||||||
|
NonZeroU64::new(1).unwrap(), // blob rate per block
|
||||||
|
NonZeroU64::new(1).unwrap(), // channel rate per block
|
||||||
|
da::Workload::default_headroom_percent(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let plan = ScenarioBuilder::topology_with(|t| t.network_star().validators(3).executors(2))
|
||||||
|
.wallets(5)
|
||||||
|
.with_workload(tx_workload)
|
||||||
|
.with_workload(da_workload)
|
||||||
|
.with_expectation(ConsensusLiveness::default())
|
||||||
|
.with_run_duration(Duration::from_secs(60))
|
||||||
|
.build();
|
||||||
|
```
|
||||||
|
|
||||||
|
**When to use:**
|
||||||
|
- Custom workload/expectation implementations
|
||||||
|
- Reusing preconfigured workload instances across multiple scenarios
|
||||||
|
- Debugging / exploring the underlying workload types
|
||||||
|
|
||||||
|
## Method Correspondence
|
||||||
|
|
||||||
|
| High-Level DSL | Low-Level Direct |
|
||||||
|
|----------------|------------------|
|
||||||
|
| `.transactions_with(\|txs\| txs.rate(5).users(3))` | `.with_workload(transaction::Workload::with_rate(5).expect(...).with_user_limit(...))` |
|
||||||
|
| `.da_with(\|da\| da.blob_rate(1).channel_rate(1))` | `.with_workload(da::Workload::with_rate(...))` |
|
||||||
|
| `.expect_consensus_liveness()` | `.with_expectation(ConsensusLiveness::default())` |
|
||||||
|
|
||||||
|
## Bundled Expectations (Important)
|
||||||
|
|
||||||
|
Workloads can bundle expectations by implementing `Workload::expectations()`.
|
||||||
|
|
||||||
|
These bundled expectations are attached automatically whenever you call `.with_workload(...)` (including when you use the DSL), because the core builder expands workload expectations during attachment.
|
||||||
|
|
||||||
|
## Mixing Both Styles
|
||||||
|
|
||||||
|
Mixing is common: use the DSL for built-ins, and direct instantiation for custom pieces.
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use testing_framework_core::scenario::ScenarioBuilder;
|
||||||
|
use testing_framework_workflows::ScenarioBuilderExt;
|
||||||
|
|
||||||
|
let custom_workload = MyCustomWorkload::new(config);
|
||||||
|
|
||||||
|
let plan = ScenarioBuilder::topology_with(|t| t.network_star().validators(3).executors(2))
|
||||||
|
.transactions_with(|txs| txs.rate(5).users(3)) // DSL
|
||||||
|
.with_workload(custom_workload) // direct
|
||||||
|
.expect_consensus_liveness() // DSL
|
||||||
|
.with_run_duration(Duration::from_secs(60))
|
||||||
|
.build();
|
||||||
|
```
|
||||||
|
|
||||||
|
## Implementation Detail (How the DSL Works)
|
||||||
|
|
||||||
|
The DSL methods are thin wrappers. For example:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
builder.transactions_with(|txs| txs.rate(5).users(3))
|
||||||
|
```
|
||||||
|
|
||||||
|
is roughly equivalent to:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
builder.transactions().rate(5).users(3).apply()
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
**DSL method not found**
|
||||||
|
- Ensure the extension traits are in scope, e.g. `use testing_framework_workflows::ScenarioBuilderExt;`
|
||||||
|
- Cross-check method names in [Builder API Quick Reference](dsl-cheat-sheet.md)
|
||||||
|
|
||||||
|
## See Also
|
||||||
|
|
||||||
|
- [Builder API Quick Reference](dsl-cheat-sheet.md)
|
||||||
|
- [Example: New Workload & Expectation (Rust)](custom-workload-example.md)
|
||||||
|
- [Extending the Framework](extending.md)
|
||||||
@ -1,31 +1,311 @@
|
|||||||
# Extending the Framework
|
# Extending the Framework
|
||||||
|
|
||||||
## Adding a workload
|
This guide shows how to extend the framework with custom workloads, expectations, runners, and topology helpers. Each section includes the trait outline and a minimal code example.
|
||||||
1) Implement `testing_framework_core::scenario::Workload`:
|
|
||||||
- Provide a name and any bundled expectations.
|
|
||||||
- In `init`, derive inputs from `GeneratedTopology` and `RunMetrics`; fail
|
|
||||||
fast if prerequisites are missing (e.g., wallet data, node addresses).
|
|
||||||
- In `start`, drive async traffic using the `RunContext` clients.
|
|
||||||
2) Expose the workload from a module under `testing-framework/workflows` and
|
|
||||||
consider adding a DSL helper for ergonomic wiring.
|
|
||||||
|
|
||||||
## Adding an expectation
|
## Adding a Workload
|
||||||
1) Implement `testing_framework_core::scenario::Expectation`:
|
|
||||||
- Use `start_capture` to snapshot baseline metrics.
|
|
||||||
- Use `evaluate` to assert outcomes after workloads finish; return all errors
|
|
||||||
so the runner can aggregate them.
|
|
||||||
2) Export it from `testing-framework/workflows` if it is reusable.
|
|
||||||
|
|
||||||
## Adding a runner
|
**Steps:**
|
||||||
1) Implement `testing_framework_core::scenario::Deployer` for your backend.
|
1. Implement `testing_framework_core::scenario::Workload`
|
||||||
- Produce a `RunContext` with `NodeClients`, metrics endpoints, and optional
|
2. Provide a name and any bundled expectations
|
||||||
`NodeControlHandle`.
|
3. Use `init` to derive inputs from topology/metrics; fail fast if prerequisites missing
|
||||||
- Guard cleanup with `CleanupGuard` to reclaim resources even on failures.
|
4. Use `start` to drive async traffic using `RunContext` clients
|
||||||
2) Mirror the readiness and block-feed probes used by the existing runners so
|
5. Expose from `testing-framework/workflows` and optionally add a DSL helper
|
||||||
workloads can rely on consistent signals.
|
|
||||||
|
|
||||||
## Adding topology helpers
|
**Trait outline:**
|
||||||
- Extend `testing_framework_core::topology::config::TopologyBuilder` with new layouts or
|
|
||||||
configuration presets (e.g., specialized DA parameters). Keep defaults safe:
|
```rust
|
||||||
ensure at least one participant and clamp dispersal factors as the current
|
use async_trait::async_trait;
|
||||||
helpers do.
|
use testing_framework_core::scenario::{
|
||||||
|
DynError, Expectation, RunContext, RunMetrics, Workload,
|
||||||
|
};
|
||||||
|
use testing_framework_core::topology::generation::GeneratedTopology;
|
||||||
|
|
||||||
|
pub struct MyWorkload {
|
||||||
|
// Configuration fields
|
||||||
|
target_rate: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MyWorkload {
|
||||||
|
pub fn new(target_rate: u64) -> Self {
|
||||||
|
Self { target_rate }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Workload for MyWorkload {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"my_workload"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn expectations(&self) -> Vec<Box<dyn Expectation>> {
|
||||||
|
// Return bundled expectations that should run with this workload
|
||||||
|
vec![Box::new(MyExpectation::new(self.target_rate))]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn init(
|
||||||
|
&mut self,
|
||||||
|
topology: &GeneratedTopology,
|
||||||
|
_run_metrics: &RunMetrics,
|
||||||
|
) -> Result<(), DynError> {
|
||||||
|
// Validate prerequisites (e.g., enough nodes, wallet data present)
|
||||||
|
if topology.validators().is_empty() {
|
||||||
|
return Err("no validators available".into());
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start(&self, ctx: &RunContext) -> Result<(), DynError> {
|
||||||
|
// Drive async activity: submit transactions, query nodes, etc.
|
||||||
|
let clients = ctx.node_clients().validator_clients();
|
||||||
|
|
||||||
|
for client in clients {
|
||||||
|
let info = client.consensus_info().await?;
|
||||||
|
tracing::info!(?info, "workload queried node");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key points:**
|
||||||
|
- `name()` identifies the workload in logs
|
||||||
|
- `expectations()` bundles default checks (can be empty)
|
||||||
|
- `init()` validates topology before run starts
|
||||||
|
- `start()` executes concurrently with other workloads; it should complete before run duration expires
|
||||||
|
|
||||||
|
See [Example: New Workload & Expectation](custom-workload-example.md) for a complete, runnable example.
|
||||||
|
|
||||||
|
## Adding an Expectation
|
||||||
|
|
||||||
|
**Steps:**
|
||||||
|
1. Implement `testing_framework_core::scenario::Expectation`
|
||||||
|
2. Use `start_capture` to snapshot baseline metrics (optional)
|
||||||
|
3. Use `evaluate` to assert outcomes after workloads finish
|
||||||
|
4. Return descriptive errors; the runner aggregates them
|
||||||
|
5. Export from `testing-framework/workflows` if reusable
|
||||||
|
|
||||||
|
**Trait outline:**
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use testing_framework_core::scenario::{DynError, Expectation, RunContext};
|
||||||
|
|
||||||
|
pub struct MyExpectation {
|
||||||
|
expected_value: u64,
|
||||||
|
captured_baseline: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MyExpectation {
|
||||||
|
pub fn new(expected_value: u64) -> Self {
|
||||||
|
Self {
|
||||||
|
expected_value,
|
||||||
|
captured_baseline: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Expectation for MyExpectation {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"my_expectation"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start_capture(&mut self, ctx: &RunContext) -> Result<(), DynError> {
|
||||||
|
// Optional: capture baseline state before workloads start
|
||||||
|
let client = ctx.node_clients().validator_clients().first()
|
||||||
|
.ok_or("no validators")?;
|
||||||
|
|
||||||
|
let info = client.consensus_info().await?;
|
||||||
|
self.captured_baseline = Some(info.current_block_id.slot);
|
||||||
|
|
||||||
|
tracing::info!(baseline = self.captured_baseline, "captured baseline");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn evaluate(&mut self, ctx: &RunContext) -> Result<(), DynError> {
|
||||||
|
// Assert the expected condition holds after workloads finish
|
||||||
|
let client = ctx.node_clients().validator_clients().first()
|
||||||
|
.ok_or("no validators")?;
|
||||||
|
|
||||||
|
let info = client.consensus_info().await?;
|
||||||
|
let final_slot = info.current_block_id.slot;
|
||||||
|
|
||||||
|
let baseline = self.captured_baseline.unwrap_or(0);
|
||||||
|
let delta = final_slot.saturating_sub(baseline);
|
||||||
|
|
||||||
|
if delta < self.expected_value {
|
||||||
|
return Err(format!(
|
||||||
|
"expected at least {} blocks, got {}",
|
||||||
|
self.expected_value, delta
|
||||||
|
).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::info!(delta, "expectation passed");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key points:**
|
||||||
|
- `name()` identifies the expectation in logs
|
||||||
|
- `start_capture()` runs before workloads start (optional)
|
||||||
|
- `evaluate()` runs after workloads finish; return descriptive errors
|
||||||
|
- Expectations run sequentially; keep them fast
|
||||||
|
|
||||||
|
## Adding a Runner (Deployer)
|
||||||
|
|
||||||
|
**Steps:**
|
||||||
|
1. Implement `testing_framework_core::scenario::Deployer<Caps>` for your capability type
|
||||||
|
2. Deploy infrastructure and return a `Runner`
|
||||||
|
3. Construct `NodeClients` and spawn a `BlockFeed`
|
||||||
|
4. Build a `RunContext` and provide a `CleanupGuard` for teardown
|
||||||
|
|
||||||
|
**Trait outline:**
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use testing_framework_core::scenario::{
|
||||||
|
CleanupGuard, Deployer, DynError, Metrics, NodeClients, RunContext, Runner, Scenario,
|
||||||
|
spawn_block_feed,
|
||||||
|
};
|
||||||
|
use testing_framework_core::topology::deployment::Topology;
|
||||||
|
|
||||||
|
pub struct MyDeployer {
|
||||||
|
// Configuration: cluster connection details, etc.
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MyDeployer {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Deployer<()> for MyDeployer {
|
||||||
|
type Error = DynError;
|
||||||
|
|
||||||
|
async fn deploy(&self, scenario: &Scenario<()>) -> Result<Runner, Self::Error> {
|
||||||
|
// 1. Launch nodes using scenario.topology()
|
||||||
|
// 2. Wait for readiness (e.g., consensus info endpoint responds)
|
||||||
|
// 3. Build NodeClients for validators/executors
|
||||||
|
// 4. Spawn a block feed for expectations (optional but recommended)
|
||||||
|
// 5. Create NodeControlHandle if you support restarts (optional)
|
||||||
|
// 6. Return a Runner wrapping RunContext + CleanupGuard
|
||||||
|
|
||||||
|
tracing::info!("deploying scenario with MyDeployer");
|
||||||
|
|
||||||
|
let topology: Option<Topology> = None; // Some(topology) if you spawned one
|
||||||
|
let node_clients = NodeClients::default(); // Or NodeClients::from_topology(...)
|
||||||
|
|
||||||
|
let (block_feed, block_feed_guard) = spawn_block_feed(&node_clients).await?;
|
||||||
|
|
||||||
|
let telemetry = Metrics::empty(); // or Metrics::from_prometheus(...)
|
||||||
|
let node_control = None; // or Some(Arc<dyn NodeControlHandle>)
|
||||||
|
|
||||||
|
let context = RunContext::new(
|
||||||
|
scenario.topology().clone(),
|
||||||
|
topology,
|
||||||
|
node_clients,
|
||||||
|
scenario.duration(),
|
||||||
|
telemetry,
|
||||||
|
block_feed,
|
||||||
|
node_control,
|
||||||
|
);
|
||||||
|
|
||||||
|
// If you also have other resources to clean up (containers/pods/etc),
|
||||||
|
// wrap them in your own CleanupGuard implementation and call
|
||||||
|
// CleanupGuard::cleanup(Box::new(block_feed_guard)) inside it.
|
||||||
|
Ok(Runner::new(context, Some(Box::new(block_feed_guard))))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key points:**
|
||||||
|
- `deploy()` must return a fully prepared `Runner`
|
||||||
|
- Block until nodes are ready before returning (avoid false negatives)
|
||||||
|
- Use a `CleanupGuard` to tear down resources on failure (and on `RunHandle` drop)
|
||||||
|
- If you want chaos workloads, also provide a `NodeControlHandle` via `RunContext`
|
||||||
|
|
||||||
|
## Adding Topology Helpers
|
||||||
|
|
||||||
|
**Steps:**
|
||||||
|
1. Extend `testing_framework_core::topology::config::TopologyBuilder` with new layouts
|
||||||
|
2. Keep defaults safe: ensure at least one participant, clamp dispersal factors
|
||||||
|
3. Consider adding configuration presets for specialized parameters
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use testing_framework_core::topology::config::TopologyBuilder;
|
||||||
|
|
||||||
|
impl TopologyBuilder {
|
||||||
|
/// Creates a "ring" topology where each node connects to its neighbors
|
||||||
|
pub fn network_ring(&mut self) -> &mut Self {
|
||||||
|
// Configure peer connections in a ring layout
|
||||||
|
self.with_network_layout(|layout| {
|
||||||
|
// Implement ring connection logic
|
||||||
|
layout.ring_peers()
|
||||||
|
});
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Preset for high-throughput DA configuration
|
||||||
|
pub fn da_high_throughput(&mut self) -> &mut Self {
|
||||||
|
self.with_da_params(|params| {
|
||||||
|
params
|
||||||
|
.dispersal_factor(8)
|
||||||
|
.replication_factor(16)
|
||||||
|
.chunk_size(4096)
|
||||||
|
});
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key points:**
|
||||||
|
- Maintain method chaining (return `&mut Self`)
|
||||||
|
- Validate inputs: clamp factors, enforce minimums
|
||||||
|
- Document assumptions (e.g., "requires at least 4 nodes")
|
||||||
|
|
||||||
|
## Adding a DSL Helper
|
||||||
|
|
||||||
|
To expose your custom workload through the high-level DSL, add a trait extension:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
use testing_framework_core::scenario::Builder as ScenarioBuilder;
|
||||||
|
|
||||||
|
pub trait MyWorkloadDsl {
|
||||||
|
fn my_workload_with(
|
||||||
|
self,
|
||||||
|
f: impl FnOnce(MyWorkloadBuilder) -> MyWorkloadBuilder,
|
||||||
|
) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Caps> MyWorkloadDsl for ScenarioBuilder<Caps> {
|
||||||
|
fn my_workload_with(
|
||||||
|
self,
|
||||||
|
f: impl FnOnce(MyWorkloadBuilder) -> MyWorkloadBuilder,
|
||||||
|
) -> Self {
|
||||||
|
let builder = f(MyWorkloadBuilder::default());
|
||||||
|
self.with_workload(builder.build())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Users can then call:
|
||||||
|
|
||||||
|
```rust
|
||||||
|
ScenarioBuilder::topology_with(|t| { /* ... */ })
|
||||||
|
.my_workload_with(|w| {
|
||||||
|
w.target_rate(10)
|
||||||
|
.some_option(true)
|
||||||
|
})
|
||||||
|
.build()
|
||||||
|
```
|
||||||
|
|
||||||
|
## See Also
|
||||||
|
|
||||||
|
- [API Levels: Builder DSL vs. Direct](api-levels.md) - Understanding the two API levels
|
||||||
|
- [Custom Workload Example](custom-workload-example.md) - Complete runnable example
|
||||||
|
- [Internal Crate Reference](internal-crate-reference.md) - Where to add new code
|
||||||
|
|||||||
@ -37,7 +37,7 @@ Both **LocalDeployer** and **ComposeDeployer** work in CI environments:
|
|||||||
**ComposeDeployer in CI (recommended):**
|
**ComposeDeployer in CI (recommended):**
|
||||||
- Better isolation (containerized)
|
- Better isolation (containerized)
|
||||||
- Reproducible environment
|
- Reproducible environment
|
||||||
- Includes Prometheus/observability
|
- Can integrate with external Prometheus/Grafana (optional)
|
||||||
- **Trade-off:** Slower startup (Docker image build)
|
- **Trade-off:** Slower startup (Docker image build)
|
||||||
- **Trade-off:** Requires Docker daemon
|
- **Trade-off:** Requires Docker daemon
|
||||||
|
|
||||||
@ -60,7 +60,21 @@ scripts/run-examples.sh -t 60 -v 1 -e 1 compose
|
|||||||
scripts/run-examples.sh -t 60 -v 1 -e 1 k8s
|
scripts/run-examples.sh -t 60 -v 1 -e 1 k8s
|
||||||
```
|
```
|
||||||
|
|
||||||
This script handles circuit setup, binary building/bundling, image building, and execution.
|
This script handles circuit setup, binary building/bundling, (local) image building, and execution.
|
||||||
|
|
||||||
|
Note: for `k8s` runs against non-local clusters (e.g. EKS), the cluster pulls images from a registry,
|
||||||
|
so a local `docker build` is not used. In that case, build + push your image separately (see
|
||||||
|
`scripts/build_test_image.sh`) and set `NOMOS_TESTNET_IMAGE` to the pushed reference.
|
||||||
|
|
||||||
|
### Quick Smoke Matrix (Host/Compose/K8s)
|
||||||
|
|
||||||
|
For a small “does everything still run?” matrix (including `--no-image-build` variants where relevant), use:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
scripts/run-test-matrix.sh -t 120 -v 1 -e 1
|
||||||
|
```
|
||||||
|
|
||||||
|
This is useful after making runner/image/script changes, and it forwards `--metrics-*` options through to `scripts/run-examples.sh`.
|
||||||
|
|
||||||
**Environment overrides:**
|
**Environment overrides:**
|
||||||
- `VERSION=v0.3.1` — Circuit version
|
- `VERSION=v0.3.1` — Circuit version
|
||||||
@ -192,6 +206,7 @@ cargo run -p runner-examples --bin compose_runner
|
|||||||
**Compose-specific features:**
|
**Compose-specific features:**
|
||||||
- **Node control support**: Only runner that supports chaos testing (`.enable_node_control()` + chaos workloads)
|
- **Node control support**: Only runner that supports chaos testing (`.enable_node_control()` + chaos workloads)
|
||||||
- **Observability is external**: Set `NOMOS_METRICS_*` / `NOMOS_GRAFANA_URL` to enable telemetry links and querying
|
- **Observability is external**: Set `NOMOS_METRICS_*` / `NOMOS_GRAFANA_URL` to enable telemetry links and querying
|
||||||
|
- Quickstart: `scripts/setup-observability.sh compose up` then `scripts/setup-observability.sh compose env`
|
||||||
|
|
||||||
**Important:**
|
**Important:**
|
||||||
- Containers expect KZG parameters at `/kzgrs_test_params/kzgrs_test_params` (note the repeated filename)
|
- Containers expect KZG parameters at `/kzgrs_test_params/kzgrs_test_params` (note the repeated filename)
|
||||||
@ -248,13 +263,13 @@ cargo run -p runner-examples --bin k8s_runner
|
|||||||
Notes:
|
Notes:
|
||||||
- `NOMOS_METRICS_QUERY_URL` must be reachable from the runner process (often via `kubectl port-forward`).
|
- `NOMOS_METRICS_QUERY_URL` must be reachable from the runner process (often via `kubectl port-forward`).
|
||||||
- `NOMOS_METRICS_OTLP_INGEST_URL` must be reachable from nodes (pods/containers) and is backend-specific (Prometheus vs VictoriaMetrics paths differ).
|
- `NOMOS_METRICS_OTLP_INGEST_URL` must be reachable from nodes (pods/containers) and is backend-specific (Prometheus vs VictoriaMetrics paths differ).
|
||||||
|
- Quickstart installer: `scripts/setup-observability.sh k8s install` then `scripts/setup-observability.sh k8s env` (optional dashboards: `scripts/setup-observability.sh k8s dashboards`)
|
||||||
|
|
||||||
**Via `scripts/run-examples.sh` (optional):**
|
**Via `scripts/run-examples.sh` (optional):**
|
||||||
```bash
|
```bash
|
||||||
scripts/run-examples.sh -t 60 -v 1 -e 1 k8s \
|
scripts/run-examples.sh -t 60 -v 1 -e 1 k8s \
|
||||||
--metrics-query-url http://your-prometheus:9090 \
|
--metrics-query-url http://your-prometheus:9090 \
|
||||||
--metrics-otlp-ingest-url http://your-prometheus:9090/api/v1/otlp/v1/metrics \
|
--metrics-otlp-ingest-url http://your-prometheus:9090/api/v1/otlp/v1/metrics
|
||||||
--grafana-url http://your-grafana:3000
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**In code (optional):**
|
**In code (optional):**
|
||||||
@ -565,12 +580,15 @@ cargo run -p runner-examples --bin local_runner
|
|||||||
Runners expose metrics and node HTTP endpoints for expectation code and debugging:
|
Runners expose metrics and node HTTP endpoints for expectation code and debugging:
|
||||||
|
|
||||||
**Prometheus-compatible metrics querying (optional):**
|
**Prometheus-compatible metrics querying (optional):**
|
||||||
- The framework does **not** deploy Prometheus.
|
- Runners do **not** provision Prometheus automatically.
|
||||||
|
- For a ready-to-run stack, use `scripts/setup-observability.sh`:
|
||||||
|
- Compose: `scripts/setup-observability.sh compose up` then `scripts/setup-observability.sh compose env`
|
||||||
|
- K8s: `scripts/setup-observability.sh k8s install` then `scripts/setup-observability.sh k8s env`
|
||||||
- Provide `NOMOS_METRICS_QUERY_URL` (PromQL base URL) to enable `ctx.telemetry()` queries.
|
- Provide `NOMOS_METRICS_QUERY_URL` (PromQL base URL) to enable `ctx.telemetry()` queries.
|
||||||
- Access from expectations when configured: `ctx.telemetry().prometheus().map(|p| p.base_url())`
|
- Access from expectations when configured: `ctx.telemetry().prometheus().map(|p| p.base_url())`
|
||||||
|
|
||||||
**Grafana (optional):**
|
**Grafana (optional):**
|
||||||
- The framework does **not** deploy Grafana.
|
- Runners do **not** provision Grafana automatically (but `scripts/setup-observability.sh` can).
|
||||||
- If you set `NOMOS_GRAFANA_URL`, the deployer prints it in `TESTNET_ENDPOINTS`.
|
- If you set `NOMOS_GRAFANA_URL`, the deployer prints it in `TESTNET_ENDPOINTS`.
|
||||||
- Dashboards live in `testing-framework/assets/stack/monitoring/grafana/dashboards/` for import into your Grafana.
|
- Dashboards live in `testing-framework/assets/stack/monitoring/grafana/dashboards/` for import into your Grafana.
|
||||||
|
|
||||||
|
|||||||
@ -163,7 +163,7 @@ pub fn step_5_run_duration() -> testing_framework_core::scenario::Builder<()> {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Run for 60 seconds (~27 blocks with default 2s slots, 0.9 coefficient). Framework ensures this is at least 2× the consensus slot duration.
|
Run for 60 seconds (~27 blocks with default 2s slots, 0.9 coefficient). Framework ensures this is at least 2× the consensus slot duration. Adjust consensus timing via `CONSENSUS_SLOT_TIME` and `CONSENSUS_ACTIVE_SLOT_COEFF`.
|
||||||
|
|
||||||
### 6. Deploy and Execute
|
### 6. Deploy and Execute
|
||||||
|
|
||||||
@ -239,7 +239,18 @@ POL_PROOF_DEV_MODE=true \
|
|||||||
cargo run -p runner-examples --bin compose_runner
|
cargo run -p runner-examples --bin compose_runner
|
||||||
```
|
```
|
||||||
|
|
||||||
**Benefit:** Reproducible containerized environment with Prometheus at `http://localhost:9090`.
|
**Benefit:** Reproducible containerized environment (Dockerized nodes, repeatable deployments).
|
||||||
|
|
||||||
|
**Optional: Prometheus + Grafana**
|
||||||
|
|
||||||
|
The runner can integrate with external observability endpoints. For a ready-to-run local stack:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
scripts/setup-observability.sh compose up
|
||||||
|
eval "$(scripts/setup-observability.sh compose env)"
|
||||||
|
```
|
||||||
|
|
||||||
|
Then run your compose scenario as usual (the environment variables enable PromQL querying and node OTLP metrics export).
|
||||||
|
|
||||||
**Note:** Compose expects KZG parameters at `/kzgrs_test_params/kzgrs_test_params` inside containers (the directory name is repeated as the filename).
|
**Note:** Compose expects KZG parameters at `/kzgrs_test_params/kzgrs_test_params` inside containers (the directory name is repeated as the filename).
|
||||||
|
|
||||||
|
|||||||
@ -36,7 +36,7 @@ Reason in **blocks** and **consensus intervals**, not wall-clock seconds.
|
|||||||
|
|
||||||
**Consensus defaults:**
|
**Consensus defaults:**
|
||||||
- Slot duration: 2 seconds (NTP-synchronized, configurable via `CONSENSUS_SLOT_TIME`)
|
- Slot duration: 2 seconds (NTP-synchronized, configurable via `CONSENSUS_SLOT_TIME`)
|
||||||
- Active slot coefficient: 0.9 (90% block probability per slot)
|
- Active slot coefficient: 0.9 (90% block probability per slot, configurable via `CONSENSUS_ACTIVE_SLOT_COEFF`)
|
||||||
- Expected rate: ~27 blocks per minute
|
- Expected rate: ~27 blocks per minute
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
@ -159,7 +159,7 @@ pub fn minimum_run_windows() {
|
|||||||
|
|
||||||
**Note:** Block counts assume default consensus parameters:
|
**Note:** Block counts assume default consensus parameters:
|
||||||
- Slot duration: 2 seconds (configurable via `CONSENSUS_SLOT_TIME`)
|
- Slot duration: 2 seconds (configurable via `CONSENSUS_SLOT_TIME`)
|
||||||
- Active slot coefficient: 0.9 (90% block probability per slot)
|
- Active slot coefficient: 0.9 (90% block probability per slot, configurable via `CONSENSUS_ACTIVE_SLOT_COEFF`)
|
||||||
- Formula: `blocks ≈ (duration / slot_duration) × active_slot_coeff`
|
- Formula: `blocks ≈ (duration / slot_duration) × active_slot_coeff`
|
||||||
|
|
||||||
If upstream changes these parameters, adjust your duration expectations accordingly.
|
If upstream changes these parameters, adjust your duration expectations accordingly.
|
||||||
|
|||||||
@ -12,7 +12,7 @@ version = "0.1.0"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1"
|
anyhow = "1"
|
||||||
cucumber = { version = "0.22.0" }
|
cucumber = { version = "0.22.0" }
|
||||||
cucumber_ext = { path = "../testing-framework/cucumber_ext" }
|
cucumber_ext = { path = "../testing-framework/cucumber" }
|
||||||
testing-framework-core = { workspace = true }
|
testing-framework-core = { workspace = true }
|
||||||
testing-framework-runner-compose = { workspace = true }
|
testing-framework-runner-compose = { workspace = true }
|
||||||
testing-framework-runner-k8s = { workspace = true }
|
testing-framework-runner-k8s = { workspace = true }
|
||||||
|
|||||||
@ -1,4 +1,9 @@
|
|||||||
use std::{env, process, time::Duration};
|
use std::{
|
||||||
|
env, fs,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
process,
|
||||||
|
time::Duration,
|
||||||
|
};
|
||||||
|
|
||||||
use anyhow::{Context as _, Result};
|
use anyhow::{Context as _, Result};
|
||||||
use runner_examples::{ChaosBuilderExt as _, ScenarioBuilderExt as _, read_env_any};
|
use runner_examples::{ChaosBuilderExt as _, ScenarioBuilderExt as _, read_env_any};
|
||||||
@ -25,6 +30,8 @@ const DA_BLOB_RATE: u64 = 1;
|
|||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
|
init_node_log_dir_defaults();
|
||||||
|
|
||||||
// Compose containers mount KZG params at /kzgrs_test_params; ensure the
|
// Compose containers mount KZG params at /kzgrs_test_params; ensure the
|
||||||
// generated configs point there unless the caller overrides explicitly.
|
// generated configs point there unless the caller overrides explicitly.
|
||||||
if env::var("NOMOS_KZGRS_PARAMS_PATH").is_err() {
|
if env::var("NOMOS_KZGRS_PARAMS_PATH").is_err() {
|
||||||
@ -57,6 +64,35 @@ async fn main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn init_node_log_dir_defaults() {
|
||||||
|
if env::var_os("NOMOS_LOG_DIR").is_some() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let repo_root = repo_root();
|
||||||
|
let host_dir = repo_root.join("tmp").join("node-logs");
|
||||||
|
let _ = fs::create_dir_all(&host_dir);
|
||||||
|
|
||||||
|
// In compose mode, node processes run inside containers; configs should
|
||||||
|
// point to the container path, while the compose deployer mounts the host
|
||||||
|
// repo's `tmp/node-logs` there.
|
||||||
|
unsafe {
|
||||||
|
env::set_var("NOMOS_LOG_DIR", "/tmp/node-logs");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn repo_root() -> PathBuf {
|
||||||
|
env::var("CARGO_WORKSPACE_DIR")
|
||||||
|
.map(PathBuf::from)
|
||||||
|
.ok()
|
||||||
|
.or_else(|| {
|
||||||
|
Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||||
|
.parent()
|
||||||
|
.map(Path::to_path_buf)
|
||||||
|
})
|
||||||
|
.expect("repo root must be discoverable from CARGO_WORKSPACE_DIR or CARGO_MANIFEST_DIR")
|
||||||
|
}
|
||||||
|
|
||||||
async fn run_compose_case(
|
async fn run_compose_case(
|
||||||
validators: usize,
|
validators: usize,
|
||||||
executors: usize,
|
executors: usize,
|
||||||
|
|||||||
@ -1,8 +1,11 @@
|
|||||||
use runner_examples::cucumber::{Mode, init_logging_defaults, init_tracing, run};
|
use runner_examples::cucumber::{
|
||||||
|
Mode, init_logging_defaults, init_node_log_dir_defaults, init_tracing, run,
|
||||||
|
};
|
||||||
|
|
||||||
#[tokio::main(flavor = "current_thread")]
|
#[tokio::main(flavor = "current_thread")]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
init_logging_defaults();
|
init_logging_defaults();
|
||||||
|
init_node_log_dir_defaults(Mode::Compose);
|
||||||
init_tracing();
|
init_tracing();
|
||||||
|
|
||||||
run(Mode::Compose).await;
|
run(Mode::Compose).await;
|
||||||
|
|||||||
@ -1,8 +1,11 @@
|
|||||||
use runner_examples::cucumber::{Mode, init_logging_defaults, init_tracing, run};
|
use runner_examples::cucumber::{
|
||||||
|
Mode, init_logging_defaults, init_node_log_dir_defaults, init_tracing, run,
|
||||||
|
};
|
||||||
|
|
||||||
#[tokio::main(flavor = "current_thread")]
|
#[tokio::main(flavor = "current_thread")]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
init_logging_defaults();
|
init_logging_defaults();
|
||||||
|
init_node_log_dir_defaults(Mode::Host);
|
||||||
init_tracing();
|
init_tracing();
|
||||||
|
|
||||||
run(Mode::Host).await;
|
run(Mode::Host).await;
|
||||||
|
|||||||
@ -1,4 +1,9 @@
|
|||||||
use std::{env, process, time::Duration};
|
use std::{
|
||||||
|
env, fs,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
process,
|
||||||
|
time::Duration,
|
||||||
|
};
|
||||||
|
|
||||||
use anyhow::{Context as _, Result};
|
use anyhow::{Context as _, Result};
|
||||||
use runner_examples::{ScenarioBuilderExt as _, read_env_any};
|
use runner_examples::{ScenarioBuilderExt as _, read_env_any};
|
||||||
@ -12,11 +17,13 @@ const DEFAULT_RUN_SECS: u64 = 60;
|
|||||||
const MIXED_TXS_PER_BLOCK: u64 = 5;
|
const MIXED_TXS_PER_BLOCK: u64 = 5;
|
||||||
const TOTAL_WALLETS: usize = 1000;
|
const TOTAL_WALLETS: usize = 1000;
|
||||||
const TRANSACTION_WALLETS: usize = 500;
|
const TRANSACTION_WALLETS: usize = 500;
|
||||||
const DA_BLOB_RATE: u64 = 1;
|
const DA_BLOB_RATE: u64 = 3;
|
||||||
const SMOKE_RUN_SECS_MAX: u64 = 30;
|
const SMOKE_RUN_SECS_MAX: u64 = 30;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
|
init_node_log_dir_defaults();
|
||||||
|
|
||||||
tracing_subscriber::fmt::init();
|
tracing_subscriber::fmt::init();
|
||||||
|
|
||||||
if env::var("POL_PROOF_DEV_MODE").is_err() {
|
if env::var("POL_PROOF_DEV_MODE").is_err() {
|
||||||
@ -39,6 +46,30 @@ async fn main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn init_node_log_dir_defaults() {
|
||||||
|
if env::var_os("NOMOS_LOG_DIR").is_some() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let host_dir = repo_root().join("tmp").join("node-logs");
|
||||||
|
let _ = fs::create_dir_all(&host_dir);
|
||||||
|
unsafe {
|
||||||
|
env::set_var("NOMOS_LOG_DIR", host_dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn repo_root() -> PathBuf {
|
||||||
|
env::var("CARGO_WORKSPACE_DIR")
|
||||||
|
.map(PathBuf::from)
|
||||||
|
.ok()
|
||||||
|
.or_else(|| {
|
||||||
|
Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||||
|
.parent()
|
||||||
|
.map(Path::to_path_buf)
|
||||||
|
})
|
||||||
|
.expect("repo root must be discoverable from CARGO_WORKSPACE_DIR or CARGO_MANIFEST_DIR")
|
||||||
|
}
|
||||||
|
|
||||||
async fn run_local_case(validators: usize, executors: usize, run_duration: Duration) -> Result<()> {
|
async fn run_local_case(validators: usize, executors: usize, run_duration: Duration) -> Result<()> {
|
||||||
info!(
|
info!(
|
||||||
validators,
|
validators,
|
||||||
|
|||||||
@ -1,3 +1,8 @@
|
|||||||
|
use std::{
|
||||||
|
env, fs,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
};
|
||||||
|
|
||||||
use cucumber::World;
|
use cucumber::World;
|
||||||
use cucumber_ext::TestingFrameworkWorld;
|
use cucumber_ext::TestingFrameworkWorld;
|
||||||
use tracing_subscriber::{EnvFilter, fmt};
|
use tracing_subscriber::{EnvFilter, fmt};
|
||||||
@ -31,11 +36,36 @@ fn is_compose(
|
|||||||
pub fn init_logging_defaults() {
|
pub fn init_logging_defaults() {
|
||||||
set_default_env("POL_PROOF_DEV_MODE", "true");
|
set_default_env("POL_PROOF_DEV_MODE", "true");
|
||||||
set_default_env("NOMOS_TESTS_KEEP_LOGS", "1");
|
set_default_env("NOMOS_TESTS_KEEP_LOGS", "1");
|
||||||
set_default_env("NOMOS_LOG_DIR", ".tmp/cucumber-logs");
|
|
||||||
set_default_env("NOMOS_LOG_LEVEL", "info");
|
set_default_env("NOMOS_LOG_LEVEL", "info");
|
||||||
set_default_env("RUST_LOG", "info");
|
set_default_env("RUST_LOG", "info");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn init_node_log_dir_defaults(mode: Mode) {
|
||||||
|
if env::var_os("NOMOS_LOG_DIR").is_some() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let host_dir = repo_root().join("tmp").join("node-logs");
|
||||||
|
let _ = fs::create_dir_all(&host_dir);
|
||||||
|
|
||||||
|
match mode {
|
||||||
|
Mode::Host => set_default_env("NOMOS_LOG_DIR", &host_dir.display().to_string()),
|
||||||
|
Mode::Compose => set_default_env("NOMOS_LOG_DIR", "/tmp/node-logs"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn repo_root() -> PathBuf {
|
||||||
|
env::var("CARGO_WORKSPACE_DIR")
|
||||||
|
.map(PathBuf::from)
|
||||||
|
.ok()
|
||||||
|
.or_else(|| {
|
||||||
|
Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||||
|
.parent()
|
||||||
|
.map(Path::to_path_buf)
|
||||||
|
})
|
||||||
|
.expect("repo root must be discoverable from CARGO_WORKSPACE_DIR or CARGO_MANIFEST_DIR")
|
||||||
|
}
|
||||||
|
|
||||||
pub fn init_tracing() {
|
pub fn init_tracing() {
|
||||||
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
|
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
|
||||||
let _ = fmt().with_env_filter(filter).with_target(true).try_init();
|
let _ = fmt().with_env_filter(filter).with_target(true).try_init();
|
||||||
|
|||||||
@ -196,6 +196,24 @@ build_bundle::clean_cargo_linux_cache() {
|
|||||||
rm -rf "${ROOT_DIR}/.tmp/cargo-linux/registry" "${ROOT_DIR}/.tmp/cargo-linux/git"
|
rm -rf "${ROOT_DIR}/.tmp/cargo-linux/registry" "${ROOT_DIR}/.tmp/cargo-linux/git"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
build_bundle::docker_platform_suffix() {
|
||||||
|
# Map a docker platform string (e.g. linux/amd64) to a filesystem-safe suffix
|
||||||
|
# used for arch-specific target dirs, to avoid mixing build artifacts between
|
||||||
|
# different container architectures.
|
||||||
|
local platform="${1:-}"
|
||||||
|
if [ -z "${platform}" ]; then
|
||||||
|
echo ""
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
platform="${platform#linux/}"
|
||||||
|
platform="${platform//\//-}"
|
||||||
|
if [ -z "${platform}" ] || [ "${platform}" = "linux" ]; then
|
||||||
|
echo ""
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
echo "-${platform}"
|
||||||
|
}
|
||||||
|
|
||||||
build_bundle::maybe_run_linux_build_in_docker() {
|
build_bundle::maybe_run_linux_build_in_docker() {
|
||||||
# With `set -e`, this function must return 0 when no Docker cross-build is needed.
|
# With `set -e`, this function must return 0 when no Docker cross-build is needed.
|
||||||
if [ "${PLATFORM}" != "linux" ] || [ "$(uname -s)" = "Linux" ] || [ -n "${BUNDLE_IN_CONTAINER:-}" ]; then
|
if [ "${PLATFORM}" != "linux" ] || [ "$(uname -s)" = "Linux" ] || [ -n "${BUNDLE_IN_CONTAINER:-}" ]; then
|
||||||
@ -224,7 +242,10 @@ build_bundle::maybe_run_linux_build_in_docker() {
|
|||||||
|
|
||||||
echo "==> Building Linux bundle inside Docker"
|
echo "==> Building Linux bundle inside Docker"
|
||||||
local container_output="/workspace${OUTPUT#"${ROOT_DIR}"}"
|
local container_output="/workspace${OUTPUT#"${ROOT_DIR}"}"
|
||||||
mkdir -p "${ROOT_DIR}/.tmp/cargo-linux" "${ROOT_DIR}/.tmp/nomos-node-linux-target"
|
local target_suffix
|
||||||
|
target_suffix="$(build_bundle::docker_platform_suffix "${DOCKER_PLATFORM}")"
|
||||||
|
local host_target_dir="${ROOT_DIR}/.tmp/nomos-node-linux-target${target_suffix}"
|
||||||
|
mkdir -p "${ROOT_DIR}/.tmp/cargo-linux" "${host_target_dir}"
|
||||||
|
|
||||||
local -a features_args=()
|
local -a features_args=()
|
||||||
if [ -n "${NOMOS_EXTRA_FEATURES:-}" ]; then
|
if [ -n "${NOMOS_EXTRA_FEATURES:-}" ]; then
|
||||||
@ -242,15 +263,16 @@ build_bundle::maybe_run_linux_build_in_docker() {
|
|||||||
-e VERSION="${VERSION}" \
|
-e VERSION="${VERSION}" \
|
||||||
-e NOMOS_NODE_REV="${NOMOS_NODE_REV}" \
|
-e NOMOS_NODE_REV="${NOMOS_NODE_REV}" \
|
||||||
-e NOMOS_NODE_PATH="${node_path_env}" \
|
-e NOMOS_NODE_PATH="${node_path_env}" \
|
||||||
|
-e NOMOS_BUNDLE_DOCKER_PLATFORM="${DOCKER_PLATFORM}" \
|
||||||
-e NOMOS_CIRCUITS="/workspace/.tmp/nomos-circuits-linux" \
|
-e NOMOS_CIRCUITS="/workspace/.tmp/nomos-circuits-linux" \
|
||||||
-e STACK_DIR="/workspace/.tmp/nomos-circuits-linux" \
|
-e STACK_DIR="/workspace/.tmp/nomos-circuits-linux" \
|
||||||
-e HOST_DIR="/workspace/.tmp/nomos-circuits-linux" \
|
-e HOST_DIR="/workspace/.tmp/nomos-circuits-linux" \
|
||||||
-e NOMOS_EXTRA_FEATURES="${NOMOS_EXTRA_FEATURES:-}" \
|
-e NOMOS_EXTRA_FEATURES="${NOMOS_EXTRA_FEATURES:-}" \
|
||||||
-e BUNDLE_IN_CONTAINER=1 \
|
-e BUNDLE_IN_CONTAINER=1 \
|
||||||
-e CARGO_HOME=/workspace/.tmp/cargo-linux \
|
-e CARGO_HOME=/workspace/.tmp/cargo-linux \
|
||||||
-e CARGO_TARGET_DIR=/workspace/.tmp/nomos-node-linux-target \
|
-e CARGO_TARGET_DIR="/workspace/.tmp/nomos-node-linux-target${target_suffix}" \
|
||||||
-v "${ROOT_DIR}/.tmp/cargo-linux":/workspace/.tmp/cargo-linux \
|
-v "${ROOT_DIR}/.tmp/cargo-linux":/workspace/.tmp/cargo-linux \
|
||||||
-v "${ROOT_DIR}/.tmp/nomos-node-linux-target":/workspace/.tmp/nomos-node-linux-target \
|
-v "${host_target_dir}:/workspace/.tmp/nomos-node-linux-target${target_suffix}" \
|
||||||
-v "${ROOT_DIR}:/workspace" \
|
-v "${ROOT_DIR}:/workspace" \
|
||||||
"${extra_mounts[@]}" \
|
"${extra_mounts[@]}" \
|
||||||
-w /workspace \
|
-w /workspace \
|
||||||
@ -267,7 +289,14 @@ build_bundle::prepare_circuits() {
|
|||||||
NODE_TARGET="${ROOT_DIR}/.tmp/nomos-node-host-target"
|
NODE_TARGET="${ROOT_DIR}/.tmp/nomos-node-host-target"
|
||||||
else
|
else
|
||||||
CIRCUITS_DIR="${ROOT_DIR}/.tmp/nomos-circuits-linux"
|
CIRCUITS_DIR="${ROOT_DIR}/.tmp/nomos-circuits-linux"
|
||||||
NODE_TARGET="${ROOT_DIR}/.tmp/nomos-node-linux-target"
|
# When building Linux bundles in Docker, avoid reusing the same target dir
|
||||||
|
# across different container architectures (e.g. linux/arm64 vs linux/amd64),
|
||||||
|
# as the native-host `target/debug` layout would otherwise get mixed.
|
||||||
|
local target_suffix=""
|
||||||
|
if [ -n "${BUNDLE_IN_CONTAINER:-}" ]; then
|
||||||
|
target_suffix="$(build_bundle::docker_platform_suffix "${NOMOS_BUNDLE_DOCKER_PLATFORM:-}")"
|
||||||
|
fi
|
||||||
|
NODE_TARGET="${ROOT_DIR}/.tmp/nomos-node-linux-target${target_suffix}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
NODE_SRC_DEFAULT="${ROOT_DIR}/.tmp/nomos-node-${PLATFORM}-src"
|
NODE_SRC_DEFAULT="${ROOT_DIR}/.tmp/nomos-node-${PLATFORM}-src"
|
||||||
|
|||||||
38
scripts/observability/compose/docker-compose.yml
Normal file
38
scripts/observability/compose/docker-compose.yml
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
services:
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:v2.53.0
|
||||||
|
command:
|
||||||
|
- --config.file=/etc/prometheus/prometheus.yml
|
||||||
|
- --storage.tsdb.path=/prometheus
|
||||||
|
# Exposes OTLP HTTP ingest at /api/v1/otlp/v1/metrics
|
||||||
|
- --enable-feature=otlp-write-receiver
|
||||||
|
- --web.enable-lifecycle
|
||||||
|
- --web.enable-admin-api
|
||||||
|
volumes:
|
||||||
|
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||||
|
- prometheus-data:/prometheus
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana:11.4.0
|
||||||
|
depends_on:
|
||||||
|
- prometheus
|
||||||
|
env_file:
|
||||||
|
- ../../../testing-framework/assets/stack/monitoring/grafana/plugins.env
|
||||||
|
environment:
|
||||||
|
GF_SECURITY_ADMIN_USER: admin
|
||||||
|
GF_SECURITY_ADMIN_PASSWORD: admin
|
||||||
|
GF_USERS_ALLOW_SIGN_UP: "false"
|
||||||
|
volumes:
|
||||||
|
- grafana-data:/var/lib/grafana
|
||||||
|
- ../../../testing-framework/assets/stack/monitoring/grafana/grafana.ini:/etc/grafana/grafana.ini:ro
|
||||||
|
- ../../../testing-framework/assets/stack/monitoring/grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:ro
|
||||||
|
- ../../../testing-framework/assets/stack/monitoring/grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro
|
||||||
|
- ../../../testing-framework/assets/stack/monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
prometheus-data: {}
|
||||||
|
grafana-data: {}
|
||||||
10
scripts/observability/compose/prometheus/prometheus.yml
Normal file
10
scripts/observability/compose/prometheus/prometheus.yml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
external_labels:
|
||||||
|
monitor: "NomosTesting"
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: prometheus
|
||||||
|
static_configs:
|
||||||
|
- targets: ["prometheus:9090"]
|
||||||
19
scripts/observability/k8s/kube-prometheus-stack.values.yaml
Normal file
19
scripts/observability/k8s/kube-prometheus-stack.values.yaml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
prometheus:
|
||||||
|
prometheusSpec:
|
||||||
|
enableOTLPReceiver: true
|
||||||
|
additionalArgs:
|
||||||
|
- name: web.enable-admin-api
|
||||||
|
# Basic OTLP → Prometheus translation defaults are fine for most setups.
|
||||||
|
# See: https://prometheus.io/docs/guides/opentelemetry/
|
||||||
|
otlp: {}
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
adminUser: admin
|
||||||
|
adminPassword: admin
|
||||||
|
sidecar:
|
||||||
|
dashboards:
|
||||||
|
enabled: true
|
||||||
|
label: grafana_dashboard
|
||||||
|
labelValue: "1"
|
||||||
|
datasources:
|
||||||
|
enabled: true
|
||||||
@ -23,6 +23,15 @@ readonly DEFAULT_PRIVATE_AWS_REGION="ap-southeast-2"
|
|||||||
readonly DEFAULT_PULL_POLICY_LOCAL="IfNotPresent"
|
readonly DEFAULT_PULL_POLICY_LOCAL="IfNotPresent"
|
||||||
readonly DEFAULT_PULL_POLICY_ECR="Always"
|
readonly DEFAULT_PULL_POLICY_ECR="Always"
|
||||||
readonly DOCKER_DESKTOP_CONTEXT="docker-desktop"
|
readonly DOCKER_DESKTOP_CONTEXT="docker-desktop"
|
||||||
|
readonly DEFAULT_K8S_ECR_SKIP_IMAGE_BUILD="1"
|
||||||
|
|
||||||
|
run_examples::cleanup() {
|
||||||
|
rm -f "${SETUP_OUT:-}" 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
# Avoid inheriting environment-provided EXIT traps (e.g., from BASH_ENV) that can
|
||||||
|
# reference missing functions and fail at script termination.
|
||||||
|
trap run_examples::cleanup EXIT
|
||||||
|
|
||||||
run_examples::usage() {
|
run_examples::usage() {
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
@ -40,7 +49,6 @@ Options:
|
|||||||
--bundle PATH Convenience alias for setting NOMOS_BINARIES_TAR=PATH
|
--bundle PATH Convenience alias for setting NOMOS_BINARIES_TAR=PATH
|
||||||
--metrics-query-url URL PromQL base URL the runner process can query (optional)
|
--metrics-query-url URL PromQL base URL the runner process can query (optional)
|
||||||
--metrics-otlp-ingest-url URL Full OTLP HTTP ingest URL for node metrics export (optional)
|
--metrics-otlp-ingest-url URL Full OTLP HTTP ingest URL for node metrics export (optional)
|
||||||
--grafana-url URL Grafana base URL for printing/logging (optional)
|
|
||||||
--external-prometheus URL Alias for --metrics-query-url
|
--external-prometheus URL Alias for --metrics-query-url
|
||||||
--external-otlp-metrics-endpoint URL Alias for --metrics-otlp-ingest-url
|
--external-otlp-metrics-endpoint URL Alias for --metrics-otlp-ingest-url
|
||||||
--local Use a local Docker image tag (default for docker-desktop k8s)
|
--local Use a local Docker image tag (default for docker-desktop k8s)
|
||||||
@ -48,6 +56,8 @@ Options:
|
|||||||
|
|
||||||
Environment:
|
Environment:
|
||||||
VERSION Circuits version (default from versions.env)
|
VERSION Circuits version (default from versions.env)
|
||||||
|
CONSENSUS_SLOT_TIME Consensus slot duration in seconds (default 2)
|
||||||
|
CONSENSUS_ACTIVE_SLOT_COEFF Probability a slot is active (default 0.9); expected block interval ≈ slot_time / coeff
|
||||||
NOMOS_TESTNET_IMAGE Image reference (overridden by --local/--ecr selection)
|
NOMOS_TESTNET_IMAGE Image reference (overridden by --local/--ecr selection)
|
||||||
ECR_IMAGE Full image reference for --ecr (overrides ECR_REGISTRY/ECR_REPO/TAG)
|
ECR_IMAGE Full image reference for --ecr (overrides ECR_REGISTRY/ECR_REPO/TAG)
|
||||||
ECR_REGISTRY Registry hostname for --ecr (default ${DEFAULT_PUBLIC_ECR_REGISTRY})
|
ECR_REGISTRY Registry hostname for --ecr (default ${DEFAULT_PUBLIC_ECR_REGISTRY})
|
||||||
@ -56,9 +66,16 @@ Environment:
|
|||||||
NOMOS_TESTNET_IMAGE_PULL_POLICY K8s imagePullPolicy (default ${DEFAULT_PULL_POLICY_LOCAL}; set to ${DEFAULT_PULL_POLICY_ECR} for --ecr)
|
NOMOS_TESTNET_IMAGE_PULL_POLICY K8s imagePullPolicy (default ${DEFAULT_PULL_POLICY_LOCAL}; set to ${DEFAULT_PULL_POLICY_ECR} for --ecr)
|
||||||
NOMOS_BINARIES_TAR Path to prebuilt binaries/circuits tarball (default .tmp/nomos-binaries-<platform>-<version>.tar.gz)
|
NOMOS_BINARIES_TAR Path to prebuilt binaries/circuits tarball (default .tmp/nomos-binaries-<platform>-<version>.tar.gz)
|
||||||
NOMOS_SKIP_IMAGE_BUILD Set to 1 to skip rebuilding the compose/k8s image
|
NOMOS_SKIP_IMAGE_BUILD Set to 1 to skip rebuilding the compose/k8s image
|
||||||
|
NOMOS_FORCE_IMAGE_BUILD Set to 1 to force image rebuild even for k8s ECR mode
|
||||||
NOMOS_METRICS_QUERY_URL PromQL base URL for the runner process (optional)
|
NOMOS_METRICS_QUERY_URL PromQL base URL for the runner process (optional)
|
||||||
NOMOS_METRICS_OTLP_INGEST_URL Full OTLP HTTP ingest URL for node metrics export (optional)
|
NOMOS_METRICS_OTLP_INGEST_URL Full OTLP HTTP ingest URL for node metrics export (optional)
|
||||||
NOMOS_GRAFANA_URL Grafana base URL for printing/logging (optional)
|
NOMOS_GRAFANA_URL Grafana base URL for printing/logging (optional)
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- For k8s runs on non-docker-desktop clusters (e.g. EKS), a locally built Docker image is not
|
||||||
|
visible to the cluster. By default, this script skips local image rebuilds in that case.
|
||||||
|
If you need a custom image, run scripts/build_test_image.sh and push it to a registry the
|
||||||
|
cluster can pull from, then set NOMOS_TESTNET_IMAGE accordingly.
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,7 +121,6 @@ run_examples::parse_args() {
|
|||||||
IMAGE_SELECTION_MODE="auto"
|
IMAGE_SELECTION_MODE="auto"
|
||||||
METRICS_QUERY_URL=""
|
METRICS_QUERY_URL=""
|
||||||
METRICS_OTLP_INGEST_URL=""
|
METRICS_OTLP_INGEST_URL=""
|
||||||
GRAFANA_URL=""
|
|
||||||
|
|
||||||
RUN_SECS_RAW_SPECIFIED=""
|
RUN_SECS_RAW_SPECIFIED=""
|
||||||
|
|
||||||
@ -166,14 +182,6 @@ run_examples::parse_args() {
|
|||||||
METRICS_OTLP_INGEST_URL="${1#*=}"
|
METRICS_OTLP_INGEST_URL="${1#*=}"
|
||||||
shift
|
shift
|
||||||
;;
|
;;
|
||||||
--grafana-url)
|
|
||||||
GRAFANA_URL="${2:-}"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
--grafana-url=*)
|
|
||||||
GRAFANA_URL="${1#*=}"
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--external-prometheus)
|
--external-prometheus)
|
||||||
METRICS_QUERY_URL="${2:-}"
|
METRICS_QUERY_URL="${2:-}"
|
||||||
shift 2
|
shift 2
|
||||||
@ -279,12 +287,20 @@ run_examples::select_image() {
|
|||||||
run_examples::fail_with_usage "Unknown image selection mode: ${selection}"
|
run_examples::fail_with_usage "Unknown image selection mode: ${selection}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export NOMOS_IMAGE_SELECTION="${selection}"
|
||||||
export IMAGE_TAG="${IMAGE}"
|
export IMAGE_TAG="${IMAGE}"
|
||||||
export NOMOS_TESTNET_IMAGE="${IMAGE}"
|
export NOMOS_TESTNET_IMAGE="${IMAGE}"
|
||||||
|
|
||||||
if [ "${MODE}" = "k8s" ]; then
|
if [ "${MODE}" = "k8s" ]; then
|
||||||
if [ "${selection}" = "ecr" ]; then
|
if [ "${selection}" = "ecr" ]; then
|
||||||
export NOMOS_KZG_MODE="${NOMOS_KZG_MODE:-inImage}"
|
export NOMOS_KZG_MODE="${NOMOS_KZG_MODE:-inImage}"
|
||||||
|
# A locally built Docker image isn't visible to remote clusters (e.g. EKS). Default to
|
||||||
|
# skipping the local rebuild, unless the user explicitly set NOMOS_SKIP_IMAGE_BUILD or
|
||||||
|
# overrides via NOMOS_FORCE_IMAGE_BUILD=1.
|
||||||
|
if [ "${NOMOS_FORCE_IMAGE_BUILD:-0}" != "1" ]; then
|
||||||
|
NOMOS_SKIP_IMAGE_BUILD="${NOMOS_SKIP_IMAGE_BUILD:-${DEFAULT_K8S_ECR_SKIP_IMAGE_BUILD}}"
|
||||||
|
export NOMOS_SKIP_IMAGE_BUILD
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
export NOMOS_KZG_MODE="${NOMOS_KZG_MODE:-hostPath}"
|
export NOMOS_KZG_MODE="${NOMOS_KZG_MODE:-hostPath}"
|
||||||
fi
|
fi
|
||||||
@ -548,9 +564,6 @@ run_examples::run() {
|
|||||||
if [ -n "${METRICS_OTLP_INGEST_URL}" ]; then
|
if [ -n "${METRICS_OTLP_INGEST_URL}" ]; then
|
||||||
export NOMOS_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}"
|
export NOMOS_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}"
|
||||||
fi
|
fi
|
||||||
if [ -n "${GRAFANA_URL}" ]; then
|
|
||||||
export NOMOS_GRAFANA_URL="${GRAFANA_URL}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "==> Running ${BIN} for ${RUN_SECS}s (mode=${MODE}, image=${IMAGE})"
|
echo "==> Running ${BIN} for ${RUN_SECS}s (mode=${MODE}, image=${IMAGE})"
|
||||||
cd "${ROOT_DIR}"
|
cd "${ROOT_DIR}"
|
||||||
@ -576,8 +589,6 @@ run_examples::main() {
|
|||||||
echo "==> Using restored circuits/binaries bundle"
|
echo "==> Using restored circuits/binaries bundle"
|
||||||
|
|
||||||
SETUP_OUT="$(common::tmpfile nomos-setup-output.XXXXXX)"
|
SETUP_OUT="$(common::tmpfile nomos-setup-output.XXXXXX)"
|
||||||
cleanup() { rm -f "${SETUP_OUT}" 2>/dev/null || true; }
|
|
||||||
trap cleanup EXIT
|
|
||||||
|
|
||||||
run_examples::maybe_rebuild_image
|
run_examples::maybe_rebuild_image
|
||||||
run_examples::maybe_restore_host_after_image
|
run_examples::maybe_restore_host_after_image
|
||||||
|
|||||||
@ -25,7 +25,6 @@ Options:
|
|||||||
--force-k8s-image-build Allow the k8s "rebuild image" run even on non-docker-desktop clusters
|
--force-k8s-image-build Allow the k8s "rebuild image" run even on non-docker-desktop clusters
|
||||||
--metrics-query-url URL Forwarded to scripts/run-examples.sh (optional)
|
--metrics-query-url URL Forwarded to scripts/run-examples.sh (optional)
|
||||||
--metrics-otlp-ingest-url URL Forwarded to scripts/run-examples.sh (optional)
|
--metrics-otlp-ingest-url URL Forwarded to scripts/run-examples.sh (optional)
|
||||||
--grafana-url URL Forwarded to scripts/run-examples.sh (optional)
|
|
||||||
-h, --help Show this help
|
-h, --help Show this help
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
@ -51,7 +50,6 @@ matrix::parse_args() {
|
|||||||
FORCE_K8S_IMAGE_BUILD=0
|
FORCE_K8S_IMAGE_BUILD=0
|
||||||
METRICS_QUERY_URL=""
|
METRICS_QUERY_URL=""
|
||||||
METRICS_OTLP_INGEST_URL=""
|
METRICS_OTLP_INGEST_URL=""
|
||||||
GRAFANA_URL=""
|
|
||||||
|
|
||||||
while [ "$#" -gt 0 ]; do
|
while [ "$#" -gt 0 ]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
@ -71,8 +69,6 @@ matrix::parse_args() {
|
|||||||
--metrics-query-url=*) METRICS_QUERY_URL="${1#*=}"; shift ;;
|
--metrics-query-url=*) METRICS_QUERY_URL="${1#*=}"; shift ;;
|
||||||
--metrics-otlp-ingest-url) METRICS_OTLP_INGEST_URL="${2:-}"; shift 2 ;;
|
--metrics-otlp-ingest-url) METRICS_OTLP_INGEST_URL="${2:-}"; shift 2 ;;
|
||||||
--metrics-otlp-ingest-url=*) METRICS_OTLP_INGEST_URL="${1#*=}"; shift ;;
|
--metrics-otlp-ingest-url=*) METRICS_OTLP_INGEST_URL="${1#*=}"; shift ;;
|
||||||
--grafana-url) GRAFANA_URL="${2:-}"; shift 2 ;;
|
|
||||||
--grafana-url=*) GRAFANA_URL="${1#*=}"; shift ;;
|
|
||||||
*) matrix::die "Unknown argument: $1" ;;
|
*) matrix::die "Unknown argument: $1" ;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
@ -104,9 +100,6 @@ matrix::forwarded_args() {
|
|||||||
if [ -n "${METRICS_OTLP_INGEST_URL}" ]; then
|
if [ -n "${METRICS_OTLP_INGEST_URL}" ]; then
|
||||||
args+=(--metrics-otlp-ingest-url "${METRICS_OTLP_INGEST_URL}")
|
args+=(--metrics-otlp-ingest-url "${METRICS_OTLP_INGEST_URL}")
|
||||||
fi
|
fi
|
||||||
if [ -n "${GRAFANA_URL}" ]; then
|
|
||||||
args+=(--grafana-url "${GRAFANA_URL}")
|
|
||||||
fi
|
|
||||||
printf '%s\0' "${args[@]}"
|
printf '%s\0' "${args[@]}"
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -148,6 +141,7 @@ matrix::k8s_context() {
|
|||||||
matrix::main() {
|
matrix::main() {
|
||||||
ROOT_DIR="$(common::repo_root)"
|
ROOT_DIR="$(common::repo_root)"
|
||||||
export ROOT_DIR
|
export ROOT_DIR
|
||||||
|
export RUST_LOG="${RUST_LOG:-info}"
|
||||||
|
|
||||||
matrix::parse_args "$@"
|
matrix::parse_args "$@"
|
||||||
matrix::split_modes
|
matrix::split_modes
|
||||||
@ -211,11 +205,17 @@ matrix::main() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "${ctx}" = "docker-desktop" ] || [ "${FORCE_K8S_IMAGE_BUILD}" -eq 1 ]; then
|
if [ "${ctx}" = "docker-desktop" ] || [ "${FORCE_K8S_IMAGE_BUILD}" -eq 1 ]; then
|
||||||
|
# On non-docker-desktop clusters, run-examples.sh defaults to skipping local image builds
|
||||||
|
# since the cluster can't see them. Honor the matrix "force" option by overriding.
|
||||||
|
if [ "${ctx}" != "docker-desktop" ] && [ "${FORCE_K8S_IMAGE_BUILD}" -eq 1 ]; then
|
||||||
|
export NOMOS_FORCE_IMAGE_BUILD=1
|
||||||
|
fi
|
||||||
matrix::run_case "k8s.image_build" \
|
matrix::run_case "k8s.image_build" \
|
||||||
"${ROOT_DIR}/scripts/run-examples.sh" \
|
"${ROOT_DIR}/scripts/run-examples.sh" \
|
||||||
-t "${RUN_SECS}" -v "${VALIDATORS}" -e "${EXECUTORS}" \
|
-t "${RUN_SECS}" -v "${VALIDATORS}" -e "${EXECUTORS}" \
|
||||||
"${forward[@]}" \
|
"${forward[@]}" \
|
||||||
k8s
|
k8s
|
||||||
|
unset NOMOS_FORCE_IMAGE_BUILD || true
|
||||||
else
|
else
|
||||||
echo "==> [k8s] Detected context '${ctx}'; skipping image-build variant (use --force-k8s-image-build to override)"
|
echo "==> [k8s] Detected context '${ctx}'; skipping image-build variant (use --force-k8s-image-build to override)"
|
||||||
fi
|
fi
|
||||||
@ -259,4 +259,3 @@ matrix::main() {
|
|||||||
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
||||||
matrix::main "$@"
|
matrix::main "$@"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
168
scripts/setup-observability.sh
Executable file
168
scripts/setup-observability.sh
Executable file
@ -0,0 +1,168 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
# shellcheck disable=SC1091
|
||||||
|
. "${SCRIPT_DIR}/common.sh"
|
||||||
|
|
||||||
|
common::ensure_bash "$@"
|
||||||
|
|
||||||
|
ROOT="$(common::repo_root)"
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
cat <<'USAGE'
|
||||||
|
Usage:
|
||||||
|
scripts/setup-observability.sh compose up|down|logs|env
|
||||||
|
scripts/setup-observability.sh k8s install|uninstall|dashboards|env
|
||||||
|
|
||||||
|
Compose:
|
||||||
|
- Runs Prometheus (+ OTLP receiver) and Grafana via docker compose.
|
||||||
|
- Prints NOMOS_METRICS_* / NOMOS_GRAFANA_URL exports to wire into runs.
|
||||||
|
|
||||||
|
Kubernetes:
|
||||||
|
- Installs prometheus-community/kube-prometheus-stack into namespace
|
||||||
|
"nomos-observability" and optionally loads Nomos Grafana dashboards.
|
||||||
|
- Prints port-forward commands + NOMOS_METRICS_* / NOMOS_GRAFANA_URL exports.
|
||||||
|
USAGE
|
||||||
|
}
|
||||||
|
|
||||||
|
require_cmd() {
|
||||||
|
command -v "$1" >/dev/null 2>&1 || common::die "Missing required command: $1"
|
||||||
|
}
|
||||||
|
|
||||||
|
compose_file() {
|
||||||
|
echo "${ROOT}/scripts/observability/compose/docker-compose.yml"
|
||||||
|
}
|
||||||
|
|
||||||
|
compose_run() {
|
||||||
|
local file
|
||||||
|
file="$(compose_file)"
|
||||||
|
common::require_file "${file}"
|
||||||
|
docker compose -f "${file}" "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
compose_env() {
|
||||||
|
cat <<'EOF'
|
||||||
|
export NOMOS_METRICS_QUERY_URL=http://localhost:9090
|
||||||
|
export NOMOS_METRICS_OTLP_INGEST_URL=http://host.docker.internal:9090/api/v1/otlp/v1/metrics
|
||||||
|
export NOMOS_GRAFANA_URL=http://localhost:3000
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
k8s_namespace() { echo "nomos-observability"; }
|
||||||
|
k8s_release() { echo "nomos-observability"; }
|
||||||
|
k8s_values() { echo "${ROOT}/scripts/observability/k8s/kube-prometheus-stack.values.yaml"; }
|
||||||
|
|
||||||
|
k8s_install() {
|
||||||
|
require_cmd kubectl
|
||||||
|
require_cmd helm
|
||||||
|
|
||||||
|
local ns release values
|
||||||
|
ns="$(k8s_namespace)"
|
||||||
|
release="$(k8s_release)"
|
||||||
|
values="$(k8s_values)"
|
||||||
|
|
||||||
|
common::require_file "${values}"
|
||||||
|
|
||||||
|
kubectl get ns "${ns}" >/dev/null 2>&1 || kubectl create ns "${ns}"
|
||||||
|
|
||||||
|
if ! helm repo list | grep -q '^prometheus-community[[:space:]]'; then
|
||||||
|
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||||
|
fi
|
||||||
|
helm repo update prometheus-community
|
||||||
|
|
||||||
|
helm upgrade --install "${release}" prometheus-community/kube-prometheus-stack \
|
||||||
|
-n "${ns}" \
|
||||||
|
-f "${values}"
|
||||||
|
|
||||||
|
kubectl -n "${ns}" wait --for=condition=Available deploy -l "release=${release}" --timeout=10m || true
|
||||||
|
kubectl -n "${ns}" wait --for=condition=Ready pod -l "release=${release}" --timeout=10m || true
|
||||||
|
}
|
||||||
|
|
||||||
|
k8s_uninstall() {
|
||||||
|
require_cmd kubectl
|
||||||
|
require_cmd helm
|
||||||
|
|
||||||
|
local ns release
|
||||||
|
ns="$(k8s_namespace)"
|
||||||
|
release="$(k8s_release)"
|
||||||
|
|
||||||
|
helm uninstall "${release}" -n "${ns}" 2>/dev/null || true
|
||||||
|
kubectl delete ns "${ns}" --ignore-not-found
|
||||||
|
}
|
||||||
|
|
||||||
|
k8s_apply_dashboards() {
|
||||||
|
require_cmd kubectl
|
||||||
|
|
||||||
|
local ns dash_dir
|
||||||
|
ns="$(k8s_namespace)"
|
||||||
|
dash_dir="${ROOT}/testing-framework/assets/stack/monitoring/grafana/dashboards"
|
||||||
|
|
||||||
|
[ -d "${dash_dir}" ] || common::die "Missing dashboards directory: ${dash_dir}"
|
||||||
|
|
||||||
|
local file base name
|
||||||
|
for file in "${dash_dir}"/*.json; do
|
||||||
|
base="$(basename "${file}" .json)"
|
||||||
|
name="nomos-dashboard-${base//[^a-zA-Z0-9-]/-}"
|
||||||
|
kubectl -n "${ns}" create configmap "${name}" \
|
||||||
|
--from-file="$(basename "${file}")=${file}" \
|
||||||
|
--dry-run=client -o yaml | kubectl apply -f -
|
||||||
|
kubectl -n "${ns}" label configmap "${name}" grafana_dashboard=1 --overwrite >/dev/null
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
k8s_env() {
|
||||||
|
local ns release
|
||||||
|
ns="$(k8s_namespace)"
|
||||||
|
release="$(k8s_release)"
|
||||||
|
|
||||||
|
cat <<EOF
|
||||||
|
# Prometheus (runner-side): port-forward then set:
|
||||||
|
kubectl -n ${ns} port-forward svc/${release}-kube-p-prometheus 9090:9090
|
||||||
|
export NOMOS_METRICS_QUERY_URL=http://localhost:9090
|
||||||
|
|
||||||
|
# Grafana (runner-side): port-forward then set:
|
||||||
|
kubectl -n ${ns} port-forward svc/${release}-grafana 3000:80
|
||||||
|
export NOMOS_GRAFANA_URL=http://localhost:3000
|
||||||
|
|
||||||
|
# Prometheus OTLP ingest (node-side inside the cluster):
|
||||||
|
export NOMOS_METRICS_OTLP_INGEST_URL=http://${release}-kube-p-prometheus.${ns}:9090/api/v1/otlp/v1/metrics
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
main() {
|
||||||
|
local target="${1:-}"
|
||||||
|
local action="${2:-}"
|
||||||
|
|
||||||
|
case "${target}" in
|
||||||
|
compose)
|
||||||
|
require_cmd docker
|
||||||
|
case "${action}" in
|
||||||
|
up) compose_run up -d ;;
|
||||||
|
down) compose_run down -v ;;
|
||||||
|
logs) compose_run logs -f ;;
|
||||||
|
env) compose_env ;;
|
||||||
|
""|help|-h|--help) usage ;;
|
||||||
|
*) common::die "Unknown compose action: ${action}" ;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
k8s)
|
||||||
|
case "${action}" in
|
||||||
|
install) k8s_install ;;
|
||||||
|
uninstall) k8s_uninstall ;;
|
||||||
|
dashboards) k8s_apply_dashboards ;;
|
||||||
|
env) k8s_env ;;
|
||||||
|
""|help|-h|--help) usage ;;
|
||||||
|
*) common::die "Unknown k8s action: ${action}" ;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
""|help|-h|--help)
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
common::die "Unknown target: ${target}"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
main "$@"
|
||||||
@ -1,5 +1,7 @@
|
|||||||
use std::{
|
use std::{
|
||||||
|
env,
|
||||||
num::{NonZero, NonZeroU64},
|
num::{NonZero, NonZeroU64},
|
||||||
|
str::FromStr as _,
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -35,8 +37,30 @@ pub struct ConsensusParams {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl ConsensusParams {
|
impl ConsensusParams {
|
||||||
|
const DEFAULT_ACTIVE_SLOT_COEFF: f64 = 0.9;
|
||||||
|
const CONSENSUS_ACTIVE_SLOT_COEFF_VAR: &str = "CONSENSUS_ACTIVE_SLOT_COEFF";
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub const fn default_for_participants(n_participants: usize) -> Self {
|
pub fn default_for_participants(n_participants: usize) -> Self {
|
||||||
|
let active_slot_coeff = env::var(Self::CONSENSUS_ACTIVE_SLOT_COEFF_VAR)
|
||||||
|
.map(|s| {
|
||||||
|
f64::from_str(&s).unwrap_or_else(|err| {
|
||||||
|
panic!(
|
||||||
|
"invalid {}='{}' (expected a float in (0.0, 1.0]): {err}",
|
||||||
|
Self::CONSENSUS_ACTIVE_SLOT_COEFF_VAR,
|
||||||
|
s
|
||||||
|
)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.unwrap_or(Self::DEFAULT_ACTIVE_SLOT_COEFF);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
(0.0..=1.0).contains(&active_slot_coeff) && active_slot_coeff > 0.0,
|
||||||
|
"{} must be in (0.0, 1.0], got {}",
|
||||||
|
Self::CONSENSUS_ACTIVE_SLOT_COEFF_VAR,
|
||||||
|
active_slot_coeff
|
||||||
|
);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
n_participants,
|
n_participants,
|
||||||
// by setting the slot coeff to 1, we also increase the probability of multiple blocks
|
// by setting the slot coeff to 1, we also increase the probability of multiple blocks
|
||||||
@ -45,7 +69,7 @@ impl ConsensusParams {
|
|||||||
// deciding on the longest chain.
|
// deciding on the longest chain.
|
||||||
security_param: NonZero::new(10).unwrap(),
|
security_param: NonZero::new(10).unwrap(),
|
||||||
// a block should be produced (on average) every slot
|
// a block should be produced (on average) every slot
|
||||||
active_slot_coeff: 0.9,
|
active_slot_coeff,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -27,6 +27,7 @@ pub fn default_time_config() -> GeneralTimeConfig {
|
|||||||
let slot_duration = std::env::var(CONSENSUS_SLOT_TIME_VAR)
|
let slot_duration = std::env::var(CONSENSUS_SLOT_TIME_VAR)
|
||||||
.map(|s| <u64>::from_str(&s).unwrap())
|
.map(|s| <u64>::from_str(&s).unwrap())
|
||||||
.unwrap_or(DEFAULT_SLOT_TIME);
|
.unwrap_or(DEFAULT_SLOT_TIME);
|
||||||
|
|
||||||
GeneralTimeConfig {
|
GeneralTimeConfig {
|
||||||
slot_duration: Duration::from_secs(slot_duration),
|
slot_duration: Duration::from_secs(slot_duration),
|
||||||
chain_start_time: OffsetDateTime::now_utc(),
|
chain_start_time: OffsetDateTime::now_utc(),
|
||||||
|
|||||||
@ -1,3 +1,8 @@
|
|||||||
|
use std::{
|
||||||
|
env,
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
};
|
||||||
|
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use testing_framework_core::{
|
use testing_framework_core::{
|
||||||
constants::{DEFAULT_CFGSYNC_PORT, kzg_container_path},
|
constants::{DEFAULT_CFGSYNC_PORT, kzg_container_path},
|
||||||
@ -150,9 +155,26 @@ fn base_volumes(use_kzg_mount: bool) -> Vec<String> {
|
|||||||
if use_kzg_mount {
|
if use_kzg_mount {
|
||||||
volumes.push("./kzgrs_test_params:/kzgrs_test_params:z".into());
|
volumes.push("./kzgrs_test_params:/kzgrs_test_params:z".into());
|
||||||
}
|
}
|
||||||
|
if let Some(host_log_dir) = repo_root()
|
||||||
|
.map(|root| root.join("tmp").join("node-logs"))
|
||||||
|
.map(|dir| dir.display().to_string())
|
||||||
|
{
|
||||||
|
volumes.push(format!("{host_log_dir}:/tmp/node-logs"));
|
||||||
|
}
|
||||||
volumes
|
volumes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn repo_root() -> Option<PathBuf> {
|
||||||
|
if let Ok(root) = env::var("CARGO_WORKSPACE_DIR") {
|
||||||
|
return Some(PathBuf::from(root));
|
||||||
|
}
|
||||||
|
Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||||
|
.parent()
|
||||||
|
.and_then(Path::parent)
|
||||||
|
.and_then(Path::parent)
|
||||||
|
.map(Path::to_path_buf)
|
||||||
|
}
|
||||||
|
|
||||||
fn default_extra_hosts() -> Vec<String> {
|
fn default_extra_hosts() -> Vec<String> {
|
||||||
host_gateway_entry().into_iter().collect()
|
host_gateway_entry().into_iter().collect()
|
||||||
}
|
}
|
||||||
|
|||||||
@ -311,6 +311,7 @@ pub fn write_compose_artifacts(
|
|||||||
let compose_path = workspace.root.join("compose.generated.yml");
|
let compose_path = workspace.root.join("compose.generated.yml");
|
||||||
write_compose_file(&descriptor, &compose_path)
|
write_compose_file(&descriptor, &compose_path)
|
||||||
.map_err(|source| ConfigError::Template { source })?;
|
.map_err(|source| ConfigError::Template { source })?;
|
||||||
|
|
||||||
debug!(compose_file = %compose_path.display(), "rendered compose file");
|
debug!(compose_file = %compose_path.display(), "rendered compose file");
|
||||||
Ok(compose_path)
|
Ok(compose_path)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -302,6 +302,7 @@ impl<Caps> TransactionFlowBuilder<Caps> {
|
|||||||
let workload = transaction::Workload::with_rate(self.rate.get())
|
let workload = transaction::Workload::with_rate(self.rate.get())
|
||||||
.expect("transaction rate must be non-zero")
|
.expect("transaction rate must be non-zero")
|
||||||
.with_user_limit(self.users);
|
.with_user_limit(self.users);
|
||||||
|
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
rate = self.rate.get(),
|
rate = self.rate.get(),
|
||||||
users = self.users.map(|u| u.get()),
|
users = self.users.map(|u| u.get()),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user