refactor(observability): remove embedded prometheus/grafana

Deployers no longer provision Prometheus/Grafana; metrics query/ingest now come from explicit URLs via env/flags.
This commit is contained in:
andrussal 2025-12-17 18:28:36 +01:00
parent 3965669f12
commit e05bf5e0bd
43 changed files with 441 additions and 1305 deletions

2
Cargo.lock generated
View File

@ -7267,6 +7267,7 @@ dependencies = [
"key-management-system-service",
"nomos-core",
"nomos-ledger",
"nomos-tracing",
"nomos-tracing-service",
"reqwest",
"serde",
@ -7290,6 +7291,7 @@ dependencies = [
"async-trait",
"k8s-openapi",
"kube",
"nomos-tracing",
"nomos-tracing-service",
"reqwest",
"serde",

View File

@ -90,7 +90,7 @@ Three deployer implementations:
| `K8sDeployer` | Kubernetes Helm | Cluster + image loaded | Not yet |
**Compose-specific features:**
- Includes Prometheus at `http://localhost:9090` (override via `TEST_FRAMEWORK_PROMETHEUS_PORT`)
- Observability is external (set `NOMOS_METRICS_QUERY_URL` / `NOMOS_METRICS_OTLP_INGEST_URL` / `NOMOS_GRAFANA_URL` as needed)
- Optional OTLP trace/metrics endpoints (`NOMOS_OTLP_ENDPOINT`, `NOMOS_OTLP_METRICS_ENDPOINT`)
- Node control for chaos testing (restart validators/executors)
@ -112,9 +112,9 @@ KZG parameters required for DA workloads:
### Compose Stack
Templates and configs in `testing-framework/runners/compose/assets/`:
- `docker-compose.yml.tera` — Stack template (validators, executors, Prometheus, Grafana)
- `docker-compose.yml.tera` — Stack template (validators, executors)
- Cfgsync config: `testing-framework/assets/stack/cfgsync.yaml`
- Monitoring: `testing-framework/assets/stack/monitoring/prometheus.yml`
- Monitoring assets (not deployed by the framework): `testing-framework/assets/stack/monitoring/`
## Logging Architecture
@ -134,14 +134,14 @@ Templates and configs in `testing-framework/runners/compose/assets/`:
## Observability
**Prometheus (Compose + K8s):**
- Exposed at `http://localhost:9090` (configurable)
- Scrapes all validator and executor metrics
- Accessible in expectations: `ctx.telemetry().prometheus().map(|p| p.base_url())`
**Prometheus-compatible metrics querying (optional):**
- The framework does **not** deploy Prometheus/Grafana.
- Provide a Prometheus-compatible base URL (PromQL API) via `NOMOS_METRICS_QUERY_URL`.
- Accessible in expectations when configured: `ctx.telemetry().prometheus().map(|p| p.base_url())`
**Grafana dashboards (Compose + K8s):**
- Provisioned automatically; URL is printed in `TESTNET_ENDPOINTS` when using `scripts/run-examples.sh`
- Default credentials: `admin` / `admin`
**Grafana dashboards (optional):**
- Dashboards live in `testing-framework/assets/stack/monitoring/grafana/dashboards/` and can be imported into your Grafana.
- If you set `NOMOS_GRAFANA_URL`, the deployer prints it in `TESTNET_ENDPOINTS`.
**Node APIs:**
- HTTP endpoints per node for consensus info, network status, DA membership

View File

@ -181,7 +181,9 @@ cargo run -p runner-examples --bin compose_runner
- `POL_PROOF_DEV_MODE=true`**Required** for all runners
- `NOMOS_DEMO_VALIDATORS=3` / `NOMOS_DEMO_EXECUTORS=2` / `NOMOS_DEMO_RUN_SECS=120` — Topology overrides
- `COMPOSE_NODE_PAIRS=1x1` — Alternative topology format: "validators×executors"
- `TEST_FRAMEWORK_PROMETHEUS_PORT=9091` — Override Prometheus port (default: 9090)
- `NOMOS_METRICS_QUERY_URL` — Prometheus-compatible base URL for the runner process to query (optional)
- `NOMOS_METRICS_OTLP_INGEST_URL` — Full OTLP HTTP ingest URL for node metrics export (optional)
- `NOMOS_GRAFANA_URL` — Grafana base URL for printing/logging (optional)
- `COMPOSE_RUNNER_HOST=127.0.0.1` — Host address for port mappings
- `COMPOSE_RUNNER_PRESERVE=1` — Keep containers running after test
- `NOMOS_LOG_LEVEL=debug` / `NOMOS_LOG_FILTER=...` — Control node log verbosity (stdout/stderr)
@ -189,7 +191,7 @@ cargo run -p runner-examples --bin compose_runner
**Compose-specific features:**
- **Node control support**: Only runner that supports chaos testing (`.enable_node_control()` + chaos workloads)
- **Prometheus observability**: Metrics at `http://localhost:9090`
- **Observability is external**: Set `NOMOS_METRICS_*` / `NOMOS_GRAFANA_URL` to enable telemetry links and querying
**Important:**
- Containers expect KZG parameters at `/kzgrs_test_params/kzgrs_test_params` (note the repeated filename)
@ -229,21 +231,30 @@ cargo run -p runner-examples --bin k8s_runner
- `NOMOS_TESTNET_IMAGE` — Image tag (required)
- `POL_PROOF_DEV_MODE=true`**Required** for all runners
- `NOMOS_DEMO_VALIDATORS` / `NOMOS_DEMO_EXECUTORS` / `NOMOS_DEMO_RUN_SECS` — Topology overrides
- `K8S_RUNNER_EXTERNAL_PROMETHEUS_URL` (or `NOMOS_EXTERNAL_PROMETHEUS_URL`) — Reuse an existing Prometheus and skip deploying the in-chart Prometheus; also points node OTLP metrics export and the in-cluster Grafana datasource at that Prometheus
- `NOMOS_METRICS_QUERY_URL` — Prometheus-compatible base URL for the runner process to query (PromQL)
- `NOMOS_METRICS_OTLP_INGEST_URL` — Full OTLP HTTP ingest URL for node metrics export (optional)
- `NOMOS_GRAFANA_URL` — Grafana base URL for printing/logging (optional)
**External Prometheus (optional):**
**Metrics + Grafana (optional):**
```bash
export K8S_RUNNER_EXTERNAL_PROMETHEUS_URL=http://your-prometheus:9090
export NOMOS_METRICS_QUERY_URL=http://your-prometheus:9090
# Prometheus OTLP receiver example:
export NOMOS_METRICS_OTLP_INGEST_URL=http://your-prometheus:9090/api/v1/otlp/v1/metrics
# Optional: print a Grafana link in TESTNET_ENDPOINTS
export NOMOS_GRAFANA_URL=http://your-grafana:3000
cargo run -p runner-examples --bin k8s_runner
```
Notes:
- The runner config expects Prometheus to accept OTLP metrics at `/api/v1/otlp/v1/metrics` (the in-chart Prometheus is started with `--web.enable-otlp-receiver` and `--enable-feature=otlp-write-receiver`).
- Use a URL reachable from inside the cluster (for example a `Service` DNS name like `http://prometheus.monitoring:9090`).
- `NOMOS_METRICS_QUERY_URL` must be reachable from the runner process (often via `kubectl port-forward`).
- `NOMOS_METRICS_OTLP_INGEST_URL` must be reachable from nodes (pods/containers) and is backend-specific (Prometheus vs VictoriaMetrics paths differ).
**Via `scripts/run-examples.sh` (optional):**
```bash
scripts/run-examples.sh -t 60 -v 1 -e 1 k8s --external-prometheus http://your-prometheus:9090
scripts/run-examples.sh -t 60 -v 1 -e 1 k8s \
--metrics-query-url http://your-prometheus:9090 \
--metrics-otlp-ingest-url http://your-prometheus:9090/api/v1/otlp/v1/metrics \
--grafana-url http://your-grafana:3000
```
**In code (optional):**
@ -252,7 +263,8 @@ use testing_framework_core::scenario::ScenarioBuilder;
use testing_framework_workflows::ObservabilityBuilderExt as _;
let plan = ScenarioBuilder::with_node_counts(1, 1)
.with_external_prometheus_str("http://your-prometheus:9090")
.with_metrics_query_url_str("http://your-prometheus:9090")
.with_metrics_otlp_ingest_url_str("http://your-prometheus:9090/api/v1/otlp/v1/metrics")
.build();
```
@ -484,7 +496,6 @@ cargo run -p runner-examples --bin compose_runner
- `COMPOSE_RUNNER_HOST=127.0.0.1` — host used for readiness probes (override for remote Docker daemons / VM networking)
- `COMPOSE_RUNNER_HOST_GATEWAY=host.docker.internal:host-gateway` — controls the `extra_hosts` entry injected into compose (set to `disable` to omit)
- `TESTNET_RUNNER_PRESERVE=1` — alias for `COMPOSE_RUNNER_PRESERVE=1`
- `COMPOSE_GRAFANA_PORT=<port>` — pin Grafana to a fixed host port instead of ephemeral assignment
- `COMPOSE_RUNNER_HTTP_TIMEOUT_SECS=<secs>` — override compose node HTTP readiness timeout
**Note:** Container names follow pattern `nomos-compose-{uuid}-validator-{index}-1` where `{uuid}` changes per run.
@ -553,15 +564,15 @@ cargo run -p runner-examples --bin local_runner
Runners expose metrics and node HTTP endpoints for expectation code and debugging:
**Prometheus (Compose + K8s):**
- Default: `http://localhost:9090`
- Override: `TEST_FRAMEWORK_PROMETHEUS_PORT=9091`
- Note: the host port can vary if `9090` is unavailable; prefer the printed `TESTNET_ENDPOINTS` line as the source of truth.
- Access from expectations: `ctx.telemetry().prometheus().map(|p| p.base_url())`
**Prometheus-compatible metrics querying (optional):**
- The framework does **not** deploy Prometheus.
- Provide `NOMOS_METRICS_QUERY_URL` (PromQL base URL) to enable `ctx.telemetry()` queries.
- Access from expectations when configured: `ctx.telemetry().prometheus().map(|p| p.base_url())`
**Grafana dashboards (Compose + K8s):**
- The deployer prints the Grafana base URL in `TESTNET_ENDPOINTS`.
- Default credentials are `admin` / `admin`.
**Grafana (optional):**
- The framework does **not** deploy Grafana.
- If you set `NOMOS_GRAFANA_URL`, the deployer prints it in `TESTNET_ENDPOINTS`.
- Dashboards live in `testing-framework/assets/stack/monitoring/grafana/dashboards/` for import into your Grafana.
**Node APIs:**
- Access from expectations: `ctx.node_clients().validator_clients().get(0)`

View File

@ -109,7 +109,7 @@ async fn run_compose_case(
};
if !runner.context().telemetry().is_configured() {
warn!("compose runner should expose prometheus metrics");
warn!("metrics querying is disabled; set NOMOS_METRICS_QUERY_URL to enable PromQL queries");
}
info!("running scenario");

View File

@ -104,7 +104,7 @@ async fn run_k8s_case(validators: usize, executors: usize, run_duration: Duratio
};
if !runner.context().telemetry().is_configured() {
warn!("k8s runner should expose prometheus metrics");
warn!("metrics querying is disabled; set NOMOS_METRICS_QUERY_URL to enable PromQL queries");
}
let validator_clients = runner.context().node_clients().validator_clients().to_vec();

View File

@ -38,12 +38,13 @@ Options:
-v, --validators N Number of validators (required)
-e, --executors N Number of executors (required)
--bundle PATH Convenience alias for setting NOMOS_BINARIES_TAR=PATH
--metrics-query-url URL (k8s) PromQL base URL the runner process can query (often localhost port-forward)
--metrics-query-grafana-url URL (k8s) PromQL base URL reachable from inside the cluster (Grafana datasource)
--metrics-otlp-ingest-url URL (k8s) Full OTLP HTTP ingest URL for node metrics export
--external-prometheus URL (k8s) Alias for --metrics-query-url
--external-prometheus-grafana-url URL (k8s) Alias for --metrics-query-grafana-url
--external-otlp-metrics-endpoint URL (k8s) Alias for --metrics-otlp-ingest-url
--metrics-query-url URL PromQL base URL the runner process can query (optional)
--metrics-query-grafana-url URL PromQL base URL for a Grafana datasource (optional)
--metrics-otlp-ingest-url URL Full OTLP HTTP ingest URL for node metrics export (optional)
--grafana-url URL Grafana base URL for printing/logging (optional)
--external-prometheus URL Alias for --metrics-query-url
--external-prometheus-grafana-url URL Alias for --metrics-query-grafana-url
--external-otlp-metrics-endpoint URL Alias for --metrics-otlp-ingest-url
--local Use a local Docker image tag (default for docker-desktop k8s)
--ecr Use an ECR image reference (default for non-docker-desktop k8s)
--no-image-build Skip rebuilding the compose/k8s image (sets NOMOS_SKIP_IMAGE_BUILD=1)
@ -64,6 +65,8 @@ Environment:
NOMOS_METRICS_QUERY_GRAFANA_URL Alias for K8S_RUNNER_METRICS_QUERY_GRAFANA_URL
K8S_RUNNER_METRICS_OTLP_INGEST_URL Full OTLP HTTP ingest URL for node metrics export
NOMOS_METRICS_OTLP_INGEST_URL Alias for K8S_RUNNER_METRICS_OTLP_INGEST_URL
K8S_RUNNER_GRAFANA_URL Grafana base URL for printing/logging (optional)
NOMOS_GRAFANA_URL Alias for K8S_RUNNER_GRAFANA_URL
Deprecated env vars (still supported):
K8S_RUNNER_EXTERNAL_PROMETHEUS_URL, NOMOS_EXTERNAL_PROMETHEUS_URL
@ -115,6 +118,7 @@ run_examples::parse_args() {
METRICS_QUERY_URL=""
METRICS_QUERY_GRAFANA_URL=""
METRICS_OTLP_INGEST_URL=""
GRAFANA_URL=""
RUN_SECS_RAW_SPECIFIED=""
@ -184,6 +188,14 @@ run_examples::parse_args() {
METRICS_OTLP_INGEST_URL="${1#*=}"
shift
;;
--grafana-url)
GRAFANA_URL="${2:-}"
shift 2
;;
--grafana-url=*)
GRAFANA_URL="${1#*=}"
shift
;;
--external-prometheus)
METRICS_QUERY_URL="${2:-}"
shift 2
@ -262,18 +274,6 @@ run_examples::parse_args() {
run_examples::fail_with_usage "executors must be a non-negative integer (pass -e/--executors)"
fi
if [ -n "${METRICS_QUERY_URL}" ] && [ "${MODE}" != "k8s" ]; then
echo "Warning: --metrics-query-url is only used in k8s mode; ignoring." >&2
METRICS_QUERY_URL=""
fi
if [ -n "${METRICS_QUERY_GRAFANA_URL}" ] && [ "${MODE}" != "k8s" ]; then
echo "Warning: --metrics-query-grafana-url is only used in k8s mode; ignoring." >&2
METRICS_QUERY_GRAFANA_URL=""
fi
if [ -n "${METRICS_OTLP_INGEST_URL}" ] && [ "${MODE}" != "k8s" ]; then
echo "Warning: --metrics-otlp-ingest-url is only used in k8s mode; ignoring." >&2
METRICS_OTLP_INGEST_URL=""
fi
}
run_examples::select_image() {
@ -582,17 +582,21 @@ run_examples::run() {
export NOMOS_DEMO_VALIDATORS="${DEMO_VALIDATORS}"
export NOMOS_DEMO_EXECUTORS="${DEMO_EXECUTORS}"
if [ "${MODE}" = "k8s" ] && [ -n "${METRICS_QUERY_URL}" ]; then
export K8S_RUNNER_METRICS_QUERY_URL="${METRICS_QUERY_URL}"
if [ -n "${METRICS_QUERY_URL}" ]; then
export NOMOS_METRICS_QUERY_URL="${METRICS_QUERY_URL}"
export K8S_RUNNER_METRICS_QUERY_URL="${METRICS_QUERY_URL}"
fi
if [ "${MODE}" = "k8s" ] && [ -n "${METRICS_QUERY_GRAFANA_URL}" ]; then
export K8S_RUNNER_METRICS_QUERY_GRAFANA_URL="${METRICS_QUERY_GRAFANA_URL}"
if [ -n "${METRICS_QUERY_GRAFANA_URL}" ]; then
export NOMOS_METRICS_QUERY_GRAFANA_URL="${METRICS_QUERY_GRAFANA_URL}"
export K8S_RUNNER_METRICS_QUERY_GRAFANA_URL="${METRICS_QUERY_GRAFANA_URL}"
fi
if [ "${MODE}" = "k8s" ] && [ -n "${METRICS_OTLP_INGEST_URL}" ]; then
export K8S_RUNNER_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}"
if [ -n "${METRICS_OTLP_INGEST_URL}" ]; then
export NOMOS_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}"
export K8S_RUNNER_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}"
fi
if [ -n "${GRAFANA_URL}" ]; then
export NOMOS_GRAFANA_URL="${GRAFANA_URL}"
export K8S_RUNNER_GRAFANA_URL="${GRAFANA_URL}"
fi
echo "==> Running ${BIN} for ${RUN_SECS}s (mode=${MODE}, image=${IMAGE})"

View File

@ -42,8 +42,6 @@ tracing_settings:
filters:
nomos: debug
cryptarchia: debug
metrics: !Otlp
endpoint: http://prometheus:9090/api/v1/otlp/v1/metrics
host_identifier: node
metrics: None
console: None
level: INFO

View File

@ -21,18 +21,6 @@ pub const DEFAULT_NODE_HTTP_PROBE_TIMEOUT: Duration = Duration::from_secs(30);
/// Default Kubernetes deployment readiness timeout.
pub const DEFAULT_K8S_DEPLOYMENT_TIMEOUT: Duration = Duration::from_secs(180);
/// Default Prometheus HTTP port.
pub const DEFAULT_PROMETHEUS_HTTP_PORT: u16 = 9090;
/// Default Prometheus HTTP timeout.
pub const DEFAULT_PROMETHEUS_HTTP_TIMEOUT: Duration = Duration::from_secs(240);
/// Default Prometheus HTTP probe timeout for NodePort checks.
pub const DEFAULT_PROMETHEUS_HTTP_PROBE_TIMEOUT: Duration = Duration::from_secs(30);
/// Default Prometheus service name.
pub const DEFAULT_PROMETHEUS_SERVICE_NAME: &str = "prometheus";
/// Default API port used by nodes.
pub const DEFAULT_API_PORT: u16 = 18080;

View File

@ -8,9 +8,6 @@ use super::DynError;
pub struct NodeControlCapability;
/// Optional observability settings attached to a scenario.
///
/// Runners may use this to decide whether to provision in-cluster Prometheus or
/// reuse an existing endpoint.
#[derive(Clone, Debug, Default)]
pub struct ObservabilityCapability {
/// Prometheus-compatible base URL used by the *runner process* to query
@ -24,6 +21,8 @@ pub struct ObservabilityCapability {
/// Full OTLP HTTP metrics ingest endpoint used by *nodes* to export metrics
/// (backend-specific host and path).
pub metrics_otlp_ingest_url: Option<Url>,
/// Optional Grafana base URL for printing/logging (human access).
pub grafana_url: Option<Url>,
}
/// Trait implemented by scenario capability markers to signal whether node

View File

@ -5,6 +5,7 @@ pub mod cfgsync;
mod definition;
mod expectation;
pub mod http_probe;
mod observability;
mod runtime;
mod workload;
@ -15,6 +16,7 @@ pub use capabilities::{
};
pub use definition::{Builder, Scenario, ScenarioBuilder, TopologyConfigurator};
pub use expectation::Expectation;
pub use observability::{ObservabilityCapabilityProvider, ObservabilityInputs};
pub use runtime::{
BlockFeed, BlockFeedTask, BlockRecord, BlockStats, CleanupGuard, Deployer, NodeClients,
RunContext, RunHandle, RunMetrics, Runner, ScenarioError,

View File

@ -0,0 +1,145 @@
use std::env;
use reqwest::Url;
use super::{Metrics, MetricsError, NodeControlCapability, ObservabilityCapability};
/// Observability configuration inputs shared by deployers/runners.
///
/// All fields are optional; missing values only matter when a caller needs the
/// corresponding capability (e.g. querying metrics from the runner process).
#[derive(Clone, Debug, Default)]
pub struct ObservabilityInputs {
/// Prometheus-compatible base URL used by the runner process to query
/// metrics (PromQL API endpoints).
pub metrics_query_url: Option<Url>,
/// Prometheus-compatible base URL intended for an in-cluster Grafana
/// datasource.
pub metrics_query_grafana_url: Option<Url>,
/// Full OTLP HTTP metrics ingest endpoint used by nodes to export metrics
/// (backend-specific host and path).
pub metrics_otlp_ingest_url: Option<Url>,
/// Optional Grafana base URL for printing/logging (human access).
pub grafana_url: Option<Url>,
}
/// Capability helper for deployers that are generic over scenario capability
/// markers.
pub trait ObservabilityCapabilityProvider {
fn observability_capability(&self) -> Option<&ObservabilityCapability>;
}
impl ObservabilityCapabilityProvider for () {
fn observability_capability(&self) -> Option<&ObservabilityCapability> {
None
}
}
impl ObservabilityCapabilityProvider for NodeControlCapability {
fn observability_capability(&self) -> Option<&ObservabilityCapability> {
None
}
}
impl ObservabilityCapabilityProvider for ObservabilityCapability {
fn observability_capability(&self) -> Option<&ObservabilityCapability> {
Some(self)
}
}
impl ObservabilityInputs {
#[must_use]
pub fn from_capability(capabilities: &ObservabilityCapability) -> Self {
Self {
metrics_query_url: capabilities.metrics_query_url.clone(),
metrics_query_grafana_url: capabilities.metrics_query_grafana_url.clone(),
metrics_otlp_ingest_url: capabilities.metrics_otlp_ingest_url.clone(),
grafana_url: capabilities.grafana_url.clone(),
}
}
/// Load observability inputs from environment variables.
///
/// The `NOMOS_*` namespace applies to all deployers. Runner-specific env
/// vars are also accepted as aliases for backwards compatibility.
pub fn from_env() -> Result<Self, MetricsError> {
Ok(Self {
metrics_query_url: read_url_var(&[
"NOMOS_METRICS_QUERY_URL",
"K8S_RUNNER_METRICS_QUERY_URL",
// Back-compat:
"K8S_RUNNER_EXTERNAL_PROMETHEUS_URL",
"NOMOS_EXTERNAL_PROMETHEUS_URL",
])?,
metrics_query_grafana_url: read_url_var(&[
"NOMOS_METRICS_QUERY_GRAFANA_URL",
"K8S_RUNNER_METRICS_QUERY_GRAFANA_URL",
// Back-compat:
"K8S_RUNNER_EXTERNAL_PROMETHEUS_GRAFANA_URL",
"NOMOS_EXTERNAL_PROMETHEUS_GRAFANA_URL",
])?,
metrics_otlp_ingest_url: read_url_var(&[
"NOMOS_METRICS_OTLP_INGEST_URL",
"K8S_RUNNER_METRICS_OTLP_INGEST_URL",
// Back-compat:
"K8S_RUNNER_EXTERNAL_OTLP_METRICS_ENDPOINT",
"NOMOS_EXTERNAL_OTLP_METRICS_ENDPOINT",
])?,
grafana_url: read_url_var(&["NOMOS_GRAFANA_URL", "K8S_RUNNER_GRAFANA_URL"])?,
})
}
/// Apply defaults and fallbacks (pure function).
///
/// Currently, the only fallback is using `metrics_query_url` as the Grafana
/// datasource URL when `metrics_query_grafana_url` is unset.
#[must_use]
pub fn normalized(mut self) -> Self {
if self.metrics_query_grafana_url.is_none() {
self.metrics_query_grafana_url = self.metrics_query_url.clone();
}
self
}
/// Overlay non-empty values from `overrides` onto `self`.
#[must_use]
pub fn with_overrides(mut self, overrides: Self) -> Self {
if overrides.metrics_query_url.is_some() {
self.metrics_query_url = overrides.metrics_query_url;
}
if overrides.metrics_query_grafana_url.is_some() {
self.metrics_query_grafana_url = overrides.metrics_query_grafana_url;
}
if overrides.metrics_otlp_ingest_url.is_some() {
self.metrics_otlp_ingest_url = overrides.metrics_otlp_ingest_url;
}
if overrides.grafana_url.is_some() {
self.grafana_url = overrides.grafana_url;
}
self
}
/// Build the telemetry handle exposed in `RunContext::telemetry()`.
pub fn telemetry_handle(&self) -> Result<Metrics, MetricsError> {
match self.metrics_query_url.clone() {
Some(url) => Metrics::from_prometheus(url),
None => Ok(Metrics::empty()),
}
}
}
fn read_url_var(keys: &[&'static str]) -> Result<Option<Url>, MetricsError> {
for key in keys {
let Some(raw) = env::var(key).ok() else {
continue;
};
let raw = raw.trim();
if raw.is_empty() {
continue;
}
return Url::parse(raw)
.map(Some)
.map_err(|err| MetricsError::new(format!("invalid {key}: {err}")));
}
Ok(None)
}

View File

@ -16,6 +16,8 @@ workspace = true
anyhow = "1"
async-trait = { workspace = true }
cfgsync = { workspace = true }
nomos-tracing = { workspace = true }
nomos-tracing-service = { workspace = true }
reqwest = { workspace = true, features = ["json"] }
serde = { workspace = true, features = ["derive"] }
tempfile = { workspace = true }
@ -32,6 +34,5 @@ groth16 = { workspace = true }
key-management-system-service = { workspace = true }
nomos-core = { workspace = true }
nomos-ledger = { workspace = true }
nomos-tracing-service = { workspace = true }
tests = { workspace = true }
zksign = { workspace = true }

View File

@ -1,37 +1,4 @@
services:
prometheus:
image: prom/prometheus:v3.0.1
{% if prometheus.platform %} platform: {{ prometheus.platform }}
{% endif %} command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=7d
- --web.enable-otlp-receiver
- --enable-feature=otlp-write-receiver
volumes:
- ./stack/monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:z
ports:
- {{ prometheus.host_port }}
restart: on-failure
grafana:
image: grafana/grafana:10.4.1
environment:
GF_PATHS_CONFIG: /etc/grafana/grafana.ini
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: admin
ports:
- {{ grafana.host_port }}
volumes:
- ./stack/monitoring/grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:z
- ./stack/monitoring/grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yaml:z
- ./stack/monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro
- ./stack/monitoring/grafana/grafana.ini:/etc/grafana/grafana.ini:ro
env_file:
- ./stack/monitoring/grafana/plugins.env
depends_on:
- prometheus
restart: on-failure
{% for node in validators %}
{{ node.name }}:
image: {{ node.image }}

View File

@ -6,7 +6,8 @@ pub mod setup;
use async_trait::async_trait;
use testing_framework_core::scenario::{
BlockFeedTask, CleanupGuard, Deployer, RequiresNodeControl, Runner, Scenario,
BlockFeedTask, CleanupGuard, Deployer, ObservabilityCapabilityProvider, RequiresNodeControl,
Runner, Scenario,
};
use crate::{errors::ComposeRunnerError, lifecycle::cleanup::RunnerCleanup};
@ -41,7 +42,7 @@ impl ComposeDeployer {
#[async_trait]
impl<Caps> Deployer<Caps> for ComposeDeployer
where
Caps: RequiresNodeControl + Send + Sync,
Caps: RequiresNodeControl + ObservabilityCapabilityProvider + Send + Sync,
{
type Error = ComposeRunnerError;

View File

@ -1,7 +1,8 @@
use std::sync::Arc;
use testing_framework_core::scenario::{
NodeControlHandle, RequiresNodeControl, RunContext, Runner, Scenario,
NodeControlHandle, ObservabilityCapabilityProvider, ObservabilityInputs, RequiresNodeControl,
RunContext, Runner, Scenario,
};
use tracing::info;
@ -20,7 +21,6 @@ use crate::{
environment::StackEnvironment,
ports::{HostPortMapping, compose_runner_host},
},
lifecycle::readiness::metrics_handle_from_port,
};
pub struct DeploymentOrchestrator {
@ -37,21 +37,35 @@ impl DeploymentOrchestrator {
scenario: &Scenario<Caps>,
) -> Result<Runner, ComposeRunnerError>
where
Caps: RequiresNodeControl + Send + Sync,
Caps: RequiresNodeControl + ObservabilityCapabilityProvider + Send + Sync,
{
let setup = DeploymentSetup::new(scenario.topology());
setup.validate_environment().await?;
let env_inputs = ObservabilityInputs::from_env()?;
let cap_inputs = scenario
.capabilities()
.observability_capability()
.map(ObservabilityInputs::from_capability)
.unwrap_or_default();
let observability = env_inputs.with_overrides(cap_inputs).normalized();
let DeploymentContext {
mut environment,
descriptors,
} = setup.prepare_workspace().await?;
} = setup.prepare_workspace(&observability).await?;
tracing::info!(
validators = descriptors.validators().len(),
executors = descriptors.executors().len(),
duration_secs = scenario.duration().as_secs(),
readiness_checks = self.deployer.readiness_checks,
metrics_query_url = observability.metrics_query_url.as_ref().map(|u| u.as_str()),
metrics_otlp_ingest_url = observability
.metrics_otlp_ingest_url
.as_ref()
.map(|u| u.as_str()),
grafana_url = observability.grafana_url.as_ref().map(|u| u.as_str()),
"compose deployment starting"
);
@ -71,26 +85,31 @@ impl DeploymentOrchestrator {
let node_clients = client_builder
.build_node_clients(&descriptors, &host_ports, &host, &mut environment)
.await?;
let telemetry = metrics_handle_from_port(environment.prometheus_port(), &host)?;
let telemetry = observability.telemetry_handle()?;
let node_control = self.maybe_node_control::<Caps>(&environment);
info!(
prometheus_url = %format!("http://{}:{}/", host, environment.prometheus_port()),
"prometheus endpoint available on host"
);
info!(
grafana_url = %format!("http://{}:{}/", host, environment.grafana_port()),
"grafana dashboard available on host"
);
if let Some(url) = observability.metrics_query_url.as_ref() {
info!(metrics_query_url = %url.as_str(), "metrics query endpoint configured");
}
if let Some(url) = observability.grafana_url.as_ref() {
info!(grafana_url = %url.as_str(), "grafana url configured");
}
log_profiling_urls(&host, &host_ports);
if std::env::var("TESTNET_PRINT_ENDPOINTS").is_ok() {
let prometheus = observability
.metrics_query_url
.as_ref()
.map(|u| u.as_str().to_string())
.unwrap_or_else(|| "<disabled>".to_string());
let grafana = observability
.grafana_url
.as_ref()
.map(|u| u.as_str().to_string())
.unwrap_or_else(|| "<disabled>".to_string());
println!(
"TESTNET_ENDPOINTS prometheus=http://{}:{}/ grafana=http://{}:{}/",
host,
environment.prometheus_port(),
host,
environment.grafana_port()
"TESTNET_ENDPOINTS prometheus={} grafana={}",
prometheus, grafana
);
print_profiling_urls(&host, &host_ports);

View File

@ -26,7 +26,6 @@ impl PortManager {
info!(
validator_ports = ?mapping.validator_api_ports(),
executor_ports = ?mapping.executor_api_ports(),
prometheus_port = environment.prometheus_port(),
"resolved container host ports"
);
Ok(mapping)

View File

@ -1,22 +1,16 @@
use std::{
env,
net::{Ipv4Addr, TcpListener as StdTcpListener},
use testing_framework_core::{
scenario::ObservabilityInputs, topology::generation::GeneratedTopology,
};
use testing_framework_core::topology::generation::GeneratedTopology;
use tracing::{debug, info};
use tracing::info;
use crate::{
docker::ensure_docker_available,
errors::ComposeRunnerError,
infrastructure::environment::{
PortReservation, StackEnvironment, ensure_supported_topology, prepare_environment,
StackEnvironment, ensure_supported_topology, prepare_environment,
},
};
pub const PROMETHEUS_PORT_ENV: &str = "TEST_FRAMEWORK_PROMETHEUS_PORT";
pub const DEFAULT_PROMETHEUS_PORT: u16 = 9090;
pub struct DeploymentSetup {
descriptors: GeneratedTopology,
}
@ -46,27 +40,15 @@ impl DeploymentSetup {
Ok(())
}
pub async fn prepare_workspace(self) -> Result<DeploymentContext, ComposeRunnerError> {
let prometheus_env = env::var(PROMETHEUS_PORT_ENV)
.ok()
.and_then(|raw| raw.parse::<u16>().ok());
if prometheus_env.is_some() {
info!(port = prometheus_env, "using prometheus port from env");
}
let prometheus_port = prometheus_env
.and_then(|port| reserve_port(port))
.or_else(|| allocate_prometheus_port())
.unwrap_or_else(|| PortReservation::new(DEFAULT_PROMETHEUS_PORT, None));
debug!(
prometheus_port = prometheus_port.port(),
"selected prometheus port"
);
let environment =
prepare_environment(&self.descriptors, prometheus_port, prometheus_env.is_some())
.await?;
pub async fn prepare_workspace(
self,
observability: &ObservabilityInputs,
) -> Result<DeploymentContext, ComposeRunnerError> {
let environment = prepare_environment(
&self.descriptors,
observability.metrics_otlp_ingest_url.as_ref(),
)
.await?;
info!(
compose_file = %environment.compose_path().display(),
@ -81,13 +63,3 @@ impl DeploymentSetup {
})
}
}
fn allocate_prometheus_port() -> Option<PortReservation> {
reserve_port(DEFAULT_PROMETHEUS_PORT).or_else(|| reserve_port(0))
}
fn reserve_port(port: u16) -> Option<PortReservation> {
let listener = StdTcpListener::bind((Ipv4Addr::LOCALHOST, port)).ok()?;
let actual_port = listener.local_addr().ok()?.port();
Some(PortReservation::new(actual_port, Some(listener)))
}

View File

@ -1,6 +1,6 @@
use serde::Serialize;
use testing_framework_core::{
constants::{DEFAULT_CFGSYNC_PORT, DEFAULT_PROMETHEUS_HTTP_PORT, kzg_container_path},
constants::{DEFAULT_CFGSYNC_PORT, kzg_container_path},
topology::generation::{GeneratedNodeConfig, GeneratedTopology},
};
@ -10,18 +10,9 @@ mod node;
pub use node::{EnvEntry, NodeDescriptor};
/// Errors building a compose descriptor from the topology.
#[derive(Debug, thiserror::Error)]
pub enum DescriptorBuildError {
#[error("prometheus port is not configured for compose descriptor")]
MissingPrometheusPort,
}
/// Top-level docker-compose descriptor built from a GeneratedTopology.
#[derive(Clone, Debug, Serialize)]
pub struct ComposeDescriptor {
prometheus: PrometheusTemplate,
grafana: GrafanaTemplate,
validators: Vec<NodeDescriptor>,
executors: Vec<NodeDescriptor>,
}
@ -50,8 +41,6 @@ pub struct ComposeDescriptorBuilder<'a> {
topology: &'a GeneratedTopology,
use_kzg_mount: bool,
cfgsync_port: Option<u16>,
prometheus_port: Option<u16>,
grafana_port: Option<u16>,
}
impl<'a> ComposeDescriptorBuilder<'a> {
@ -60,8 +49,6 @@ impl<'a> ComposeDescriptorBuilder<'a> {
topology,
use_kzg_mount: false,
cfgsync_port: None,
prometheus_port: None,
grafana_port: None,
}
}
@ -79,34 +66,12 @@ impl<'a> ComposeDescriptorBuilder<'a> {
self
}
/// Finish building the descriptor.
#[must_use]
/// Set host port mapping for Prometheus.
pub const fn with_prometheus_port(mut self, port: u16) -> Self {
self.prometheus_port = Some(port);
self
}
#[must_use]
/// Set host port mapping for Grafana.
pub const fn with_grafana_port(mut self, port: u16) -> Self {
self.grafana_port = Some(port);
self
}
/// Finish building the descriptor, erroring if required fields are missing.
pub fn build(self) -> Result<ComposeDescriptor, DescriptorBuildError> {
pub fn build(self) -> ComposeDescriptor {
let cfgsync_port = self.cfgsync_port.unwrap_or(DEFAULT_CFGSYNC_PORT);
let prometheus_host_port = self
.prometheus_port
.ok_or(DescriptorBuildError::MissingPrometheusPort)?;
let grafana_host_port = self.grafana_port.unwrap_or(0);
let (image, platform) = resolve_image();
// Prometheus image is x86_64-only on some tags; set platform when on arm hosts.
let prometheus_platform = match std::env::consts::ARCH {
"aarch64" | "arm64" => Some(String::from("linux/arm64")),
_ => None,
};
let validators = build_nodes(
self.topology.validators(),
@ -126,49 +91,13 @@ impl<'a> ComposeDescriptorBuilder<'a> {
cfgsync_port,
);
Ok(ComposeDescriptor {
prometheus: PrometheusTemplate::new(prometheus_host_port, prometheus_platform),
grafana: GrafanaTemplate::new(grafana_host_port),
ComposeDescriptor {
validators,
executors,
})
}
}
/// Minimal Prometheus service mapping used in the compose template.
#[derive(Clone, Debug, Serialize)]
pub struct PrometheusTemplate {
host_port: String,
#[serde(skip_serializing_if = "Option::is_none")]
platform: Option<String>,
}
impl PrometheusTemplate {
fn new(port: u16, platform: Option<String>) -> Self {
Self {
host_port: format!("127.0.0.1:{port}:{}", DEFAULT_PROMETHEUS_HTTP_PORT),
platform,
}
}
}
/// Minimal Grafana service mapping used in the compose template.
#[derive(Clone, Debug, Serialize)]
pub struct GrafanaTemplate {
host_port: String,
}
impl GrafanaTemplate {
fn new(port: u16) -> Self {
let host_port = match port {
0 => "127.0.0.1::3000".to_string(), // docker assigns host port
_ => format!("127.0.0.1:{port}:3000"),
};
Self { host_port }
}
}
#[derive(Clone, Copy)]
pub(crate) enum ComposeNodeKind {
Validator,

View File

@ -49,24 +49,6 @@ impl ComposeWorkspace {
copy_dir_recursive(&scripts_source, &temp.path().join("stack/scripts"))?;
}
// Ensure Prometheus config is a file (Docker bind mount fails if a directory
// exists).
let prometheus_src = stack_source.join("monitoring/prometheus.yml");
let prometheus_dst = temp.path().join("stack/monitoring/prometheus.yml");
if prometheus_dst.exists() && prometheus_dst.is_dir() {
fs::remove_dir_all(&prometheus_dst)
.with_context(|| format!("removing bogus dir {}", prometheus_dst.display()))?;
}
if !prometheus_dst.exists() {
fs::copy(&prometheus_src, &prometheus_dst).with_context(|| {
format!(
"copying prometheus.yml {} -> {}",
prometheus_src.display(),
prometheus_dst.display()
)
})?;
}
let kzg_source = repo_root.join("testing-framework/assets/stack/kzgrs_test_params");
let target = temp.path().join("kzgrs_test_params");
if kzg_source.exists() {

View File

@ -9,10 +9,7 @@ use testing_framework_core::{
};
use url::ParseError;
use crate::{
descriptor::DescriptorBuildError, docker::commands::ComposeCommandError,
infrastructure::template::TemplateError,
};
use crate::{docker::commands::ComposeCommandError, infrastructure::template::TemplateError};
#[derive(Debug, thiserror::Error)]
/// Top-level compose runner errors.
@ -94,11 +91,6 @@ pub enum ConfigError {
#[source]
source: anyhow::Error,
},
#[error("failed to build compose descriptor: {source}")]
Descriptor {
#[source]
source: DescriptorBuildError,
},
#[error("failed to render compose template: {source}")]
Template {
#[source]

View File

@ -1,5 +1,8 @@
use std::{path::Path, process::Command as StdCommand};
use nomos_tracing::metrics::otlp::OtlpMetricsConfig;
use nomos_tracing_service::MetricsLayer;
use reqwest::Url;
use testing_framework_core::{
scenario::cfgsync::{apply_topology_overrides, load_cfgsync_template, write_cfgsync_template},
topology::generation::GeneratedTopology,
@ -60,6 +63,7 @@ pub fn update_cfgsync_config(
topology: &GeneratedTopology,
use_kzg_mount: bool,
port: u16,
metrics_otlp_ingest_url: Option<&Url>,
) -> anyhow::Result<()> {
debug!(
path = %path.display(),
@ -72,6 +76,12 @@ pub fn update_cfgsync_config(
let mut cfg = load_cfgsync_template(path)?;
cfg.port = port;
apply_topology_overrides(&mut cfg, topology, use_kzg_mount);
if let Some(endpoint) = metrics_otlp_ingest_url.cloned() {
cfg.tracing_settings.metrics = MetricsLayer::Otlp(OtlpMetricsConfig {
endpoint,
host_identifier: "node".into(),
});
}
write_cfgsync_template(path, &cfg)?;
Ok(())
}

View File

@ -1,20 +1,19 @@
use std::{
env,
net::{Ipv4Addr, TcpListener as StdTcpListener},
path::{Path, PathBuf},
time::Duration,
};
use anyhow::{Context as _, anyhow};
use anyhow::anyhow;
use reqwest::Url;
use testing_framework_core::{
adjust_timeout, scenario::CleanupGuard, topology::generation::GeneratedTopology,
};
use tokio::{process::Command, time::timeout};
use tracing::{debug, error, info, warn};
use tokio::process::Command;
use tracing::{debug, error, info};
use uuid::Uuid;
use crate::{
deployer::setup::DEFAULT_PROMETHEUS_PORT,
descriptor::ComposeDescriptor,
docker::{
commands::{compose_up, dump_compose_logs, run_docker_command},
@ -31,8 +30,6 @@ use crate::{
};
const CFGSYNC_START_TIMEOUT: Duration = Duration::from_secs(180);
const COMPOSE_PORT_DISCOVERY_TIMEOUT: Duration = Duration::from_secs(30);
const STACK_BRINGUP_MAX_ATTEMPTS: usize = 3;
/// Paths and flags describing the prepared compose workspace.
pub struct WorkspaceState {
@ -49,8 +46,6 @@ pub struct StackEnvironment {
root: PathBuf,
workspace: Option<ComposeWorkspace>,
cfgsync_handle: Option<CfgsyncServerHandle>,
prometheus_port: u16,
grafana_port: u16,
}
impl StackEnvironment {
@ -60,8 +55,6 @@ impl StackEnvironment {
compose_path: PathBuf,
project_name: String,
cfgsync_handle: Option<CfgsyncServerHandle>,
prometheus_port: u16,
grafana_port: u16,
) -> Self {
let WorkspaceState {
workspace, root, ..
@ -73,8 +66,6 @@ impl StackEnvironment {
root,
workspace: Some(workspace),
cfgsync_handle,
prometheus_port,
grafana_port,
}
}
@ -82,16 +73,6 @@ impl StackEnvironment {
&self.compose_path
}
/// Host port exposed by Prometheus.
pub const fn prometheus_port(&self) -> u16 {
self.prometheus_port
}
/// Host port exposed by Grafana.
pub const fn grafana_port(&self) -> u16 {
self.grafana_port
}
/// Docker compose project name.
pub fn project_name(&self) -> &str {
&self.project_name
@ -138,27 +119,6 @@ impl StackEnvironment {
}
}
/// Represents a claimed port, optionally guarded by an open socket.
pub struct PortReservation {
port: u16,
_guard: Option<StdTcpListener>,
}
impl PortReservation {
/// Holds a port and an optional socket guard to keep it reserved.
pub const fn new(port: u16, guard: Option<StdTcpListener>) -> Self {
Self {
port,
_guard: guard,
}
}
/// The reserved port number.
pub const fn port(&self) -> u16 {
self.port
}
}
/// Verifies the topology has at least one validator so compose can start.
pub fn ensure_supported_topology(
descriptors: &GeneratedTopology,
@ -210,10 +170,17 @@ pub fn update_cfgsync_logged(
workspace: &WorkspaceState,
descriptors: &GeneratedTopology,
cfgsync_port: u16,
metrics_otlp_ingest_url: Option<&Url>,
) -> Result<(), ComposeRunnerError> {
info!(cfgsync_port, "updating cfgsync configuration");
configure_cfgsync(workspace, descriptors, cfgsync_port).map_err(Into::into)
configure_cfgsync(
workspace,
descriptors,
cfgsync_port,
metrics_otlp_ingest_url,
)
.map_err(Into::into)
}
/// Start the cfgsync server container using the generated config.
@ -232,12 +199,14 @@ pub fn configure_cfgsync(
workspace: &WorkspaceState,
descriptors: &GeneratedTopology,
cfgsync_port: u16,
metrics_otlp_ingest_url: Option<&Url>,
) -> Result<(), ConfigError> {
update_cfgsync_config(
&workspace.cfgsync_path,
descriptors,
workspace.use_kzg,
cfgsync_port,
metrics_otlp_ingest_url,
)
.map_err(|source| ConfigError::Cfgsync {
path: workspace.cfgsync_path.clone(),
@ -328,23 +297,16 @@ pub fn write_compose_artifacts(
workspace: &WorkspaceState,
descriptors: &GeneratedTopology,
cfgsync_port: u16,
prometheus_port: u16,
grafana_port: u16,
) -> Result<PathBuf, ConfigError> {
debug!(
cfgsync_port,
prometheus_port,
grafana_port,
workspace_root = %workspace.root.display(),
"building compose descriptor"
);
let descriptor = ComposeDescriptor::builder(descriptors)
.with_kzg_mount(workspace.use_kzg)
.with_cfgsync_port(cfgsync_port)
.with_prometheus_port(prometheus_port)
.with_grafana_port(grafana_port)
.build()
.map_err(|source| ConfigError::Descriptor { source })?;
.build();
let compose_path = workspace.root.join("compose.generated.yml");
write_compose_file(&descriptor, &compose_path)
@ -358,21 +320,9 @@ pub fn render_compose_logged(
workspace: &WorkspaceState,
descriptors: &GeneratedTopology,
cfgsync_port: u16,
prometheus_port: u16,
grafana_port: u16,
) -> Result<PathBuf, ComposeRunnerError> {
info!(
cfgsync_port,
prometheus_port, grafana_port, "rendering compose file with ports"
);
write_compose_artifacts(
workspace,
descriptors,
cfgsync_port,
prometheus_port,
grafana_port,
)
.map_err(Into::into)
info!(cfgsync_port, "rendering compose file");
write_compose_artifacts(workspace, descriptors, cfgsync_port).map_err(Into::into)
}
/// Bring up docker compose; shut down cfgsync if start-up fails.
@ -404,172 +354,46 @@ pub async fn bring_up_stack_logged(
/// Prepare workspace, cfgsync, compose artifacts, and launch the stack.
pub async fn prepare_environment(
descriptors: &GeneratedTopology,
mut prometheus_port: PortReservation,
prometheus_port_locked: bool,
metrics_otlp_ingest_url: Option<&Url>,
) -> Result<StackEnvironment, ComposeRunnerError> {
let workspace = prepare_workspace_logged()?;
let cfgsync_port = allocate_cfgsync_port()?;
let grafana_env = env::var("COMPOSE_GRAFANA_PORT")
.ok()
.and_then(|raw| raw.parse::<u16>().ok());
if let Some(port) = grafana_env {
info!(port, "using grafana port from env");
}
update_cfgsync_logged(&workspace, descriptors, cfgsync_port)?;
update_cfgsync_logged(
&workspace,
descriptors,
cfgsync_port,
metrics_otlp_ingest_url,
)?;
ensure_compose_image().await?;
let compose_path = render_compose_logged(&workspace, descriptors, cfgsync_port)?;
let attempts = if prometheus_port_locked {
1
} else {
STACK_BRINGUP_MAX_ATTEMPTS
};
let mut last_err = None;
let project_name = format!("nomos-compose-{}", Uuid::new_v4());
let mut cfgsync_handle = start_cfgsync_stage(&workspace, cfgsync_port).await?;
for _ in 0..attempts {
let prometheus_port_value = prometheus_port.port();
let grafana_port_value = grafana_env.unwrap_or(0);
let compose_path = render_compose_logged(
&workspace,
descriptors,
cfgsync_port,
prometheus_port_value,
grafana_port_value,
)?;
let project_name = format!("nomos-compose-{}", Uuid::new_v4());
let mut cfgsync_handle = start_cfgsync_stage(&workspace, cfgsync_port).await?;
drop(prometheus_port);
match bring_up_stack_logged(
&compose_path,
&project_name,
&workspace.root,
&mut cfgsync_handle,
)
.await
{
Ok(()) => {
let grafana_port_resolved = resolve_service_port(
&compose_path,
&project_name,
&workspace.root,
"grafana",
3000,
)
.await
.unwrap_or(grafana_port_value);
info!(
project = %project_name,
compose_file = %compose_path.display(),
cfgsync_port,
prometheus_port = prometheus_port_value,
grafana_port = grafana_port_resolved,
"compose stack is up"
);
return Ok(StackEnvironment::from_workspace(
workspace,
compose_path,
project_name,
Some(cfgsync_handle),
prometheus_port_value,
grafana_port_resolved,
));
}
Err(err) => {
// Attempt to capture container logs even when bring-up fails early.
dump_compose_logs(&compose_path, &project_name, &workspace.root).await;
cfgsync_handle.shutdown();
last_err = Some(err);
if prometheus_port_locked {
break;
}
warn!(
error = %last_err.as_ref().unwrap(),
"compose bring-up failed; retrying with a new prometheus port"
);
prometheus_port = allocate_prometheus_port()
.unwrap_or_else(|| PortReservation::new(DEFAULT_PROMETHEUS_PORT, None));
debug!(
next_prometheus_port = prometheus_port.port(),
"retrying compose bring-up"
);
}
}
if let Err(err) = bring_up_stack_logged(
&compose_path,
&project_name,
&workspace.root,
&mut cfgsync_handle,
)
.await
{
dump_compose_logs(&compose_path, &project_name, &workspace.root).await;
cfgsync_handle.shutdown();
return Err(err);
}
Err(last_err.expect("prepare_environment should return or fail with error"))
}
fn allocate_prometheus_port() -> Option<PortReservation> {
reserve_prometheus_port(DEFAULT_PROMETHEUS_PORT).or_else(|| reserve_prometheus_port(0))
}
fn reserve_prometheus_port(port: u16) -> Option<PortReservation> {
let listener = StdTcpListener::bind((Ipv4Addr::LOCALHOST, port)).ok()?;
let actual_port = listener.local_addr().ok()?.port();
Some(PortReservation::new(actual_port, Some(listener)))
}
async fn resolve_service_port(
compose_file: &Path,
project_name: &str,
root: &Path,
service: &str,
container_port: u16,
) -> Result<u16, ComposeRunnerError> {
let mut cmd = Command::new("docker");
cmd.arg("compose")
.arg("-f")
.arg(compose_file)
.arg("-p")
.arg(project_name)
.arg("port")
.arg(service)
.arg(container_port.to_string())
.current_dir(root);
let output = timeout(adjust_timeout(COMPOSE_PORT_DISCOVERY_TIMEOUT), cmd.output())
.await
.map_err(|_| ComposeRunnerError::PortDiscovery {
service: service.to_owned(),
container_port,
source: anyhow!("docker compose port timed out"),
})?
.with_context(|| format!("running docker compose port {service} {container_port}"))
.map_err(|source| ComposeRunnerError::PortDiscovery {
service: service.to_owned(),
container_port,
source,
})?;
if !output.status.success() {
return Err(ComposeRunnerError::PortDiscovery {
service: service.to_owned(),
container_port,
source: anyhow!("docker compose port exited with {}", output.status),
});
}
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
if let Some(port_str) = line.rsplit(':').next()
&& let Ok(port) = port_str.trim().parse::<u16>()
{
return Ok(port);
}
}
Err(ComposeRunnerError::PortDiscovery {
service: service.to_owned(),
container_port,
source: anyhow!("unable to parse docker compose port output: {stdout}"),
})
info!(
project = %project_name,
compose_file = %compose_path.display(),
cfgsync_port,
"compose stack is up"
);
Ok(StackEnvironment::from_workspace(
workspace,
compose_path,
project_name,
Some(cfgsync_handle),
))
}

View File

@ -3,7 +3,7 @@ use std::time::Duration;
use reqwest::Url;
use testing_framework_core::{
nodes::ApiClient,
scenario::{Metrics, MetricsError, NodeClients, http_probe::NodeRole as HttpNodeRole},
scenario::{NodeClients, http_probe::NodeRole as HttpNodeRole},
topology::generation::{GeneratedTopology, NodeRole as TopologyNodeRole},
};
use tokio::time::sleep;
@ -16,13 +16,6 @@ use crate::{
const DISABLED_READINESS_SLEEP: Duration = Duration::from_secs(5);
/// Build a metrics client from host/port, validating the URL.
pub fn metrics_handle_from_port(port: u16, host: &str) -> Result<Metrics, MetricsError> {
let url = Url::parse(&format!("http://{host}:{port}/"))
.map_err(|err| MetricsError::new(format!("invalid prometheus url: {err}")))?;
Metrics::from_prometheus(url)
}
/// Wait until all validators respond on their API ports.
pub async fn ensure_validators_ready_with_ports(ports: &[u16]) -> Result<(), StackReadinessError> {
if ports.is_empty() {

View File

@ -17,6 +17,7 @@ anyhow = "1"
async-trait = { workspace = true }
k8s-openapi = { version = "0.20", features = ["latest"] }
kube = { version = "0.87", default-features = false, features = ["client", "runtime", "rustls-tls"] }
nomos-tracing = { workspace = true }
nomos-tracing-service = { workspace = true }
reqwest = { workspace = true, features = ["json"] }
serde = { version = "1", features = ["derive"] }

View File

@ -1,2 +0,0 @@
*.json
!.gitignore

View File

@ -37,9 +37,3 @@ app.kubernetes.io/instance: {{ $root.Release.Name }}
nomos/logical-role: executor
nomos/executor-index: "{{ $index }}"
{{- end -}}
{{- define "nomos-runner.prometheusLabels" -}}
app.kubernetes.io/name: {{ include "nomos-runner.chart" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
nomos/logical-role: prometheus
{{- end -}}

View File

@ -1,33 +0,0 @@
{{- if .Values.grafana.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "nomos-runner.fullname" . }}-grafana-config
data:
datasources.yaml: |
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
isDefault: true
uid: PBFA97CFB590B2093
orgId: 1
url: {{ if .Values.prometheus.enabled }}http://prometheus:9090{{ else }}{{ required "prometheus.externalUrl must be set when prometheus.enabled=false" .Values.prometheus.externalUrl }}{{ end }}
editable: true
dashboards.yaml: |
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: 'Nomos'
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards
{{ range $path, $_ := .Files.Glob "grafana/dashboards/*.json" }}
{{ base $path }}: |
{{ $.Files.Get $path | indent 4 }}
{{ end }}
{{- end }}

View File

@ -1,64 +0,0 @@
{{- if .Values.grafana.enabled }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "nomos-runner.fullname" . }}-grafana
labels:
app: {{ include "nomos-runner.name" . }}
chart: {{ include "nomos-runner.chart" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
spec:
replicas: 1
selector:
matchLabels:
app: {{ include "nomos-runner.name" . }}
component: grafana
template:
metadata:
labels:
app: {{ include "nomos-runner.name" . }}
component: grafana
spec:
containers:
- name: grafana
image: {{ .Values.grafana.image }}
imagePullPolicy: {{ .Values.grafana.imagePullPolicy }}
env:
- name: GF_SECURITY_ADMIN_USER
value: {{ .Values.grafana.adminUser | quote }}
- name: GF_SECURITY_ADMIN_PASSWORD
value: {{ .Values.grafana.adminPassword | quote }}
ports:
- containerPort: 3000
name: http
volumeMounts:
- name: grafana-config
mountPath: /etc/grafana/provisioning/datasources/datasources.yaml
subPath: datasources.yaml
readOnly: true
- name: grafana-config
mountPath: /etc/grafana/provisioning/dashboards/dashboards.yaml
subPath: dashboards.yaml
readOnly: true
- name: grafana-dashboards
mountPath: /var/lib/grafana/dashboards
readOnly: true
volumes:
- name: grafana-config
configMap:
name: {{ include "nomos-runner.fullname" . }}-grafana-config
items:
- key: datasources.yaml
path: datasources.yaml
- key: dashboards.yaml
path: dashboards.yaml
- name: grafana-dashboards
configMap:
name: {{ include "nomos-runner.fullname" . }}-grafana-config
items:
{{ range $path, $_ := .Files.Glob "grafana/dashboards/*.json" }}
- key: {{ base $path }}
path: {{ base $path }}
{{ end }}
{{- end }}

View File

@ -1,22 +0,0 @@
{{- if .Values.grafana.enabled }}
apiVersion: v1
kind: Service
metadata:
name: {{ include "nomos-runner.fullname" . }}-grafana
labels:
app: {{ include "nomos-runner.name" . }}
component: grafana
spec:
type: {{ .Values.grafana.service.type }}
ports:
- port: 3000
targetPort: http
protocol: TCP
name: http
{{- if and (eq .Values.grafana.service.type "NodePort") .Values.grafana.service.nodePort }}
nodePort: {{ .Values.grafana.service.nodePort }}
{{- end }}
selector:
app: {{ include "nomos-runner.name" . }}
component: grafana
{{- end }}

View File

@ -1,16 +0,0 @@
{{- if .Values.prometheus.enabled }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus
labels:
{{- include "nomos-runner.prometheusLabels" . | nindent 4 }}
data:
prometheus.yml: |
{{- if .Values.prometheus.config }}
{{ .Values.prometheus.config | indent 4 }}
{{- else }}
{{ "" | indent 4 }}
{{- end }}
{{- end }}

View File

@ -1,38 +0,0 @@
{{- if .Values.prometheus.enabled }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
labels:
{{- include "nomos-runner.prometheusLabels" . | nindent 4 }}
spec:
replicas: 1
selector:
matchLabels:
{{- include "nomos-runner.prometheusLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "nomos-runner.prometheusLabels" . | nindent 8 }}
spec:
containers:
- name: prometheus
image: {{ .Values.prometheus.image }}
imagePullPolicy: {{ .Values.prometheus.imagePullPolicy | default "IfNotPresent" }}
args:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time={{ .Values.prometheus.retention }}
- --web.enable-otlp-receiver
- --enable-feature=otlp-write-receiver
ports:
- containerPort: 9090
name: http
volumeMounts:
- name: prometheus-config
mountPath: /etc/prometheus
volumes:
- name: prometheus-config
configMap:
name: prometheus
{{- end }}

View File

@ -1,20 +0,0 @@
{{- if .Values.prometheus.enabled }}
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
labels:
{{- include "nomos-runner.prometheusLabels" . | nindent 4 }}
spec:
type: {{ .Values.prometheus.service.type | default "NodePort" }}
selector:
{{- include "nomos-runner.prometheusLabels" . | nindent 4 }}
ports:
- name: http
port: 9090
targetPort: http
{{- if and (eq (default "NodePort" .Values.prometheus.service.type) "NodePort") .Values.prometheus.service.nodePort }}
nodePort: {{ .Values.prometheus.service.nodePort }}
{{- end }}
{{- end }}

View File

@ -1,18 +0,0 @@
{{- if eq .Values.kzg.mode "hostPath" }}
apiVersion: v1
kind: PersistentVolume
metadata:
name: {{ include "nomos-runner.fullname" . }}-kzg
labels:
{{- include "nomos-runner.labels" . | nindent 4 }}
spec:
capacity:
storage: {{ .Values.kzg.storageSize }}
accessModes:
- ReadOnlyMany
persistentVolumeReclaimPolicy: Delete
storageClassName: manual
hostPath:
path: {{ .Values.kzg.hostPath }}
type: {{ .Values.kzg.hostPathType }}
{{- end }}

View File

@ -1,16 +0,0 @@
{{- if eq .Values.kzg.mode "hostPath" }}
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ include "nomos-runner.fullname" . }}-kzg
labels:
{{- include "nomos-runner.labels" . | nindent 4 }}
spec:
accessModes:
- ReadOnlyMany
storageClassName: manual
volumeName: {{ include "nomos-runner.fullname" . }}-kzg
resources:
requests:
storage: {{ .Values.kzg.storageSize }}
{{- end }}

View File

@ -26,28 +26,3 @@ kzg:
hostPath: "/var/lib/nomos/kzgrs_test_params"
hostPathType: "Directory"
storageSize: "1Gi"
prometheus:
enabled: true
externalUrl: ""
image: "prom/prometheus:v3.0.1"
imagePullPolicy: IfNotPresent
retention: "7d"
service:
type: NodePort
nodePort: null
config: |
global:
evaluation_interval: 15s
external_labels:
monitor: "NomosRunner"
grafana:
enabled: true
image: "grafana/grafana:10.4.1"
imagePullPolicy: IfNotPresent
adminUser: admin
adminPassword: admin
service:
type: NodePort
nodePort: null

View File

@ -1,11 +1,10 @@
use anyhow::Error;
use async_trait::async_trait;
use kube::Client;
use reqwest::Url;
use testing_framework_core::{
scenario::{
BlockFeedTask, CleanupGuard, Deployer, MetricsError, ObservabilityCapability, RunContext,
Runner, Scenario,
BlockFeedTask, CleanupGuard, Deployer, MetricsError, ObservabilityCapability,
ObservabilityInputs, RunContext, Runner, Scenario,
},
topology::generation::GeneratedTopology,
};
@ -17,13 +16,12 @@ use crate::{
cluster::{
ClusterEnvironment, NodeClientError, PortSpecs, RemoteReadinessError,
build_node_clients, cluster_identifiers, collect_port_specs, ensure_cluster_readiness,
install_stack, kill_port_forwards, metrics_handle_from_endpoint,
metrics_handle_from_url, wait_for_ports_or_cleanup,
install_stack, kill_port_forwards, wait_for_ports_or_cleanup,
},
helm::HelmError,
},
lifecycle::{block_feed::spawn_block_feed_with, cleanup::RunnerCleanup},
wait::{ClusterWaitError, HostPort, PortForwardHandle},
wait::{ClusterWaitError, PortForwardHandle},
};
/// Deploys a scenario into Kubernetes using Helm charts and port-forwards.
@ -93,7 +91,7 @@ impl Deployer for K8sDeployer {
type Error = K8sRunnerError;
async fn deploy(&self, scenario: &Scenario) -> Result<Runner, Self::Error> {
deploy_with_observability(self, scenario, None, None, None).await
deploy_with_observability(self, scenario, None).await
}
}
@ -105,31 +103,10 @@ impl Deployer<ObservabilityCapability> for K8sDeployer {
&self,
scenario: &Scenario<ObservabilityCapability>,
) -> Result<Runner, Self::Error> {
deploy_with_observability(
self,
scenario,
scenario.capabilities().metrics_query_url.clone(),
scenario.capabilities().metrics_query_grafana_url.clone(),
scenario.capabilities().metrics_otlp_ingest_url.clone(),
)
.await
deploy_with_observability(self, scenario, Some(scenario.capabilities())).await
}
}
fn cluster_prometheus_endpoint(cluster: &Option<ClusterEnvironment>) -> Option<&HostPort> {
cluster
.as_ref()
.expect("cluster must be available")
.prometheus_endpoint()
}
fn cluster_grafana_endpoint(cluster: &Option<ClusterEnvironment>) -> Option<&HostPort> {
cluster
.as_ref()
.expect("cluster must be available")
.grafana_endpoint()
}
async fn fail_cluster(cluster: &mut Option<ClusterEnvironment>, reason: &str) {
if let Some(env) = cluster.as_mut() {
env.fail(reason).await;
@ -157,59 +134,13 @@ fn ensure_supported_topology(descriptors: &GeneratedTopology) -> Result<(), K8sR
async fn deploy_with_observability<Caps>(
deployer: &K8sDeployer,
scenario: &Scenario<Caps>,
metrics_query_url: Option<Url>,
metrics_query_grafana_url: Option<Url>,
metrics_otlp_ingest_url: Option<Url>,
observability: Option<&ObservabilityCapability>,
) -> Result<Runner, K8sRunnerError> {
let external_prometheus = match metrics_query_url {
Some(url) => Some(url),
None => match std::env::var("K8S_RUNNER_METRICS_QUERY_URL")
.ok()
.or_else(|| std::env::var("NOMOS_METRICS_QUERY_URL").ok())
// Back-compat:
.or_else(|| std::env::var("K8S_RUNNER_EXTERNAL_PROMETHEUS_URL").ok())
.or_else(|| std::env::var("NOMOS_EXTERNAL_PROMETHEUS_URL").ok())
{
Some(raw) if !raw.trim().is_empty() => {
Some(Url::parse(raw.trim()).map_err(|err| {
MetricsError::new(format!("invalid metrics query url: {err}"))
})?)
}
_ => None,
},
};
let external_prometheus_grafana_url = match metrics_query_grafana_url {
Some(url) => Some(url),
None => match std::env::var("K8S_RUNNER_METRICS_QUERY_GRAFANA_URL")
.ok()
.or_else(|| std::env::var("NOMOS_METRICS_QUERY_GRAFANA_URL").ok())
// Back-compat:
.or_else(|| std::env::var("K8S_RUNNER_EXTERNAL_PROMETHEUS_GRAFANA_URL").ok())
.or_else(|| std::env::var("NOMOS_EXTERNAL_PROMETHEUS_GRAFANA_URL").ok())
{
Some(raw) if !raw.trim().is_empty() => Some(Url::parse(raw.trim()).map_err(|err| {
MetricsError::new(format!("invalid metrics query grafana url: {err}"))
})?),
_ => None,
},
};
let external_otlp_metrics_endpoint = match metrics_otlp_ingest_url {
Some(url) => Some(url),
None => match std::env::var("K8S_RUNNER_METRICS_OTLP_INGEST_URL")
.ok()
.or_else(|| std::env::var("NOMOS_METRICS_OTLP_INGEST_URL").ok())
// Back-compat:
.or_else(|| std::env::var("K8S_RUNNER_EXTERNAL_OTLP_METRICS_ENDPOINT").ok())
.or_else(|| std::env::var("NOMOS_EXTERNAL_OTLP_METRICS_ENDPOINT").ok())
{
Some(raw) if !raw.trim().is_empty() => Some(Url::parse(raw.trim()).map_err(|err| {
MetricsError::new(format!("invalid metrics OTLP ingest url: {err}"))
})?),
_ => None,
},
};
let env_inputs = ObservabilityInputs::from_env()?;
let cap_inputs = observability
.map(ObservabilityInputs::from_capability)
.unwrap_or_default();
let observability = env_inputs.with_overrides(cap_inputs).normalized();
let descriptors = scenario.topology().clone();
let validator_count = descriptors.validators().len();
@ -225,9 +156,16 @@ async fn deploy_with_observability<Caps>(
executors = executor_count,
duration_secs = scenario.duration().as_secs(),
readiness_checks = deployer.readiness_checks,
metrics_query_url = external_prometheus.as_ref().map(|u| u.as_str()),
metrics_query_grafana_url = external_prometheus_grafana_url.as_ref().map(|u| u.as_str()),
metrics_otlp_ingest_url = external_otlp_metrics_endpoint.as_ref().map(|u| u.as_str()),
metrics_query_url = observability.metrics_query_url.as_ref().map(|u| u.as_str()),
metrics_query_grafana_url = observability
.metrics_query_grafana_url
.as_ref()
.map(|u| u.as_str()),
metrics_otlp_ingest_url = observability
.metrics_otlp_ingest_url
.as_ref()
.map(|u| u.as_str()),
grafana_url = observability.grafana_url.as_ref().map(|u| u.as_str()),
"starting k8s deployment"
);
@ -238,9 +176,7 @@ async fn deploy_with_observability<Caps>(
&port_specs,
&descriptors,
deployer.readiness_checks,
external_prometheus.as_ref(),
external_prometheus_grafana_url.as_ref(),
external_otlp_metrics_endpoint.as_ref(),
&observability,
)
.await?,
);
@ -259,23 +195,11 @@ async fn deploy_with_observability<Caps>(
}
};
let telemetry = match external_prometheus.clone() {
Some(url) => metrics_handle_from_url(url),
None => cluster
.as_ref()
.and_then(|cluster| cluster.prometheus_endpoint())
.ok_or_else(|| MetricsError::new("prometheus endpoint unavailable"))
.and_then(metrics_handle_from_endpoint),
};
let telemetry = match telemetry {
let telemetry = match observability.telemetry_handle() {
Ok(handle) => handle,
Err(err) => {
fail_cluster(
&mut cluster,
"failed to configure prometheus metrics handle",
)
.await;
error!(error = ?err, "failed to configure prometheus metrics handle");
fail_cluster(&mut cluster, "failed to configure metrics telemetry handle").await;
error!(error = ?err, "failed to configure metrics telemetry handle");
return Err(err.into());
}
};
@ -289,36 +213,29 @@ async fn deploy_with_observability<Caps>(
}
};
if let Some(url) = external_prometheus.as_ref() {
info!(prometheus_url = %url.as_str(), "using external prometheus endpoint");
} else if let Some(prometheus) = cluster_prometheus_endpoint(&cluster) {
if let Some(url) = observability.metrics_query_url.as_ref() {
info!(
prometheus_url = %format!("http://{}:{}/", prometheus.host, prometheus.port),
"prometheus endpoint available on host"
metrics_query_url = %url.as_str(),
"metrics query endpoint configured"
);
}
if let Some(grafana) = cluster_grafana_endpoint(&cluster) {
info!(
grafana_url = %format!("http://{}:{}/", grafana.host, grafana.port),
"grafana dashboard available on host"
);
if let Some(url) = observability.grafana_url.as_ref() {
info!(grafana_url = %url.as_str(), "grafana url configured");
}
if std::env::var("TESTNET_PRINT_ENDPOINTS").is_ok() {
let prometheus = external_prometheus
let prometheus = observability
.metrics_query_url
.as_ref()
.map(|u| u.as_str().to_string())
.or_else(|| {
cluster_prometheus_endpoint(&cluster)
.map(|endpoint| format!("http://{}:{}/", endpoint.host, endpoint.port))
})
.unwrap_or_else(|| "<disabled>".to_string());
let grafana = cluster_grafana_endpoint(&cluster);
println!(
"TESTNET_ENDPOINTS prometheus={} grafana={}",
prometheus,
grafana
.map(|endpoint| format!("http://{}:{}/", endpoint.host, endpoint.port))
observability
.grafana_url
.as_ref()
.map(|u| u.as_str().to_string())
.unwrap_or_else(|| "<disabled>".to_string())
);
@ -374,16 +291,9 @@ async fn setup_cluster(
specs: &PortSpecs,
descriptors: &GeneratedTopology,
readiness_checks: bool,
external_prometheus: Option<&Url>,
external_prometheus_grafana_url: Option<&Url>,
external_otlp_metrics_endpoint: Option<&Url>,
observability: &ObservabilityInputs,
) -> Result<ClusterEnvironment, K8sRunnerError> {
let assets = prepare_assets(
descriptors,
external_prometheus,
external_prometheus_grafana_url,
external_otlp_metrics_endpoint,
)?;
let assets = prepare_assets(descriptors, observability.metrics_otlp_ingest_url.as_ref())?;
let validators = descriptors.validators().len();
let executors = descriptors.executors().len();
@ -394,19 +304,8 @@ async fn setup_cluster(
Some(install_stack(client, &assets, &namespace, &release, validators, executors).await?);
info!("waiting for helm-managed services to become ready");
let cluster_ready = wait_for_ports_or_cleanup(
client,
&namespace,
&release,
specs,
external_prometheus.is_none() && external_prometheus_grafana_url.is_none(),
&mut cleanup_guard,
)
.await?;
if let Some(prometheus) = cluster_ready.ports.prometheus.as_ref() {
info!(prometheus = ?prometheus, "discovered prometheus endpoint");
}
let cluster_ready =
wait_for_ports_or_cleanup(client, &namespace, &release, specs, &mut cleanup_guard).await?;
let environment = ClusterEnvironment::new(
client.clone(),

View File

@ -5,6 +5,7 @@ use std::{
};
use anyhow::{Context as _, Result as AnyResult};
use nomos_tracing::metrics::otlp::OtlpMetricsConfig;
use nomos_tracing_service::MetricsLayer;
use reqwest::Url;
use serde::Serialize;
@ -55,8 +56,6 @@ pub enum AssetsError {
MissingKzg { path: PathBuf },
#[error("missing Helm chart at {path}; ensure the repository is up-to-date")]
MissingChart { path: PathBuf },
#[error("missing Grafana dashboards source at {path}")]
MissingGrafanaDashboards { path: PathBuf },
#[error("failed to create temporary directory for rendered assets: {source}")]
TempDir {
#[source]
@ -92,9 +91,7 @@ fn kzg_mode() -> KzgMode {
/// topology.
pub fn prepare_assets(
topology: &GeneratedTopology,
external_prometheus: Option<&Url>,
external_prometheus_grafana_url: Option<&Url>,
external_otlp_metrics_endpoint: Option<&Url>,
metrics_otlp_ingest_url: Option<&Url>,
) -> Result<RunnerAssets, AssetsError> {
info!(
validators = topology.validators().len(),
@ -104,13 +101,7 @@ pub fn prepare_assets(
let root = workspace_root().map_err(|source| AssetsError::WorkspaceRoot { source })?;
let kzg_mode = kzg_mode();
let cfgsync_yaml = render_cfgsync_config(
&root,
topology,
kzg_mode,
external_prometheus,
external_otlp_metrics_endpoint,
)?;
let cfgsync_yaml = render_cfgsync_config(&root, topology, kzg_mode, metrics_otlp_ingest_url)?;
let tempdir = tempfile::Builder::new()
.prefix("nomos-helm-")
@ -124,12 +115,7 @@ pub fn prepare_assets(
KzgMode::InImage => None,
};
let chart_path = helm_chart_path()?;
sync_grafana_dashboards(&root, &chart_path)?;
let values_yaml = render_values_yaml(
topology,
external_prometheus,
external_prometheus_grafana_url,
)?;
let values_yaml = render_values_yaml(topology)?;
let values_file = write_temp_file(tempdir.path(), "values.yaml", values_yaml)?;
let image = env::var("NOMOS_TESTNET_IMAGE")
.unwrap_or_else(|_| String::from("public.ecr.aws/r4s5t9y4/logos/logos-blockchain:test"));
@ -164,81 +150,13 @@ pub fn prepare_assets(
}
const CFGSYNC_K8S_TIMEOUT_SECS: u64 = 300;
const DEFAULT_GRAFANA_NODE_PORT: u16 = 30030;
const DEFAULT_IN_IMAGE_KZG_PARAMS_PATH: &str = "/opt/nomos/kzg-params/kzgrs_test_params";
fn sync_grafana_dashboards(root: &Path, chart_path: &Path) -> Result<(), AssetsError> {
let source_dir = stack_assets_root(root).join("monitoring/grafana/dashboards");
let dest_dir = chart_path.join("grafana/dashboards");
if !source_dir.exists() {
return Err(AssetsError::MissingGrafanaDashboards { path: source_dir });
}
fs::create_dir_all(&dest_dir).map_err(|source| AssetsError::Io {
path: dest_dir.clone(),
source,
})?;
let mut removed = 0usize;
for entry in fs::read_dir(&dest_dir).map_err(|source| AssetsError::Io {
path: dest_dir.clone(),
source,
})? {
let entry = entry.map_err(|source| AssetsError::Io {
path: dest_dir.clone(),
source,
})?;
let path = entry.path();
if path.extension().and_then(|ext| ext.to_str()) != Some("json") {
continue;
}
fs::remove_file(&path).map_err(|source| AssetsError::Io {
path: path.clone(),
source,
})?;
removed += 1;
}
let mut copied = 0usize;
for entry in fs::read_dir(&source_dir).map_err(|source| AssetsError::Io {
path: source_dir.clone(),
source,
})? {
let entry = entry.map_err(|source| AssetsError::Io {
path: source_dir.clone(),
source,
})?;
let path = entry.path();
if path.extension().and_then(|ext| ext.to_str()) != Some("json") {
continue;
}
let file_name = path.file_name().unwrap_or_default();
let dest_path = dest_dir.join(file_name);
fs::copy(&path, &dest_path).map_err(|source| AssetsError::Io {
path: dest_path.clone(),
source,
})?;
copied += 1;
}
debug!(
source = %source_dir.display(),
dest = %dest_dir.display(),
removed,
copied,
"synced Grafana dashboards into Helm chart"
);
Ok(())
}
fn render_cfgsync_config(
root: &Path,
topology: &GeneratedTopology,
kzg_mode: KzgMode,
external_prometheus: Option<&Url>,
external_otlp_metrics_endpoint: Option<&Url>,
metrics_otlp_ingest_url: Option<&Url>,
) -> Result<String, AssetsError> {
let cfgsync_template_path = stack_assets_root(root).join("cfgsync.yaml");
debug!(path = %cfgsync_template_path.display(), "loading cfgsync template");
@ -254,15 +172,11 @@ fn render_cfgsync_config(
.unwrap_or_else(|| DEFAULT_IN_IMAGE_KZG_PARAMS_PATH.to_string());
}
let external_metrics_endpoint = match external_otlp_metrics_endpoint {
Some(endpoint) => Some(Ok(endpoint.clone())),
None => external_prometheus.map(derive_prometheus_otlp_metrics_endpoint),
};
if let Some(endpoint) = external_metrics_endpoint.transpose()? {
if let MetricsLayer::Otlp(ref mut config) = cfg.tracing_settings.metrics {
config.endpoint = endpoint;
}
if let Some(endpoint) = metrics_otlp_ingest_url.cloned() {
cfg.tracing_settings.metrics = MetricsLayer::Otlp(OtlpMetricsConfig {
endpoint,
host_identifier: "node".into(),
});
}
cfg.timeout = cfg.timeout.max(CFGSYNC_K8S_TIMEOUT_SECS);
@ -270,16 +184,6 @@ fn render_cfgsync_config(
render_cfgsync_yaml(&cfg).map_err(|source| AssetsError::Cfgsync { source })
}
fn derive_prometheus_otlp_metrics_endpoint(base: &Url) -> Result<Url, AssetsError> {
let base = base.as_str().trim_end_matches('/');
let otlp_metrics = format!("{base}/api/v1/otlp/v1/metrics");
Url::parse(&otlp_metrics).map_err(|source| AssetsError::Cfgsync {
source: anyhow::anyhow!(
"invalid OTLP metrics endpoint derived from external Prometheus url '{base}': {source}"
),
})
}
struct ScriptPaths {
run_cfgsync: PathBuf,
run_shared: PathBuf,
@ -337,16 +241,8 @@ fn helm_chart_path() -> Result<PathBuf, AssetsError> {
}
}
fn render_values_yaml(
topology: &GeneratedTopology,
external_prometheus: Option<&Url>,
external_prometheus_grafana_url: Option<&Url>,
) -> Result<String, AssetsError> {
let values = build_values(
topology,
external_prometheus,
external_prometheus_grafana_url,
);
fn render_values_yaml(topology: &GeneratedTopology) -> Result<String, AssetsError> {
let values = build_values(topology);
serde_yaml::to_string(&values).map_err(|source| AssetsError::Values { source })
}
@ -402,8 +298,6 @@ struct HelmValues {
cfgsync: CfgsyncValues,
validators: NodeGroup,
executors: NodeGroup,
prometheus: PrometheusValues,
grafana: GrafanaValues,
}
#[derive(Serialize)]
@ -426,72 +320,13 @@ struct NodeValues {
env: BTreeMap<String, String>,
}
#[derive(Serialize)]
struct PrometheusValues {
enabled: bool,
#[serde(rename = "externalUrl", skip_serializing_if = "Option::is_none")]
external_url: Option<String>,
}
#[derive(Serialize)]
struct GrafanaValues {
enabled: bool,
image: String,
#[serde(rename = "imagePullPolicy")]
image_pull_policy: String,
#[serde(rename = "adminUser")]
admin_user: String,
#[serde(rename = "adminPassword")]
admin_password: String,
service: GrafanaServiceValues,
}
#[derive(Serialize)]
struct GrafanaServiceValues {
#[serde(rename = "type")]
type_field: String,
#[serde(rename = "nodePort")]
node_port: Option<u16>,
}
fn build_values(
topology: &GeneratedTopology,
external_prometheus: Option<&Url>,
external_prometheus_grafana_url: Option<&Url>,
) -> HelmValues {
fn build_values(topology: &GeneratedTopology) -> HelmValues {
let cfgsync = CfgsyncValues {
port: cfgsync_port(),
};
let pol_mode = pol_proof_mode();
let image_pull_policy =
env::var("NOMOS_TESTNET_IMAGE_PULL_POLICY").unwrap_or_else(|_| "IfNotPresent".into());
let grafana_node_port = match kzg_mode() {
KzgMode::HostPath => Some(DEFAULT_GRAFANA_NODE_PORT),
KzgMode::InImage => env::var("NOMOS_GRAFANA_NODE_PORT").ok().and_then(|value| {
value
.parse::<u16>()
.ok()
.filter(|port| *port >= 30000 && *port <= 32767)
}),
};
let grafana = GrafanaValues {
enabled: true,
image: "grafana/grafana:10.4.1".into(),
image_pull_policy: "IfNotPresent".into(),
admin_user: "admin".into(),
admin_password: "admin".into(),
service: GrafanaServiceValues {
type_field: "NodePort".into(),
node_port: grafana_node_port,
},
};
let prometheus_external_url = external_prometheus_grafana_url
.or(external_prometheus)
.map(|url| url.as_str().trim_end_matches('/').to_string());
let prometheus = PrometheusValues {
enabled: prometheus_external_url.is_none(),
external_url: prometheus_external_url,
};
debug!(pol_mode, "rendering Helm values for k8s stack");
let validators = topology
.validators()
@ -578,8 +413,6 @@ fn build_values(
count: topology.executors().len(),
nodes: executors,
},
prometheus,
grafana,
}
}

View File

@ -4,7 +4,7 @@ use kube::Client;
use reqwest::Url;
use testing_framework_core::{
nodes::ApiClient,
scenario::{CleanupGuard, Metrics, MetricsError, NodeClients, http_probe::NodeRole},
scenario::{CleanupGuard, NodeClients, http_probe::NodeRole},
topology::{generation::GeneratedTopology, readiness::ReadinessError},
};
use tracing::{debug, info};
@ -15,8 +15,7 @@ use crate::{
infrastructure::assets::RunnerAssets,
lifecycle::{cleanup::RunnerCleanup, logs::dump_namespace_logs},
wait::{
ClusterPorts, ClusterReady, HostPort, NodeConfigPorts, PortForwardHandle,
wait_for_cluster_ready,
ClusterPorts, ClusterReady, NodeConfigPorts, PortForwardHandle, wait_for_cluster_ready,
},
};
@ -38,8 +37,6 @@ pub struct ClusterEnvironment {
validator_testing_ports: Vec<u16>,
executor_api_ports: Vec<u16>,
executor_testing_ports: Vec<u16>,
prometheus: Option<HostPort>,
grafana: Option<HostPort>,
port_forwards: Vec<PortForwardHandle>,
}
@ -68,8 +65,6 @@ impl ClusterEnvironment {
validator_testing_ports,
executor_api_ports,
executor_testing_ports,
prometheus: ports.prometheus.clone(),
grafana: ports.grafana.clone(),
port_forwards,
}
}
@ -105,14 +100,6 @@ impl ClusterEnvironment {
&self.release
}
pub fn prometheus_endpoint(&self) -> Option<&HostPort> {
self.prometheus.as_ref()
}
pub fn grafana_endpoint(&self) -> Option<&HostPort> {
self.grafana.as_ref()
}
pub fn validator_ports(&self) -> (&[u16], &[u16]) {
(&self.validator_api_ports, &self.validator_testing_ports)
}
@ -229,16 +216,6 @@ pub fn build_node_clients(cluster: &ClusterEnvironment) -> Result<NodeClients, N
Ok(NodeClients::new(validators, executors))
}
pub fn metrics_handle_from_endpoint(endpoint: &HostPort) -> Result<Metrics, MetricsError> {
let url = cluster_host_url(&endpoint.host, endpoint.port)
.map_err(|err| MetricsError::new(format!("invalid prometheus url: {err}")))?;
Metrics::from_prometheus(url)
}
pub fn metrics_handle_from_url(url: Url) -> Result<Metrics, MetricsError> {
Metrics::from_prometheus(url)
}
pub async fn ensure_cluster_readiness(
descriptors: &GeneratedTopology,
cluster: &ClusterEnvironment,
@ -324,7 +301,6 @@ pub async fn wait_for_ports_or_cleanup(
namespace: &str,
release: &str,
specs: &PortSpecs,
prometheus_enabled: bool,
cleanup_guard: &mut Option<RunnerCleanup>,
) -> Result<ClusterReady, crate::deployer::K8sRunnerError> {
info!(
@ -340,13 +316,11 @@ pub async fn wait_for_ports_or_cleanup(
release,
&specs.validators,
&specs.executors,
prometheus_enabled,
)
.await
{
Ok(ports) => {
info!(
prometheus = ?ports.ports.prometheus,
validator_ports = ?ports.ports.validators,
executor_ports = ?ports.ports.executors,
"cluster port-forwards established"

View File

@ -1,36 +0,0 @@
use tokio::time::sleep;
use super::{ClusterWaitError, node_http_probe_timeout, node_http_timeout};
use crate::host::node_host;
const GRAFANA_HTTP_POLL_INTERVAL: std::time::Duration = std::time::Duration::from_secs(1);
pub async fn wait_for_grafana_http_nodeport(port: u16) -> Result<(), ClusterWaitError> {
let host = node_host();
wait_for_grafana_http(&host, port, node_http_probe_timeout()).await
}
pub async fn wait_for_grafana_http_port_forward(port: u16) -> Result<(), ClusterWaitError> {
wait_for_grafana_http("127.0.0.1", port, node_http_timeout()).await
}
async fn wait_for_grafana_http(
host: &str,
port: u16,
timeout: std::time::Duration,
) -> Result<(), ClusterWaitError> {
let client = reqwest::Client::new();
let url = format!("http://{host}:{port}/api/health");
let attempts = timeout.as_secs();
for _ in 0..attempts {
if let Ok(resp) = client.get(&url).send().await
&& resp.status().is_success()
{
return Ok(());
}
sleep(GRAFANA_HTTP_POLL_INTERVAL).await;
}
Err(ClusterWaitError::GrafanaTimeout { port })
}

View File

@ -4,9 +4,7 @@ use kube::Error as KubeError;
use testing_framework_core::{
constants::{
DEFAULT_HTTP_POLL_INTERVAL, DEFAULT_K8S_DEPLOYMENT_TIMEOUT,
DEFAULT_NODE_HTTP_PROBE_TIMEOUT, DEFAULT_NODE_HTTP_TIMEOUT, DEFAULT_PROMETHEUS_HTTP_PORT,
DEFAULT_PROMETHEUS_HTTP_PROBE_TIMEOUT, DEFAULT_PROMETHEUS_HTTP_TIMEOUT,
DEFAULT_PROMETHEUS_SERVICE_NAME,
DEFAULT_NODE_HTTP_PROBE_TIMEOUT, DEFAULT_NODE_HTTP_TIMEOUT,
},
scenario::http_probe::NodeRole,
};
@ -14,11 +12,9 @@ use thiserror::Error;
mod deployment;
mod forwarding;
mod grafana;
mod http_probe;
mod orchestrator;
mod ports;
mod prometheus;
pub use forwarding::PortForwardHandle;
pub use orchestrator::wait_for_cluster_ready;
@ -44,15 +40,13 @@ pub struct HostPort {
pub port: u16,
}
/// All port assignments for the cluster plus Prometheus.
/// All port assignments for the cluster.
#[derive(Debug)]
pub struct ClusterPorts {
pub validators: Vec<NodePortAllocation>,
pub executors: Vec<NodePortAllocation>,
pub validator_host: String,
pub executor_host: String,
pub prometheus: Option<HostPort>,
pub grafana: Option<HostPort>,
}
/// Success result from waiting for the cluster: host ports and forward handles.
@ -96,10 +90,6 @@ pub enum ClusterWaitError {
port: u16,
timeout: Duration,
},
#[error("timeout waiting for prometheus readiness on NodePort {port}")]
PrometheusTimeout { port: u16 },
#[error("timeout waiting for grafana readiness on port {port}")]
GrafanaTimeout { port: u16 },
#[error("failed to start port-forward for service {service} port {port}: {source}")]
PortForward {
service: String,
@ -149,32 +139,6 @@ pub(crate) fn http_poll_interval() -> Duration {
*HTTP_POLL_INTERVAL
}
pub(crate) const PROMETHEUS_HTTP_PORT: u16 = DEFAULT_PROMETHEUS_HTTP_PORT;
static PROMETHEUS_HTTP_TIMEOUT: LazyLock<Duration> = LazyLock::new(|| {
env_duration_secs(
"K8S_RUNNER_PROMETHEUS_HTTP_TIMEOUT_SECS",
DEFAULT_PROMETHEUS_HTTP_TIMEOUT,
)
});
static PROMETHEUS_HTTP_PROBE_TIMEOUT: LazyLock<Duration> = LazyLock::new(|| {
env_duration_secs(
"K8S_RUNNER_PROMETHEUS_HTTP_PROBE_TIMEOUT_SECS",
DEFAULT_PROMETHEUS_HTTP_PROBE_TIMEOUT,
)
});
pub(crate) fn prometheus_http_timeout() -> Duration {
*PROMETHEUS_HTTP_TIMEOUT
}
pub(crate) fn prometheus_http_probe_timeout() -> Duration {
*PROMETHEUS_HTTP_PROBE_TIMEOUT
}
pub(crate) const PROMETHEUS_SERVICE_NAME: &str = DEFAULT_PROMETHEUS_SERVICE_NAME;
fn env_duration_secs(key: &str, default: Duration) -> Duration {
env::var(key)
.ok()

View File

@ -1,31 +1,20 @@
use kube::Client;
use testing_framework_core::scenario::http_probe::NodeRole;
use super::{
ClusterPorts, ClusterReady, ClusterWaitError, HostPort, NodeConfigPorts, PROMETHEUS_HTTP_PORT,
PROMETHEUS_SERVICE_NAME, prometheus_http_probe_timeout,
};
use super::{ClusterPorts, ClusterReady, ClusterWaitError, NodeConfigPorts};
use crate::lifecycle::wait::{
deployment::wait_for_deployment_ready,
forwarding::{
PortForwardHandle, PortForwardSpawn, kill_port_forwards, port_forward_group,
port_forward_service,
},
grafana::{wait_for_grafana_http_nodeport, wait_for_grafana_http_port_forward},
forwarding::{PortForwardHandle, kill_port_forwards, port_forward_group},
http_probe::{wait_for_node_http_nodeport, wait_for_node_http_port_forward},
ports::{discover_node_ports, find_node_port},
prometheus::{wait_for_prometheus_http_nodeport, wait_for_prometheus_http_port_forward},
ports::discover_node_ports,
};
const GRAFANA_HTTP_PORT: u16 = 3000;
pub async fn wait_for_cluster_ready(
client: &Client,
namespace: &str,
release: &str,
validator_ports: &[NodeConfigPorts],
executor_ports: &[NodeConfigPorts],
prometheus_enabled: bool,
) -> Result<ClusterReady, ClusterWaitError> {
if validator_ports.is_empty() {
return Err(ClusterWaitError::MissingValidator);
@ -108,75 +97,12 @@ pub async fn wait_for_cluster_ready(
}
}
let mut prometheus = None;
if prometheus_enabled {
let mut prometheus_port = find_node_port(
client,
namespace,
PROMETHEUS_SERVICE_NAME,
PROMETHEUS_HTTP_PORT,
)
.await?;
let mut prometheus_host = crate::host::node_host();
if wait_for_prometheus_http_nodeport(prometheus_port, prometheus_http_probe_timeout())
.await
.is_err()
{
let PortForwardSpawn { local_port, handle } =
port_forward_service(namespace, PROMETHEUS_SERVICE_NAME, PROMETHEUS_HTTP_PORT)
.map_err(|err| {
kill_port_forwards(&mut port_forwards);
err
})?;
prometheus_port = local_port;
prometheus_host = "127.0.0.1".to_owned();
port_forwards.push(handle);
if let Err(err) = wait_for_prometheus_http_port_forward(prometheus_port).await {
return Err(cleanup_port_forwards(&mut port_forwards, err));
}
}
prometheus = Some(HostPort {
host: prometheus_host,
port: prometheus_port,
});
}
let mut grafana = None;
let grafana_service = format!("{release}-grafana");
if let Ok(node_port) =
find_node_port(client, namespace, &grafana_service, GRAFANA_HTTP_PORT).await
{
let mut grafana_host = crate::host::node_host();
let mut grafana_port = node_port;
if wait_for_grafana_http_nodeport(grafana_port).await.is_err() {
let PortForwardSpawn { local_port, handle } =
port_forward_service(namespace, &grafana_service, GRAFANA_HTTP_PORT).map_err(
|err| {
kill_port_forwards(&mut port_forwards);
err
},
)?;
grafana_host = "127.0.0.1".to_owned();
grafana_port = local_port;
port_forwards.push(handle);
if let Err(err) = wait_for_grafana_http_port_forward(grafana_port).await {
return Err(cleanup_port_forwards(&mut port_forwards, err));
}
}
grafana = Some(HostPort {
host: grafana_host,
port: grafana_port,
});
}
Ok(ClusterReady {
ports: ClusterPorts {
validators: validator_allocations,
executors: executor_allocations,
validator_host,
executor_host,
prometheus,
grafana,
},
port_forwards,
})

View File

@ -1,39 +0,0 @@
use tokio::time::sleep;
use super::{ClusterWaitError, prometheus_http_timeout};
use crate::host::node_host;
const PROMETHEUS_HTTP_POLL_INTERVAL: std::time::Duration = std::time::Duration::from_secs(1);
pub async fn wait_for_prometheus_http_nodeport(
port: u16,
timeout: std::time::Duration,
) -> Result<(), ClusterWaitError> {
let host = node_host();
wait_for_prometheus_http(&host, port, timeout).await
}
pub async fn wait_for_prometheus_http_port_forward(port: u16) -> Result<(), ClusterWaitError> {
wait_for_prometheus_http("127.0.0.1", port, prometheus_http_timeout()).await
}
async fn wait_for_prometheus_http(
host: &str,
port: u16,
timeout: std::time::Duration,
) -> Result<(), ClusterWaitError> {
let client = reqwest::Client::new();
let url = format!("http://{host}:{port}/-/ready");
let attempts = timeout.as_secs();
for _ in 0..attempts {
if let Ok(resp) = client.get(&url).send().await
&& resp.status().is_success()
{
return Ok(());
}
sleep(PROMETHEUS_HTTP_POLL_INTERVAL).await;
}
Err(ClusterWaitError::PrometheusTimeout { port })
}

View File

@ -132,6 +132,12 @@ pub trait ObservabilityBuilderExt: Sized {
url: &str,
) -> CoreScenarioBuilder<ObservabilityCapability>;
/// Optional Grafana base URL for printing/logging (human access).
fn with_grafana_url(self, url: reqwest::Url) -> CoreScenarioBuilder<ObservabilityCapability>;
/// Convenience wrapper that parses a URL string (panics if invalid).
fn with_grafana_url_str(self, url: &str) -> CoreScenarioBuilder<ObservabilityCapability>;
#[deprecated(note = "use with_metrics_query_url")]
fn with_external_prometheus(
self,
@ -190,6 +196,7 @@ impl ObservabilityBuilderExt for CoreScenarioBuilder<()> {
metrics_query_url: Some(url),
metrics_query_grafana_url: None,
metrics_otlp_ingest_url: None,
grafana_url: None,
})
}
@ -206,6 +213,7 @@ impl ObservabilityBuilderExt for CoreScenarioBuilder<()> {
metrics_query_url: None,
metrics_query_grafana_url: None,
metrics_otlp_ingest_url: Some(url),
grafana_url: None,
})
}
@ -225,6 +233,7 @@ impl ObservabilityBuilderExt for CoreScenarioBuilder<()> {
metrics_query_url: None,
metrics_query_grafana_url: Some(url),
metrics_otlp_ingest_url: None,
grafana_url: None,
})
}
@ -235,6 +244,20 @@ impl ObservabilityBuilderExt for CoreScenarioBuilder<()> {
let parsed = reqwest::Url::parse(url).expect("metrics query grafana url must be valid");
self.with_metrics_query_grafana_url(parsed)
}
fn with_grafana_url(self, url: reqwest::Url) -> CoreScenarioBuilder<ObservabilityCapability> {
self.with_capabilities(ObservabilityCapability {
metrics_query_url: None,
metrics_query_grafana_url: None,
metrics_otlp_ingest_url: None,
grafana_url: Some(url),
})
}
fn with_grafana_url_str(self, url: &str) -> CoreScenarioBuilder<ObservabilityCapability> {
let parsed = reqwest::Url::parse(url).expect("grafana url must be valid");
self.with_grafana_url(parsed)
}
}
impl ObservabilityBuilderExt for CoreScenarioBuilder<ObservabilityCapability> {
@ -282,6 +305,19 @@ impl ObservabilityBuilderExt for CoreScenarioBuilder<ObservabilityCapability> {
let parsed = reqwest::Url::parse(url).expect("metrics query grafana url must be valid");
self.with_metrics_query_grafana_url(parsed)
}
fn with_grafana_url(
mut self,
url: reqwest::Url,
) -> CoreScenarioBuilder<ObservabilityCapability> {
self.capabilities_mut().grafana_url = Some(url);
self
}
fn with_grafana_url_str(self, url: &str) -> CoreScenarioBuilder<ObservabilityCapability> {
let parsed = reqwest::Url::parse(url).expect("grafana url must be valid");
self.with_grafana_url(parsed)
}
}
/// Builder for transaction workloads.