obs: drop grafana query url + simplify env vars

This commit is contained in:
andrussal 2025-12-18 09:23:39 +01:00
parent 3496945bd2
commit 29d9b40d72
21 changed files with 34 additions and 248 deletions

View File

@ -44,14 +44,17 @@ async fn main() {
&["NOMOS_DEMO_VALIDATORS", "COMPOSE_DEMO_VALIDATORS"],
DEFAULT_VALIDATORS,
);
let executors = read_env_any(
&["NOMOS_DEMO_EXECUTORS", "COMPOSE_DEMO_EXECUTORS"],
DEFAULT_EXECUTORS,
);
let run_secs = read_env_any(
&["NOMOS_DEMO_RUN_SECS", "COMPOSE_DEMO_RUN_SECS"],
DEFAULT_RUN_SECS,
);
info!(
validators,
executors, run_secs, "starting compose runner demo"
@ -75,18 +78,14 @@ async fn run_compose_case(
"building scenario plan"
);
let enable_chaos = env::var("NOMOS_DEMO_CHAOS")
.or_else(|_| env::var("COMPOSE_DEMO_CHAOS"))
.map(|value| value == "1" || value.eq_ignore_ascii_case("true"))
.unwrap_or(false);
let scenario = ScenarioBuilder::topology_with(|t| {
t.network_star().validators(validators).executors(executors)
})
.enable_node_control();
let scenario = if enable_chaos {
let (chaos_min_delay, chaos_max_delay, chaos_target_cooldown) = chaos_timings(run_duration);
let scenario = if let Some((chaos_min_delay, chaos_max_delay, chaos_target_cooldown)) =
chaos_timings(run_duration)
{
scenario.chaos_with(|c| {
c.restart()
.min_delay(chaos_min_delay)
@ -130,22 +129,24 @@ async fn run_compose_case(
Ok(())
}
fn chaos_timings(run_duration: Duration) -> (Duration, Duration, Duration) {
fn chaos_timings(run_duration: Duration) -> Option<(Duration, Duration, Duration)> {
let headroom = Duration::from_secs(CHAOS_DELAY_HEADROOM_SECS);
let chaos_min_delay = Duration::from_secs(CHAOS_MIN_DELAY_SECS).max(run_duration + headroom);
let chaos_max_delay = Duration::from_secs(CHAOS_MAX_DELAY_SECS).max(chaos_min_delay);
let chaos_target_cooldown = Duration::from_secs(CHAOS_COOLDOWN_SECS).max(chaos_min_delay);
let Some(max_allowed_delay) = run_duration.checked_sub(headroom) else {
return None;
};
(chaos_min_delay, chaos_max_delay, chaos_target_cooldown)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn chaos_cooldown_is_never_less_than_min_delay() {
let (min_delay, _max_delay, cooldown) = chaos_timings(Duration::from_secs(600));
assert!(cooldown >= min_delay);
let chaos_min_delay = Duration::from_secs(CHAOS_MIN_DELAY_SECS);
if max_allowed_delay <= chaos_min_delay {
return None;
}
let chaos_max_delay = Duration::from_secs(CHAOS_MAX_DELAY_SECS)
.min(max_allowed_delay)
.max(chaos_min_delay);
let chaos_target_cooldown = Duration::from_secs(CHAOS_COOLDOWN_SECS)
.min(max_allowed_delay)
.max(chaos_max_delay);
Some((chaos_min_delay, chaos_max_delay, chaos_target_cooldown))
}

View File

@ -4,5 +4,6 @@ use runner_examples::cucumber::{Mode, init_logging_defaults, init_tracing, run};
async fn main() {
init_logging_defaults();
init_tracing();
run(Mode::Host).await;
}

View File

@ -59,31 +59,13 @@ async fn run_k8s_case(validators: usize, executors: usize, run_duration: Duratio
.with_run_duration(run_duration)
.expect_consensus_liveness();
if let Ok(url) = env::var("K8S_RUNNER_METRICS_QUERY_URL")
.or_else(|_| env::var("NOMOS_METRICS_QUERY_URL"))
.or_else(|_| env::var("K8S_RUNNER_EXTERNAL_PROMETHEUS_URL"))
.or_else(|_| env::var("NOMOS_EXTERNAL_PROMETHEUS_URL"))
{
if let Ok(url) = env::var("NOMOS_METRICS_QUERY_URL") {
if !url.trim().is_empty() {
scenario = scenario.with_metrics_query_url_str(url.trim());
}
}
if let Ok(url) = env::var("K8S_RUNNER_METRICS_QUERY_GRAFANA_URL")
.or_else(|_| env::var("NOMOS_METRICS_QUERY_GRAFANA_URL"))
.or_else(|_| env::var("K8S_RUNNER_EXTERNAL_PROMETHEUS_GRAFANA_URL"))
.or_else(|_| env::var("NOMOS_EXTERNAL_PROMETHEUS_GRAFANA_URL"))
{
if !url.trim().is_empty() {
scenario = scenario.with_metrics_query_grafana_url_str(url.trim());
}
}
if let Ok(url) = env::var("K8S_RUNNER_METRICS_OTLP_INGEST_URL")
.or_else(|_| env::var("NOMOS_METRICS_OTLP_INGEST_URL"))
.or_else(|_| env::var("K8S_RUNNER_EXTERNAL_OTLP_METRICS_ENDPOINT"))
.or_else(|_| env::var("NOMOS_EXTERNAL_OTLP_METRICS_ENDPOINT"))
{
if let Ok(url) = env::var("NOMOS_METRICS_OTLP_INGEST_URL") {
if !url.trim().is_empty() {
scenario = scenario.with_metrics_otlp_ingest_url_str(url.trim());
}

View File

@ -39,14 +39,11 @@ Options:
-e, --executors N Number of executors (required)
--bundle PATH Convenience alias for setting NOMOS_BINARIES_TAR=PATH
--metrics-query-url URL PromQL base URL the runner process can query (optional)
--metrics-query-grafana-url URL PromQL base URL for a Grafana datasource (optional)
--metrics-otlp-ingest-url URL Full OTLP HTTP ingest URL for node metrics export (optional)
--grafana-url URL Grafana base URL for printing/logging (optional)
--external-prometheus URL Alias for --metrics-query-url
--external-prometheus-grafana-url URL Alias for --metrics-query-grafana-url
--external-otlp-metrics-endpoint URL Alias for --metrics-otlp-ingest-url
--local Use a local Docker image tag (default for docker-desktop k8s)
--ecr Use an ECR image reference (default for non-docker-desktop k8s)
--no-image-build Skip rebuilding the compose/k8s image (sets NOMOS_SKIP_IMAGE_BUILD=1)
Environment:
@ -59,19 +56,9 @@ Environment:
NOMOS_TESTNET_IMAGE_PULL_POLICY K8s imagePullPolicy (default ${DEFAULT_PULL_POLICY_LOCAL}; set to ${DEFAULT_PULL_POLICY_ECR} for --ecr)
NOMOS_BINARIES_TAR Path to prebuilt binaries/circuits tarball (default .tmp/nomos-binaries-<platform>-<version>.tar.gz)
NOMOS_SKIP_IMAGE_BUILD Set to 1 to skip rebuilding the compose/k8s image
K8S_RUNNER_METRICS_QUERY_URL PromQL base URL for the runner process
NOMOS_METRICS_QUERY_URL Alias for K8S_RUNNER_METRICS_QUERY_URL
K8S_RUNNER_METRICS_QUERY_GRAFANA_URL PromQL base URL for Grafana (cluster-reachable)
NOMOS_METRICS_QUERY_GRAFANA_URL Alias for K8S_RUNNER_METRICS_QUERY_GRAFANA_URL
K8S_RUNNER_METRICS_OTLP_INGEST_URL Full OTLP HTTP ingest URL for node metrics export
NOMOS_METRICS_OTLP_INGEST_URL Alias for K8S_RUNNER_METRICS_OTLP_INGEST_URL
K8S_RUNNER_GRAFANA_URL Grafana base URL for printing/logging (optional)
NOMOS_GRAFANA_URL Alias for K8S_RUNNER_GRAFANA_URL
Deprecated env vars (still supported):
K8S_RUNNER_EXTERNAL_PROMETHEUS_URL, NOMOS_EXTERNAL_PROMETHEUS_URL
K8S_RUNNER_EXTERNAL_PROMETHEUS_GRAFANA_URL, NOMOS_EXTERNAL_PROMETHEUS_GRAFANA_URL
K8S_RUNNER_EXTERNAL_OTLP_METRICS_ENDPOINT, NOMOS_EXTERNAL_OTLP_METRICS_ENDPOINT
NOMOS_METRICS_QUERY_URL PromQL base URL for the runner process (optional)
NOMOS_METRICS_OTLP_INGEST_URL Full OTLP HTTP ingest URL for node metrics export (optional)
NOMOS_GRAFANA_URL Grafana base URL for printing/logging (optional)
EOF
}
@ -116,7 +103,6 @@ run_examples::parse_args() {
DEMO_EXECUTORS=""
IMAGE_SELECTION_MODE="auto"
METRICS_QUERY_URL=""
METRICS_QUERY_GRAFANA_URL=""
METRICS_OTLP_INGEST_URL=""
GRAFANA_URL=""
@ -172,14 +158,6 @@ run_examples::parse_args() {
METRICS_QUERY_URL="${1#*=}"
shift
;;
--metrics-query-grafana-url)
METRICS_QUERY_GRAFANA_URL="${2:-}"
shift 2
;;
--metrics-query-grafana-url=*)
METRICS_QUERY_GRAFANA_URL="${1#*=}"
shift
;;
--metrics-otlp-ingest-url)
METRICS_OTLP_INGEST_URL="${2:-}"
shift 2
@ -204,14 +182,6 @@ run_examples::parse_args() {
METRICS_QUERY_URL="${1#*=}"
shift
;;
--external-prometheus-grafana-url)
METRICS_QUERY_GRAFANA_URL="${2:-}"
shift 2
;;
--external-prometheus-grafana-url=*)
METRICS_QUERY_GRAFANA_URL="${1#*=}"
shift
;;
--external-otlp-metrics-endpoint)
METRICS_OTLP_INGEST_URL="${2:-}"
shift 2
@ -221,19 +191,9 @@ run_examples::parse_args() {
shift
;;
--local)
if [ "${IMAGE_SELECTION_MODE}" = "ecr" ]; then
run_examples::fail_with_usage "--local and --ecr are mutually exclusive"
fi
IMAGE_SELECTION_MODE="local"
shift
;;
--ecr)
if [ "${IMAGE_SELECTION_MODE}" = "local" ]; then
run_examples::fail_with_usage "--local and --ecr are mutually exclusive"
fi
IMAGE_SELECTION_MODE="ecr"
shift
;;
--no-image-build)
NOMOS_SKIP_IMAGE_BUILD=1
export NOMOS_SKIP_IMAGE_BUILD
@ -584,19 +544,12 @@ run_examples::run() {
if [ -n "${METRICS_QUERY_URL}" ]; then
export NOMOS_METRICS_QUERY_URL="${METRICS_QUERY_URL}"
export K8S_RUNNER_METRICS_QUERY_URL="${METRICS_QUERY_URL}"
fi
if [ -n "${METRICS_QUERY_GRAFANA_URL}" ]; then
export NOMOS_METRICS_QUERY_GRAFANA_URL="${METRICS_QUERY_GRAFANA_URL}"
export K8S_RUNNER_METRICS_QUERY_GRAFANA_URL="${METRICS_QUERY_GRAFANA_URL}"
fi
if [ -n "${METRICS_OTLP_INGEST_URL}" ]; then
export NOMOS_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}"
export K8S_RUNNER_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}"
fi
if [ -n "${GRAFANA_URL}" ]; then
export NOMOS_GRAFANA_URL="${GRAFANA_URL}"
export K8S_RUNNER_GRAFANA_URL="${GRAFANA_URL}"
fi
echo "==> Running ${BIN} for ${RUN_SECS}s (mode=${MODE}, image=${IMAGE})"

View File

@ -1,4 +0,0 @@
#![allow(dead_code)]
/// Shared API client wrappers (placeholder).
pub struct SharedApiClient;

View File

@ -1,2 +0,0 @@
pub mod client;
pub mod proxy;

View File

@ -1,4 +0,0 @@
#![allow(dead_code)]
/// Shared API proxy helpers (placeholder).
pub struct ApiProxy;

View File

@ -1,3 +1,2 @@
pub mod injection;
pub mod paths;
pub mod validation;

View File

@ -1,4 +0,0 @@
#![allow(dead_code)]
/// Shared config validation helpers (placeholder).
pub struct ConfigValidator;

View File

@ -1,4 +0,0 @@
#![allow(dead_code)]
/// Shared cleanup helpers (placeholder).
pub struct CleanupManager;

View File

@ -1,5 +1,3 @@
#![allow(dead_code)]
use std::process::Child;
/// Shared cleanup helpers for child processes.

View File

@ -1,10 +0,0 @@
#![allow(dead_code)]
use std::process::Child;
use tracing::debug;
/// Shared lifecycle hooks (placeholder).
pub fn kill_child(child: &mut Child) {
debug!("killing child process");
let _ = child.kill();
}

View File

@ -1,4 +1,3 @@
pub mod cleanup;
pub mod kill;
pub mod monitor;
pub mod spawn;

View File

@ -1,5 +1,3 @@
#![allow(dead_code)]
use std::process::Child;
use tracing::debug;

View File

@ -1,5 +1,3 @@
#![allow(dead_code)]
use std::{
env,
fs::{self, File},

View File

@ -1,4 +1,3 @@
pub mod api;
pub mod binary;
pub mod config;
pub mod lifecycle;

View File

@ -14,10 +14,6 @@ pub struct ObservabilityCapability {
/// metrics (commonly a localhost port-forward, but can be any reachable
/// endpoint).
pub metrics_query_url: Option<Url>,
/// Optional Prometheus-compatible base URL used by the *Grafana pod* as its
/// datasource. This must be reachable from inside the cluster. If unset,
/// the k8s runner falls back to `metrics_query_url`.
pub metrics_query_grafana_url: Option<Url>,
/// Full OTLP HTTP metrics ingest endpoint used by *nodes* to export metrics
/// (backend-specific host and path).
pub metrics_otlp_ingest_url: Option<Url>,

View File

@ -13,9 +13,6 @@ pub struct ObservabilityInputs {
/// Prometheus-compatible base URL used by the runner process to query
/// metrics (PromQL API endpoints).
pub metrics_query_url: Option<Url>,
/// Prometheus-compatible base URL intended for an in-cluster Grafana
/// datasource.
pub metrics_query_grafana_url: Option<Url>,
/// Full OTLP HTTP metrics ingest endpoint used by nodes to export metrics
/// (backend-specific host and path).
pub metrics_otlp_ingest_url: Option<Url>,
@ -52,7 +49,6 @@ impl ObservabilityInputs {
pub fn from_capability(capabilities: &ObservabilityCapability) -> Self {
Self {
metrics_query_url: capabilities.metrics_query_url.clone(),
metrics_query_grafana_url: capabilities.metrics_query_grafana_url.clone(),
metrics_otlp_ingest_url: capabilities.metrics_otlp_ingest_url.clone(),
grafana_url: capabilities.grafana_url.clone(),
}
@ -64,52 +60,18 @@ impl ObservabilityInputs {
/// vars are also accepted as aliases for backwards compatibility.
pub fn from_env() -> Result<Self, MetricsError> {
Ok(Self {
metrics_query_url: read_url_var(&[
"NOMOS_METRICS_QUERY_URL",
"K8S_RUNNER_METRICS_QUERY_URL",
// Back-compat:
"K8S_RUNNER_EXTERNAL_PROMETHEUS_URL",
"NOMOS_EXTERNAL_PROMETHEUS_URL",
])?,
metrics_query_grafana_url: read_url_var(&[
"NOMOS_METRICS_QUERY_GRAFANA_URL",
"K8S_RUNNER_METRICS_QUERY_GRAFANA_URL",
// Back-compat:
"K8S_RUNNER_EXTERNAL_PROMETHEUS_GRAFANA_URL",
"NOMOS_EXTERNAL_PROMETHEUS_GRAFANA_URL",
])?,
metrics_otlp_ingest_url: read_url_var(&[
"NOMOS_METRICS_OTLP_INGEST_URL",
"K8S_RUNNER_METRICS_OTLP_INGEST_URL",
// Back-compat:
"K8S_RUNNER_EXTERNAL_OTLP_METRICS_ENDPOINT",
"NOMOS_EXTERNAL_OTLP_METRICS_ENDPOINT",
])?,
grafana_url: read_url_var(&["NOMOS_GRAFANA_URL", "K8S_RUNNER_GRAFANA_URL"])?,
metrics_query_url: read_url_var(&["NOMOS_METRICS_QUERY_URL"])?,
metrics_otlp_ingest_url: read_url_var(&["NOMOS_METRICS_OTLP_INGEST_URL"])?,
grafana_url: read_url_var(&["NOMOS_GRAFANA_URL"])?,
})
}
/// Apply defaults and fallbacks (pure function).
///
/// Currently, the only fallback is using `metrics_query_url` as the Grafana
/// datasource URL when `metrics_query_grafana_url` is unset.
#[must_use]
pub fn normalized(mut self) -> Self {
if self.metrics_query_grafana_url.is_none() {
self.metrics_query_grafana_url = self.metrics_query_url.clone();
}
self
}
/// Overlay non-empty values from `overrides` onto `self`.
#[must_use]
pub fn with_overrides(mut self, overrides: Self) -> Self {
if overrides.metrics_query_url.is_some() {
self.metrics_query_url = overrides.metrics_query_url;
}
if overrides.metrics_query_grafana_url.is_some() {
self.metrics_query_grafana_url = overrides.metrics_query_grafana_url;
}
if overrides.metrics_otlp_ingest_url.is_some() {
self.metrics_otlp_ingest_url = overrides.metrics_otlp_ingest_url;
}

View File

@ -48,7 +48,7 @@ impl DeploymentOrchestrator {
.observability_capability()
.map(ObservabilityInputs::from_capability)
.unwrap_or_default();
let observability = env_inputs.with_overrides(cap_inputs).normalized();
let observability = env_inputs.with_overrides(cap_inputs);
let DeploymentContext {
mut environment,

View File

@ -140,7 +140,7 @@ async fn deploy_with_observability<Caps>(
let cap_inputs = observability
.map(ObservabilityInputs::from_capability)
.unwrap_or_default();
let observability = env_inputs.with_overrides(cap_inputs).normalized();
let observability = env_inputs.with_overrides(cap_inputs);
let descriptors = scenario.topology().clone();
let validator_count = descriptors.validators().len();
@ -157,10 +157,6 @@ async fn deploy_with_observability<Caps>(
duration_secs = scenario.duration().as_secs(),
readiness_checks = deployer.readiness_checks,
metrics_query_url = observability.metrics_query_url.as_ref().map(|u| u.as_str()),
metrics_query_grafana_url = observability
.metrics_query_grafana_url
.as_ref()
.map(|u| u.as_str()),
metrics_otlp_ingest_url = observability
.metrics_otlp_ingest_url
.as_ref()

View File

@ -106,19 +106,6 @@ pub trait ObservabilityBuilderExt: Sized {
/// Convenience wrapper that parses a URL string (panics if invalid).
fn with_metrics_query_url_str(self, url: &str) -> CoreScenarioBuilder<ObservabilityCapability>;
/// Configure the Prometheus-compatible base URL for the k8s runner Grafana
/// datasource (must be reachable from inside the cluster).
fn with_metrics_query_grafana_url(
self,
url: reqwest::Url,
) -> CoreScenarioBuilder<ObservabilityCapability>;
/// Convenience wrapper that parses a URL string (panics if invalid).
fn with_metrics_query_grafana_url_str(
self,
url: &str,
) -> CoreScenarioBuilder<ObservabilityCapability>;
/// Configure the OTLP HTTP metrics ingest endpoint to which nodes should
/// export metrics (must be a full URL, including any required path).
fn with_metrics_otlp_ingest_url(
@ -154,22 +141,6 @@ pub trait ObservabilityBuilderExt: Sized {
self.with_metrics_query_url_str(url)
}
#[deprecated(note = "use with_metrics_query_grafana_url")]
fn with_external_prometheus_grafana_url(
self,
url: reqwest::Url,
) -> CoreScenarioBuilder<ObservabilityCapability> {
self.with_metrics_query_grafana_url(url)
}
#[deprecated(note = "use with_metrics_query_grafana_url_str")]
fn with_external_prometheus_grafana_url_str(
self,
url: &str,
) -> CoreScenarioBuilder<ObservabilityCapability> {
self.with_metrics_query_grafana_url_str(url)
}
#[deprecated(note = "use with_metrics_otlp_ingest_url")]
fn with_external_otlp_metrics_endpoint(
self,
@ -194,7 +165,6 @@ impl ObservabilityBuilderExt for CoreScenarioBuilder<()> {
) -> CoreScenarioBuilder<ObservabilityCapability> {
self.with_capabilities(ObservabilityCapability {
metrics_query_url: Some(url),
metrics_query_grafana_url: None,
metrics_otlp_ingest_url: None,
grafana_url: None,
})
@ -211,7 +181,6 @@ impl ObservabilityBuilderExt for CoreScenarioBuilder<()> {
) -> CoreScenarioBuilder<ObservabilityCapability> {
self.with_capabilities(ObservabilityCapability {
metrics_query_url: None,
metrics_query_grafana_url: None,
metrics_otlp_ingest_url: Some(url),
grafana_url: None,
})
@ -225,30 +194,9 @@ impl ObservabilityBuilderExt for CoreScenarioBuilder<()> {
self.with_metrics_otlp_ingest_url(parsed)
}
fn with_metrics_query_grafana_url(
self,
url: reqwest::Url,
) -> CoreScenarioBuilder<ObservabilityCapability> {
self.with_capabilities(ObservabilityCapability {
metrics_query_url: None,
metrics_query_grafana_url: Some(url),
metrics_otlp_ingest_url: None,
grafana_url: None,
})
}
fn with_metrics_query_grafana_url_str(
self,
url: &str,
) -> CoreScenarioBuilder<ObservabilityCapability> {
let parsed = reqwest::Url::parse(url).expect("metrics query grafana url must be valid");
self.with_metrics_query_grafana_url(parsed)
}
fn with_grafana_url(self, url: reqwest::Url) -> CoreScenarioBuilder<ObservabilityCapability> {
self.with_capabilities(ObservabilityCapability {
metrics_query_url: None,
metrics_query_grafana_url: None,
metrics_otlp_ingest_url: None,
grafana_url: Some(url),
})
@ -290,22 +238,6 @@ impl ObservabilityBuilderExt for CoreScenarioBuilder<ObservabilityCapability> {
self.with_metrics_otlp_ingest_url(parsed)
}
fn with_metrics_query_grafana_url(
mut self,
url: reqwest::Url,
) -> CoreScenarioBuilder<ObservabilityCapability> {
self.capabilities_mut().metrics_query_grafana_url = Some(url);
self
}
fn with_metrics_query_grafana_url_str(
self,
url: &str,
) -> CoreScenarioBuilder<ObservabilityCapability> {
let parsed = reqwest::Url::parse(url).expect("metrics query grafana url must be valid");
self.with_metrics_query_grafana_url(parsed)
}
fn with_grafana_url(
mut self,
url: reqwest::Url,