From d6c79ed1798316a1d4e46055b7016fa5fb792d6b Mon Sep 17 00:00:00 2001 From: andrussal Date: Wed, 17 Dec 2025 17:04:41 +0100 Subject: [PATCH] k8s runner: role-based metrics URLs --- examples/src/bin/k8s_runner.rs | 45 ++++- scripts/run-examples.sh | 95 ++++++++-- .../core/src/scenario/capabilities.rs | 12 +- .../k8s/src/deployer/orchestrator.rs | 75 ++++++-- .../k8s/src/infrastructure/assets.rs | 67 +++++-- .../workflows/src/builder/mod.rs | 173 +++++++++++++++++- 6 files changed, 416 insertions(+), 51 deletions(-) diff --git a/examples/src/bin/k8s_runner.rs b/examples/src/bin/k8s_runner.rs index 60611a6..150ff76 100644 --- a/examples/src/bin/k8s_runner.rs +++ b/examples/src/bin/k8s_runner.rs @@ -1,9 +1,12 @@ -use std::{process, time::Duration}; +use std::{env, process, time::Duration}; use anyhow::{Context as _, Result, ensure}; use runner_examples::{ScenarioBuilderExt as _, read_env_any}; -use testing_framework_core::scenario::{Deployer as _, Runner, ScenarioBuilder}; +use testing_framework_core::scenario::{ + Deployer as _, ObservabilityCapability, Runner, ScenarioBuilder, +}; use testing_framework_runner_k8s::{K8sDeployer, K8sRunnerError}; +use testing_framework_workflows::ObservabilityBuilderExt as _; use tracing::{info, warn}; const DEFAULT_RUN_SECS: u64 = 60; @@ -46,15 +49,47 @@ async fn run_k8s_case(validators: usize, executors: usize, run_duration: Duratio duration_secs = run_duration.as_secs(), "building scenario plan" ); - let mut plan = ScenarioBuilder::topology_with(|t| { + let mut scenario = ScenarioBuilder::topology_with(|t| { t.network_star().validators(validators).executors(executors) }) + .with_capabilities(ObservabilityCapability::default()) .wallets(TOTAL_WALLETS) .transactions_with(|txs| txs.rate(MIXED_TXS_PER_BLOCK).users(TRANSACTION_WALLETS)) .da_with(|da| da.blob_rate(DA_BLOB_RATE)) .with_run_duration(run_duration) - .expect_consensus_liveness() - .build(); + .expect_consensus_liveness(); + + if let Ok(url) = env::var("K8S_RUNNER_METRICS_QUERY_URL") + .or_else(|_| env::var("NOMOS_METRICS_QUERY_URL")) + .or_else(|_| env::var("K8S_RUNNER_EXTERNAL_PROMETHEUS_URL")) + .or_else(|_| env::var("NOMOS_EXTERNAL_PROMETHEUS_URL")) + { + if !url.trim().is_empty() { + scenario = scenario.with_metrics_query_url_str(url.trim()); + } + } + + if let Ok(url) = env::var("K8S_RUNNER_METRICS_QUERY_GRAFANA_URL") + .or_else(|_| env::var("NOMOS_METRICS_QUERY_GRAFANA_URL")) + .or_else(|_| env::var("K8S_RUNNER_EXTERNAL_PROMETHEUS_GRAFANA_URL")) + .or_else(|_| env::var("NOMOS_EXTERNAL_PROMETHEUS_GRAFANA_URL")) + { + if !url.trim().is_empty() { + scenario = scenario.with_metrics_query_grafana_url_str(url.trim()); + } + } + + if let Ok(url) = env::var("K8S_RUNNER_METRICS_OTLP_INGEST_URL") + .or_else(|_| env::var("NOMOS_METRICS_OTLP_INGEST_URL")) + .or_else(|_| env::var("K8S_RUNNER_EXTERNAL_OTLP_METRICS_ENDPOINT")) + .or_else(|_| env::var("NOMOS_EXTERNAL_OTLP_METRICS_ENDPOINT")) + { + if !url.trim().is_empty() { + scenario = scenario.with_metrics_otlp_ingest_url_str(url.trim()); + } + } + + let mut plan = scenario.build(); let deployer = K8sDeployer::new(); info!("deploying k8s stack"); diff --git a/scripts/run-examples.sh b/scripts/run-examples.sh index 2cb0c78..5c749d0 100755 --- a/scripts/run-examples.sh +++ b/scripts/run-examples.sh @@ -38,7 +38,12 @@ Options: -v, --validators N Number of validators (required) -e, --executors N Number of executors (required) --bundle PATH Convenience alias for setting NOMOS_BINARIES_TAR=PATH - --external-prometheus URL (k8s) Reuse existing Prometheus; skips Helm Prometheus + --metrics-query-url URL (k8s) PromQL base URL the runner process can query (often localhost port-forward) + --metrics-query-grafana-url URL (k8s) PromQL base URL reachable from inside the cluster (Grafana datasource) + --metrics-otlp-ingest-url URL (k8s) Full OTLP HTTP ingest URL for node metrics export + --external-prometheus URL (k8s) Alias for --metrics-query-url + --external-prometheus-grafana-url URL (k8s) Alias for --metrics-query-grafana-url + --external-otlp-metrics-endpoint URL (k8s) Alias for --metrics-otlp-ingest-url --local Use a local Docker image tag (default for docker-desktop k8s) --ecr Use an ECR image reference (default for non-docker-desktop k8s) --no-image-build Skip rebuilding the compose/k8s image (sets NOMOS_SKIP_IMAGE_BUILD=1) @@ -53,7 +58,17 @@ Environment: NOMOS_TESTNET_IMAGE_PULL_POLICY K8s imagePullPolicy (default ${DEFAULT_PULL_POLICY_LOCAL}; set to ${DEFAULT_PULL_POLICY_ECR} for --ecr) NOMOS_BINARIES_TAR Path to prebuilt binaries/circuits tarball (default .tmp/nomos-binaries--.tar.gz) NOMOS_SKIP_IMAGE_BUILD Set to 1 to skip rebuilding the compose/k8s image - K8S_RUNNER_EXTERNAL_PROMETHEUS_URL Reuse existing Prometheus; skips Helm Prometheus + K8S_RUNNER_METRICS_QUERY_URL PromQL base URL for the runner process + NOMOS_METRICS_QUERY_URL Alias for K8S_RUNNER_METRICS_QUERY_URL + K8S_RUNNER_METRICS_QUERY_GRAFANA_URL PromQL base URL for Grafana (cluster-reachable) + NOMOS_METRICS_QUERY_GRAFANA_URL Alias for K8S_RUNNER_METRICS_QUERY_GRAFANA_URL + K8S_RUNNER_METRICS_OTLP_INGEST_URL Full OTLP HTTP ingest URL for node metrics export + NOMOS_METRICS_OTLP_INGEST_URL Alias for K8S_RUNNER_METRICS_OTLP_INGEST_URL + +Deprecated env vars (still supported): + K8S_RUNNER_EXTERNAL_PROMETHEUS_URL, NOMOS_EXTERNAL_PROMETHEUS_URL + K8S_RUNNER_EXTERNAL_PROMETHEUS_GRAFANA_URL, NOMOS_EXTERNAL_PROMETHEUS_GRAFANA_URL + K8S_RUNNER_EXTERNAL_OTLP_METRICS_ENDPOINT, NOMOS_EXTERNAL_OTLP_METRICS_ENDPOINT EOF } @@ -97,7 +112,9 @@ run_examples::parse_args() { DEMO_VALIDATORS="" DEMO_EXECUTORS="" IMAGE_SELECTION_MODE="auto" - EXTERNAL_PROMETHEUS_URL="" + METRICS_QUERY_URL="" + METRICS_QUERY_GRAFANA_URL="" + METRICS_OTLP_INGEST_URL="" RUN_SECS_RAW_SPECIFIED="" @@ -143,12 +160,52 @@ run_examples::parse_args() { export NOMOS_BINARIES_TAR shift ;; + --metrics-query-url) + METRICS_QUERY_URL="${2:-}" + shift 2 + ;; + --metrics-query-url=*) + METRICS_QUERY_URL="${1#*=}" + shift + ;; + --metrics-query-grafana-url) + METRICS_QUERY_GRAFANA_URL="${2:-}" + shift 2 + ;; + --metrics-query-grafana-url=*) + METRICS_QUERY_GRAFANA_URL="${1#*=}" + shift + ;; + --metrics-otlp-ingest-url) + METRICS_OTLP_INGEST_URL="${2:-}" + shift 2 + ;; + --metrics-otlp-ingest-url=*) + METRICS_OTLP_INGEST_URL="${1#*=}" + shift + ;; --external-prometheus) - EXTERNAL_PROMETHEUS_URL="${2:-}" + METRICS_QUERY_URL="${2:-}" shift 2 ;; --external-prometheus=*) - EXTERNAL_PROMETHEUS_URL="${1#*=}" + METRICS_QUERY_URL="${1#*=}" + shift + ;; + --external-prometheus-grafana-url) + METRICS_QUERY_GRAFANA_URL="${2:-}" + shift 2 + ;; + --external-prometheus-grafana-url=*) + METRICS_QUERY_GRAFANA_URL="${1#*=}" + shift + ;; + --external-otlp-metrics-endpoint) + METRICS_OTLP_INGEST_URL="${2:-}" + shift 2 + ;; + --external-otlp-metrics-endpoint=*) + METRICS_OTLP_INGEST_URL="${1#*=}" shift ;; --local) @@ -205,9 +262,17 @@ run_examples::parse_args() { run_examples::fail_with_usage "executors must be a non-negative integer (pass -e/--executors)" fi - if [ -n "${EXTERNAL_PROMETHEUS_URL}" ] && [ "${MODE}" != "k8s" ]; then - echo "Warning: --external-prometheus is only used in k8s mode; ignoring." >&2 - EXTERNAL_PROMETHEUS_URL="" + if [ -n "${METRICS_QUERY_URL}" ] && [ "${MODE}" != "k8s" ]; then + echo "Warning: --metrics-query-url is only used in k8s mode; ignoring." >&2 + METRICS_QUERY_URL="" + fi + if [ -n "${METRICS_QUERY_GRAFANA_URL}" ] && [ "${MODE}" != "k8s" ]; then + echo "Warning: --metrics-query-grafana-url is only used in k8s mode; ignoring." >&2 + METRICS_QUERY_GRAFANA_URL="" + fi + if [ -n "${METRICS_OTLP_INGEST_URL}" ] && [ "${MODE}" != "k8s" ]; then + echo "Warning: --metrics-otlp-ingest-url is only used in k8s mode; ignoring." >&2 + METRICS_OTLP_INGEST_URL="" fi } @@ -517,9 +582,17 @@ run_examples::run() { export NOMOS_DEMO_VALIDATORS="${DEMO_VALIDATORS}" export NOMOS_DEMO_EXECUTORS="${DEMO_EXECUTORS}" - if [ "${MODE}" = "k8s" ] && [ -n "${EXTERNAL_PROMETHEUS_URL}" ]; then - export K8S_RUNNER_EXTERNAL_PROMETHEUS_URL="${EXTERNAL_PROMETHEUS_URL}" - export NOMOS_EXTERNAL_PROMETHEUS_URL="${EXTERNAL_PROMETHEUS_URL}" + if [ "${MODE}" = "k8s" ] && [ -n "${METRICS_QUERY_URL}" ]; then + export K8S_RUNNER_METRICS_QUERY_URL="${METRICS_QUERY_URL}" + export NOMOS_METRICS_QUERY_URL="${METRICS_QUERY_URL}" + fi + if [ "${MODE}" = "k8s" ] && [ -n "${METRICS_QUERY_GRAFANA_URL}" ]; then + export K8S_RUNNER_METRICS_QUERY_GRAFANA_URL="${METRICS_QUERY_GRAFANA_URL}" + export NOMOS_METRICS_QUERY_GRAFANA_URL="${METRICS_QUERY_GRAFANA_URL}" + fi + if [ "${MODE}" = "k8s" ] && [ -n "${METRICS_OTLP_INGEST_URL}" ]; then + export K8S_RUNNER_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}" + export NOMOS_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}" fi echo "==> Running ${BIN} for ${RUN_SECS}s (mode=${MODE}, image=${IMAGE})" diff --git a/testing-framework/core/src/scenario/capabilities.rs b/testing-framework/core/src/scenario/capabilities.rs index 6d4ef4e..9ecc867 100644 --- a/testing-framework/core/src/scenario/capabilities.rs +++ b/testing-framework/core/src/scenario/capabilities.rs @@ -13,7 +13,17 @@ pub struct NodeControlCapability; /// reuse an existing endpoint. #[derive(Clone, Debug, Default)] pub struct ObservabilityCapability { - pub external_prometheus: Option, + /// Prometheus-compatible base URL used by the *runner process* to query + /// metrics (commonly a localhost port-forward, but can be any reachable + /// endpoint). + pub metrics_query_url: Option, + /// Optional Prometheus-compatible base URL used by the *Grafana pod* as its + /// datasource. This must be reachable from inside the cluster. If unset, + /// the k8s runner falls back to `metrics_query_url`. + pub metrics_query_grafana_url: Option, + /// Full OTLP HTTP metrics ingest endpoint used by *nodes* to export metrics + /// (backend-specific host and path). + pub metrics_otlp_ingest_url: Option, } /// Trait implemented by scenario capability markers to signal whether node diff --git a/testing-framework/deployers/k8s/src/deployer/orchestrator.rs b/testing-framework/deployers/k8s/src/deployer/orchestrator.rs index a4f960c..c1873ca 100644 --- a/testing-framework/deployers/k8s/src/deployer/orchestrator.rs +++ b/testing-framework/deployers/k8s/src/deployer/orchestrator.rs @@ -93,7 +93,7 @@ impl Deployer for K8sDeployer { type Error = K8sRunnerError; async fn deploy(&self, scenario: &Scenario) -> Result { - deploy_with_prometheus(self, scenario, None).await + deploy_with_observability(self, scenario, None, None, None).await } } @@ -105,10 +105,12 @@ impl Deployer for K8sDeployer { &self, scenario: &Scenario, ) -> Result { - deploy_with_prometheus( + deploy_with_observability( self, scenario, - scenario.capabilities().external_prometheus.clone(), + scenario.capabilities().metrics_query_url.clone(), + scenario.capabilities().metrics_query_grafana_url.clone(), + scenario.capabilities().metrics_otlp_ingest_url.clone(), ) .await } @@ -152,19 +154,58 @@ fn ensure_supported_topology(descriptors: &GeneratedTopology) -> Result<(), K8sR Ok(()) } -async fn deploy_with_prometheus( +async fn deploy_with_observability( deployer: &K8sDeployer, scenario: &Scenario, - external_prometheus: Option, + metrics_query_url: Option, + metrics_query_grafana_url: Option, + metrics_otlp_ingest_url: Option, ) -> Result { - let external_prometheus = match external_prometheus { + let external_prometheus = match metrics_query_url { Some(url) => Some(url), - None => match std::env::var("K8S_RUNNER_EXTERNAL_PROMETHEUS_URL") + None => match std::env::var("K8S_RUNNER_METRICS_QUERY_URL") .ok() + .or_else(|| std::env::var("NOMOS_METRICS_QUERY_URL").ok()) + // Back-compat: + .or_else(|| std::env::var("K8S_RUNNER_EXTERNAL_PROMETHEUS_URL").ok()) .or_else(|| std::env::var("NOMOS_EXTERNAL_PROMETHEUS_URL").ok()) + { + Some(raw) if !raw.trim().is_empty() => { + Some(Url::parse(raw.trim()).map_err(|err| { + MetricsError::new(format!("invalid metrics query url: {err}")) + })?) + } + _ => None, + }, + }; + + let external_prometheus_grafana_url = match metrics_query_grafana_url { + Some(url) => Some(url), + None => match std::env::var("K8S_RUNNER_METRICS_QUERY_GRAFANA_URL") + .ok() + .or_else(|| std::env::var("NOMOS_METRICS_QUERY_GRAFANA_URL").ok()) + // Back-compat: + .or_else(|| std::env::var("K8S_RUNNER_EXTERNAL_PROMETHEUS_GRAFANA_URL").ok()) + .or_else(|| std::env::var("NOMOS_EXTERNAL_PROMETHEUS_GRAFANA_URL").ok()) { Some(raw) if !raw.trim().is_empty() => Some(Url::parse(raw.trim()).map_err(|err| { - MetricsError::new(format!("invalid external prometheus url: {err}")) + MetricsError::new(format!("invalid metrics query grafana url: {err}")) + })?), + _ => None, + }, + }; + + let external_otlp_metrics_endpoint = match metrics_otlp_ingest_url { + Some(url) => Some(url), + None => match std::env::var("K8S_RUNNER_METRICS_OTLP_INGEST_URL") + .ok() + .or_else(|| std::env::var("NOMOS_METRICS_OTLP_INGEST_URL").ok()) + // Back-compat: + .or_else(|| std::env::var("K8S_RUNNER_EXTERNAL_OTLP_METRICS_ENDPOINT").ok()) + .or_else(|| std::env::var("NOMOS_EXTERNAL_OTLP_METRICS_ENDPOINT").ok()) + { + Some(raw) if !raw.trim().is_empty() => Some(Url::parse(raw.trim()).map_err(|err| { + MetricsError::new(format!("invalid metrics OTLP ingest url: {err}")) })?), _ => None, }, @@ -178,12 +219,15 @@ async fn deploy_with_prometheus( let client = Client::try_default() .await .map_err(|source| K8sRunnerError::ClientInit { source })?; + info!( validators = validator_count, executors = executor_count, duration_secs = scenario.duration().as_secs(), readiness_checks = deployer.readiness_checks, - external_prometheus = external_prometheus.as_ref().map(|u| u.as_str()), + metrics_query_url = external_prometheus.as_ref().map(|u| u.as_str()), + metrics_query_grafana_url = external_prometheus_grafana_url.as_ref().map(|u| u.as_str()), + metrics_otlp_ingest_url = external_otlp_metrics_endpoint.as_ref().map(|u| u.as_str()), "starting k8s deployment" ); @@ -195,6 +239,8 @@ async fn deploy_with_prometheus( &descriptors, deployer.readiness_checks, external_prometheus.as_ref(), + external_prometheus_grafana_url.as_ref(), + external_otlp_metrics_endpoint.as_ref(), ) .await?, ); @@ -329,8 +375,15 @@ async fn setup_cluster( descriptors: &GeneratedTopology, readiness_checks: bool, external_prometheus: Option<&Url>, + external_prometheus_grafana_url: Option<&Url>, + external_otlp_metrics_endpoint: Option<&Url>, ) -> Result { - let assets = prepare_assets(descriptors, external_prometheus)?; + let assets = prepare_assets( + descriptors, + external_prometheus, + external_prometheus_grafana_url, + external_otlp_metrics_endpoint, + )?; let validators = descriptors.validators().len(); let executors = descriptors.executors().len(); @@ -346,7 +399,7 @@ async fn setup_cluster( &namespace, &release, specs, - external_prometheus.is_none(), + external_prometheus.is_none() && external_prometheus_grafana_url.is_none(), &mut cleanup_guard, ) .await?; diff --git a/testing-framework/deployers/k8s/src/infrastructure/assets.rs b/testing-framework/deployers/k8s/src/infrastructure/assets.rs index df180d5..fcc6060 100644 --- a/testing-framework/deployers/k8s/src/infrastructure/assets.rs +++ b/testing-framework/deployers/k8s/src/infrastructure/assets.rs @@ -93,6 +93,8 @@ fn kzg_mode() -> KzgMode { pub fn prepare_assets( topology: &GeneratedTopology, external_prometheus: Option<&Url>, + external_prometheus_grafana_url: Option<&Url>, + external_otlp_metrics_endpoint: Option<&Url>, ) -> Result { info!( validators = topology.validators().len(), @@ -102,7 +104,13 @@ pub fn prepare_assets( let root = workspace_root().map_err(|source| AssetsError::WorkspaceRoot { source })?; let kzg_mode = kzg_mode(); - let cfgsync_yaml = render_cfgsync_config(&root, topology, kzg_mode, external_prometheus)?; + let cfgsync_yaml = render_cfgsync_config( + &root, + topology, + kzg_mode, + external_prometheus, + external_otlp_metrics_endpoint, + )?; let tempdir = tempfile::Builder::new() .prefix("nomos-helm-") @@ -117,7 +125,11 @@ pub fn prepare_assets( }; let chart_path = helm_chart_path()?; sync_grafana_dashboards(&root, &chart_path)?; - let values_yaml = render_values_yaml(topology, external_prometheus)?; + let values_yaml = render_values_yaml( + topology, + external_prometheus, + external_prometheus_grafana_url, + )?; let values_file = write_temp_file(tempdir.path(), "values.yaml", values_yaml)?; let image = env::var("NOMOS_TESTNET_IMAGE") .unwrap_or_else(|_| String::from("public.ecr.aws/r4s5t9y4/logos/logos-blockchain:test")); @@ -226,33 +238,48 @@ fn render_cfgsync_config( topology: &GeneratedTopology, kzg_mode: KzgMode, external_prometheus: Option<&Url>, + external_otlp_metrics_endpoint: Option<&Url>, ) -> Result { let cfgsync_template_path = stack_assets_root(root).join("cfgsync.yaml"); debug!(path = %cfgsync_template_path.display(), "loading cfgsync template"); + let mut cfg = load_cfgsync_template(&cfgsync_template_path) .map_err(|source| AssetsError::Cfgsync { source })?; + apply_topology_overrides(&mut cfg, topology, kzg_mode == KzgMode::HostPath); + if kzg_mode == KzgMode::InImage { cfg.global_params_path = env::var("NOMOS_KZGRS_PARAMS_PATH") .ok() .unwrap_or_else(|| DEFAULT_IN_IMAGE_KZG_PARAMS_PATH.to_string()); } - if let Some(external_prometheus) = external_prometheus { - let base = external_prometheus.as_str().trim_end_matches('/'); - let otlp_metrics = format!("{base}/api/v1/otlp/v1/metrics"); - let endpoint = Url::parse(&otlp_metrics).map_err(|source| AssetsError::Cfgsync { - source: anyhow::anyhow!( - "invalid OTLP metrics endpoint derived from external Prometheus url '{base}': {source}" - ), - })?; + + let external_metrics_endpoint = match external_otlp_metrics_endpoint { + Some(endpoint) => Some(Ok(endpoint.clone())), + None => external_prometheus.map(derive_prometheus_otlp_metrics_endpoint), + }; + + if let Some(endpoint) = external_metrics_endpoint.transpose()? { if let MetricsLayer::Otlp(ref mut config) = cfg.tracing_settings.metrics { config.endpoint = endpoint; } } + cfg.timeout = cfg.timeout.max(CFGSYNC_K8S_TIMEOUT_SECS); + render_cfgsync_yaml(&cfg).map_err(|source| AssetsError::Cfgsync { source }) } +fn derive_prometheus_otlp_metrics_endpoint(base: &Url) -> Result { + let base = base.as_str().trim_end_matches('/'); + let otlp_metrics = format!("{base}/api/v1/otlp/v1/metrics"); + Url::parse(&otlp_metrics).map_err(|source| AssetsError::Cfgsync { + source: anyhow::anyhow!( + "invalid OTLP metrics endpoint derived from external Prometheus url '{base}': {source}" + ), + }) +} + struct ScriptPaths { run_cfgsync: PathBuf, run_shared: PathBuf, @@ -313,8 +340,13 @@ fn helm_chart_path() -> Result { fn render_values_yaml( topology: &GeneratedTopology, external_prometheus: Option<&Url>, + external_prometheus_grafana_url: Option<&Url>, ) -> Result { - let values = build_values(topology, external_prometheus); + let values = build_values( + topology, + external_prometheus, + external_prometheus_grafana_url, + ); serde_yaml::to_string(&values).map_err(|source| AssetsError::Values { source }) } @@ -422,7 +454,11 @@ struct GrafanaServiceValues { node_port: Option, } -fn build_values(topology: &GeneratedTopology, external_prometheus: Option<&Url>) -> HelmValues { +fn build_values( + topology: &GeneratedTopology, + external_prometheus: Option<&Url>, + external_prometheus_grafana_url: Option<&Url>, +) -> HelmValues { let cfgsync = CfgsyncValues { port: cfgsync_port(), }; @@ -449,9 +485,12 @@ fn build_values(topology: &GeneratedTopology, external_prometheus: Option<&Url>) node_port: grafana_node_port, }, }; + let prometheus_external_url = external_prometheus_grafana_url + .or(external_prometheus) + .map(|url| url.as_str().trim_end_matches('/').to_string()); let prometheus = PrometheusValues { - enabled: external_prometheus.is_none(), - external_url: external_prometheus.map(|url| url.as_str().trim_end_matches('/').to_string()), + enabled: prometheus_external_url.is_none(), + external_url: prometheus_external_url, }; debug!(pol_mode, "rendering Helm values for k8s stack"); let validators = topology diff --git a/testing-framework/workflows/src/builder/mod.rs b/testing-framework/workflows/src/builder/mod.rs index fd8d7b6..b9e233c 100644 --- a/testing-framework/workflows/src/builder/mod.rs +++ b/testing-framework/workflows/src/builder/mod.rs @@ -98,34 +98,189 @@ impl ScenarioBuilderExt for CoreScenarioBuilder { pub trait ObservabilityBuilderExt: Sized { /// Reuse an existing Prometheus endpoint instead of provisioning one (k8s /// runner). - fn with_external_prometheus( + fn with_metrics_query_url( self, url: reqwest::Url, ) -> CoreScenarioBuilder; /// Convenience wrapper that parses a URL string (panics if invalid). - fn with_external_prometheus_str( + fn with_metrics_query_url_str(self, url: &str) -> CoreScenarioBuilder; + + /// Configure the Prometheus-compatible base URL for the k8s runner Grafana + /// datasource (must be reachable from inside the cluster). + fn with_metrics_query_grafana_url( + self, + url: reqwest::Url, + ) -> CoreScenarioBuilder; + + /// Convenience wrapper that parses a URL string (panics if invalid). + fn with_metrics_query_grafana_url_str( self, url: &str, ) -> CoreScenarioBuilder; -} -impl ObservabilityBuilderExt for CoreScenarioBuilder<()> { + /// Configure the OTLP HTTP metrics ingest endpoint to which nodes should + /// export metrics (must be a full URL, including any required path). + fn with_metrics_otlp_ingest_url( + self, + url: reqwest::Url, + ) -> CoreScenarioBuilder; + + /// Convenience wrapper that parses a URL string (panics if invalid). + fn with_metrics_otlp_ingest_url_str( + self, + url: &str, + ) -> CoreScenarioBuilder; + + #[deprecated(note = "use with_metrics_query_url")] fn with_external_prometheus( self, url: reqwest::Url, ) -> CoreScenarioBuilder { - self.with_capabilities(ObservabilityCapability { - external_prometheus: Some(url), - }) + self.with_metrics_query_url(url) } + #[deprecated(note = "use with_metrics_query_url_str")] fn with_external_prometheus_str( self, url: &str, ) -> CoreScenarioBuilder { - let parsed = reqwest::Url::parse(url).expect("external prometheus url must be valid"); - self.with_external_prometheus(parsed) + self.with_metrics_query_url_str(url) + } + + #[deprecated(note = "use with_metrics_query_grafana_url")] + fn with_external_prometheus_grafana_url( + self, + url: reqwest::Url, + ) -> CoreScenarioBuilder { + self.with_metrics_query_grafana_url(url) + } + + #[deprecated(note = "use with_metrics_query_grafana_url_str")] + fn with_external_prometheus_grafana_url_str( + self, + url: &str, + ) -> CoreScenarioBuilder { + self.with_metrics_query_grafana_url_str(url) + } + + #[deprecated(note = "use with_metrics_otlp_ingest_url")] + fn with_external_otlp_metrics_endpoint( + self, + url: reqwest::Url, + ) -> CoreScenarioBuilder { + self.with_metrics_otlp_ingest_url(url) + } + + #[deprecated(note = "use with_metrics_otlp_ingest_url_str")] + fn with_external_otlp_metrics_endpoint_str( + self, + url: &str, + ) -> CoreScenarioBuilder { + self.with_metrics_otlp_ingest_url_str(url) + } +} + +impl ObservabilityBuilderExt for CoreScenarioBuilder<()> { + fn with_metrics_query_url( + self, + url: reqwest::Url, + ) -> CoreScenarioBuilder { + self.with_capabilities(ObservabilityCapability { + metrics_query_url: Some(url), + metrics_query_grafana_url: None, + metrics_otlp_ingest_url: None, + }) + } + + fn with_metrics_query_url_str(self, url: &str) -> CoreScenarioBuilder { + let parsed = reqwest::Url::parse(url).expect("metrics query url must be valid"); + self.with_metrics_query_url(parsed) + } + + fn with_metrics_otlp_ingest_url( + self, + url: reqwest::Url, + ) -> CoreScenarioBuilder { + self.with_capabilities(ObservabilityCapability { + metrics_query_url: None, + metrics_query_grafana_url: None, + metrics_otlp_ingest_url: Some(url), + }) + } + + fn with_metrics_otlp_ingest_url_str( + self, + url: &str, + ) -> CoreScenarioBuilder { + let parsed = reqwest::Url::parse(url).expect("metrics OTLP ingest url must be valid"); + self.with_metrics_otlp_ingest_url(parsed) + } + + fn with_metrics_query_grafana_url( + self, + url: reqwest::Url, + ) -> CoreScenarioBuilder { + self.with_capabilities(ObservabilityCapability { + metrics_query_url: None, + metrics_query_grafana_url: Some(url), + metrics_otlp_ingest_url: None, + }) + } + + fn with_metrics_query_grafana_url_str( + self, + url: &str, + ) -> CoreScenarioBuilder { + let parsed = reqwest::Url::parse(url).expect("metrics query grafana url must be valid"); + self.with_metrics_query_grafana_url(parsed) + } +} + +impl ObservabilityBuilderExt for CoreScenarioBuilder { + fn with_metrics_query_url( + mut self, + url: reqwest::Url, + ) -> CoreScenarioBuilder { + self.capabilities_mut().metrics_query_url = Some(url); + self + } + + fn with_metrics_query_url_str(self, url: &str) -> CoreScenarioBuilder { + let parsed = reqwest::Url::parse(url).expect("metrics query url must be valid"); + self.with_metrics_query_url(parsed) + } + + fn with_metrics_otlp_ingest_url( + mut self, + url: reqwest::Url, + ) -> CoreScenarioBuilder { + self.capabilities_mut().metrics_otlp_ingest_url = Some(url); + self + } + + fn with_metrics_otlp_ingest_url_str( + self, + url: &str, + ) -> CoreScenarioBuilder { + let parsed = reqwest::Url::parse(url).expect("metrics OTLP ingest url must be valid"); + self.with_metrics_otlp_ingest_url(parsed) + } + + fn with_metrics_query_grafana_url( + mut self, + url: reqwest::Url, + ) -> CoreScenarioBuilder { + self.capabilities_mut().metrics_query_grafana_url = Some(url); + self + } + + fn with_metrics_query_grafana_url_str( + self, + url: &str, + ) -> CoreScenarioBuilder { + let parsed = reqwest::Url::parse(url).expect("metrics query grafana url must be valid"); + self.with_metrics_query_grafana_url(parsed) } }