k8s: support in-image KZG and dynamic grafana NodePort

This commit is contained in:
andrussal 2025-12-16 11:36:43 +01:00
parent bf1aa47dca
commit a3aa416de5
4 changed files with 86 additions and 13 deletions

View File

@ -49,4 +49,4 @@ grafana:
adminPassword: admin
service:
type: NodePort
nodePort: 30030
nodePort: null

View File

@ -1,6 +1,7 @@
use anyhow::Error;
use async_trait::async_trait;
use kube::Client;
use k8s_openapi::api::core::v1::Service;
use kube::{Client, api::Api};
use testing_framework_core::{
scenario::{BlockFeedTask, CleanupGuard, Deployer, MetricsError, RunContext, Runner, Scenario},
topology::generation::GeneratedTopology,
@ -150,15 +151,45 @@ impl Deployer for K8sDeployer {
prometheus_url = %format!("http://{}:{}/", node_host, prometheus_port),
"prometheus endpoint available on host"
);
info!(
grafana_url = %format!("http://{}:{}/", node_host, 30030),
"grafana dashboard available via NodePort"
);
if let Some(grafana_port) = cluster_grafana_node_port(
&client,
cluster
.as_ref()
.expect("cluster must be available")
.namespace(),
cluster
.as_ref()
.expect("cluster must be available")
.release(),
)
.await
{
info!(
grafana_url = %format!("http://{}:{}/", node_host, grafana_port),
"grafana dashboard available via NodePort"
);
}
if std::env::var("TESTNET_PRINT_ENDPOINTS").is_ok() {
let grafana_port = cluster_grafana_node_port(
&client,
cluster
.as_ref()
.expect("cluster must be available")
.namespace(),
cluster
.as_ref()
.expect("cluster must be available")
.release(),
)
.await;
println!(
"TESTNET_ENDPOINTS prometheus=http://{}:{}/ grafana=http://{}:{}/",
node_host, prometheus_port, node_host, 30030
"TESTNET_ENDPOINTS prometheus=http://{}:{}/ grafana={}",
node_host,
prometheus_port,
grafana_port
.map(|port| format!("http://{}:{}/", node_host, port))
.unwrap_or_else(|| "<disabled>".to_string())
);
for (idx, client) in node_clients.validator_clients().iter().enumerate() {
@ -216,6 +247,21 @@ fn cluster_prometheus_port(cluster: &Option<ClusterEnvironment>) -> u16 {
.prometheus_port()
}
async fn cluster_grafana_node_port(client: &Client, namespace: &str, release: &str) -> Option<u16> {
let services: Api<Service> = Api::namespaced(client.clone(), namespace);
let service_name = format!("{release}-grafana");
let service = services.get(&service_name).await.ok()?;
let spec = service.spec?;
if spec.type_.as_deref() != Some("NodePort") {
return None;
}
let ports = spec.ports?;
ports.into_iter().find_map(|port| {
let node_port = port.node_port?;
u16::try_from(node_port).ok()
})
}
async fn fail_cluster(cluster: &mut Option<ClusterEnvironment>, reason: &str) {
if let Some(env) = cluster.as_mut() {
env.fail(reason).await;

View File

@ -94,7 +94,8 @@ pub fn prepare_assets(topology: &GeneratedTopology) -> Result<RunnerAssets, Asse
);
let root = workspace_root().map_err(|source| AssetsError::WorkspaceRoot { source })?;
let cfgsync_yaml = render_cfgsync_config(&root, topology)?;
let kzg_mode = kzg_mode();
let cfgsync_yaml = render_cfgsync_config(&root, topology, kzg_mode)?;
let tempdir = tempfile::Builder::new()
.prefix("nomos-helm-")
@ -103,7 +104,6 @@ pub fn prepare_assets(topology: &GeneratedTopology) -> Result<RunnerAssets, Asse
let cfgsync_file = write_temp_file(tempdir.path(), "cfgsync.yaml", cfgsync_yaml)?;
let scripts = validate_scripts(&root)?;
let kzg_mode = kzg_mode();
let kzg_path = match kzg_mode {
KzgMode::HostPath => Some(validate_kzg_params(&root)?),
KzgMode::InImage => None,
@ -145,13 +145,23 @@ pub fn prepare_assets(topology: &GeneratedTopology) -> Result<RunnerAssets, Asse
const CFGSYNC_K8S_TIMEOUT_SECS: u64 = 300;
const DEFAULT_GRAFANA_NODE_PORT: u16 = 30030;
const DEFAULT_IN_IMAGE_KZG_PARAMS_PATH: &str = "/opt/nomos/kzg-params/kzgrs_test_params";
fn render_cfgsync_config(root: &Path, topology: &GeneratedTopology) -> Result<String, AssetsError> {
fn render_cfgsync_config(
root: &Path,
topology: &GeneratedTopology,
kzg_mode: KzgMode,
) -> Result<String, AssetsError> {
let cfgsync_template_path = stack_assets_root(root).join("cfgsync.yaml");
debug!(path = %cfgsync_template_path.display(), "loading cfgsync template");
let mut cfg = load_cfgsync_template(&cfgsync_template_path)
.map_err(|source| AssetsError::Cfgsync { source })?;
apply_topology_overrides(&mut cfg, topology, true);
apply_topology_overrides(&mut cfg, topology, kzg_mode == KzgMode::HostPath);
if kzg_mode == KzgMode::InImage {
cfg.global_params_path = env::var("NOMOS_KZGRS_PARAMS_PATH")
.ok()
.unwrap_or_else(|| DEFAULT_IN_IMAGE_KZG_PARAMS_PATH.to_string());
}
cfg.timeout = cfg.timeout.max(CFGSYNC_K8S_TIMEOUT_SECS);
render_cfgsync_yaml(&cfg).map_err(|source| AssetsError::Cfgsync { source })
}
@ -321,6 +331,15 @@ fn build_values(topology: &GeneratedTopology) -> HelmValues {
let pol_mode = pol_proof_mode();
let image_pull_policy =
env::var("NOMOS_TESTNET_IMAGE_PULL_POLICY").unwrap_or_else(|_| "IfNotPresent".into());
let grafana_node_port = match kzg_mode() {
KzgMode::HostPath => Some(DEFAULT_GRAFANA_NODE_PORT),
KzgMode::InImage => env::var("NOMOS_GRAFANA_NODE_PORT").ok().and_then(|value| {
value
.parse::<u16>()
.ok()
.filter(|port| *port >= 30000 && *port <= 32767)
}),
};
let grafana = GrafanaValues {
enabled: true,
image: "grafana/grafana:10.4.1".into(),
@ -329,7 +348,7 @@ fn build_values(topology: &GeneratedTopology) -> HelmValues {
admin_password: "admin".into(),
service: GrafanaServiceValues {
type_field: "NodePort".into(),
node_port: Some(DEFAULT_GRAFANA_NODE_PORT),
node_port: grafana_node_port,
},
};
debug!(pol_mode, "rendering Helm values for k8s stack");

View File

@ -89,6 +89,14 @@ impl ClusterEnvironment {
)
}
pub fn namespace(&self) -> &str {
&self.namespace
}
pub fn release(&self) -> &str {
&self.release
}
pub fn prometheus_port(&self) -> u16 {
self.prometheus_port
}