diff --git a/testing-framework/core/src/nodes/common/binary.rs b/testing-framework/core/src/nodes/common/binary.rs index 3313a6b..4efcb92 100644 --- a/testing-framework/core/src/nodes/common/binary.rs +++ b/testing-framework/core/src/nodes/common/binary.rs @@ -1,5 +1,7 @@ use std::{env, path::PathBuf}; +use tracing::{debug, info}; + pub struct BinaryConfig { pub env_var: &'static str, pub binary_name: &'static str, @@ -12,18 +14,43 @@ pub struct BinaryResolver; impl BinaryResolver { pub fn resolve_path(config: &BinaryConfig) -> PathBuf { if let Some(path) = env::var_os(config.env_var) { - return PathBuf::from(path); + let resolved = PathBuf::from(path); + + info!( + env = config.env_var, + binary = config.binary_name, + path = %resolved.display(), + "resolved binary from env override" + ); + return resolved; } if let Some(path) = Self::which_on_path(config.binary_name) { + info!( + binary = config.binary_name, + path = %path.display(), + "resolved binary from PATH" + ); return path; } let shared_bin = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(config.shared_bin_subpath); if shared_bin.exists() { + info!( + binary = config.binary_name, + path = %shared_bin.display(), + "resolved binary from shared assets" + ); return shared_bin; } - PathBuf::from(env!("CARGO_MANIFEST_DIR")) + let fallback = PathBuf::from(env!("CARGO_MANIFEST_DIR")) .join("../../") - .join(config.fallback_path) + .join(config.fallback_path); + + debug!( + binary = config.binary_name, + path = %fallback.display(), + "falling back to binary path" + ); + fallback } fn which_on_path(bin: &str) -> Option { diff --git a/testing-framework/core/src/nodes/common/lifecycle/lifecycle.rs b/testing-framework/core/src/nodes/common/lifecycle/lifecycle.rs index 15f049f..509024a 100644 --- a/testing-framework/core/src/nodes/common/lifecycle/lifecycle.rs +++ b/testing-framework/core/src/nodes/common/lifecycle/lifecycle.rs @@ -1,8 +1,10 @@ #![allow(dead_code)] use std::process::Child; +use tracing::debug; /// Shared lifecycle hooks (placeholder). pub fn kill_child(child: &mut Child) { + debug!("killing child process"); let _ = child.kill(); } diff --git a/testing-framework/core/src/nodes/common/lifecycle/monitor.rs b/testing-framework/core/src/nodes/common/lifecycle/monitor.rs index bc0afaa..464574e 100644 --- a/testing-framework/core/src/nodes/common/lifecycle/monitor.rs +++ b/testing-framework/core/src/nodes/common/lifecycle/monitor.rs @@ -2,10 +2,22 @@ use std::process::Child; +use tracing::debug; + /// Check if a child process is still running. pub fn is_running(child: &mut Child) -> bool { match child.try_wait() { - Ok(None) => true, - Ok(Some(_)) | Err(_) => false, + Ok(None) => { + debug!("process still running"); + true + } + Ok(Some(status)) => { + debug!(?status, "process exited"); + false + } + Err(err) => { + debug!(error = ?err, "process state check failed"); + false + } } } diff --git a/testing-framework/core/src/nodes/common/lifecycle/spawn.rs b/testing-framework/core/src/nodes/common/lifecycle/spawn.rs index abaa0c4..0e1f715 100644 --- a/testing-framework/core/src/nodes/common/lifecycle/spawn.rs +++ b/testing-framework/core/src/nodes/common/lifecycle/spawn.rs @@ -5,6 +5,7 @@ use std::{fs::File, io, path::Path}; use nomos_tracing::logging::local::FileConfig; use serde::Serialize; use serde_yaml::Value; +use tracing::debug; /// Configure tracing logger to write into `NOMOS_LOG_DIR` if set, else into the /// provided base dir. @@ -12,6 +13,7 @@ pub fn configure_logging(base_dir: &Path, prefix: &str, set_logger: F) where F: FnOnce(FileConfig), { + debug!(prefix, base_dir = %base_dir.display(), "configuring node logging"); if let Ok(env_dir) = std::env::var("NOMOS_LOG_DIR") { let log_dir = std::path::PathBuf::from(env_dir); let _ = std::fs::create_dir_all(&log_dir); @@ -34,6 +36,7 @@ where T: Serialize, F: FnOnce(&mut Value), { + debug!(path = %path.display(), "writing node config with injection"); let mut yaml_value = serde_yaml::to_value(config).map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; inject(&mut yaml_value); diff --git a/testing-framework/core/src/nodes/common/node.rs b/testing-framework/core/src/nodes/common/node.rs index 65a4b02..a61cee8 100644 --- a/testing-framework/core/src/nodes/common/node.rs +++ b/testing-framework/core/src/nodes/common/node.rs @@ -10,6 +10,7 @@ use reqwest::Url; use serde::Serialize; use tempfile::TempDir; use tokio::time; +use tracing::{debug, info}; use super::lifecycle::monitor::is_running; use crate::nodes::{ @@ -87,6 +88,8 @@ pub fn prepare_node_config( ) -> (TempDir, T, SocketAddr, Option) { let dir = create_tempdir().expect("tempdir"); + debug!(dir = %dir.path().display(), log_prefix, enable_logging, "preparing node config"); + // Ensure recovery files/dirs exist so services that persist state do not fail // on startup. let _ = ensure_recovery_paths(dir.path()); @@ -100,6 +103,8 @@ pub fn prepare_node_config( config.set_paths(dir.path()); let (addr, testing_addr) = config.addresses(); + debug!(addr = %addr, testing_addr = ?testing_addr, "configured node addresses"); + (dir, config, addr, testing_addr) } @@ -121,6 +126,8 @@ where }) .expect("failed to write node config"); + debug!(config_file = %config_path.display(), binary = %binary_path.display(), "spawning node process"); + let child = Command::new(binary_path) .arg(&config_path) .current_dir(dir.path()) @@ -143,5 +150,6 @@ where }) .await?; + info!("node readiness confirmed via consensus_info"); Ok(handle) } diff --git a/testing-framework/core/src/nodes/executor.rs b/testing-framework/core/src/nodes/executor.rs index ccdae36..5903ed8 100644 --- a/testing-framework/core/src/nodes/executor.rs +++ b/testing-framework/core/src/nodes/executor.rs @@ -3,6 +3,7 @@ use std::{ops::Deref, path::PathBuf}; use nomos_executor::config::Config; use nomos_tracing_service::LoggerLayer; pub use testing_framework_config::nodes::executor::create_executor_config; +use tracing::{debug, info}; use super::{persist_tempdir, should_persist_tempdir}; use crate::{ @@ -11,7 +12,7 @@ use crate::{ LOGS_PREFIX, common::{ binary::{BinaryConfig, BinaryResolver}, - lifecycle::kill::kill_child, + lifecycle::{kill::kill_child, monitor::is_running}, node::{NodeConfigCommon, NodeHandle, spawn_node}, }, }, @@ -46,9 +47,10 @@ impl Drop for Executor { if should_persist_tempdir() && let Err(e) = persist_tempdir(&mut self.handle.tempdir, "nomos-executor") { - println!("failed to persist tempdir: {e}"); + debug!(error = ?e, "failed to persist executor tempdir"); } + debug!("stopping executor process"); kill_child(&mut self.handle.child); } } @@ -65,8 +67,20 @@ impl Executor { .await .expect("executor did not become ready"); + info!("executor spawned and ready"); + Self { handle } } + + /// Check if the executor process is still running + pub fn is_running(&mut self) -> bool { + is_running(&mut self.handle.child) + } + + /// Wait for the executor process to exit, with a timeout. + pub async fn wait_for_exit(&mut self, timeout: std::time::Duration) -> bool { + self.handle.wait_for_exit(timeout).await + } } impl NodeConfigCommon for Config { diff --git a/testing-framework/core/src/nodes/validator.rs b/testing-framework/core/src/nodes/validator.rs index 6f09698..a1624e7 100644 --- a/testing-framework/core/src/nodes/validator.rs +++ b/testing-framework/core/src/nodes/validator.rs @@ -4,6 +4,7 @@ use nomos_node::Config; use nomos_tracing_service::LoggerLayer; pub use testing_framework_config::nodes::validator::create_validator_config; use tokio::time::error::Elapsed; +use tracing::{debug, info}; use super::{persist_tempdir, should_persist_tempdir}; use crate::{ @@ -12,7 +13,7 @@ use crate::{ LOGS_PREFIX, common::{ binary::{BinaryConfig, BinaryResolver}, - lifecycle::kill::kill_child, + lifecycle::{kill::kill_child, monitor::is_running}, node::{NodeConfigCommon, NodeHandle, spawn_node}, }, }, @@ -52,9 +53,10 @@ impl Drop for Validator { if should_persist_tempdir() && let Err(e) = persist_tempdir(&mut self.handle.tempdir, "nomos-node") { - println!("failed to persist tempdir: {e}"); + debug!(error = ?e, "failed to persist validator tempdir"); } + debug!("stopping validator process"); kill_child(&mut self.handle.child); } } @@ -62,7 +64,7 @@ impl Drop for Validator { impl Validator { /// Check if the validator process is still running pub fn is_running(&mut self) -> bool { - crate::nodes::common::lifecycle::monitor::is_running(&mut self.handle.child) + is_running(&mut self.handle.child) } /// Wait for the validator process to exit, with a timeout @@ -81,6 +83,8 @@ impl Validator { ) .await?; + info!("validator spawned and ready"); + Ok(Self { handle }) } } diff --git a/testing-framework/core/src/scenario/cfgsync.rs b/testing-framework/core/src/scenario/cfgsync.rs index 206e9ec..698e8f0 100644 --- a/testing-framework/core/src/scenario/cfgsync.rs +++ b/testing-framework/core/src/scenario/cfgsync.rs @@ -6,6 +6,7 @@ use nomos_tracing_service::TracingSettings; use nomos_utils::bounded_duration::{MinimalBoundedDuration, SECOND}; use serde::{Deserialize, Serialize}; use serde_with::serde_as; +use tracing::debug; use crate::{ constants::kzg_container_path, @@ -50,12 +51,14 @@ pub struct CfgSyncConfig { } pub fn load_cfgsync_template(path: &Path) -> Result { + debug!(path = %path.display(), "loading cfgsync template"); let file = File::open(path) .with_context(|| format!("opening cfgsync template at {}", path.display()))?; serde_yaml::from_reader(file).context("parsing cfgsync template") } pub fn write_cfgsync_template(path: &Path, cfg: &CfgSyncConfig) -> Result<()> { + debug!(path = %path.display(), "writing cfgsync template"); let file = File::create(path) .with_context(|| format!("writing cfgsync template to {}", path.display()))?; let serializable = SerializableCfgSyncConfig::from(cfg); @@ -63,6 +66,7 @@ pub fn write_cfgsync_template(path: &Path, cfg: &CfgSyncConfig) -> Result<()> { } pub fn render_cfgsync_yaml(cfg: &CfgSyncConfig) -> Result { + debug!("rendering cfgsync yaml"); let serializable = SerializableCfgSyncConfig::from(cfg); serde_yaml::to_string(&serializable).context("rendering cfgsync yaml") } @@ -72,6 +76,12 @@ pub fn apply_topology_overrides( topology: &GeneratedTopology, use_kzg_mount: bool, ) { + debug!( + validators = topology.validators().len(), + executors = topology.executors().len(), + use_kzg_mount, + "applying topology overrides to cfgsync config" + ); let hosts = topology.validators().len() + topology.executors().len(); cfg.n_hosts = hosts; diff --git a/testing-framework/core/src/scenario/definition.rs b/testing-framework/core/src/scenario/definition.rs index ee1633d..9a00466 100644 --- a/testing-framework/core/src/scenario/definition.rs +++ b/testing-framework/core/src/scenario/definition.rs @@ -1,5 +1,7 @@ use std::{num::NonZeroUsize, sync::Arc, time::Duration}; +use tracing::{debug, info}; + use super::{ NodeControlCapability, expectation::Expectation, runtime::context::RunMetrics, workload::Workload, @@ -227,6 +229,15 @@ impl Builder { let run_metrics = RunMetrics::from_topology(&generated, duration); initialize_components(&generated, &run_metrics, &mut workloads, &mut expectations); + info!( + validators = generated.validators().len(), + executors = generated.executors().len(), + duration_secs = duration.as_secs(), + workloads = workloads.len(), + expectations = expectations.len(), + "scenario built" + ); + Scenario::new(generated, workloads, expectations, duration, capabilities) } } @@ -307,6 +318,8 @@ fn initialize_workloads( for workload in workloads { let inner = Arc::get_mut(workload).expect("workload unexpectedly cloned before initialization"); + + debug!(workload = inner.name(), "initializing workload"); if let Err(err) = inner.init(descriptors, run_metrics) { panic!("workload '{}' failed to initialize: {err}", inner.name()); } @@ -319,6 +332,7 @@ fn initialize_expectations( expectations: &mut [Box], ) { for expectation in expectations { + debug!(expectation = expectation.name(), "initializing expectation"); if let Err(err) = expectation.init(descriptors, run_metrics) { panic!( "expectation '{}' failed to initialize: {err}", diff --git a/testing-framework/core/src/scenario/http_probe.rs b/testing-framework/core/src/scenario/http_probe.rs index 4e26186..13cc116 100644 --- a/testing-framework/core/src/scenario/http_probe.rs +++ b/testing-framework/core/src/scenario/http_probe.rs @@ -5,6 +5,7 @@ use nomos_http_api_common::paths; use reqwest::Client as ReqwestClient; use thiserror::Error; use tokio::time::{sleep, timeout}; +use tracing::{debug, info}; /// Role used for labelling readiness probes. #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -80,6 +81,15 @@ pub async fn wait_for_http_ports_with_host( return Ok(()); } + info!( + role = role.label(), + ?ports, + host, + timeout_secs = timeout_duration.as_secs_f32(), + poll_ms = poll_interval.as_millis(), + "waiting for HTTP readiness" + ); + let client = ReqwestClient::new(); let probes = ports.iter().copied().map(|port| { wait_for_single_port( @@ -104,6 +114,7 @@ async fn wait_for_single_port( poll_interval: Duration, ) -> Result<(), HttpReadinessError> { let url = format!("http://{host}:{port}{}", paths::CRYPTARCHIA_INFO); + debug!(role = role.label(), %url, "probing HTTP endpoint"); let probe = async { loop { let is_ready = client diff --git a/testing-framework/runners/compose/src/deployer/clients.rs b/testing-framework/runners/compose/src/deployer/clients.rs index d61ec30..7e15186 100644 --- a/testing-framework/runners/compose/src/deployer/clients.rs +++ b/testing-framework/runners/compose/src/deployer/clients.rs @@ -33,6 +33,7 @@ impl ClientBuilder { environment .fail("failed to construct node api clients") .await; + tracing::warn!(error = ?err, host, "failed to build node clients"); Err(err.into()) } } @@ -50,6 +51,7 @@ impl ClientBuilder { } Err(err) => { environment.fail("failed to initialize block feed").await; + tracing::warn!(error = ?err, "block feed initialization failed"); Err(err) } } diff --git a/testing-framework/runners/compose/src/deployer/orchestrator.rs b/testing-framework/runners/compose/src/deployer/orchestrator.rs index 29d6f30..84877ec 100644 --- a/testing-framework/runners/compose/src/deployer/orchestrator.rs +++ b/testing-framework/runners/compose/src/deployer/orchestrator.rs @@ -52,6 +52,8 @@ impl DeploymentOrchestrator { "compose deployment starting" ); + let validator_count = descriptors.validators().len(); + let executor_count = descriptors.executors().len(); let host_ports = PortManager::prepare(&mut environment, &descriptors).await?; if self.deployer.readiness_checks { @@ -84,6 +86,15 @@ impl DeploymentOrchestrator { node_control, ); + info!( + validators = validator_count, + executors = executor_count, + duration_secs = scenario.duration().as_secs(), + readiness_checks = self.deployer.readiness_checks, + host, + "compose deployment ready; handing control to scenario runner" + ); + Ok(Runner::new(context, Some(cleanup_guard))) } diff --git a/testing-framework/runners/compose/src/deployer/readiness.rs b/testing-framework/runners/compose/src/deployer/readiness.rs index f3bae44..5907fb7 100644 --- a/testing-framework/runners/compose/src/deployer/readiness.rs +++ b/testing-framework/runners/compose/src/deployer/readiness.rs @@ -26,6 +26,7 @@ impl ReadinessChecker { ensure_validators_ready_with_ports(&host_ports.validator_api_ports()).await { environment.fail("validator readiness failed").await; + tracing::warn!(error = ?err, "validator readiness failed"); return Err(err.into()); } @@ -36,12 +37,14 @@ impl ReadinessChecker { if let Err(err) = ensure_executors_ready_with_ports(&host_ports.executor_api_ports()).await { environment.fail("executor readiness failed").await; + tracing::warn!(error = ?err, "executor readiness failed"); return Err(err.into()); } info!("waiting for remote service readiness"); if let Err(err) = ensure_remote_readiness_with_ports(descriptors, host_ports).await { environment.fail("remote readiness probe failed").await; + tracing::warn!(error = ?err, "remote readiness probe failed"); return Err(err.into()); } diff --git a/testing-framework/runners/compose/src/deployer/setup.rs b/testing-framework/runners/compose/src/deployer/setup.rs index 4b891f3..e137ba5 100644 --- a/testing-framework/runners/compose/src/deployer/setup.rs +++ b/testing-framework/runners/compose/src/deployer/setup.rs @@ -4,7 +4,7 @@ use std::{ }; use testing_framework_core::topology::generation::GeneratedTopology; -use tracing::info; +use tracing::{debug, info}; use crate::{ docker::ensure_docker_available, @@ -57,6 +57,10 @@ impl DeploymentSetup { .and_then(|port| reserve_port(port)) .or_else(|| allocate_prometheus_port()) .unwrap_or_else(|| PortReservation::new(DEFAULT_PROMETHEUS_PORT, None)); + debug!( + prometheus_port = prometheus_port.port(), + "selected prometheus port" + ); let environment = prepare_environment(&self.descriptors, prometheus_port, prometheus_env.is_some()) .await?; diff --git a/testing-framework/runners/compose/src/docker/commands.rs b/testing-framework/runners/compose/src/docker/commands.rs index b4d3733..9386754 100644 --- a/testing-framework/runners/compose/src/docker/commands.rs +++ b/testing-framework/runners/compose/src/docker/commands.rs @@ -2,6 +2,7 @@ use std::{io, path::Path, process, time::Duration}; use testing_framework_core::adjust_timeout; use tokio::{process::Command, time::timeout}; +use tracing::{debug, info, warn}; const COMPOSE_UP_TIMEOUT: Duration = Duration::from_secs(120); @@ -29,6 +30,7 @@ pub async fn run_docker_command( timeout_duration: Duration, description: &str, ) -> Result<(), ComposeCommandError> { + debug!(description, ?command, "running docker command"); let result = timeout(timeout_duration, command.status()).await; match result { Ok(status) => handle_compose_status(status, description), @@ -55,6 +57,13 @@ pub async fn compose_up( .arg("-d") .current_dir(root); + info!( + compose_file = %compose_path.display(), + project = project_name, + root = %root.display(), + "running docker compose up" + ); + run_compose_command(cmd, adjust_timeout(COMPOSE_UP_TIMEOUT), "docker compose up").await } @@ -74,6 +83,13 @@ pub async fn compose_down( .arg("--volumes") .current_dir(root); + info!( + compose_file = %compose_path.display(), + project = project_name, + root = %root.display(), + "running docker compose down" + ); + run_compose_command( cmd, adjust_timeout(COMPOSE_UP_TIMEOUT), @@ -96,21 +112,21 @@ pub async fn dump_compose_logs(compose_file: &Path, project: &str, root: &Path) match cmd.output().await { Ok(output) => print_logs(&output.stdout, &output.stderr), - Err(err) => eprintln!("[compose-runner] failed to collect docker compose logs: {err}"), + Err(err) => warn!(error = ?err, "failed to collect docker compose logs"), } } fn print_logs(stdout: &[u8], stderr: &[u8]) { if !stdout.is_empty() { - eprintln!( - "[compose-runner] docker compose logs:\n{}", - String::from_utf8_lossy(stdout) + warn!( + logs = %String::from_utf8_lossy(stdout), + "docker compose stdout" ); } if !stderr.is_empty() { - eprintln!( - "[compose-runner] docker compose errors:\n{}", - String::from_utf8_lossy(stderr) + warn!( + logs = %String::from_utf8_lossy(stderr), + "docker compose stderr" ); } } @@ -135,14 +151,23 @@ fn handle_compose_status( description: &str, ) -> Result<(), ComposeCommandError> { match status { - Ok(code) if code.success() => Ok(()), - Ok(code) => Err(ComposeCommandError::Failed { - command: description.to_owned(), - status: code, - }), - Err(err) => Err(ComposeCommandError::Spawn { - command: description.to_owned(), - source: err, - }), + Ok(code) if code.success() => { + debug!(description, "docker command succeeded"); + Ok(()) + } + Ok(code) => { + warn!(description, status = ?code, "docker command failed"); + Err(ComposeCommandError::Failed { + command: description.to_owned(), + status: code, + }) + } + Err(err) => { + warn!(description, error = ?err, "failed to spawn docker command"); + Err(ComposeCommandError::Spawn { + command: description.to_owned(), + source: err, + }) + } } } diff --git a/testing-framework/runners/compose/src/docker/control.rs b/testing-framework/runners/compose/src/docker/control.rs index 1da24b3..cf37a32 100644 --- a/testing-framework/runners/compose/src/docker/control.rs +++ b/testing-framework/runners/compose/src/docker/control.rs @@ -2,6 +2,7 @@ use std::path::{Path, PathBuf}; use testing_framework_core::scenario::{DynError, NodeControlHandle}; use tokio::process::Command; +use tracing::info; use crate::{docker::commands::run_docker_command, errors::ComposeRunnerError}; @@ -21,6 +22,7 @@ pub async fn restart_compose_service( .arg(service); let description = "docker compose restart"; + info!(service, project = project_name, compose_file = %compose_file.display(), "restarting compose service"); run_docker_command( command, testing_framework_core::adjust_timeout(std::time::Duration::from_secs(120)), diff --git a/testing-framework/runners/compose/src/docker/mod.rs b/testing-framework/runners/compose/src/docker/mod.rs index 54b9f6b..cf1f12c 100644 --- a/testing-framework/runners/compose/src/docker/mod.rs +++ b/testing-framework/runners/compose/src/docker/mod.rs @@ -6,6 +6,7 @@ pub mod workspace; use std::{env, process::Stdio, time::Duration}; use tokio::{process::Command, time::timeout}; +use tracing::{debug, info, warn}; use crate::{ docker::commands::ComposeCommandError, errors::ComposeRunnerError, @@ -35,8 +36,10 @@ pub async fn ensure_docker_available() -> Result<(), ComposeRunnerError> { .unwrap_or(false); if available { + debug!("docker info succeeded"); Ok(()) } else { + warn!("docker info failed or timed out; compose runner unavailable"); Err(ComposeRunnerError::DockerUnavailable) } } @@ -44,6 +47,7 @@ pub async fn ensure_docker_available() -> Result<(), ComposeRunnerError> { /// Ensure the configured compose image exists, building a local one if needed. pub async fn ensure_compose_image() -> Result<(), ComposeRunnerError> { let (image, platform) = crate::docker::platform::resolve_image(); + info!(image, platform = ?platform, "ensuring compose image is present"); ensure_image_present(&image, platform.as_deref()).await } @@ -53,6 +57,7 @@ pub async fn ensure_image_present( platform: Option<&str>, ) -> Result<(), ComposeRunnerError> { if docker_image_exists(image).await? { + debug!(image, "docker image already present"); return Ok(()); } @@ -157,6 +162,7 @@ pub async fn build_local_image( ) .await .map_err(|_| { + warn!(image, timeout = ?IMAGE_BUILD_TIMEOUT, "docker build timed out"); ComposeRunnerError::Compose(ComposeCommandError::Timeout { command: String::from("docker build"), timeout: testing_framework_core::adjust_timeout(IMAGE_BUILD_TIMEOUT), @@ -164,12 +170,21 @@ pub async fn build_local_image( })?; match status { - Ok(code) if code.success() => Ok(()), - Ok(code) => Err(ComposeRunnerError::Compose(ComposeCommandError::Failed { - command: String::from("docker build"), - status: code, - })), - Err(err) => Err(ComposeRunnerError::ImageBuild { source: err.into() }), + Ok(code) if code.success() => { + info!(image, platform = ?platform, "docker build completed"); + Ok(()) + } + Ok(code) => { + warn!(image, status = ?code, "docker build failed"); + Err(ComposeRunnerError::Compose(ComposeCommandError::Failed { + command: String::from("docker build"), + status: code, + })) + } + Err(err) => { + warn!(image, error = ?err, "docker build spawn failed"); + Err(ComposeRunnerError::ImageBuild { source: err.into() }) + } } } diff --git a/testing-framework/runners/compose/src/docker/platform.rs b/testing-framework/runners/compose/src/docker/platform.rs index 683f631..c1ee108 100644 --- a/testing-framework/runners/compose/src/docker/platform.rs +++ b/testing-framework/runners/compose/src/docker/platform.rs @@ -1,11 +1,14 @@ use std::env; +use tracing::debug; + /// Select the compose image and optional platform, honoring /// NOMOS_TESTNET_IMAGE. pub fn resolve_image() -> (String, Option) { let image = env::var("NOMOS_TESTNET_IMAGE") .unwrap_or_else(|_| String::from("logos-blockchain-testing:local")); let platform = (image == "ghcr.io/logos-co/nomos:testnet").then(|| "linux/amd64".to_owned()); + debug!(image, platform = ?platform, "resolved compose image"); (image, platform) } diff --git a/testing-framework/runners/compose/src/docker/workspace.rs b/testing-framework/runners/compose/src/docker/workspace.rs index 4e35264..8200475 100644 --- a/testing-framework/runners/compose/src/docker/workspace.rs +++ b/testing-framework/runners/compose/src/docker/workspace.rs @@ -5,6 +5,7 @@ use std::{ use anyhow::{Context as _, Result}; use tempfile::TempDir; +use tracing::{debug, info}; /// Copy the repository stack assets into a scenario-specific temp dir. #[derive(Debug)] @@ -37,6 +38,11 @@ impl ComposeWorkspace { stack_source.display() ); } + debug!( + repo_root = %repo_root.display(), + stack_source = %stack_source.display(), + "copying stack assets into temporary workspace" + ); copy_dir_recursive(&stack_source, &temp.path().join("stack"))?; let scripts_source = stack_scripts_root(&repo_root); if scripts_source.exists() { @@ -86,6 +92,7 @@ impl ComposeWorkspace { ); } + info!(root = %temp.path().display(), "compose workspace created"); Ok(Self { root: temp }) } diff --git a/testing-framework/runners/compose/src/infrastructure/cfgsync.rs b/testing-framework/runners/compose/src/infrastructure/cfgsync.rs index cfb475c..224d4d7 100644 --- a/testing-framework/runners/compose/src/infrastructure/cfgsync.rs +++ b/testing-framework/runners/compose/src/infrastructure/cfgsync.rs @@ -4,6 +4,7 @@ use testing_framework_core::{ scenario::cfgsync::{apply_topology_overrides, load_cfgsync_template, write_cfgsync_template}, topology::generation::GeneratedTopology, }; +use tracing::{debug, info, warn}; /// Handle that tracks a cfgsync server started for compose runs. #[derive(Debug)] @@ -16,6 +17,7 @@ impl CfgsyncServerHandle { pub fn shutdown(&mut self) { match self { Self::Container { name, stopped } if !*stopped => { + info!(container = name, "stopping cfgsync container"); remove_container(name); *stopped = true; } @@ -31,12 +33,17 @@ fn remove_container(name: &str) { .arg(name) .status() { - Ok(status) if status.success() => {} + Ok(status) if status.success() => { + debug!(container = name, "removed cfgsync container"); + } Ok(status) => { - eprintln!("[compose-runner] failed to remove cfgsync container {name}: {status}"); + warn!(container = name, status = ?status, "failed to remove cfgsync container"); } Err(_) => { - eprintln!("[compose-runner] failed to spawn docker rm for cfgsync container {name}"); + warn!( + container = name, + "failed to spawn docker rm for cfgsync container" + ); } } } @@ -54,6 +61,14 @@ pub fn update_cfgsync_config( use_kzg_mount: bool, port: u16, ) -> anyhow::Result<()> { + debug!( + path = %path.display(), + use_kzg_mount, + port, + validators = topology.validators().len(), + executors = topology.executors().len(), + "updating cfgsync template" + ); let mut cfg = load_cfgsync_template(path)?; cfg.port = port; apply_topology_overrides(&mut cfg, topology, use_kzg_mount); diff --git a/testing-framework/runners/compose/src/infrastructure/environment.rs b/testing-framework/runners/compose/src/infrastructure/environment.rs index 8ad1cce..83bdd50 100644 --- a/testing-framework/runners/compose/src/infrastructure/environment.rs +++ b/testing-framework/runners/compose/src/infrastructure/environment.rs @@ -172,12 +172,21 @@ pub fn prepare_workspace_state() -> Result { let cfgsync_path = workspace.stack_dir().join("cfgsync.yaml"); let use_kzg = workspace.root_path().join("kzgrs_test_params").exists(); - Ok(WorkspaceState { + let state = WorkspaceState { workspace, root, cfgsync_path, use_kzg, - }) + }; + + debug!( + root = %state.root.display(), + cfgsync = %state.cfgsync_path.display(), + use_kzg = state.use_kzg, + "prepared compose workspace state" + ); + + Ok(state) } /// Log wrapper for `prepare_workspace_state`. @@ -204,6 +213,7 @@ pub async fn start_cfgsync_stage( ) -> Result { info!(cfgsync_port = cfgsync_port, "launching cfgsync server"); let handle = launch_cfgsync(&workspace.cfgsync_path, cfgsync_port).await?; + debug!(container = ?handle, "cfgsync server launched"); Ok(handle) } @@ -238,6 +248,7 @@ pub fn allocate_cfgsync_port() -> Result { source: source.into(), })? .port(); + debug!(port, "allocated cfgsync port"); Ok(port) } @@ -254,6 +265,13 @@ pub async fn launch_cfgsync( })?; let (image, _) = resolve_image(); let container_name = format!("nomos-cfgsync-{}", Uuid::new_v4()); + debug!( + container = %container_name, + image, + cfgsync = %cfgsync_path.display(), + port, + "starting cfgsync container" + ); let mut command = Command::new("docker"); command @@ -287,6 +305,8 @@ pub async fn launch_cfgsync( source: anyhow!(source), })?; + info!(container = %container_name, port, "cfgsync container started"); + Ok(CfgsyncServerHandle::Container { name: container_name, stopped: false, @@ -300,6 +320,12 @@ pub fn write_compose_artifacts( cfgsync_port: u16, prometheus_port: u16, ) -> Result { + debug!( + cfgsync_port, + prometheus_port, + workspace_root = %workspace.root.display(), + "building compose descriptor" + ); let descriptor = ComposeDescriptor::builder(descriptors) .with_kzg_mount(workspace.use_kzg) .with_cfgsync_port(cfgsync_port) @@ -310,6 +336,7 @@ pub fn write_compose_artifacts( let compose_path = workspace.root.join("compose.generated.yml"); write_compose_file(&descriptor, &compose_path) .map_err(|source| ConfigError::Template { source })?; + debug!(compose_file = %compose_path.display(), "rendered compose file"); Ok(compose_path) } @@ -339,6 +366,7 @@ pub async fn bring_up_stack( cfgsync_handle.shutdown(); return Err(ComposeRunnerError::Compose(err)); } + debug!(project = %project_name, "docker compose up completed"); Ok(()) } diff --git a/testing-framework/runners/compose/src/infrastructure/ports.rs b/testing-framework/runners/compose/src/infrastructure/ports.rs index a91154a..8a26566 100644 --- a/testing-framework/runners/compose/src/infrastructure/ports.rs +++ b/testing-framework/runners/compose/src/infrastructure/ports.rs @@ -8,6 +8,7 @@ use testing_framework_core::{ topology::generation::{GeneratedTopology, NodeRole as TopologyNodeRole}, }; use tokio::{process::Command, time::timeout}; +use tracing::{debug, info}; use url::ParseError; use crate::{ @@ -46,6 +47,13 @@ pub async fn discover_host_ports( environment: &StackEnvironment, descriptors: &GeneratedTopology, ) -> Result { + debug!( + compose_file = %environment.compose_path().display(), + project = environment.project_name(), + validators = descriptors.validators().len(), + executors = descriptors.executors().len(), + "resolving compose host ports" + ); let mut validators = Vec::new(); for node in descriptors.validators() { let service = node_identifier(TopologyNodeRole::Validator, node.index()); @@ -62,10 +70,18 @@ pub async fn discover_host_ports( executors.push(NodeHostPorts { api, testing }); } - Ok(HostPortMapping { + let mapping = HostPortMapping { validators, executors, - }) + }; + + info!( + validator_ports = ?mapping.validators, + executor_ports = ?mapping.executors, + "compose host ports resolved" + ); + + Ok(mapping) } async fn resolve_service_port( @@ -180,5 +196,7 @@ fn node_identifier(role: TopologyNodeRole, index: usize) -> String { } pub(crate) fn compose_runner_host() -> String { - std::env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string()) + let host = std::env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string()); + debug!(host, "compose runner host resolved for readiness URLs"); + host } diff --git a/testing-framework/runners/compose/src/infrastructure/template.rs b/testing-framework/runners/compose/src/infrastructure/template.rs index 5eed0ba..e913119 100644 --- a/testing-framework/runners/compose/src/infrastructure/template.rs +++ b/testing-framework/runners/compose/src/infrastructure/template.rs @@ -5,6 +5,7 @@ use std::{ use anyhow::Context as _; use tera::Context as TeraContext; +use tracing::{debug, info}; use crate::descriptor::ComposeDescriptor; @@ -49,6 +50,7 @@ pub fn write_compose_file( descriptor: &ComposeDescriptor, compose_path: &Path, ) -> Result<(), TemplateError> { + info!(file = %compose_path.display(), "writing compose file"); TemplateSource::load()?.write(descriptor, compose_path) } @@ -71,6 +73,7 @@ impl TemplateSource { } fn render(&self, descriptor: &ComposeDescriptor) -> Result { + debug!(template = %self.path.display(), "rendering compose template"); let context = TeraContext::from_serialize(descriptor) .map_err(|source| TemplateError::Serialize { source })?; diff --git a/testing-framework/runners/compose/src/lifecycle/block_feed.rs b/testing-framework/runners/compose/src/lifecycle/block_feed.rs index 19f9b58..2c27761 100644 --- a/testing-framework/runners/compose/src/lifecycle/block_feed.rs +++ b/testing-framework/runners/compose/src/lifecycle/block_feed.rs @@ -2,7 +2,7 @@ use std::time::Duration; use testing_framework_core::scenario::{BlockFeed, BlockFeedTask, NodeClients, spawn_block_feed}; use tokio::time::sleep; -use tracing::{info, warn}; +use tracing::{debug, info, warn}; use crate::errors::ComposeRunnerError; @@ -12,6 +12,12 @@ const BLOCK_FEED_RETRY_DELAY: Duration = Duration::from_secs(1); async fn spawn_block_feed_with( node_clients: &NodeClients, ) -> Result<(BlockFeed, BlockFeedTask), ComposeRunnerError> { + debug!( + validators = node_clients.validator_clients().len(), + executors = node_clients.executor_clients().len(), + "selecting validator client for block feed" + ); + let block_source_client = node_clients .random_validator() .cloned() diff --git a/testing-framework/runners/compose/src/lifecycle/cleanup.rs b/testing-framework/runners/compose/src/lifecycle/cleanup.rs index fd9b4e3..3df5de3 100644 --- a/testing-framework/runners/compose/src/lifecycle/cleanup.rs +++ b/testing-framework/runners/compose/src/lifecycle/cleanup.rs @@ -1,6 +1,7 @@ use std::{env, path::PathBuf, thread}; use testing_framework_core::scenario::CleanupGuard; +use tracing::{debug, info, warn}; use crate::{ docker::{ @@ -45,7 +46,7 @@ impl RunnerCleanup { if let Err(err) = run_compose_down_blocking(&self.compose_file, &self.project_name, &self.root) { - eprintln!("[compose-runner] docker compose down failed: {err}"); + warn!(error = ?err, "docker compose down failed"); } } } @@ -80,6 +81,13 @@ fn run_compose_down_blocking( } impl CleanupGuard for RunnerCleanup { fn cleanup(mut self: Box) { + debug!( + compose_file = %self.compose_file.display(), + project = %self.project_name, + root = %self.root.display(), + preserve = self.should_preserve(), + "compose cleanup started" + ); if self.should_preserve() { self.persist_workspace(); return; @@ -101,12 +109,9 @@ impl RunnerCleanup { fn persist_workspace(&mut self) { if let Some(workspace) = self.workspace.take() { let keep = workspace.into_inner().keep(); - eprintln!( - "[compose-runner] preserving docker state at {}", - keep.display() - ); + info!(path = %keep.display(), "preserving docker state"); } - eprintln!("[compose-runner] compose preserve flag set; skipping docker compose down"); + info!("compose preserve flag set; skipping docker compose down"); } } diff --git a/testing-framework/runners/compose/src/lifecycle/wait.rs b/testing-framework/runners/compose/src/lifecycle/wait.rs index 17240af..9fe7668 100644 --- a/testing-framework/runners/compose/src/lifecycle/wait.rs +++ b/testing-framework/runners/compose/src/lifecycle/wait.rs @@ -4,6 +4,7 @@ use testing_framework_core::{ adjust_timeout, scenario::http_probe::{self, HttpReadinessError, NodeRole}, }; +use tracing::{debug, info}; const DEFAULT_WAIT: Duration = Duration::from_secs(180); const POLL_INTERVAL: Duration = Duration::from_millis(250); @@ -18,6 +19,7 @@ pub async fn wait_for_executors(ports: &[u16]) -> Result<(), HttpReadinessError> async fn wait_for_ports(ports: &[u16], role: NodeRole) -> Result<(), HttpReadinessError> { let host = compose_runner_host(); + info!(role = ?role, ports = ?ports, host, "waiting for compose HTTP readiness"); http_probe::wait_for_http_ports_with_host( ports, role, @@ -29,5 +31,7 @@ async fn wait_for_ports(ports: &[u16], role: NodeRole) -> Result<(), HttpReadine } fn compose_runner_host() -> String { - env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string()) + let host = env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string()); + debug!(host, "compose runner host resolved"); + host } diff --git a/testing-framework/runners/k8s/src/deployer/orchestrator.rs b/testing-framework/runners/k8s/src/deployer/orchestrator.rs index 19540a5..2d27c16 100644 --- a/testing-framework/runners/k8s/src/deployer/orchestrator.rs +++ b/testing-framework/runners/k8s/src/deployer/orchestrator.rs @@ -89,14 +89,16 @@ impl Deployer for K8sDeployer { async fn deploy(&self, scenario: &Scenario) -> Result { let descriptors = scenario.topology().clone(); + let validator_count = descriptors.validators().len(); + let executor_count = descriptors.executors().len(); ensure_supported_topology(&descriptors)?; let client = Client::try_default() .await .map_err(|source| K8sRunnerError::ClientInit { source })?; info!( - validators = descriptors.validators().len(), - executors = descriptors.executors().len(), + validators = validator_count, + executors = executor_count, duration_secs = scenario.duration().as_secs(), readiness_checks = self.readiness_checks, "starting k8s deployment" @@ -117,6 +119,7 @@ impl Deployer for K8sDeployer { if let Some(env) = cluster.as_mut() { env.fail("failed to construct node api clients").await; } + error!(error = ?err, "failed to build k8s node clients"); return Err(err.into()); } }; @@ -133,6 +136,7 @@ impl Deployer for K8sDeployer { env.fail("failed to configure prometheus metrics handle") .await; } + error!(error = ?err, "failed to configure prometheus metrics handle"); return Err(err.into()); } }; @@ -142,6 +146,7 @@ impl Deployer for K8sDeployer { if let Some(env) = cluster.as_mut() { env.fail("failed to initialize block feed").await; } + error!(error = ?err, "failed to initialize block feed"); return Err(err); } }; @@ -163,6 +168,12 @@ impl Deployer for K8sDeployer { block_feed, None, ); + info!( + validators = validator_count, + executors = executor_count, + duration_secs = scenario.duration().as_secs(), + "k8s deployment ready; handing control to scenario runner" + ); Ok(Runner::new(context, Some(cleanup_guard))) } } diff --git a/testing-framework/runners/k8s/src/host.rs b/testing-framework/runners/k8s/src/host.rs index 6bafd52..0401262 100644 --- a/testing-framework/runners/k8s/src/host.rs +++ b/testing-framework/runners/k8s/src/host.rs @@ -2,6 +2,7 @@ use std::env; const NODE_HOST_ENV: &str = "K8S_RUNNER_NODE_HOST"; const KUBE_SERVICE_HOST_ENV: &str = "KUBERNETES_SERVICE_HOST"; +use tracing::debug; /// Returns the hostname or IP used to reach `NodePorts` exposed by the cluster. /// Prefers `K8S_RUNNER_NODE_HOST`, then the standard `KUBERNETES_SERVICE_HOST` @@ -9,12 +10,19 @@ const KUBE_SERVICE_HOST_ENV: &str = "KUBERNETES_SERVICE_HOST"; /// back to `127.0.0.1`. pub fn node_host() -> String { if let Ok(host) = env::var(NODE_HOST_ENV) { + debug!(host, env = NODE_HOST_ENV, "using node host override"); return host; } if let Ok(host) = env::var(KUBE_SERVICE_HOST_ENV) && !host.is_empty() { + debug!( + host, + env = KUBE_SERVICE_HOST_ENV, + "using kubernetes service host" + ); return host; } + debug!("falling back to 127.0.0.1 for node host"); "127.0.0.1".to_owned() } diff --git a/testing-framework/runners/k8s/src/infrastructure/assets.rs b/testing-framework/runners/k8s/src/infrastructure/assets.rs index a1db9a2..7b45191 100644 --- a/testing-framework/runners/k8s/src/infrastructure/assets.rs +++ b/testing-framework/runners/k8s/src/infrastructure/assets.rs @@ -13,6 +13,7 @@ use testing_framework_core::{ topology::generation::GeneratedTopology, }; use thiserror::Error; +use tracing::{debug, info}; /// Paths and image metadata required to deploy the Helm chart. pub struct RunnerAssets { @@ -72,6 +73,12 @@ pub enum AssetsError { /// Render cfgsync config, Helm values, and locate scripts/KZG assets for a /// topology. pub fn prepare_assets(topology: &GeneratedTopology) -> Result { + info!( + validators = topology.validators().len(), + executors = topology.executors().len(), + "preparing k8s runner assets" + ); + let root = workspace_root().map_err(|source| AssetsError::WorkspaceRoot { source })?; let cfgsync_yaml = render_cfgsync_config(&root, topology)?; @@ -89,6 +96,15 @@ pub fn prepare_assets(topology: &GeneratedTopology) -> Result Result { let cfgsync_template_path = stack_assets_root(root).join("cfgsync.yaml"); + debug!(path = %cfgsync_template_path.display(), "loading cfgsync template"); let mut cfg = load_cfgsync_template(&cfgsync_template_path) .map_err(|source| AssetsError::Cfgsync { source })?; apply_topology_overrides(&mut cfg, topology, true); @@ -134,6 +151,14 @@ fn validate_scripts(root: &Path) -> Result { } } + debug!( + run_cfgsync = %run_cfgsync.display(), + run_shared = %run_shared.display(), + run_node = %run_node.display(), + run_executor = %run_executor.display(), + "validated runner scripts exist" + ); + Ok(ScriptPaths { run_cfgsync, run_shared, @@ -245,6 +270,7 @@ fn build_values(topology: &GeneratedTopology) -> HelmValues { port: cfgsync_port(), }; let pol_mode = pol_proof_mode(); + debug!(pol_mode, "rendering Helm values for k8s stack"); let validators = topology .validators() .iter() diff --git a/testing-framework/runners/k8s/src/infrastructure/cluster.rs b/testing-framework/runners/k8s/src/infrastructure/cluster.rs index 464a62b..9d1e74b 100644 --- a/testing-framework/runners/k8s/src/infrastructure/cluster.rs +++ b/testing-framework/runners/k8s/src/infrastructure/cluster.rs @@ -7,7 +7,7 @@ use testing_framework_core::{ scenario::{CleanupGuard, Metrics, MetricsError, NodeClients, http_probe::NodeRole}, topology::{generation::GeneratedTopology, readiness::ReadinessError}, }; -use tracing::info; +use tracing::{debug, info}; use url::ParseError; use uuid::Uuid; @@ -149,10 +149,18 @@ pub fn collect_port_specs(descriptors: &GeneratedTopology) -> PortSpecs { }) .collect(); - PortSpecs { + let specs = PortSpecs { validators, executors, - } + }; + + debug!( + validators = specs.validators.len(), + executors = specs.executors.len(), + "collected k8s port specs" + ); + + specs } pub fn build_node_clients(cluster: &ClusterEnvironment) -> Result { @@ -175,6 +183,12 @@ pub fn build_node_clients(cluster: &ClusterEnvironment) -> Result, _>>()?; + debug!( + validators = validators.len(), + executors = executors.len(), + "built k8s node clients" + ); + Ok(NodeClients::new(validators, executors)) } @@ -205,7 +219,15 @@ pub async fn ensure_cluster_readiness( Some(&executor_membership_urls), ) .await - .map_err(|source| RemoteReadinessError::Remote { source }) + .map_err(|source| RemoteReadinessError::Remote { source })?; + + info!( + validator_api_ports = ?validator_api, + executor_api_ports = ?executor_api, + "k8s remote readiness confirmed" + ); + + Ok(()) } pub fn cluster_identifiers() -> (String, String) { diff --git a/testing-framework/runners/k8s/src/infrastructure/helm.rs b/testing-framework/runners/k8s/src/infrastructure/helm.rs index c0ec10e..0c6fa41 100644 --- a/testing-framework/runners/k8s/src/infrastructure/helm.rs +++ b/testing-framework/runners/k8s/src/infrastructure/helm.rs @@ -2,6 +2,7 @@ use std::{io, process::Stdio}; use thiserror::Error; use tokio::process::Command; +use tracing::{debug, info}; use crate::infrastructure::assets::{RunnerAssets, cfgsync_port_value, workspace_root}; @@ -36,6 +37,17 @@ pub async fn install_release( } else { "File" }; + info!( + release, + namespace, + validators, + executors, + image = %assets.image, + cfgsync_port = cfgsync_port_value(), + kzg = %assets.kzg_path.display(), + values = %assets.values_file.display(), + "installing helm release" + ); let mut cmd = Command::new("helm"); cmd.arg("install") @@ -94,16 +106,11 @@ pub async fn install_release( let output = run_helm_command(cmd, &command).await?; if std::env::var("K8S_RUNNER_DEBUG").is_ok() { - println!( - "[k8s-runner] {command} stdout:\n{}", - String::from_utf8_lossy(&output.stdout) - ); - println!( - "[k8s-runner] {command} stderr:\n{}", - String::from_utf8_lossy(&output.stderr) - ); + debug!(command, stdout = %String::from_utf8_lossy(&output.stdout), "helm install stdout"); + debug!(command, stderr = %String::from_utf8_lossy(&output.stderr), "helm install stderr"); } + info!(release, namespace, "helm install completed"); Ok(()) } @@ -117,12 +124,9 @@ pub async fn uninstall_release(release: &str, namespace: &str) -> Result<(), Hel .stdout(Stdio::piped()) .stderr(Stdio::piped()); - println!("[k8s-runner] issuing `helm uninstall {release}` in namespace `{namespace}`"); - + info!(release, namespace, "issuing helm uninstall"); run_helm_command(cmd, &format!("helm uninstall {release}")).await?; - println!( - "[k8s-runner] helm uninstall {release} completed successfully (namespace `{namespace}`)" - ); + info!(release, namespace, "helm uninstall completed successfully"); Ok(()) } diff --git a/testing-framework/runners/k8s/src/lifecycle/block_feed.rs b/testing-framework/runners/k8s/src/lifecycle/block_feed.rs index a5a07f9..93b88a5 100644 --- a/testing-framework/runners/k8s/src/lifecycle/block_feed.rs +++ b/testing-framework/runners/k8s/src/lifecycle/block_feed.rs @@ -1,11 +1,17 @@ use testing_framework_core::scenario::{BlockFeed, BlockFeedTask, NodeClients, spawn_block_feed}; -use tracing::info; +use tracing::{debug, info}; use crate::deployer::K8sRunnerError; pub async fn spawn_block_feed_with( node_clients: &NodeClients, ) -> Result<(BlockFeed, BlockFeedTask), K8sRunnerError> { + debug!( + validators = node_clients.validator_clients().len(), + executors = node_clients.executor_clients().len(), + "selecting node client for block feed" + ); + let block_source_client = node_clients .any_client() .cloned() diff --git a/testing-framework/runners/k8s/src/lifecycle/cleanup.rs b/testing-framework/runners/k8s/src/lifecycle/cleanup.rs index 545cf70..6552fc9 100644 --- a/testing-framework/runners/k8s/src/lifecycle/cleanup.rs +++ b/testing-framework/runners/k8s/src/lifecycle/cleanup.rs @@ -7,7 +7,7 @@ use tokio::{ process::Command, time::{Duration, sleep}, }; -use tracing::warn; +use tracing::{info, warn}; use crate::infrastructure::helm::uninstall_release; @@ -37,7 +37,11 @@ impl RunnerCleanup { async fn cleanup_async(&self) { if self.preserve { - print_preserve_notice(&self.release, &self.namespace); + info!( + release = %self.release, + namespace = %self.namespace, + "preserving k8s release and namespace" + ); return; } @@ -51,16 +55,12 @@ impl RunnerCleanup { }) { Ok(()) => true, Err(err) => { - warn!( - "[k8s-runner] cleanup timed out after 120s: {err}; falling back to background thread" - ); + warn!(error = ?err, "cleanup timed out after 120s; falling back to background thread"); false } }, Err(err) => { - warn!( - "[k8s-runner] unable to create cleanup runtime: {err}; falling back to background thread" - ); + warn!(error = ?err, "unable to create cleanup runtime; falling back to background thread"); false } } @@ -73,26 +73,22 @@ impl RunnerCleanup { { Ok(handle) => { if let Err(err) = handle.join() { - warn!("[k8s-runner] cleanup thread panicked: {err:?}"); + warn!(error = ?err, "cleanup thread panicked"); } } - Err(err) => warn!("[k8s-runner] failed to spawn cleanup thread: {err}"), + Err(err) => warn!(error = ?err, "failed to spawn cleanup thread"), } } } -fn print_preserve_notice(release: &str, namespace: &str) { - println!("[k8s-runner] preserving Helm release `{release}` in namespace `{namespace}`"); -} - async fn uninstall_release_and_namespace(client: &Client, release: &str, namespace: &str) { if let Err(err) = uninstall_release(release, namespace).await { - println!("[k8s-runner] helm uninstall {release} failed: {err}"); + warn!(release, namespace, error = ?err, "helm uninstall failed during cleanup"); } - println!("[k8s-runner] deleting namespace `{namespace}` via k8s API",); + info!(namespace, "deleting namespace via k8s API"); delete_namespace(client, namespace).await; - println!("[k8s-runner] delete request for namespace `{namespace}` finished",); + info!(namespace, "namespace delete request finished"); } fn run_background_cleanup(cleanup: Box) { @@ -119,12 +115,15 @@ async fn delete_namespace(client: &Client, namespace: &str) { if delete_namespace_via_cli(namespace).await { wait_for_namespace_termination(&namespaces, namespace).await; } else { - warn!("[k8s-runner] unable to delete namespace `{namespace}` using kubectl fallback"); + warn!( + namespace, + "unable to delete namespace using kubectl fallback" + ); } } async fn delete_namespace_via_api(namespaces: &Api, namespace: &str) -> bool { - println!("[k8s-runner] invoking kubernetes API to delete namespace `{namespace}`"); + info!(namespace, "invoking kubernetes API to delete namespace"); match tokio::time::timeout( Duration::from_secs(10), namespaces.delete(namespace, &DeleteParams::default()), @@ -132,19 +131,20 @@ async fn delete_namespace_via_api(namespaces: &Api, namespace: &str) .await { Ok(Ok(_)) => { - println!( - "[k8s-runner] delete request accepted for namespace `{namespace}`; waiting for termination" + info!( + namespace, + "delete request accepted; waiting for termination" ); true } Ok(Err(err)) => { - println!("[k8s-runner] failed to delete namespace `{namespace}` via API: {err}"); - warn!("[k8s-runner] api delete failed for namespace {namespace}: {err}"); + warn!(namespace, error = ?err, "failed to delete namespace via API"); false } Err(_) => { - println!( - "[k8s-runner] kubernetes API timed out deleting namespace `{namespace}`; falling back to kubectl" + warn!( + namespace, + "kubernetes API timed out deleting namespace; falling back to kubectl" ); false } @@ -152,7 +152,7 @@ async fn delete_namespace_via_api(namespaces: &Api, namespace: &str) } async fn delete_namespace_via_cli(namespace: &str) -> bool { - println!("[k8s-runner] invoking `kubectl delete namespace {namespace}` fallback"); + info!(namespace, "invoking kubectl delete namespace fallback"); let output = Command::new("kubectl") .arg("delete") .arg("namespace") @@ -163,19 +163,20 @@ async fn delete_namespace_via_cli(namespace: &str) -> bool { match output { Ok(result) if result.status.success() => { - println!("[k8s-runner] `kubectl delete namespace {namespace}` completed successfully"); + info!(namespace, "kubectl delete namespace completed successfully"); true } Ok(result) => { - println!( - "[k8s-runner] `kubectl delete namespace {namespace}` failed: {}\n{}", - String::from_utf8_lossy(&result.stderr), - String::from_utf8_lossy(&result.stdout) + warn!( + namespace, + stderr = %String::from_utf8_lossy(&result.stderr), + stdout = %String::from_utf8_lossy(&result.stdout), + "kubectl delete namespace failed" ); false } Err(err) => { - println!("[k8s-runner] failed to spawn kubectl for namespace `{namespace}`: {err}"); + warn!(namespace, error = ?err, "failed to spawn kubectl delete namespace"); false } } @@ -204,19 +205,16 @@ async fn namespace_deleted(namespaces: &Api, namespace: &str, attempt .as_ref() .and_then(|status| status.phase.clone()) .unwrap_or_else(|| "Unknown".into()); - println!( - "[k8s-runner] waiting for namespace `{}` to terminate (phase={phase:?})", - namespace - ); + info!(namespace, ?phase, "waiting for namespace to terminate"); } false } Ok(None) => { - println!("[k8s-runner] namespace `{namespace}` deleted"); + info!(namespace, "namespace deleted"); true } Err(err) => { - warn!("[k8s-runner] namespace `{namespace}` poll failed: {err}"); + warn!(namespace, error = ?err, "namespace poll failed"); true } } diff --git a/testing-framework/runners/k8s/src/lifecycle/logs.rs b/testing-framework/runners/k8s/src/lifecycle/logs.rs index eea910a..80b28fd 100644 --- a/testing-framework/runners/k8s/src/lifecycle/logs.rs +++ b/testing-framework/runners/k8s/src/lifecycle/logs.rs @@ -9,7 +9,7 @@ pub async fn dump_namespace_logs(client: &Client, namespace: &str) { let pod_names = match list_pod_names(client, namespace).await { Ok(names) => names, Err(err) => { - warn!("[k8s-runner] failed to list pods in namespace {namespace}: {err}"); + warn!(%namespace, error = ?err, "failed to list pods for log dump"); return; } }; @@ -38,7 +38,7 @@ async fn stream_pod_logs(client: &Client, namespace: &str, pod_name: &str) { }; match pods.logs(pod_name, ¶ms).await { - Ok(log) => info!("[k8s-runner] pod {pod_name} logs:\n{log}"), - Err(err) => warn!("[k8s-runner] failed to fetch logs for pod {pod_name}: {err}"), + Ok(log) => info!(pod = pod_name, "pod logs:\n{log}"), + Err(err) => warn!(pod = pod_name, error = ?err, "failed to fetch pod logs"), } } diff --git a/testing-framework/runners/local/src/runner.rs b/testing-framework/runners/local/src/runner.rs index 88decd6..c737054 100644 --- a/testing-framework/runners/local/src/runner.rs +++ b/testing-framework/runners/local/src/runner.rs @@ -7,7 +7,7 @@ use testing_framework_core::{ topology::{deployment::Topology, readiness::ReadinessError}, }; use thiserror::Error; -use tracing::info; +use tracing::{debug, info}; /// Spawns validators and executors as local processes, reusing the existing /// integration harness. @@ -96,11 +96,16 @@ impl LocalDeployer { membership_check: bool, ) -> Result { let descriptors = scenario.topology(); - info!("spawning local validators/executors"); + info!( + validators = descriptors.validators().len(), + executors = descriptors.executors().len(), + "spawning local validators/executors" + ); let topology = descriptors.clone().spawn_local().await; let skip_membership = !membership_check; if let Err(source) = wait_for_readiness(&topology, skip_membership).await { + debug!(error = ?source, "local readiness failed"); return Err(LocalDeployerError::ReadinessFailed { source }); } @@ -136,6 +141,12 @@ async fn wait_for_readiness( async fn spawn_block_feed_with( node_clients: &NodeClients, ) -> Result<(BlockFeed, BlockFeedTask), LocalDeployerError> { + debug!( + validators = node_clients.validator_clients().len(), + executors = node_clients.executor_clients().len(), + "selecting validator client for local block feed" + ); + let block_source_client = node_clients.random_validator().cloned().ok_or_else(|| { LocalDeployerError::WorkloadFailed { source: "block feed requires at least one validator".into(),