mirror of
https://github.com/logos-blockchain/logos-blockchain-testing.git
synced 2026-01-02 13:23:13 +00:00
chore: add structured logging across runners and core
This commit is contained in:
parent
741c60497b
commit
30fe4aa0ff
@ -1,5 +1,7 @@
|
||||
use std::{env, path::PathBuf};
|
||||
|
||||
use tracing::{debug, info};
|
||||
|
||||
pub struct BinaryConfig {
|
||||
pub env_var: &'static str,
|
||||
pub binary_name: &'static str,
|
||||
@ -12,18 +14,43 @@ pub struct BinaryResolver;
|
||||
impl BinaryResolver {
|
||||
pub fn resolve_path(config: &BinaryConfig) -> PathBuf {
|
||||
if let Some(path) = env::var_os(config.env_var) {
|
||||
return PathBuf::from(path);
|
||||
let resolved = PathBuf::from(path);
|
||||
|
||||
info!(
|
||||
env = config.env_var,
|
||||
binary = config.binary_name,
|
||||
path = %resolved.display(),
|
||||
"resolved binary from env override"
|
||||
);
|
||||
return resolved;
|
||||
}
|
||||
if let Some(path) = Self::which_on_path(config.binary_name) {
|
||||
info!(
|
||||
binary = config.binary_name,
|
||||
path = %path.display(),
|
||||
"resolved binary from PATH"
|
||||
);
|
||||
return path;
|
||||
}
|
||||
let shared_bin = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(config.shared_bin_subpath);
|
||||
if shared_bin.exists() {
|
||||
info!(
|
||||
binary = config.binary_name,
|
||||
path = %shared_bin.display(),
|
||||
"resolved binary from shared assets"
|
||||
);
|
||||
return shared_bin;
|
||||
}
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
let fallback = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("../../")
|
||||
.join(config.fallback_path)
|
||||
.join(config.fallback_path);
|
||||
|
||||
debug!(
|
||||
binary = config.binary_name,
|
||||
path = %fallback.display(),
|
||||
"falling back to binary path"
|
||||
);
|
||||
fallback
|
||||
}
|
||||
|
||||
fn which_on_path(bin: &str) -> Option<PathBuf> {
|
||||
|
||||
@ -1,8 +1,10 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
use std::process::Child;
|
||||
use tracing::debug;
|
||||
|
||||
/// Shared lifecycle hooks (placeholder).
|
||||
pub fn kill_child(child: &mut Child) {
|
||||
debug!("killing child process");
|
||||
let _ = child.kill();
|
||||
}
|
||||
|
||||
@ -2,10 +2,22 @@
|
||||
|
||||
use std::process::Child;
|
||||
|
||||
use tracing::debug;
|
||||
|
||||
/// Check if a child process is still running.
|
||||
pub fn is_running(child: &mut Child) -> bool {
|
||||
match child.try_wait() {
|
||||
Ok(None) => true,
|
||||
Ok(Some(_)) | Err(_) => false,
|
||||
Ok(None) => {
|
||||
debug!("process still running");
|
||||
true
|
||||
}
|
||||
Ok(Some(status)) => {
|
||||
debug!(?status, "process exited");
|
||||
false
|
||||
}
|
||||
Err(err) => {
|
||||
debug!(error = ?err, "process state check failed");
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -5,6 +5,7 @@ use std::{fs::File, io, path::Path};
|
||||
use nomos_tracing::logging::local::FileConfig;
|
||||
use serde::Serialize;
|
||||
use serde_yaml::Value;
|
||||
use tracing::debug;
|
||||
|
||||
/// Configure tracing logger to write into `NOMOS_LOG_DIR` if set, else into the
|
||||
/// provided base dir.
|
||||
@ -12,6 +13,7 @@ pub fn configure_logging<F>(base_dir: &Path, prefix: &str, set_logger: F)
|
||||
where
|
||||
F: FnOnce(FileConfig),
|
||||
{
|
||||
debug!(prefix, base_dir = %base_dir.display(), "configuring node logging");
|
||||
if let Ok(env_dir) = std::env::var("NOMOS_LOG_DIR") {
|
||||
let log_dir = std::path::PathBuf::from(env_dir);
|
||||
let _ = std::fs::create_dir_all(&log_dir);
|
||||
@ -34,6 +36,7 @@ where
|
||||
T: Serialize,
|
||||
F: FnOnce(&mut Value),
|
||||
{
|
||||
debug!(path = %path.display(), "writing node config with injection");
|
||||
let mut yaml_value =
|
||||
serde_yaml::to_value(config).map_err(|err| io::Error::new(io::ErrorKind::Other, err))?;
|
||||
inject(&mut yaml_value);
|
||||
|
||||
@ -10,6 +10,7 @@ use reqwest::Url;
|
||||
use serde::Serialize;
|
||||
use tempfile::TempDir;
|
||||
use tokio::time;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::lifecycle::monitor::is_running;
|
||||
use crate::nodes::{
|
||||
@ -87,6 +88,8 @@ pub fn prepare_node_config<T: NodeConfigCommon>(
|
||||
) -> (TempDir, T, SocketAddr, Option<SocketAddr>) {
|
||||
let dir = create_tempdir().expect("tempdir");
|
||||
|
||||
debug!(dir = %dir.path().display(), log_prefix, enable_logging, "preparing node config");
|
||||
|
||||
// Ensure recovery files/dirs exist so services that persist state do not fail
|
||||
// on startup.
|
||||
let _ = ensure_recovery_paths(dir.path());
|
||||
@ -100,6 +103,8 @@ pub fn prepare_node_config<T: NodeConfigCommon>(
|
||||
config.set_paths(dir.path());
|
||||
let (addr, testing_addr) = config.addresses();
|
||||
|
||||
debug!(addr = %addr, testing_addr = ?testing_addr, "configured node addresses");
|
||||
|
||||
(dir, config, addr, testing_addr)
|
||||
}
|
||||
|
||||
@ -121,6 +126,8 @@ where
|
||||
})
|
||||
.expect("failed to write node config");
|
||||
|
||||
debug!(config_file = %config_path.display(), binary = %binary_path.display(), "spawning node process");
|
||||
|
||||
let child = Command::new(binary_path)
|
||||
.arg(&config_path)
|
||||
.current_dir(dir.path())
|
||||
@ -143,5 +150,6 @@ where
|
||||
})
|
||||
.await?;
|
||||
|
||||
info!("node readiness confirmed via consensus_info");
|
||||
Ok(handle)
|
||||
}
|
||||
|
||||
@ -3,6 +3,7 @@ use std::{ops::Deref, path::PathBuf};
|
||||
use nomos_executor::config::Config;
|
||||
use nomos_tracing_service::LoggerLayer;
|
||||
pub use testing_framework_config::nodes::executor::create_executor_config;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::{persist_tempdir, should_persist_tempdir};
|
||||
use crate::{
|
||||
@ -11,7 +12,7 @@ use crate::{
|
||||
LOGS_PREFIX,
|
||||
common::{
|
||||
binary::{BinaryConfig, BinaryResolver},
|
||||
lifecycle::kill::kill_child,
|
||||
lifecycle::{kill::kill_child, monitor::is_running},
|
||||
node::{NodeConfigCommon, NodeHandle, spawn_node},
|
||||
},
|
||||
},
|
||||
@ -46,9 +47,10 @@ impl Drop for Executor {
|
||||
if should_persist_tempdir()
|
||||
&& let Err(e) = persist_tempdir(&mut self.handle.tempdir, "nomos-executor")
|
||||
{
|
||||
println!("failed to persist tempdir: {e}");
|
||||
debug!(error = ?e, "failed to persist executor tempdir");
|
||||
}
|
||||
|
||||
debug!("stopping executor process");
|
||||
kill_child(&mut self.handle.child);
|
||||
}
|
||||
}
|
||||
@ -65,8 +67,20 @@ impl Executor {
|
||||
.await
|
||||
.expect("executor did not become ready");
|
||||
|
||||
info!("executor spawned and ready");
|
||||
|
||||
Self { handle }
|
||||
}
|
||||
|
||||
/// Check if the executor process is still running
|
||||
pub fn is_running(&mut self) -> bool {
|
||||
is_running(&mut self.handle.child)
|
||||
}
|
||||
|
||||
/// Wait for the executor process to exit, with a timeout.
|
||||
pub async fn wait_for_exit(&mut self, timeout: std::time::Duration) -> bool {
|
||||
self.handle.wait_for_exit(timeout).await
|
||||
}
|
||||
}
|
||||
|
||||
impl NodeConfigCommon for Config {
|
||||
|
||||
@ -4,6 +4,7 @@ use nomos_node::Config;
|
||||
use nomos_tracing_service::LoggerLayer;
|
||||
pub use testing_framework_config::nodes::validator::create_validator_config;
|
||||
use tokio::time::error::Elapsed;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::{persist_tempdir, should_persist_tempdir};
|
||||
use crate::{
|
||||
@ -12,7 +13,7 @@ use crate::{
|
||||
LOGS_PREFIX,
|
||||
common::{
|
||||
binary::{BinaryConfig, BinaryResolver},
|
||||
lifecycle::kill::kill_child,
|
||||
lifecycle::{kill::kill_child, monitor::is_running},
|
||||
node::{NodeConfigCommon, NodeHandle, spawn_node},
|
||||
},
|
||||
},
|
||||
@ -52,9 +53,10 @@ impl Drop for Validator {
|
||||
if should_persist_tempdir()
|
||||
&& let Err(e) = persist_tempdir(&mut self.handle.tempdir, "nomos-node")
|
||||
{
|
||||
println!("failed to persist tempdir: {e}");
|
||||
debug!(error = ?e, "failed to persist validator tempdir");
|
||||
}
|
||||
|
||||
debug!("stopping validator process");
|
||||
kill_child(&mut self.handle.child);
|
||||
}
|
||||
}
|
||||
@ -62,7 +64,7 @@ impl Drop for Validator {
|
||||
impl Validator {
|
||||
/// Check if the validator process is still running
|
||||
pub fn is_running(&mut self) -> bool {
|
||||
crate::nodes::common::lifecycle::monitor::is_running(&mut self.handle.child)
|
||||
is_running(&mut self.handle.child)
|
||||
}
|
||||
|
||||
/// Wait for the validator process to exit, with a timeout
|
||||
@ -81,6 +83,8 @@ impl Validator {
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!("validator spawned and ready");
|
||||
|
||||
Ok(Self { handle })
|
||||
}
|
||||
}
|
||||
|
||||
@ -6,6 +6,7 @@ use nomos_tracing_service::TracingSettings;
|
||||
use nomos_utils::bounded_duration::{MinimalBoundedDuration, SECOND};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::serde_as;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::{
|
||||
constants::kzg_container_path,
|
||||
@ -50,12 +51,14 @@ pub struct CfgSyncConfig {
|
||||
}
|
||||
|
||||
pub fn load_cfgsync_template(path: &Path) -> Result<CfgSyncConfig> {
|
||||
debug!(path = %path.display(), "loading cfgsync template");
|
||||
let file = File::open(path)
|
||||
.with_context(|| format!("opening cfgsync template at {}", path.display()))?;
|
||||
serde_yaml::from_reader(file).context("parsing cfgsync template")
|
||||
}
|
||||
|
||||
pub fn write_cfgsync_template(path: &Path, cfg: &CfgSyncConfig) -> Result<()> {
|
||||
debug!(path = %path.display(), "writing cfgsync template");
|
||||
let file = File::create(path)
|
||||
.with_context(|| format!("writing cfgsync template to {}", path.display()))?;
|
||||
let serializable = SerializableCfgSyncConfig::from(cfg);
|
||||
@ -63,6 +66,7 @@ pub fn write_cfgsync_template(path: &Path, cfg: &CfgSyncConfig) -> Result<()> {
|
||||
}
|
||||
|
||||
pub fn render_cfgsync_yaml(cfg: &CfgSyncConfig) -> Result<String> {
|
||||
debug!("rendering cfgsync yaml");
|
||||
let serializable = SerializableCfgSyncConfig::from(cfg);
|
||||
serde_yaml::to_string(&serializable).context("rendering cfgsync yaml")
|
||||
}
|
||||
@ -72,6 +76,12 @@ pub fn apply_topology_overrides(
|
||||
topology: &GeneratedTopology,
|
||||
use_kzg_mount: bool,
|
||||
) {
|
||||
debug!(
|
||||
validators = topology.validators().len(),
|
||||
executors = topology.executors().len(),
|
||||
use_kzg_mount,
|
||||
"applying topology overrides to cfgsync config"
|
||||
);
|
||||
let hosts = topology.validators().len() + topology.executors().len();
|
||||
cfg.n_hosts = hosts;
|
||||
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
use std::{num::NonZeroUsize, sync::Arc, time::Duration};
|
||||
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::{
|
||||
NodeControlCapability, expectation::Expectation, runtime::context::RunMetrics,
|
||||
workload::Workload,
|
||||
@ -227,6 +229,15 @@ impl<Caps> Builder<Caps> {
|
||||
let run_metrics = RunMetrics::from_topology(&generated, duration);
|
||||
initialize_components(&generated, &run_metrics, &mut workloads, &mut expectations);
|
||||
|
||||
info!(
|
||||
validators = generated.validators().len(),
|
||||
executors = generated.executors().len(),
|
||||
duration_secs = duration.as_secs(),
|
||||
workloads = workloads.len(),
|
||||
expectations = expectations.len(),
|
||||
"scenario built"
|
||||
);
|
||||
|
||||
Scenario::new(generated, workloads, expectations, duration, capabilities)
|
||||
}
|
||||
}
|
||||
@ -307,6 +318,8 @@ fn initialize_workloads(
|
||||
for workload in workloads {
|
||||
let inner =
|
||||
Arc::get_mut(workload).expect("workload unexpectedly cloned before initialization");
|
||||
|
||||
debug!(workload = inner.name(), "initializing workload");
|
||||
if let Err(err) = inner.init(descriptors, run_metrics) {
|
||||
panic!("workload '{}' failed to initialize: {err}", inner.name());
|
||||
}
|
||||
@ -319,6 +332,7 @@ fn initialize_expectations(
|
||||
expectations: &mut [Box<dyn Expectation>],
|
||||
) {
|
||||
for expectation in expectations {
|
||||
debug!(expectation = expectation.name(), "initializing expectation");
|
||||
if let Err(err) = expectation.init(descriptors, run_metrics) {
|
||||
panic!(
|
||||
"expectation '{}' failed to initialize: {err}",
|
||||
|
||||
@ -5,6 +5,7 @@ use nomos_http_api_common::paths;
|
||||
use reqwest::Client as ReqwestClient;
|
||||
use thiserror::Error;
|
||||
use tokio::time::{sleep, timeout};
|
||||
use tracing::{debug, info};
|
||||
|
||||
/// Role used for labelling readiness probes.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
@ -80,6 +81,15 @@ pub async fn wait_for_http_ports_with_host(
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!(
|
||||
role = role.label(),
|
||||
?ports,
|
||||
host,
|
||||
timeout_secs = timeout_duration.as_secs_f32(),
|
||||
poll_ms = poll_interval.as_millis(),
|
||||
"waiting for HTTP readiness"
|
||||
);
|
||||
|
||||
let client = ReqwestClient::new();
|
||||
let probes = ports.iter().copied().map(|port| {
|
||||
wait_for_single_port(
|
||||
@ -104,6 +114,7 @@ async fn wait_for_single_port(
|
||||
poll_interval: Duration,
|
||||
) -> Result<(), HttpReadinessError> {
|
||||
let url = format!("http://{host}:{port}{}", paths::CRYPTARCHIA_INFO);
|
||||
debug!(role = role.label(), %url, "probing HTTP endpoint");
|
||||
let probe = async {
|
||||
loop {
|
||||
let is_ready = client
|
||||
|
||||
@ -33,6 +33,7 @@ impl ClientBuilder {
|
||||
environment
|
||||
.fail("failed to construct node api clients")
|
||||
.await;
|
||||
tracing::warn!(error = ?err, host, "failed to build node clients");
|
||||
Err(err.into())
|
||||
}
|
||||
}
|
||||
@ -50,6 +51,7 @@ impl ClientBuilder {
|
||||
}
|
||||
Err(err) => {
|
||||
environment.fail("failed to initialize block feed").await;
|
||||
tracing::warn!(error = ?err, "block feed initialization failed");
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,6 +52,8 @@ impl DeploymentOrchestrator {
|
||||
"compose deployment starting"
|
||||
);
|
||||
|
||||
let validator_count = descriptors.validators().len();
|
||||
let executor_count = descriptors.executors().len();
|
||||
let host_ports = PortManager::prepare(&mut environment, &descriptors).await?;
|
||||
|
||||
if self.deployer.readiness_checks {
|
||||
@ -84,6 +86,15 @@ impl DeploymentOrchestrator {
|
||||
node_control,
|
||||
);
|
||||
|
||||
info!(
|
||||
validators = validator_count,
|
||||
executors = executor_count,
|
||||
duration_secs = scenario.duration().as_secs(),
|
||||
readiness_checks = self.deployer.readiness_checks,
|
||||
host,
|
||||
"compose deployment ready; handing control to scenario runner"
|
||||
);
|
||||
|
||||
Ok(Runner::new(context, Some(cleanup_guard)))
|
||||
}
|
||||
|
||||
|
||||
@ -26,6 +26,7 @@ impl ReadinessChecker {
|
||||
ensure_validators_ready_with_ports(&host_ports.validator_api_ports()).await
|
||||
{
|
||||
environment.fail("validator readiness failed").await;
|
||||
tracing::warn!(error = ?err, "validator readiness failed");
|
||||
return Err(err.into());
|
||||
}
|
||||
|
||||
@ -36,12 +37,14 @@ impl ReadinessChecker {
|
||||
if let Err(err) = ensure_executors_ready_with_ports(&host_ports.executor_api_ports()).await
|
||||
{
|
||||
environment.fail("executor readiness failed").await;
|
||||
tracing::warn!(error = ?err, "executor readiness failed");
|
||||
return Err(err.into());
|
||||
}
|
||||
|
||||
info!("waiting for remote service readiness");
|
||||
if let Err(err) = ensure_remote_readiness_with_ports(descriptors, host_ports).await {
|
||||
environment.fail("remote readiness probe failed").await;
|
||||
tracing::warn!(error = ?err, "remote readiness probe failed");
|
||||
return Err(err.into());
|
||||
}
|
||||
|
||||
|
||||
@ -4,7 +4,7 @@ use std::{
|
||||
};
|
||||
|
||||
use testing_framework_core::topology::generation::GeneratedTopology;
|
||||
use tracing::info;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::{
|
||||
docker::ensure_docker_available,
|
||||
@ -57,6 +57,10 @@ impl DeploymentSetup {
|
||||
.and_then(|port| reserve_port(port))
|
||||
.or_else(|| allocate_prometheus_port())
|
||||
.unwrap_or_else(|| PortReservation::new(DEFAULT_PROMETHEUS_PORT, None));
|
||||
debug!(
|
||||
prometheus_port = prometheus_port.port(),
|
||||
"selected prometheus port"
|
||||
);
|
||||
let environment =
|
||||
prepare_environment(&self.descriptors, prometheus_port, prometheus_env.is_some())
|
||||
.await?;
|
||||
|
||||
@ -2,6 +2,7 @@ use std::{io, path::Path, process, time::Duration};
|
||||
|
||||
use testing_framework_core::adjust_timeout;
|
||||
use tokio::{process::Command, time::timeout};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
const COMPOSE_UP_TIMEOUT: Duration = Duration::from_secs(120);
|
||||
|
||||
@ -29,6 +30,7 @@ pub async fn run_docker_command(
|
||||
timeout_duration: Duration,
|
||||
description: &str,
|
||||
) -> Result<(), ComposeCommandError> {
|
||||
debug!(description, ?command, "running docker command");
|
||||
let result = timeout(timeout_duration, command.status()).await;
|
||||
match result {
|
||||
Ok(status) => handle_compose_status(status, description),
|
||||
@ -55,6 +57,13 @@ pub async fn compose_up(
|
||||
.arg("-d")
|
||||
.current_dir(root);
|
||||
|
||||
info!(
|
||||
compose_file = %compose_path.display(),
|
||||
project = project_name,
|
||||
root = %root.display(),
|
||||
"running docker compose up"
|
||||
);
|
||||
|
||||
run_compose_command(cmd, adjust_timeout(COMPOSE_UP_TIMEOUT), "docker compose up").await
|
||||
}
|
||||
|
||||
@ -74,6 +83,13 @@ pub async fn compose_down(
|
||||
.arg("--volumes")
|
||||
.current_dir(root);
|
||||
|
||||
info!(
|
||||
compose_file = %compose_path.display(),
|
||||
project = project_name,
|
||||
root = %root.display(),
|
||||
"running docker compose down"
|
||||
);
|
||||
|
||||
run_compose_command(
|
||||
cmd,
|
||||
adjust_timeout(COMPOSE_UP_TIMEOUT),
|
||||
@ -96,21 +112,21 @@ pub async fn dump_compose_logs(compose_file: &Path, project: &str, root: &Path)
|
||||
|
||||
match cmd.output().await {
|
||||
Ok(output) => print_logs(&output.stdout, &output.stderr),
|
||||
Err(err) => eprintln!("[compose-runner] failed to collect docker compose logs: {err}"),
|
||||
Err(err) => warn!(error = ?err, "failed to collect docker compose logs"),
|
||||
}
|
||||
}
|
||||
|
||||
fn print_logs(stdout: &[u8], stderr: &[u8]) {
|
||||
if !stdout.is_empty() {
|
||||
eprintln!(
|
||||
"[compose-runner] docker compose logs:\n{}",
|
||||
String::from_utf8_lossy(stdout)
|
||||
warn!(
|
||||
logs = %String::from_utf8_lossy(stdout),
|
||||
"docker compose stdout"
|
||||
);
|
||||
}
|
||||
if !stderr.is_empty() {
|
||||
eprintln!(
|
||||
"[compose-runner] docker compose errors:\n{}",
|
||||
String::from_utf8_lossy(stderr)
|
||||
warn!(
|
||||
logs = %String::from_utf8_lossy(stderr),
|
||||
"docker compose stderr"
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -135,14 +151,23 @@ fn handle_compose_status(
|
||||
description: &str,
|
||||
) -> Result<(), ComposeCommandError> {
|
||||
match status {
|
||||
Ok(code) if code.success() => Ok(()),
|
||||
Ok(code) => Err(ComposeCommandError::Failed {
|
||||
command: description.to_owned(),
|
||||
status: code,
|
||||
}),
|
||||
Err(err) => Err(ComposeCommandError::Spawn {
|
||||
command: description.to_owned(),
|
||||
source: err,
|
||||
}),
|
||||
Ok(code) if code.success() => {
|
||||
debug!(description, "docker command succeeded");
|
||||
Ok(())
|
||||
}
|
||||
Ok(code) => {
|
||||
warn!(description, status = ?code, "docker command failed");
|
||||
Err(ComposeCommandError::Failed {
|
||||
command: description.to_owned(),
|
||||
status: code,
|
||||
})
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(description, error = ?err, "failed to spawn docker command");
|
||||
Err(ComposeCommandError::Spawn {
|
||||
command: description.to_owned(),
|
||||
source: err,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ use std::path::{Path, PathBuf};
|
||||
|
||||
use testing_framework_core::scenario::{DynError, NodeControlHandle};
|
||||
use tokio::process::Command;
|
||||
use tracing::info;
|
||||
|
||||
use crate::{docker::commands::run_docker_command, errors::ComposeRunnerError};
|
||||
|
||||
@ -21,6 +22,7 @@ pub async fn restart_compose_service(
|
||||
.arg(service);
|
||||
|
||||
let description = "docker compose restart";
|
||||
info!(service, project = project_name, compose_file = %compose_file.display(), "restarting compose service");
|
||||
run_docker_command(
|
||||
command,
|
||||
testing_framework_core::adjust_timeout(std::time::Duration::from_secs(120)),
|
||||
|
||||
@ -6,6 +6,7 @@ pub mod workspace;
|
||||
use std::{env, process::Stdio, time::Duration};
|
||||
|
||||
use tokio::{process::Command, time::timeout};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::{
|
||||
docker::commands::ComposeCommandError, errors::ComposeRunnerError,
|
||||
@ -35,8 +36,10 @@ pub async fn ensure_docker_available() -> Result<(), ComposeRunnerError> {
|
||||
.unwrap_or(false);
|
||||
|
||||
if available {
|
||||
debug!("docker info succeeded");
|
||||
Ok(())
|
||||
} else {
|
||||
warn!("docker info failed or timed out; compose runner unavailable");
|
||||
Err(ComposeRunnerError::DockerUnavailable)
|
||||
}
|
||||
}
|
||||
@ -44,6 +47,7 @@ pub async fn ensure_docker_available() -> Result<(), ComposeRunnerError> {
|
||||
/// Ensure the configured compose image exists, building a local one if needed.
|
||||
pub async fn ensure_compose_image() -> Result<(), ComposeRunnerError> {
|
||||
let (image, platform) = crate::docker::platform::resolve_image();
|
||||
info!(image, platform = ?platform, "ensuring compose image is present");
|
||||
ensure_image_present(&image, platform.as_deref()).await
|
||||
}
|
||||
|
||||
@ -53,6 +57,7 @@ pub async fn ensure_image_present(
|
||||
platform: Option<&str>,
|
||||
) -> Result<(), ComposeRunnerError> {
|
||||
if docker_image_exists(image).await? {
|
||||
debug!(image, "docker image already present");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@ -157,6 +162,7 @@ pub async fn build_local_image(
|
||||
)
|
||||
.await
|
||||
.map_err(|_| {
|
||||
warn!(image, timeout = ?IMAGE_BUILD_TIMEOUT, "docker build timed out");
|
||||
ComposeRunnerError::Compose(ComposeCommandError::Timeout {
|
||||
command: String::from("docker build"),
|
||||
timeout: testing_framework_core::adjust_timeout(IMAGE_BUILD_TIMEOUT),
|
||||
@ -164,12 +170,21 @@ pub async fn build_local_image(
|
||||
})?;
|
||||
|
||||
match status {
|
||||
Ok(code) if code.success() => Ok(()),
|
||||
Ok(code) => Err(ComposeRunnerError::Compose(ComposeCommandError::Failed {
|
||||
command: String::from("docker build"),
|
||||
status: code,
|
||||
})),
|
||||
Err(err) => Err(ComposeRunnerError::ImageBuild { source: err.into() }),
|
||||
Ok(code) if code.success() => {
|
||||
info!(image, platform = ?platform, "docker build completed");
|
||||
Ok(())
|
||||
}
|
||||
Ok(code) => {
|
||||
warn!(image, status = ?code, "docker build failed");
|
||||
Err(ComposeRunnerError::Compose(ComposeCommandError::Failed {
|
||||
command: String::from("docker build"),
|
||||
status: code,
|
||||
}))
|
||||
}
|
||||
Err(err) => {
|
||||
warn!(image, error = ?err, "docker build spawn failed");
|
||||
Err(ComposeRunnerError::ImageBuild { source: err.into() })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,11 +1,14 @@
|
||||
use std::env;
|
||||
|
||||
use tracing::debug;
|
||||
|
||||
/// Select the compose image and optional platform, honoring
|
||||
/// NOMOS_TESTNET_IMAGE.
|
||||
pub fn resolve_image() -> (String, Option<String>) {
|
||||
let image = env::var("NOMOS_TESTNET_IMAGE")
|
||||
.unwrap_or_else(|_| String::from("logos-blockchain-testing:local"));
|
||||
let platform = (image == "ghcr.io/logos-co/nomos:testnet").then(|| "linux/amd64".to_owned());
|
||||
debug!(image, platform = ?platform, "resolved compose image");
|
||||
(image, platform)
|
||||
}
|
||||
|
||||
|
||||
@ -5,6 +5,7 @@ use std::{
|
||||
|
||||
use anyhow::{Context as _, Result};
|
||||
use tempfile::TempDir;
|
||||
use tracing::{debug, info};
|
||||
|
||||
/// Copy the repository stack assets into a scenario-specific temp dir.
|
||||
#[derive(Debug)]
|
||||
@ -37,6 +38,11 @@ impl ComposeWorkspace {
|
||||
stack_source.display()
|
||||
);
|
||||
}
|
||||
debug!(
|
||||
repo_root = %repo_root.display(),
|
||||
stack_source = %stack_source.display(),
|
||||
"copying stack assets into temporary workspace"
|
||||
);
|
||||
copy_dir_recursive(&stack_source, &temp.path().join("stack"))?;
|
||||
let scripts_source = stack_scripts_root(&repo_root);
|
||||
if scripts_source.exists() {
|
||||
@ -86,6 +92,7 @@ impl ComposeWorkspace {
|
||||
);
|
||||
}
|
||||
|
||||
info!(root = %temp.path().display(), "compose workspace created");
|
||||
Ok(Self { root: temp })
|
||||
}
|
||||
|
||||
|
||||
@ -4,6 +4,7 @@ use testing_framework_core::{
|
||||
scenario::cfgsync::{apply_topology_overrides, load_cfgsync_template, write_cfgsync_template},
|
||||
topology::generation::GeneratedTopology,
|
||||
};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
/// Handle that tracks a cfgsync server started for compose runs.
|
||||
#[derive(Debug)]
|
||||
@ -16,6 +17,7 @@ impl CfgsyncServerHandle {
|
||||
pub fn shutdown(&mut self) {
|
||||
match self {
|
||||
Self::Container { name, stopped } if !*stopped => {
|
||||
info!(container = name, "stopping cfgsync container");
|
||||
remove_container(name);
|
||||
*stopped = true;
|
||||
}
|
||||
@ -31,12 +33,17 @@ fn remove_container(name: &str) {
|
||||
.arg(name)
|
||||
.status()
|
||||
{
|
||||
Ok(status) if status.success() => {}
|
||||
Ok(status) if status.success() => {
|
||||
debug!(container = name, "removed cfgsync container");
|
||||
}
|
||||
Ok(status) => {
|
||||
eprintln!("[compose-runner] failed to remove cfgsync container {name}: {status}");
|
||||
warn!(container = name, status = ?status, "failed to remove cfgsync container");
|
||||
}
|
||||
Err(_) => {
|
||||
eprintln!("[compose-runner] failed to spawn docker rm for cfgsync container {name}");
|
||||
warn!(
|
||||
container = name,
|
||||
"failed to spawn docker rm for cfgsync container"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -54,6 +61,14 @@ pub fn update_cfgsync_config(
|
||||
use_kzg_mount: bool,
|
||||
port: u16,
|
||||
) -> anyhow::Result<()> {
|
||||
debug!(
|
||||
path = %path.display(),
|
||||
use_kzg_mount,
|
||||
port,
|
||||
validators = topology.validators().len(),
|
||||
executors = topology.executors().len(),
|
||||
"updating cfgsync template"
|
||||
);
|
||||
let mut cfg = load_cfgsync_template(path)?;
|
||||
cfg.port = port;
|
||||
apply_topology_overrides(&mut cfg, topology, use_kzg_mount);
|
||||
|
||||
@ -172,12 +172,21 @@ pub fn prepare_workspace_state() -> Result<WorkspaceState, WorkspaceError> {
|
||||
let cfgsync_path = workspace.stack_dir().join("cfgsync.yaml");
|
||||
let use_kzg = workspace.root_path().join("kzgrs_test_params").exists();
|
||||
|
||||
Ok(WorkspaceState {
|
||||
let state = WorkspaceState {
|
||||
workspace,
|
||||
root,
|
||||
cfgsync_path,
|
||||
use_kzg,
|
||||
})
|
||||
};
|
||||
|
||||
debug!(
|
||||
root = %state.root.display(),
|
||||
cfgsync = %state.cfgsync_path.display(),
|
||||
use_kzg = state.use_kzg,
|
||||
"prepared compose workspace state"
|
||||
);
|
||||
|
||||
Ok(state)
|
||||
}
|
||||
|
||||
/// Log wrapper for `prepare_workspace_state`.
|
||||
@ -204,6 +213,7 @@ pub async fn start_cfgsync_stage(
|
||||
) -> Result<CfgsyncServerHandle, ComposeRunnerError> {
|
||||
info!(cfgsync_port = cfgsync_port, "launching cfgsync server");
|
||||
let handle = launch_cfgsync(&workspace.cfgsync_path, cfgsync_port).await?;
|
||||
debug!(container = ?handle, "cfgsync server launched");
|
||||
Ok(handle)
|
||||
}
|
||||
|
||||
@ -238,6 +248,7 @@ pub fn allocate_cfgsync_port() -> Result<u16, ConfigError> {
|
||||
source: source.into(),
|
||||
})?
|
||||
.port();
|
||||
debug!(port, "allocated cfgsync port");
|
||||
Ok(port)
|
||||
}
|
||||
|
||||
@ -254,6 +265,13 @@ pub async fn launch_cfgsync(
|
||||
})?;
|
||||
let (image, _) = resolve_image();
|
||||
let container_name = format!("nomos-cfgsync-{}", Uuid::new_v4());
|
||||
debug!(
|
||||
container = %container_name,
|
||||
image,
|
||||
cfgsync = %cfgsync_path.display(),
|
||||
port,
|
||||
"starting cfgsync container"
|
||||
);
|
||||
|
||||
let mut command = Command::new("docker");
|
||||
command
|
||||
@ -287,6 +305,8 @@ pub async fn launch_cfgsync(
|
||||
source: anyhow!(source),
|
||||
})?;
|
||||
|
||||
info!(container = %container_name, port, "cfgsync container started");
|
||||
|
||||
Ok(CfgsyncServerHandle::Container {
|
||||
name: container_name,
|
||||
stopped: false,
|
||||
@ -300,6 +320,12 @@ pub fn write_compose_artifacts(
|
||||
cfgsync_port: u16,
|
||||
prometheus_port: u16,
|
||||
) -> Result<PathBuf, ConfigError> {
|
||||
debug!(
|
||||
cfgsync_port,
|
||||
prometheus_port,
|
||||
workspace_root = %workspace.root.display(),
|
||||
"building compose descriptor"
|
||||
);
|
||||
let descriptor = ComposeDescriptor::builder(descriptors)
|
||||
.with_kzg_mount(workspace.use_kzg)
|
||||
.with_cfgsync_port(cfgsync_port)
|
||||
@ -310,6 +336,7 @@ pub fn write_compose_artifacts(
|
||||
let compose_path = workspace.root.join("compose.generated.yml");
|
||||
write_compose_file(&descriptor, &compose_path)
|
||||
.map_err(|source| ConfigError::Template { source })?;
|
||||
debug!(compose_file = %compose_path.display(), "rendered compose file");
|
||||
Ok(compose_path)
|
||||
}
|
||||
|
||||
@ -339,6 +366,7 @@ pub async fn bring_up_stack(
|
||||
cfgsync_handle.shutdown();
|
||||
return Err(ComposeRunnerError::Compose(err));
|
||||
}
|
||||
debug!(project = %project_name, "docker compose up completed");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@ -8,6 +8,7 @@ use testing_framework_core::{
|
||||
topology::generation::{GeneratedTopology, NodeRole as TopologyNodeRole},
|
||||
};
|
||||
use tokio::{process::Command, time::timeout};
|
||||
use tracing::{debug, info};
|
||||
use url::ParseError;
|
||||
|
||||
use crate::{
|
||||
@ -46,6 +47,13 @@ pub async fn discover_host_ports(
|
||||
environment: &StackEnvironment,
|
||||
descriptors: &GeneratedTopology,
|
||||
) -> Result<HostPortMapping, ComposeRunnerError> {
|
||||
debug!(
|
||||
compose_file = %environment.compose_path().display(),
|
||||
project = environment.project_name(),
|
||||
validators = descriptors.validators().len(),
|
||||
executors = descriptors.executors().len(),
|
||||
"resolving compose host ports"
|
||||
);
|
||||
let mut validators = Vec::new();
|
||||
for node in descriptors.validators() {
|
||||
let service = node_identifier(TopologyNodeRole::Validator, node.index());
|
||||
@ -62,10 +70,18 @@ pub async fn discover_host_ports(
|
||||
executors.push(NodeHostPorts { api, testing });
|
||||
}
|
||||
|
||||
Ok(HostPortMapping {
|
||||
let mapping = HostPortMapping {
|
||||
validators,
|
||||
executors,
|
||||
})
|
||||
};
|
||||
|
||||
info!(
|
||||
validator_ports = ?mapping.validators,
|
||||
executor_ports = ?mapping.executors,
|
||||
"compose host ports resolved"
|
||||
);
|
||||
|
||||
Ok(mapping)
|
||||
}
|
||||
|
||||
async fn resolve_service_port(
|
||||
@ -180,5 +196,7 @@ fn node_identifier(role: TopologyNodeRole, index: usize) -> String {
|
||||
}
|
||||
|
||||
pub(crate) fn compose_runner_host() -> String {
|
||||
std::env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string())
|
||||
let host = std::env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string());
|
||||
debug!(host, "compose runner host resolved for readiness URLs");
|
||||
host
|
||||
}
|
||||
|
||||
@ -5,6 +5,7 @@ use std::{
|
||||
|
||||
use anyhow::Context as _;
|
||||
use tera::Context as TeraContext;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::descriptor::ComposeDescriptor;
|
||||
|
||||
@ -49,6 +50,7 @@ pub fn write_compose_file(
|
||||
descriptor: &ComposeDescriptor,
|
||||
compose_path: &Path,
|
||||
) -> Result<(), TemplateError> {
|
||||
info!(file = %compose_path.display(), "writing compose file");
|
||||
TemplateSource::load()?.write(descriptor, compose_path)
|
||||
}
|
||||
|
||||
@ -71,6 +73,7 @@ impl TemplateSource {
|
||||
}
|
||||
|
||||
fn render(&self, descriptor: &ComposeDescriptor) -> Result<String, TemplateError> {
|
||||
debug!(template = %self.path.display(), "rendering compose template");
|
||||
let context = TeraContext::from_serialize(descriptor)
|
||||
.map_err(|source| TemplateError::Serialize { source })?;
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@ use std::time::Duration;
|
||||
|
||||
use testing_framework_core::scenario::{BlockFeed, BlockFeedTask, NodeClients, spawn_block_feed};
|
||||
use tokio::time::sleep;
|
||||
use tracing::{info, warn};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::errors::ComposeRunnerError;
|
||||
|
||||
@ -12,6 +12,12 @@ const BLOCK_FEED_RETRY_DELAY: Duration = Duration::from_secs(1);
|
||||
async fn spawn_block_feed_with(
|
||||
node_clients: &NodeClients,
|
||||
) -> Result<(BlockFeed, BlockFeedTask), ComposeRunnerError> {
|
||||
debug!(
|
||||
validators = node_clients.validator_clients().len(),
|
||||
executors = node_clients.executor_clients().len(),
|
||||
"selecting validator client for block feed"
|
||||
);
|
||||
|
||||
let block_source_client = node_clients
|
||||
.random_validator()
|
||||
.cloned()
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
use std::{env, path::PathBuf, thread};
|
||||
|
||||
use testing_framework_core::scenario::CleanupGuard;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::{
|
||||
docker::{
|
||||
@ -45,7 +46,7 @@ impl RunnerCleanup {
|
||||
if let Err(err) =
|
||||
run_compose_down_blocking(&self.compose_file, &self.project_name, &self.root)
|
||||
{
|
||||
eprintln!("[compose-runner] docker compose down failed: {err}");
|
||||
warn!(error = ?err, "docker compose down failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -80,6 +81,13 @@ fn run_compose_down_blocking(
|
||||
}
|
||||
impl CleanupGuard for RunnerCleanup {
|
||||
fn cleanup(mut self: Box<Self>) {
|
||||
debug!(
|
||||
compose_file = %self.compose_file.display(),
|
||||
project = %self.project_name,
|
||||
root = %self.root.display(),
|
||||
preserve = self.should_preserve(),
|
||||
"compose cleanup started"
|
||||
);
|
||||
if self.should_preserve() {
|
||||
self.persist_workspace();
|
||||
return;
|
||||
@ -101,12 +109,9 @@ impl RunnerCleanup {
|
||||
fn persist_workspace(&mut self) {
|
||||
if let Some(workspace) = self.workspace.take() {
|
||||
let keep = workspace.into_inner().keep();
|
||||
eprintln!(
|
||||
"[compose-runner] preserving docker state at {}",
|
||||
keep.display()
|
||||
);
|
||||
info!(path = %keep.display(), "preserving docker state");
|
||||
}
|
||||
|
||||
eprintln!("[compose-runner] compose preserve flag set; skipping docker compose down");
|
||||
info!("compose preserve flag set; skipping docker compose down");
|
||||
}
|
||||
}
|
||||
|
||||
@ -4,6 +4,7 @@ use testing_framework_core::{
|
||||
adjust_timeout,
|
||||
scenario::http_probe::{self, HttpReadinessError, NodeRole},
|
||||
};
|
||||
use tracing::{debug, info};
|
||||
|
||||
const DEFAULT_WAIT: Duration = Duration::from_secs(180);
|
||||
const POLL_INTERVAL: Duration = Duration::from_millis(250);
|
||||
@ -18,6 +19,7 @@ pub async fn wait_for_executors(ports: &[u16]) -> Result<(), HttpReadinessError>
|
||||
|
||||
async fn wait_for_ports(ports: &[u16], role: NodeRole) -> Result<(), HttpReadinessError> {
|
||||
let host = compose_runner_host();
|
||||
info!(role = ?role, ports = ?ports, host, "waiting for compose HTTP readiness");
|
||||
http_probe::wait_for_http_ports_with_host(
|
||||
ports,
|
||||
role,
|
||||
@ -29,5 +31,7 @@ async fn wait_for_ports(ports: &[u16], role: NodeRole) -> Result<(), HttpReadine
|
||||
}
|
||||
|
||||
fn compose_runner_host() -> String {
|
||||
env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string())
|
||||
let host = env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string());
|
||||
debug!(host, "compose runner host resolved");
|
||||
host
|
||||
}
|
||||
|
||||
@ -89,14 +89,16 @@ impl Deployer for K8sDeployer {
|
||||
|
||||
async fn deploy(&self, scenario: &Scenario) -> Result<Runner, Self::Error> {
|
||||
let descriptors = scenario.topology().clone();
|
||||
let validator_count = descriptors.validators().len();
|
||||
let executor_count = descriptors.executors().len();
|
||||
ensure_supported_topology(&descriptors)?;
|
||||
|
||||
let client = Client::try_default()
|
||||
.await
|
||||
.map_err(|source| K8sRunnerError::ClientInit { source })?;
|
||||
info!(
|
||||
validators = descriptors.validators().len(),
|
||||
executors = descriptors.executors().len(),
|
||||
validators = validator_count,
|
||||
executors = executor_count,
|
||||
duration_secs = scenario.duration().as_secs(),
|
||||
readiness_checks = self.readiness_checks,
|
||||
"starting k8s deployment"
|
||||
@ -117,6 +119,7 @@ impl Deployer for K8sDeployer {
|
||||
if let Some(env) = cluster.as_mut() {
|
||||
env.fail("failed to construct node api clients").await;
|
||||
}
|
||||
error!(error = ?err, "failed to build k8s node clients");
|
||||
return Err(err.into());
|
||||
}
|
||||
};
|
||||
@ -133,6 +136,7 @@ impl Deployer for K8sDeployer {
|
||||
env.fail("failed to configure prometheus metrics handle")
|
||||
.await;
|
||||
}
|
||||
error!(error = ?err, "failed to configure prometheus metrics handle");
|
||||
return Err(err.into());
|
||||
}
|
||||
};
|
||||
@ -142,6 +146,7 @@ impl Deployer for K8sDeployer {
|
||||
if let Some(env) = cluster.as_mut() {
|
||||
env.fail("failed to initialize block feed").await;
|
||||
}
|
||||
error!(error = ?err, "failed to initialize block feed");
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
@ -163,6 +168,12 @@ impl Deployer for K8sDeployer {
|
||||
block_feed,
|
||||
None,
|
||||
);
|
||||
info!(
|
||||
validators = validator_count,
|
||||
executors = executor_count,
|
||||
duration_secs = scenario.duration().as_secs(),
|
||||
"k8s deployment ready; handing control to scenario runner"
|
||||
);
|
||||
Ok(Runner::new(context, Some(cleanup_guard)))
|
||||
}
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ use std::env;
|
||||
|
||||
const NODE_HOST_ENV: &str = "K8S_RUNNER_NODE_HOST";
|
||||
const KUBE_SERVICE_HOST_ENV: &str = "KUBERNETES_SERVICE_HOST";
|
||||
use tracing::debug;
|
||||
|
||||
/// Returns the hostname or IP used to reach `NodePorts` exposed by the cluster.
|
||||
/// Prefers `K8S_RUNNER_NODE_HOST`, then the standard `KUBERNETES_SERVICE_HOST`
|
||||
@ -9,12 +10,19 @@ const KUBE_SERVICE_HOST_ENV: &str = "KUBERNETES_SERVICE_HOST";
|
||||
/// back to `127.0.0.1`.
|
||||
pub fn node_host() -> String {
|
||||
if let Ok(host) = env::var(NODE_HOST_ENV) {
|
||||
debug!(host, env = NODE_HOST_ENV, "using node host override");
|
||||
return host;
|
||||
}
|
||||
if let Ok(host) = env::var(KUBE_SERVICE_HOST_ENV)
|
||||
&& !host.is_empty()
|
||||
{
|
||||
debug!(
|
||||
host,
|
||||
env = KUBE_SERVICE_HOST_ENV,
|
||||
"using kubernetes service host"
|
||||
);
|
||||
return host;
|
||||
}
|
||||
debug!("falling back to 127.0.0.1 for node host");
|
||||
"127.0.0.1".to_owned()
|
||||
}
|
||||
|
||||
@ -13,6 +13,7 @@ use testing_framework_core::{
|
||||
topology::generation::GeneratedTopology,
|
||||
};
|
||||
use thiserror::Error;
|
||||
use tracing::{debug, info};
|
||||
|
||||
/// Paths and image metadata required to deploy the Helm chart.
|
||||
pub struct RunnerAssets {
|
||||
@ -72,6 +73,12 @@ pub enum AssetsError {
|
||||
/// Render cfgsync config, Helm values, and locate scripts/KZG assets for a
|
||||
/// topology.
|
||||
pub fn prepare_assets(topology: &GeneratedTopology) -> Result<RunnerAssets, AssetsError> {
|
||||
info!(
|
||||
validators = topology.validators().len(),
|
||||
executors = topology.executors().len(),
|
||||
"preparing k8s runner assets"
|
||||
);
|
||||
|
||||
let root = workspace_root().map_err(|source| AssetsError::WorkspaceRoot { source })?;
|
||||
let cfgsync_yaml = render_cfgsync_config(&root, topology)?;
|
||||
|
||||
@ -89,6 +96,15 @@ pub fn prepare_assets(topology: &GeneratedTopology) -> Result<RunnerAssets, Asse
|
||||
let image = env::var("NOMOS_TESTNET_IMAGE")
|
||||
.unwrap_or_else(|_| String::from("logos-blockchain-testing:local"));
|
||||
|
||||
debug!(
|
||||
cfgsync = %cfgsync_file.display(),
|
||||
values = %values_file.display(),
|
||||
image,
|
||||
kzg = %kzg_path.display(),
|
||||
chart = %chart_path.display(),
|
||||
"k8s runner assets prepared"
|
||||
);
|
||||
|
||||
Ok(RunnerAssets {
|
||||
image,
|
||||
kzg_path,
|
||||
@ -107,6 +123,7 @@ const CFGSYNC_K8S_TIMEOUT_SECS: u64 = 300;
|
||||
|
||||
fn render_cfgsync_config(root: &Path, topology: &GeneratedTopology) -> Result<String, AssetsError> {
|
||||
let cfgsync_template_path = stack_assets_root(root).join("cfgsync.yaml");
|
||||
debug!(path = %cfgsync_template_path.display(), "loading cfgsync template");
|
||||
let mut cfg = load_cfgsync_template(&cfgsync_template_path)
|
||||
.map_err(|source| AssetsError::Cfgsync { source })?;
|
||||
apply_topology_overrides(&mut cfg, topology, true);
|
||||
@ -134,6 +151,14 @@ fn validate_scripts(root: &Path) -> Result<ScriptPaths, AssetsError> {
|
||||
}
|
||||
}
|
||||
|
||||
debug!(
|
||||
run_cfgsync = %run_cfgsync.display(),
|
||||
run_shared = %run_shared.display(),
|
||||
run_node = %run_node.display(),
|
||||
run_executor = %run_executor.display(),
|
||||
"validated runner scripts exist"
|
||||
);
|
||||
|
||||
Ok(ScriptPaths {
|
||||
run_cfgsync,
|
||||
run_shared,
|
||||
@ -245,6 +270,7 @@ fn build_values(topology: &GeneratedTopology) -> HelmValues {
|
||||
port: cfgsync_port(),
|
||||
};
|
||||
let pol_mode = pol_proof_mode();
|
||||
debug!(pol_mode, "rendering Helm values for k8s stack");
|
||||
let validators = topology
|
||||
.validators()
|
||||
.iter()
|
||||
|
||||
@ -7,7 +7,7 @@ use testing_framework_core::{
|
||||
scenario::{CleanupGuard, Metrics, MetricsError, NodeClients, http_probe::NodeRole},
|
||||
topology::{generation::GeneratedTopology, readiness::ReadinessError},
|
||||
};
|
||||
use tracing::info;
|
||||
use tracing::{debug, info};
|
||||
use url::ParseError;
|
||||
use uuid::Uuid;
|
||||
|
||||
@ -149,10 +149,18 @@ pub fn collect_port_specs(descriptors: &GeneratedTopology) -> PortSpecs {
|
||||
})
|
||||
.collect();
|
||||
|
||||
PortSpecs {
|
||||
let specs = PortSpecs {
|
||||
validators,
|
||||
executors,
|
||||
}
|
||||
};
|
||||
|
||||
debug!(
|
||||
validators = specs.validators.len(),
|
||||
executors = specs.executors.len(),
|
||||
"collected k8s port specs"
|
||||
);
|
||||
|
||||
specs
|
||||
}
|
||||
|
||||
pub fn build_node_clients(cluster: &ClusterEnvironment) -> Result<NodeClients, NodeClientError> {
|
||||
@ -175,6 +183,12 @@ pub fn build_node_clients(cluster: &ClusterEnvironment) -> Result<NodeClients, N
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
debug!(
|
||||
validators = validators.len(),
|
||||
executors = executors.len(),
|
||||
"built k8s node clients"
|
||||
);
|
||||
|
||||
Ok(NodeClients::new(validators, executors))
|
||||
}
|
||||
|
||||
@ -205,7 +219,15 @@ pub async fn ensure_cluster_readiness(
|
||||
Some(&executor_membership_urls),
|
||||
)
|
||||
.await
|
||||
.map_err(|source| RemoteReadinessError::Remote { source })
|
||||
.map_err(|source| RemoteReadinessError::Remote { source })?;
|
||||
|
||||
info!(
|
||||
validator_api_ports = ?validator_api,
|
||||
executor_api_ports = ?executor_api,
|
||||
"k8s remote readiness confirmed"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn cluster_identifiers() -> (String, String) {
|
||||
|
||||
@ -2,6 +2,7 @@ use std::{io, process::Stdio};
|
||||
|
||||
use thiserror::Error;
|
||||
use tokio::process::Command;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::infrastructure::assets::{RunnerAssets, cfgsync_port_value, workspace_root};
|
||||
|
||||
@ -36,6 +37,17 @@ pub async fn install_release(
|
||||
} else {
|
||||
"File"
|
||||
};
|
||||
info!(
|
||||
release,
|
||||
namespace,
|
||||
validators,
|
||||
executors,
|
||||
image = %assets.image,
|
||||
cfgsync_port = cfgsync_port_value(),
|
||||
kzg = %assets.kzg_path.display(),
|
||||
values = %assets.values_file.display(),
|
||||
"installing helm release"
|
||||
);
|
||||
|
||||
let mut cmd = Command::new("helm");
|
||||
cmd.arg("install")
|
||||
@ -94,16 +106,11 @@ pub async fn install_release(
|
||||
let output = run_helm_command(cmd, &command).await?;
|
||||
|
||||
if std::env::var("K8S_RUNNER_DEBUG").is_ok() {
|
||||
println!(
|
||||
"[k8s-runner] {command} stdout:\n{}",
|
||||
String::from_utf8_lossy(&output.stdout)
|
||||
);
|
||||
println!(
|
||||
"[k8s-runner] {command} stderr:\n{}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
debug!(command, stdout = %String::from_utf8_lossy(&output.stdout), "helm install stdout");
|
||||
debug!(command, stderr = %String::from_utf8_lossy(&output.stderr), "helm install stderr");
|
||||
}
|
||||
|
||||
info!(release, namespace, "helm install completed");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -117,12 +124,9 @@ pub async fn uninstall_release(release: &str, namespace: &str) -> Result<(), Hel
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped());
|
||||
|
||||
println!("[k8s-runner] issuing `helm uninstall {release}` in namespace `{namespace}`");
|
||||
|
||||
info!(release, namespace, "issuing helm uninstall");
|
||||
run_helm_command(cmd, &format!("helm uninstall {release}")).await?;
|
||||
println!(
|
||||
"[k8s-runner] helm uninstall {release} completed successfully (namespace `{namespace}`)"
|
||||
);
|
||||
info!(release, namespace, "helm uninstall completed successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@ -1,11 +1,17 @@
|
||||
use testing_framework_core::scenario::{BlockFeed, BlockFeedTask, NodeClients, spawn_block_feed};
|
||||
use tracing::info;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::deployer::K8sRunnerError;
|
||||
|
||||
pub async fn spawn_block_feed_with(
|
||||
node_clients: &NodeClients,
|
||||
) -> Result<(BlockFeed, BlockFeedTask), K8sRunnerError> {
|
||||
debug!(
|
||||
validators = node_clients.validator_clients().len(),
|
||||
executors = node_clients.executor_clients().len(),
|
||||
"selecting node client for block feed"
|
||||
);
|
||||
|
||||
let block_source_client = node_clients
|
||||
.any_client()
|
||||
.cloned()
|
||||
|
||||
@ -7,7 +7,7 @@ use tokio::{
|
||||
process::Command,
|
||||
time::{Duration, sleep},
|
||||
};
|
||||
use tracing::warn;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::infrastructure::helm::uninstall_release;
|
||||
|
||||
@ -37,7 +37,11 @@ impl RunnerCleanup {
|
||||
|
||||
async fn cleanup_async(&self) {
|
||||
if self.preserve {
|
||||
print_preserve_notice(&self.release, &self.namespace);
|
||||
info!(
|
||||
release = %self.release,
|
||||
namespace = %self.namespace,
|
||||
"preserving k8s release and namespace"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -51,16 +55,12 @@ impl RunnerCleanup {
|
||||
}) {
|
||||
Ok(()) => true,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"[k8s-runner] cleanup timed out after 120s: {err}; falling back to background thread"
|
||||
);
|
||||
warn!(error = ?err, "cleanup timed out after 120s; falling back to background thread");
|
||||
false
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"[k8s-runner] unable to create cleanup runtime: {err}; falling back to background thread"
|
||||
);
|
||||
warn!(error = ?err, "unable to create cleanup runtime; falling back to background thread");
|
||||
false
|
||||
}
|
||||
}
|
||||
@ -73,26 +73,22 @@ impl RunnerCleanup {
|
||||
{
|
||||
Ok(handle) => {
|
||||
if let Err(err) = handle.join() {
|
||||
warn!("[k8s-runner] cleanup thread panicked: {err:?}");
|
||||
warn!(error = ?err, "cleanup thread panicked");
|
||||
}
|
||||
}
|
||||
Err(err) => warn!("[k8s-runner] failed to spawn cleanup thread: {err}"),
|
||||
Err(err) => warn!(error = ?err, "failed to spawn cleanup thread"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn print_preserve_notice(release: &str, namespace: &str) {
|
||||
println!("[k8s-runner] preserving Helm release `{release}` in namespace `{namespace}`");
|
||||
}
|
||||
|
||||
async fn uninstall_release_and_namespace(client: &Client, release: &str, namespace: &str) {
|
||||
if let Err(err) = uninstall_release(release, namespace).await {
|
||||
println!("[k8s-runner] helm uninstall {release} failed: {err}");
|
||||
warn!(release, namespace, error = ?err, "helm uninstall failed during cleanup");
|
||||
}
|
||||
|
||||
println!("[k8s-runner] deleting namespace `{namespace}` via k8s API",);
|
||||
info!(namespace, "deleting namespace via k8s API");
|
||||
delete_namespace(client, namespace).await;
|
||||
println!("[k8s-runner] delete request for namespace `{namespace}` finished",);
|
||||
info!(namespace, "namespace delete request finished");
|
||||
}
|
||||
|
||||
fn run_background_cleanup(cleanup: Box<RunnerCleanup>) {
|
||||
@ -119,12 +115,15 @@ async fn delete_namespace(client: &Client, namespace: &str) {
|
||||
if delete_namespace_via_cli(namespace).await {
|
||||
wait_for_namespace_termination(&namespaces, namespace).await;
|
||||
} else {
|
||||
warn!("[k8s-runner] unable to delete namespace `{namespace}` using kubectl fallback");
|
||||
warn!(
|
||||
namespace,
|
||||
"unable to delete namespace using kubectl fallback"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete_namespace_via_api(namespaces: &Api<Namespace>, namespace: &str) -> bool {
|
||||
println!("[k8s-runner] invoking kubernetes API to delete namespace `{namespace}`");
|
||||
info!(namespace, "invoking kubernetes API to delete namespace");
|
||||
match tokio::time::timeout(
|
||||
Duration::from_secs(10),
|
||||
namespaces.delete(namespace, &DeleteParams::default()),
|
||||
@ -132,19 +131,20 @@ async fn delete_namespace_via_api(namespaces: &Api<Namespace>, namespace: &str)
|
||||
.await
|
||||
{
|
||||
Ok(Ok(_)) => {
|
||||
println!(
|
||||
"[k8s-runner] delete request accepted for namespace `{namespace}`; waiting for termination"
|
||||
info!(
|
||||
namespace,
|
||||
"delete request accepted; waiting for termination"
|
||||
);
|
||||
true
|
||||
}
|
||||
Ok(Err(err)) => {
|
||||
println!("[k8s-runner] failed to delete namespace `{namespace}` via API: {err}");
|
||||
warn!("[k8s-runner] api delete failed for namespace {namespace}: {err}");
|
||||
warn!(namespace, error = ?err, "failed to delete namespace via API");
|
||||
false
|
||||
}
|
||||
Err(_) => {
|
||||
println!(
|
||||
"[k8s-runner] kubernetes API timed out deleting namespace `{namespace}`; falling back to kubectl"
|
||||
warn!(
|
||||
namespace,
|
||||
"kubernetes API timed out deleting namespace; falling back to kubectl"
|
||||
);
|
||||
false
|
||||
}
|
||||
@ -152,7 +152,7 @@ async fn delete_namespace_via_api(namespaces: &Api<Namespace>, namespace: &str)
|
||||
}
|
||||
|
||||
async fn delete_namespace_via_cli(namespace: &str) -> bool {
|
||||
println!("[k8s-runner] invoking `kubectl delete namespace {namespace}` fallback");
|
||||
info!(namespace, "invoking kubectl delete namespace fallback");
|
||||
let output = Command::new("kubectl")
|
||||
.arg("delete")
|
||||
.arg("namespace")
|
||||
@ -163,19 +163,20 @@ async fn delete_namespace_via_cli(namespace: &str) -> bool {
|
||||
|
||||
match output {
|
||||
Ok(result) if result.status.success() => {
|
||||
println!("[k8s-runner] `kubectl delete namespace {namespace}` completed successfully");
|
||||
info!(namespace, "kubectl delete namespace completed successfully");
|
||||
true
|
||||
}
|
||||
Ok(result) => {
|
||||
println!(
|
||||
"[k8s-runner] `kubectl delete namespace {namespace}` failed: {}\n{}",
|
||||
String::from_utf8_lossy(&result.stderr),
|
||||
String::from_utf8_lossy(&result.stdout)
|
||||
warn!(
|
||||
namespace,
|
||||
stderr = %String::from_utf8_lossy(&result.stderr),
|
||||
stdout = %String::from_utf8_lossy(&result.stdout),
|
||||
"kubectl delete namespace failed"
|
||||
);
|
||||
false
|
||||
}
|
||||
Err(err) => {
|
||||
println!("[k8s-runner] failed to spawn kubectl for namespace `{namespace}`: {err}");
|
||||
warn!(namespace, error = ?err, "failed to spawn kubectl delete namespace");
|
||||
false
|
||||
}
|
||||
}
|
||||
@ -204,19 +205,16 @@ async fn namespace_deleted(namespaces: &Api<Namespace>, namespace: &str, attempt
|
||||
.as_ref()
|
||||
.and_then(|status| status.phase.clone())
|
||||
.unwrap_or_else(|| "Unknown".into());
|
||||
println!(
|
||||
"[k8s-runner] waiting for namespace `{}` to terminate (phase={phase:?})",
|
||||
namespace
|
||||
);
|
||||
info!(namespace, ?phase, "waiting for namespace to terminate");
|
||||
}
|
||||
false
|
||||
}
|
||||
Ok(None) => {
|
||||
println!("[k8s-runner] namespace `{namespace}` deleted");
|
||||
info!(namespace, "namespace deleted");
|
||||
true
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("[k8s-runner] namespace `{namespace}` poll failed: {err}");
|
||||
warn!(namespace, error = ?err, "namespace poll failed");
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
@ -9,7 +9,7 @@ pub async fn dump_namespace_logs(client: &Client, namespace: &str) {
|
||||
let pod_names = match list_pod_names(client, namespace).await {
|
||||
Ok(names) => names,
|
||||
Err(err) => {
|
||||
warn!("[k8s-runner] failed to list pods in namespace {namespace}: {err}");
|
||||
warn!(%namespace, error = ?err, "failed to list pods for log dump");
|
||||
return;
|
||||
}
|
||||
};
|
||||
@ -38,7 +38,7 @@ async fn stream_pod_logs(client: &Client, namespace: &str, pod_name: &str) {
|
||||
};
|
||||
|
||||
match pods.logs(pod_name, ¶ms).await {
|
||||
Ok(log) => info!("[k8s-runner] pod {pod_name} logs:\n{log}"),
|
||||
Err(err) => warn!("[k8s-runner] failed to fetch logs for pod {pod_name}: {err}"),
|
||||
Ok(log) => info!(pod = pod_name, "pod logs:\n{log}"),
|
||||
Err(err) => warn!(pod = pod_name, error = ?err, "failed to fetch pod logs"),
|
||||
}
|
||||
}
|
||||
|
||||
@ -7,7 +7,7 @@ use testing_framework_core::{
|
||||
topology::{deployment::Topology, readiness::ReadinessError},
|
||||
};
|
||||
use thiserror::Error;
|
||||
use tracing::info;
|
||||
use tracing::{debug, info};
|
||||
|
||||
/// Spawns validators and executors as local processes, reusing the existing
|
||||
/// integration harness.
|
||||
@ -96,11 +96,16 @@ impl LocalDeployer {
|
||||
membership_check: bool,
|
||||
) -> Result<Topology, LocalDeployerError> {
|
||||
let descriptors = scenario.topology();
|
||||
info!("spawning local validators/executors");
|
||||
info!(
|
||||
validators = descriptors.validators().len(),
|
||||
executors = descriptors.executors().len(),
|
||||
"spawning local validators/executors"
|
||||
);
|
||||
let topology = descriptors.clone().spawn_local().await;
|
||||
|
||||
let skip_membership = !membership_check;
|
||||
if let Err(source) = wait_for_readiness(&topology, skip_membership).await {
|
||||
debug!(error = ?source, "local readiness failed");
|
||||
return Err(LocalDeployerError::ReadinessFailed { source });
|
||||
}
|
||||
|
||||
@ -136,6 +141,12 @@ async fn wait_for_readiness(
|
||||
async fn spawn_block_feed_with(
|
||||
node_clients: &NodeClients,
|
||||
) -> Result<(BlockFeed, BlockFeedTask), LocalDeployerError> {
|
||||
debug!(
|
||||
validators = node_clients.validator_clients().len(),
|
||||
executors = node_clients.executor_clients().len(),
|
||||
"selecting validator client for local block feed"
|
||||
);
|
||||
|
||||
let block_source_client = node_clients.random_validator().cloned().ok_or_else(|| {
|
||||
LocalDeployerError::WorkloadFailed {
|
||||
source: "block feed requires at least one validator".into(),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user