From 26f312cf6bc753423b97b3b95bb6ed4b8cb796fa Mon Sep 17 00:00:00 2001 From: andrussal Date: Wed, 28 Jan 2026 05:16:21 +0100 Subject: [PATCH] Add local stop-node support --- Cargo.lock | 1 + scripts/build/build-bundle.sh | 15 +++---- scripts/run/run-examples.sh | 11 ++++- testing-framework/core/src/nodes/node.rs | 10 +++++ testing-framework/deployers/local/Cargo.toml | 3 ++ .../deployers/local/src/manual/mod.rs | 8 ++++ .../deployers/local/src/node_control/mod.rs | 42 ++++++++++++++++++- .../deployers/local/src/runner.rs | 7 +++- .../deployers/local/tests/restart.rs | 20 +++++---- 9 files changed, 99 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c239e76..aaea87e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6569,6 +6569,7 @@ dependencies = [ "thiserror 2.0.18", "tokio", "tracing", + "tracing-subscriber 0.3.22", ] [[package]] diff --git a/scripts/build/build-bundle.sh b/scripts/build/build-bundle.sh index 4a32ede..87d25a7 100755 --- a/scripts/build/build-bundle.sh +++ b/scripts/build/build-bundle.sh @@ -309,11 +309,14 @@ build_bundle::prepare_circuits() { } build_bundle::build_binaries() { - BUILD_FEATURES_LABEL="all" + BUILD_FEATURES_LABEL="all,pol-dev-mode,verification-keys" echo "==> Building binaries (platform=${PLATFORM})" mkdir -p "${NODE_SRC}" ( cd "${NODE_SRC}" + if [ -d "${NODE_TARGET}" ]; then + rm -rf "${NODE_TARGET}" + fi if [ -n "${LOGOS_BLOCKCHAIN_NODE_PATH}" ]; then echo "Using local logos-blockchain-node checkout at ${NODE_SRC} (no fetch/checkout)" else @@ -326,18 +329,16 @@ build_bundle::build_binaries() { git clean -fdx fi - if [ -z "${LOGOS_BLOCKCHAIN_NODE_PATH}" ]; then - build_bundle::apply_nomos_node_patches "${NODE_SRC}" - fi - unset CARGO_FEATURE_BUILD_VERIFICATION_KEY if [ -n "${BUNDLE_RUSTUP_TOOLCHAIN}" ]; then - RUSTFLAGS='--cfg feature="pol-dev-mode"' \ + RUSTFLAGS='--cfg feature="pol-dev-mode" --cfg feature="build-verification-key"' \ + CARGO_FEATURE_BUILD_VERIFICATION_KEY=1 \ RUSTUP_TOOLCHAIN="${BUNDLE_RUSTUP_TOOLCHAIN}" \ cargo build --all-features \ -p logos-blockchain-node \ --target-dir "${NODE_TARGET}" else - RUSTFLAGS='--cfg feature="pol-dev-mode"' \ + RUSTFLAGS='--cfg feature="pol-dev-mode" --cfg feature="build-verification-key"' \ + CARGO_FEATURE_BUILD_VERIFICATION_KEY=1 \ cargo build --all-features \ -p logos-blockchain-node \ --target-dir "${NODE_TARGET}" diff --git a/scripts/run/run-examples.sh b/scripts/run/run-examples.sh index e4af1c0..f34af21 100755 --- a/scripts/run/run-examples.sh +++ b/scripts/run/run-examples.sh @@ -60,6 +60,7 @@ Environment: LOGOS_BLOCKCHAIN_TESTNET_IMAGE_PULL_POLICY K8s imagePullPolicy (default ${DEFAULT_PULL_POLICY_LOCAL}; set to ${DEFAULT_PULL_POLICY_ECR} for --ecr) LOGOS_BLOCKCHAIN_BINARIES_TAR Path to prebuilt binaries tarball (default .tmp/nomos-binaries--.tar.gz) LOGOS_BLOCKCHAIN_CIRCUITS Directory containing circuits assets (defaults to ~/.logos-blockchain-circuits) + CARGO_FEATURE_BUILD_VERIFICATION_KEY Build flag to embed Groth16 verification keys in node binaries (recommended for host) LOGOS_BLOCKCHAIN_SKIP_IMAGE_BUILD Set to 1 to skip rebuilding the compose/k8s image LOGOS_BLOCKCHAIN_FORCE_IMAGE_BUILD Set to 1 to force image rebuild even for k8s ECR mode LOGOS_BLOCKCHAIN_METRICS_QUERY_URL PromQL base URL for the runner process (optional) @@ -301,8 +302,9 @@ run_examples::bundle_matches_expected() { local tar_path="$1" [ -f "${tar_path}" ] || return 1 [ -z "${LOGOS_BLOCKCHAIN_NODE_REV:-}" ] && return 0 + local expected_features="${RUN_EXAMPLES_EXPECTED_BUNDLE_FEATURES:-all,pol-dev-mode,verification-keys}" - local meta tar_rev tar_head + local meta tar_rev tar_head tar_features meta="$(tar -xOzf "${tar_path}" artifacts/nomos-bundle-meta.env 2>/dev/null || true)" if [ -z "${meta}" ]; then echo "Bundle meta missing in ${tar_path}; treating as stale and rebuilding." >&2 @@ -310,6 +312,11 @@ run_examples::bundle_matches_expected() { fi tar_rev="$(echo "${meta}" | sed -n 's/^nomos_node_rev=//p' | head -n 1)" tar_head="$(echo "${meta}" | sed -n 's/^nomos_node_git_head=//p' | head -n 1)" + tar_features="$(echo "${meta}" | sed -n 's/^features=//p' | head -n 1)" + if [ -n "${expected_features}" ] && [ "${tar_features}" != "${expected_features}" ]; then + echo "Bundle ${tar_path} features '${tar_features}' do not match expected '${expected_features}'; rebuilding." >&2 + return 1 + fi if [ -n "${tar_rev}" ] && [ "${tar_rev}" != "${LOGOS_BLOCKCHAIN_NODE_REV}" ]; then echo "Bundle ${tar_path} is for logos-blockchain-node rev ${tar_rev}, expected ${LOGOS_BLOCKCHAIN_NODE_REV}; rebuilding." >&2 return 1 @@ -501,6 +508,8 @@ run_examples::run() { if [ "${MODE}" = "host" ]; then run_examples::ensure_circuits + # Ensure Groth16 verification keys are embedded when building local node binaries. + export CARGO_FEATURE_BUILD_VERIFICATION_KEY=1 fi echo "==> Running ${BIN} for ${RUN_SECS}s (mode=${MODE}, image=${IMAGE})" diff --git a/testing-framework/core/src/nodes/node.rs b/testing-framework/core/src/nodes/node.rs index d40609c..5fd9600 100644 --- a/testing-framework/core/src/nodes/node.rs +++ b/testing-framework/core/src/nodes/node.rs @@ -133,8 +133,18 @@ impl Node { old_pid, new_pid, "node restart readiness confirmed via consensus_info" ); + Ok(()) } + + /// Stop the node process without restarting it. + pub async fn stop(&mut self) { + let pid = self.pid(); + debug!(pid, "stopping node process"); + + kill_child(&mut self.handle.child); + let _ = self.wait_for_exit(RESTART_SHUTDOWN_TIMEOUT).await; + } } impl NodeConfigCommon for Config { diff --git a/testing-framework/deployers/local/Cargo.toml b/testing-framework/deployers/local/Cargo.toml index 46e9b56..32b895a 100644 --- a/testing-framework/deployers/local/Cargo.toml +++ b/testing-framework/deployers/local/Cargo.toml @@ -24,3 +24,6 @@ testing-framework-core = { path = "../../core" } thiserror = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } + +[dev-dependencies] +tracing-subscriber = "0.3" diff --git a/testing-framework/deployers/local/src/manual/mod.rs b/testing-framework/deployers/local/src/manual/mod.rs index 98b4dd6..63df702 100644 --- a/testing-framework/deployers/local/src/manual/mod.rs +++ b/testing-framework/deployers/local/src/manual/mod.rs @@ -77,6 +77,10 @@ impl LocalManualCluster { Ok(self.nodes.restart_node(index).await?) } + pub async fn stop_node(&self, index: usize) -> Result<(), ManualClusterError> { + Ok(self.nodes.stop_node(index).await?) + } + pub async fn wait_network_ready(&self) -> Result<(), ReadinessError> { let nodes = self.nodes.readiness_nodes(); if self.is_singleton(&nodes) { @@ -110,6 +114,10 @@ impl NodeControlHandle for LocalManualCluster { .map_err(|err| err.into()) } + async fn stop_node(&self, index: usize) -> Result<(), DynError> { + self.nodes.stop_node(index).await.map_err(|err| err.into()) + } + async fn start_node(&self, name: &str) -> Result { self.start_node_with(name, StartNodeOptions::default()) .await diff --git a/testing-framework/deployers/local/src/node_control/mod.rs b/testing-framework/deployers/local/src/node_control/mod.rs index 6c6c2c3..9193a4b 100644 --- a/testing-framework/deployers/local/src/node_control/mod.rs +++ b/testing-framework/deployers/local/src/node_control/mod.rs @@ -170,12 +170,17 @@ impl LocalNodeManager { #[must_use] pub fn node_pid(&self, index: usize) -> Option { - let state = self + let mut state = self .state .lock() .unwrap_or_else(|poisoned| poisoned.into_inner()); - state.nodes.get(index).map(|node| node.pid()) + let node = state.nodes.get_mut(index)?; + if node.is_running() { + Some(node.pid()) + } else { + None + } } pub fn stop_all(&self) { @@ -361,6 +366,35 @@ impl LocalNodeManager { Ok(()) } + pub async fn stop_node(&self, index: usize) -> Result<(), LocalNodeManagerError> { + let mut node = { + let mut state = self + .state + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + + if index >= state.nodes.len() { + return Err(LocalNodeManagerError::NodeIndex { index }); + } + + state.nodes.remove(index) + }; + + node.stop().await; + + let mut state = self + .state + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); + + if index <= state.nodes.len() { + state.nodes.insert(index, node); + } else { + state.nodes.push(node); + } + Ok(()) + } + async fn spawn_and_register_node( &self, node_name: &str, @@ -416,6 +450,10 @@ impl NodeControlHandle for LocalNodeManager { self.restart_node(index).await.map_err(|err| err.into()) } + async fn stop_node(&self, index: usize) -> Result<(), DynError> { + self.stop_node(index).await.map_err(|err| err.into()) + } + async fn start_node(&self, name: &str) -> Result { self.start_node_with(name, StartNodeOptions::default()) .await diff --git a/testing-framework/deployers/local/src/runner.rs b/testing-framework/deployers/local/src/runner.rs index 33fc560..cfbb7d9 100644 --- a/testing-framework/deployers/local/src/runner.rs +++ b/testing-framework/deployers/local/src/runner.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use async_trait::async_trait; use testing_framework_core::{ + nodes::common::node::SpawnNodeError, scenario::{ BlockFeed, BlockFeedTask, Deployer, DynError, Metrics, NodeClients, NodeControlCapability, RunContext, Runner, Scenario, ScenarioError, spawn_block_feed, @@ -28,7 +29,7 @@ pub enum LocalDeployerError { #[error("failed to spawn local topology: {source}")] Spawn { #[source] - source: testing_framework_core::nodes::common::node::SpawnNodeError, + source: SpawnNodeError, }, #[error("readiness probe failed: {source}")] ReadinessFailed { @@ -105,11 +106,14 @@ impl Deployer for LocalDeployer { if self.membership_check { let topology = Topology::from_nodes(nodes); + wait_for_readiness(&topology).await.map_err(|source| { debug!(error = ?source, "local readiness failed"); LocalDeployerError::ReadinessFailed { source } })?; + nodes = topology.into_nodes(); + info!("local nodes are ready"); } else { info!("skipping local membership readiness checks"); @@ -120,6 +124,7 @@ impl Deployer for LocalDeployer { NodeClients::default(), LocalNodeManagerSeed::from_topology(scenario.topology()), )); + node_control.initialize_with_nodes(nodes); let node_clients = node_control.node_clients(); diff --git a/testing-framework/deployers/local/tests/restart.rs b/testing-framework/deployers/local/tests/restart.rs index bd2e464..98a19b6 100644 --- a/testing-framework/deployers/local/tests/restart.rs +++ b/testing-framework/deployers/local/tests/restart.rs @@ -5,10 +5,12 @@ use testing_framework_core::{ topology::config::TopologyConfig, }; use testing_framework_runner_local::LocalDeployer; +use tracing_subscriber::fmt::try_init; #[tokio::test] #[ignore = "requires local node binary and open ports"] async fn local_restart_node() -> Result<(), Box> { + let _ = try_init(); let mut scenario = ScenarioBuilder::topology_with(|t| t.nodes(1)) .enable_node_control() .with_run_duration(Duration::from_secs(1)) @@ -27,11 +29,11 @@ async fn local_restart_node() -> Result<(), Box Result<(), Box Result<(), Box> { + let _ = try_init(); let deployer = LocalDeployer::default(); let cluster = deployer.manual_cluster(TopologyConfig::with_node_numbers(1))?; @@ -53,8 +56,11 @@ async fn manual_cluster_restart_node() -> Result<(), Box