Add local stop-node support

This commit is contained in:
andrussal 2026-01-28 05:16:21 +01:00
parent 160dbe1078
commit 26f312cf6b
9 changed files with 99 additions and 18 deletions

1
Cargo.lock generated
View File

@ -6569,6 +6569,7 @@ dependencies = [
"thiserror 2.0.18",
"tokio",
"tracing",
"tracing-subscriber 0.3.22",
]
[[package]]

View File

@ -309,11 +309,14 @@ build_bundle::prepare_circuits() {
}
build_bundle::build_binaries() {
BUILD_FEATURES_LABEL="all"
BUILD_FEATURES_LABEL="all,pol-dev-mode,verification-keys"
echo "==> Building binaries (platform=${PLATFORM})"
mkdir -p "${NODE_SRC}"
(
cd "${NODE_SRC}"
if [ -d "${NODE_TARGET}" ]; then
rm -rf "${NODE_TARGET}"
fi
if [ -n "${LOGOS_BLOCKCHAIN_NODE_PATH}" ]; then
echo "Using local logos-blockchain-node checkout at ${NODE_SRC} (no fetch/checkout)"
else
@ -326,18 +329,16 @@ build_bundle::build_binaries() {
git clean -fdx
fi
if [ -z "${LOGOS_BLOCKCHAIN_NODE_PATH}" ]; then
build_bundle::apply_nomos_node_patches "${NODE_SRC}"
fi
unset CARGO_FEATURE_BUILD_VERIFICATION_KEY
if [ -n "${BUNDLE_RUSTUP_TOOLCHAIN}" ]; then
RUSTFLAGS='--cfg feature="pol-dev-mode"' \
RUSTFLAGS='--cfg feature="pol-dev-mode" --cfg feature="build-verification-key"' \
CARGO_FEATURE_BUILD_VERIFICATION_KEY=1 \
RUSTUP_TOOLCHAIN="${BUNDLE_RUSTUP_TOOLCHAIN}" \
cargo build --all-features \
-p logos-blockchain-node \
--target-dir "${NODE_TARGET}"
else
RUSTFLAGS='--cfg feature="pol-dev-mode"' \
RUSTFLAGS='--cfg feature="pol-dev-mode" --cfg feature="build-verification-key"' \
CARGO_FEATURE_BUILD_VERIFICATION_KEY=1 \
cargo build --all-features \
-p logos-blockchain-node \
--target-dir "${NODE_TARGET}"

View File

@ -60,6 +60,7 @@ Environment:
LOGOS_BLOCKCHAIN_TESTNET_IMAGE_PULL_POLICY K8s imagePullPolicy (default ${DEFAULT_PULL_POLICY_LOCAL}; set to ${DEFAULT_PULL_POLICY_ECR} for --ecr)
LOGOS_BLOCKCHAIN_BINARIES_TAR Path to prebuilt binaries tarball (default .tmp/nomos-binaries-<platform>-<version>.tar.gz)
LOGOS_BLOCKCHAIN_CIRCUITS Directory containing circuits assets (defaults to ~/.logos-blockchain-circuits)
CARGO_FEATURE_BUILD_VERIFICATION_KEY Build flag to embed Groth16 verification keys in node binaries (recommended for host)
LOGOS_BLOCKCHAIN_SKIP_IMAGE_BUILD Set to 1 to skip rebuilding the compose/k8s image
LOGOS_BLOCKCHAIN_FORCE_IMAGE_BUILD Set to 1 to force image rebuild even for k8s ECR mode
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL PromQL base URL for the runner process (optional)
@ -301,8 +302,9 @@ run_examples::bundle_matches_expected() {
local tar_path="$1"
[ -f "${tar_path}" ] || return 1
[ -z "${LOGOS_BLOCKCHAIN_NODE_REV:-}" ] && return 0
local expected_features="${RUN_EXAMPLES_EXPECTED_BUNDLE_FEATURES:-all,pol-dev-mode,verification-keys}"
local meta tar_rev tar_head
local meta tar_rev tar_head tar_features
meta="$(tar -xOzf "${tar_path}" artifacts/nomos-bundle-meta.env 2>/dev/null || true)"
if [ -z "${meta}" ]; then
echo "Bundle meta missing in ${tar_path}; treating as stale and rebuilding." >&2
@ -310,6 +312,11 @@ run_examples::bundle_matches_expected() {
fi
tar_rev="$(echo "${meta}" | sed -n 's/^nomos_node_rev=//p' | head -n 1)"
tar_head="$(echo "${meta}" | sed -n 's/^nomos_node_git_head=//p' | head -n 1)"
tar_features="$(echo "${meta}" | sed -n 's/^features=//p' | head -n 1)"
if [ -n "${expected_features}" ] && [ "${tar_features}" != "${expected_features}" ]; then
echo "Bundle ${tar_path} features '${tar_features}' do not match expected '${expected_features}'; rebuilding." >&2
return 1
fi
if [ -n "${tar_rev}" ] && [ "${tar_rev}" != "${LOGOS_BLOCKCHAIN_NODE_REV}" ]; then
echo "Bundle ${tar_path} is for logos-blockchain-node rev ${tar_rev}, expected ${LOGOS_BLOCKCHAIN_NODE_REV}; rebuilding." >&2
return 1
@ -501,6 +508,8 @@ run_examples::run() {
if [ "${MODE}" = "host" ]; then
run_examples::ensure_circuits
# Ensure Groth16 verification keys are embedded when building local node binaries.
export CARGO_FEATURE_BUILD_VERIFICATION_KEY=1
fi
echo "==> Running ${BIN} for ${RUN_SECS}s (mode=${MODE}, image=${IMAGE})"

View File

@ -133,8 +133,18 @@ impl Node {
old_pid,
new_pid, "node restart readiness confirmed via consensus_info"
);
Ok(())
}
/// Stop the node process without restarting it.
pub async fn stop(&mut self) {
let pid = self.pid();
debug!(pid, "stopping node process");
kill_child(&mut self.handle.child);
let _ = self.wait_for_exit(RESTART_SHUTDOWN_TIMEOUT).await;
}
}
impl NodeConfigCommon for Config {

View File

@ -24,3 +24,6 @@ testing-framework-core = { path = "../../core" }
thiserror = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
[dev-dependencies]
tracing-subscriber = "0.3"

View File

@ -77,6 +77,10 @@ impl LocalManualCluster {
Ok(self.nodes.restart_node(index).await?)
}
pub async fn stop_node(&self, index: usize) -> Result<(), ManualClusterError> {
Ok(self.nodes.stop_node(index).await?)
}
pub async fn wait_network_ready(&self) -> Result<(), ReadinessError> {
let nodes = self.nodes.readiness_nodes();
if self.is_singleton(&nodes) {
@ -110,6 +114,10 @@ impl NodeControlHandle for LocalManualCluster {
.map_err(|err| err.into())
}
async fn stop_node(&self, index: usize) -> Result<(), DynError> {
self.nodes.stop_node(index).await.map_err(|err| err.into())
}
async fn start_node(&self, name: &str) -> Result<StartedNode, DynError> {
self.start_node_with(name, StartNodeOptions::default())
.await

View File

@ -170,12 +170,17 @@ impl LocalNodeManager {
#[must_use]
pub fn node_pid(&self, index: usize) -> Option<u32> {
let state = self
let mut state = self
.state
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
state.nodes.get(index).map(|node| node.pid())
let node = state.nodes.get_mut(index)?;
if node.is_running() {
Some(node.pid())
} else {
None
}
}
pub fn stop_all(&self) {
@ -361,6 +366,35 @@ impl LocalNodeManager {
Ok(())
}
pub async fn stop_node(&self, index: usize) -> Result<(), LocalNodeManagerError> {
let mut node = {
let mut state = self
.state
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
if index >= state.nodes.len() {
return Err(LocalNodeManagerError::NodeIndex { index });
}
state.nodes.remove(index)
};
node.stop().await;
let mut state = self
.state
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
if index <= state.nodes.len() {
state.nodes.insert(index, node);
} else {
state.nodes.push(node);
}
Ok(())
}
async fn spawn_and_register_node(
&self,
node_name: &str,
@ -416,6 +450,10 @@ impl NodeControlHandle for LocalNodeManager {
self.restart_node(index).await.map_err(|err| err.into())
}
async fn stop_node(&self, index: usize) -> Result<(), DynError> {
self.stop_node(index).await.map_err(|err| err.into())
}
async fn start_node(&self, name: &str) -> Result<StartedNode, DynError> {
self.start_node_with(name, StartNodeOptions::default())
.await

View File

@ -2,6 +2,7 @@ use std::sync::Arc;
use async_trait::async_trait;
use testing_framework_core::{
nodes::common::node::SpawnNodeError,
scenario::{
BlockFeed, BlockFeedTask, Deployer, DynError, Metrics, NodeClients, NodeControlCapability,
RunContext, Runner, Scenario, ScenarioError, spawn_block_feed,
@ -28,7 +29,7 @@ pub enum LocalDeployerError {
#[error("failed to spawn local topology: {source}")]
Spawn {
#[source]
source: testing_framework_core::nodes::common::node::SpawnNodeError,
source: SpawnNodeError,
},
#[error("readiness probe failed: {source}")]
ReadinessFailed {
@ -105,11 +106,14 @@ impl Deployer<NodeControlCapability> for LocalDeployer {
if self.membership_check {
let topology = Topology::from_nodes(nodes);
wait_for_readiness(&topology).await.map_err(|source| {
debug!(error = ?source, "local readiness failed");
LocalDeployerError::ReadinessFailed { source }
})?;
nodes = topology.into_nodes();
info!("local nodes are ready");
} else {
info!("skipping local membership readiness checks");
@ -120,6 +124,7 @@ impl Deployer<NodeControlCapability> for LocalDeployer {
NodeClients::default(),
LocalNodeManagerSeed::from_topology(scenario.topology()),
));
node_control.initialize_with_nodes(nodes);
let node_clients = node_control.node_clients();

View File

@ -5,10 +5,12 @@ use testing_framework_core::{
topology::config::TopologyConfig,
};
use testing_framework_runner_local::LocalDeployer;
use tracing_subscriber::fmt::try_init;
#[tokio::test]
#[ignore = "requires local node binary and open ports"]
async fn local_restart_node() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let _ = try_init();
let mut scenario = ScenarioBuilder::topology_with(|t| t.nodes(1))
.enable_node_control()
.with_run_duration(Duration::from_secs(1))
@ -27,11 +29,11 @@ async fn local_restart_node() -> Result<(), Box<dyn std::error::Error + Send + S
let new_pid = control.node_pid(0).ok_or("missing node pid")?;
assert_ne!(old_pid, new_pid, "expected a new process after restart");
let client = context
.node_clients()
.any_client()
.ok_or("no node clients available")?;
client.consensus_info().await?;
control.stop_node(0).await?;
assert!(
control.node_pid(0).is_none(),
"expected node pid to be absent after stop"
);
let _handle = runner.run(&mut scenario).await?;
@ -41,6 +43,7 @@ async fn local_restart_node() -> Result<(), Box<dyn std::error::Error + Send + S
#[tokio::test]
#[ignore = "requires local node binary and open ports"]
async fn manual_cluster_restart_node() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let _ = try_init();
let deployer = LocalDeployer::default();
let cluster = deployer.manual_cluster(TopologyConfig::with_node_numbers(1))?;
@ -53,8 +56,11 @@ async fn manual_cluster_restart_node() -> Result<(), Box<dyn std::error::Error +
let new_pid = cluster.node_pid(0).ok_or("missing node pid")?;
assert_ne!(old_pid, new_pid, "expected a new process after restart");
let client = cluster.node_client("node-a").ok_or("missing node client")?;
client.consensus_info().await?;
cluster.stop_node(0).await?;
assert!(
cluster.node_pid(0).is_none(),
"expected node pid to be absent after stop"
);
Ok(())
}