diff --git a/examples/doc-snippets/src/node_control_accessing_control.rs b/examples/doc-snippets/src/node_control_accessing_control.rs index a7cc27d..f4404f3 100644 --- a/examples/doc-snippets/src/node_control_accessing_control.rs +++ b/examples/doc-snippets/src/node_control_accessing_control.rs @@ -11,8 +11,8 @@ impl Workload for RestartWorkload { async fn start(&self, ctx: &RunContext) -> Result<(), DynError> { if let Some(control) = ctx.node_control() { - // Restart the first node (index 0) if supported. - control.restart_node(0).await?; + // Restart the first node by name if supported. + control.restart_node("node-0").await?; } Ok(()) } diff --git a/examples/doc-snippets/src/node_control_trait.rs b/examples/doc-snippets/src/node_control_trait.rs index 98976ca..6064b80 100644 --- a/examples/doc-snippets/src/node_control_trait.rs +++ b/examples/doc-snippets/src/node_control_trait.rs @@ -3,5 +3,5 @@ use testing_framework_core::scenario::DynError; #[async_trait] pub trait NodeControlHandle: Send + Sync { - async fn restart_node(&self, index: usize) -> Result<(), DynError>; + async fn restart_node(&self, name: &str) -> Result<(), DynError>; } diff --git a/testing-framework/core/src/scenario/control.rs b/testing-framework/core/src/scenario/control.rs index 6ad88d6..682937d 100644 --- a/testing-framework/core/src/scenario/control.rs +++ b/testing-framework/core/src/scenario/control.rs @@ -8,7 +8,7 @@ use crate::{ /// Deployer-agnostic control surface for runtime node operations. #[async_trait] pub trait NodeControlHandle: Send + Sync { - async fn restart_node(&self, _index: usize) -> Result<(), DynError> { + async fn restart_node(&self, _name: &str) -> Result<(), DynError> { Err("restart_node not supported by this deployer".into()) } @@ -24,7 +24,7 @@ pub trait NodeControlHandle: Send + Sync { Err("start_node_with not supported by this deployer".into()) } - async fn stop_node(&self, _index: usize) -> Result<(), DynError> { + async fn stop_node(&self, _name: &str) -> Result<(), DynError> { Err("stop_node not supported by this deployer".into()) } @@ -32,7 +32,7 @@ pub trait NodeControlHandle: Send + Sync { None } - fn node_pid(&self, _index: usize) -> Option { + fn node_pid(&self, _name: &str) -> Option { None } } diff --git a/testing-framework/deployers/compose/src/docker/control.rs b/testing-framework/deployers/compose/src/docker/control.rs index 4a27e02..24d924f 100644 --- a/testing-framework/deployers/compose/src/docker/control.rs +++ b/testing-framework/deployers/compose/src/docker/control.rs @@ -45,13 +45,9 @@ pub struct ComposeNodeControl { #[async_trait::async_trait] impl NodeControlHandle for ComposeNodeControl { - async fn restart_node(&self, index: usize) -> Result<(), DynError> { - restart_compose_service( - &self.compose_file, - &self.project_name, - &format!("node-{index}"), - ) - .await - .map_err(|err| format!("node restart failed: {err}").into()) + async fn restart_node(&self, name: &str) -> Result<(), DynError> { + restart_compose_service(&self.compose_file, &self.project_name, name) + .await + .map_err(|err| format!("node restart failed: {err}").into()) } } diff --git a/testing-framework/deployers/local/src/manual/mod.rs b/testing-framework/deployers/local/src/manual/mod.rs index 63df702..51a75bf 100644 --- a/testing-framework/deployers/local/src/manual/mod.rs +++ b/testing-framework/deployers/local/src/manual/mod.rs @@ -50,8 +50,8 @@ impl LocalManualCluster { } #[must_use] - pub fn node_pid(&self, index: usize) -> Option { - self.nodes.node_pid(index) + pub fn node_pid(&self, name: &str) -> Option { + self.nodes.node_pid(name) } pub async fn start_node(&self, name: &str) -> Result { @@ -73,12 +73,12 @@ impl LocalManualCluster { self.nodes.stop_all(); } - pub async fn restart_node(&self, index: usize) -> Result<(), ManualClusterError> { - Ok(self.nodes.restart_node(index).await?) + pub async fn restart_node(&self, name: &str) -> Result<(), ManualClusterError> { + Ok(self.nodes.restart_node(name).await?) } - pub async fn stop_node(&self, index: usize) -> Result<(), ManualClusterError> { - Ok(self.nodes.stop_node(index).await?) + pub async fn stop_node(&self, name: &str) -> Result<(), ManualClusterError> { + Ok(self.nodes.stop_node(name).await?) } pub async fn wait_network_ready(&self) -> Result<(), ReadinessError> { @@ -107,15 +107,15 @@ impl Drop for LocalManualCluster { #[async_trait::async_trait] impl NodeControlHandle for LocalManualCluster { - async fn restart_node(&self, index: usize) -> Result<(), DynError> { + async fn restart_node(&self, name: &str) -> Result<(), DynError> { self.nodes - .restart_node(index) + .restart_node(name) .await .map_err(|err| err.into()) } - async fn stop_node(&self, index: usize) -> Result<(), DynError> { - self.nodes.stop_node(index).await.map_err(|err| err.into()) + async fn stop_node(&self, name: &str) -> Result<(), DynError> { + self.nodes.stop_node(name).await.map_err(|err| err.into()) } async fn start_node(&self, name: &str) -> Result { @@ -138,8 +138,8 @@ impl NodeControlHandle for LocalManualCluster { self.node_client(name) } - fn node_pid(&self, index: usize) -> Option { - self.node_pid(index) + fn node_pid(&self, name: &str) -> Option { + self.node_pid(name) } } diff --git a/testing-framework/deployers/local/src/node_control/mod.rs b/testing-framework/deployers/local/src/node_control/mod.rs index 9193a4b..3bb5ca9 100644 --- a/testing-framework/deployers/local/src/node_control/mod.rs +++ b/testing-framework/deployers/local/src/node_control/mod.rs @@ -44,8 +44,8 @@ pub enum LocalNodeManagerError { PortAllocation { message: String }, #[error("node config patch failed: {message}")] ConfigPatch { message: String }, - #[error("node index {index} is out of bounds")] - NodeIndex { index: usize }, + #[error("node name '{name}' is unknown")] + NodeName { name: String }, #[error("failed to restart node: {source}")] Restart { #[source] @@ -145,6 +145,7 @@ impl LocalNodeManager { peer_ports: seed.peer_ports.clone(), peer_ports_by_name: seed.peer_ports_by_name.clone(), clients_by_name: HashMap::new(), + indices_by_name: HashMap::new(), nodes: Vec::new(), }; @@ -169,12 +170,13 @@ impl LocalNodeManager { } #[must_use] - pub fn node_pid(&self, index: usize) -> Option { + pub fn node_pid(&self, name: &str) -> Option { let mut state = self .state .lock() .unwrap_or_else(|poisoned| poisoned.into_inner()); + let index = *state.indices_by_name.get(name)?; let node = state.nodes.get_mut(index)?; if node.is_running() { Some(node.pid()) @@ -195,6 +197,7 @@ impl LocalNodeManager { .peer_ports_by_name .clone_from(&self.seed.peer_ports_by_name); state.clients_by_name.clear(); + state.indices_by_name.clear(); state.node_count = self.seed.node_count; self.node_clients.clear(); } @@ -211,6 +214,7 @@ impl LocalNodeManager { state.peer_ports.clear(); state.peer_ports_by_name.clear(); state.clients_by_name.clear(); + state.indices_by_name.clear(); state.node_count = 0; for (idx, node) in nodes.into_iter().enumerate() { @@ -290,6 +294,8 @@ impl LocalNodeManager { let index = state.node_count; let label = if name.trim().is_empty() { Self::default_label(index) + } else if name.starts_with("node-") { + name.to_string() } else { format!("node-{name}") }; @@ -334,18 +340,27 @@ impl LocalNodeManager { }) } - pub async fn restart_node(&self, index: usize) -> Result<(), LocalNodeManagerError> { - let mut node = { + pub async fn restart_node(&self, name: &str) -> Result<(), LocalNodeManagerError> { + let (index, mut node) = { let mut state = self .state .lock() .unwrap_or_else(|poisoned| poisoned.into_inner()); + let Some(index) = state.indices_by_name.get(name).copied() else { + return Err(LocalNodeManagerError::NodeName { + name: name.to_string(), + }); + }; + if index >= state.nodes.len() { - return Err(LocalNodeManagerError::NodeIndex { index }); + return Err(LocalNodeManagerError::NodeName { + name: name.to_string(), + }); } - state.nodes.remove(index) + let node = state.nodes.remove(index); + (index, node) }; node.restart() @@ -366,18 +381,27 @@ impl LocalNodeManager { Ok(()) } - pub async fn stop_node(&self, index: usize) -> Result<(), LocalNodeManagerError> { - let mut node = { + pub async fn stop_node(&self, name: &str) -> Result<(), LocalNodeManagerError> { + let (index, mut node) = { let mut state = self .state .lock() .unwrap_or_else(|poisoned| poisoned.into_inner()); + let Some(index) = state.indices_by_name.get(name).copied() else { + return Err(LocalNodeManagerError::NodeName { + name: name.to_string(), + }); + }; + if index >= state.nodes.len() { - return Err(LocalNodeManagerError::NodeIndex { index }); + return Err(LocalNodeManagerError::NodeName { + name: name.to_string(), + }); } - state.nodes.remove(index) + let node = state.nodes.remove(index); + (index, node) }; node.stop().await; @@ -446,12 +470,12 @@ fn apply_patch_if_needed( #[async_trait::async_trait] impl NodeControlHandle for LocalNodeManager { - async fn restart_node(&self, index: usize) -> Result<(), DynError> { - self.restart_node(index).await.map_err(|err| err.into()) + async fn restart_node(&self, name: &str) -> Result<(), DynError> { + self.restart_node(name).await.map_err(|err| err.into()) } - async fn stop_node(&self, index: usize) -> Result<(), DynError> { - self.stop_node(index).await.map_err(|err| err.into()) + async fn stop_node(&self, name: &str) -> Result<(), DynError> { + self.stop_node(name).await.map_err(|err| err.into()) } async fn start_node(&self, name: &str) -> Result { @@ -474,7 +498,7 @@ impl NodeControlHandle for LocalNodeManager { self.node_client(name) } - fn node_pid(&self, index: usize) -> Option { - self.node_pid(index) + fn node_pid(&self, name: &str) -> Option { + self.node_pid(name) } } diff --git a/testing-framework/deployers/local/src/node_control/state.rs b/testing-framework/deployers/local/src/node_control/state.rs index bcacad4..81fa525 100644 --- a/testing-framework/deployers/local/src/node_control/state.rs +++ b/testing-framework/deployers/local/src/node_control/state.rs @@ -7,6 +7,7 @@ pub(crate) struct LocalNodeManagerState { pub(crate) peer_ports: Vec, pub(crate) peer_ports_by_name: HashMap, pub(crate) clients_by_name: HashMap, + pub(crate) indices_by_name: HashMap, pub(crate) nodes: Vec, } @@ -26,6 +27,8 @@ impl LocalNodeManagerState { node: Node, ) { self.register_common(node_name, network_port, client); + let index = self.nodes.len(); + self.indices_by_name.insert(node_name.to_string(), index); self.node_count += 1; self.nodes.push(node); } diff --git a/testing-framework/deployers/local/tests/restart.rs b/testing-framework/deployers/local/tests/restart.rs index 98a19b6..a16ee50 100644 --- a/testing-framework/deployers/local/tests/restart.rs +++ b/testing-framework/deployers/local/tests/restart.rs @@ -22,16 +22,17 @@ async fn local_restart_node() -> Result<(), Box Result<(), Box 1 { for index in 0..node_count { - targets.push(Target::Node(index)); + targets.push(Target::Node(format!("node-{index}"))); } } else if node_count == 1 { info!("chaos restart skipping nodes: only one node configured"); @@ -76,7 +76,7 @@ impl RandomRestartWorkload { let ready = now.checked_sub(self.target_cooldown).unwrap_or(now); targets .iter() - .copied() + .cloned() .map(|target| (target, ready)) .collect() } @@ -111,16 +111,16 @@ impl RandomRestartWorkload { let available: Vec = targets .iter() - .copied() + .cloned() .filter(|target| cooldowns.get(target).is_none_or(|ready| *ready <= now)) .collect(); - if let Some(choice) = available.choose(&mut thread_rng()).copied() { + if let Some(choice) = available.choose(&mut thread_rng()).cloned() { tracing::debug!(?choice, "chaos restart picked target"); return Ok(choice); } - if let Some(choice) = targets.choose(&mut thread_rng()).copied() { + if let Some(choice) = targets.choose(&mut thread_rng()).cloned() { return Ok(choice); } return Err("chaos restart workload has no eligible targets".into()); @@ -158,10 +158,10 @@ impl Workload for RandomRestartWorkload { let target = self.pick_target(&targets, &cooldowns).await?; match target { - Target::Node(index) => { - tracing::info!(index, "chaos restarting node"); + Target::Node(ref name) => { + tracing::info!(name, "chaos restarting node"); handle - .restart_node(index) + .restart_node(name) .await .map_err(|err| format!("node restart failed: {err}"))? } @@ -172,7 +172,7 @@ impl Workload for RandomRestartWorkload { } } -#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] enum Target { - Node(usize), + Node(String), }