mirror of
https://github.com/logos-blockchain/logos-blockchain-testing.git
synced 2026-02-18 04:03:06 +00:00
Use node names for restart/stop control
This commit is contained in:
parent
26f312cf6b
commit
bb31c03cab
@ -11,8 +11,8 @@ impl Workload for RestartWorkload {
|
|||||||
|
|
||||||
async fn start(&self, ctx: &RunContext) -> Result<(), DynError> {
|
async fn start(&self, ctx: &RunContext) -> Result<(), DynError> {
|
||||||
if let Some(control) = ctx.node_control() {
|
if let Some(control) = ctx.node_control() {
|
||||||
// Restart the first node (index 0) if supported.
|
// Restart the first node by name if supported.
|
||||||
control.restart_node(0).await?;
|
control.restart_node("node-0").await?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,5 +3,5 @@ use testing_framework_core::scenario::DynError;
|
|||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait NodeControlHandle: Send + Sync {
|
pub trait NodeControlHandle: Send + Sync {
|
||||||
async fn restart_node(&self, index: usize) -> Result<(), DynError>;
|
async fn restart_node(&self, name: &str) -> Result<(), DynError>;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -8,7 +8,7 @@ use crate::{
|
|||||||
/// Deployer-agnostic control surface for runtime node operations.
|
/// Deployer-agnostic control surface for runtime node operations.
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait NodeControlHandle: Send + Sync {
|
pub trait NodeControlHandle: Send + Sync {
|
||||||
async fn restart_node(&self, _index: usize) -> Result<(), DynError> {
|
async fn restart_node(&self, _name: &str) -> Result<(), DynError> {
|
||||||
Err("restart_node not supported by this deployer".into())
|
Err("restart_node not supported by this deployer".into())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -24,7 +24,7 @@ pub trait NodeControlHandle: Send + Sync {
|
|||||||
Err("start_node_with not supported by this deployer".into())
|
Err("start_node_with not supported by this deployer".into())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn stop_node(&self, _index: usize) -> Result<(), DynError> {
|
async fn stop_node(&self, _name: &str) -> Result<(), DynError> {
|
||||||
Err("stop_node not supported by this deployer".into())
|
Err("stop_node not supported by this deployer".into())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,7 +32,7 @@ pub trait NodeControlHandle: Send + Sync {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
fn node_pid(&self, _index: usize) -> Option<u32> {
|
fn node_pid(&self, _name: &str) -> Option<u32> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -45,13 +45,9 @@ pub struct ComposeNodeControl {
|
|||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl NodeControlHandle for ComposeNodeControl {
|
impl NodeControlHandle for ComposeNodeControl {
|
||||||
async fn restart_node(&self, index: usize) -> Result<(), DynError> {
|
async fn restart_node(&self, name: &str) -> Result<(), DynError> {
|
||||||
restart_compose_service(
|
restart_compose_service(&self.compose_file, &self.project_name, name)
|
||||||
&self.compose_file,
|
.await
|
||||||
&self.project_name,
|
.map_err(|err| format!("node restart failed: {err}").into())
|
||||||
&format!("node-{index}"),
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
.map_err(|err| format!("node restart failed: {err}").into())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -50,8 +50,8 @@ impl LocalManualCluster {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn node_pid(&self, index: usize) -> Option<u32> {
|
pub fn node_pid(&self, name: &str) -> Option<u32> {
|
||||||
self.nodes.node_pid(index)
|
self.nodes.node_pid(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn start_node(&self, name: &str) -> Result<StartedNode, ManualClusterError> {
|
pub async fn start_node(&self, name: &str) -> Result<StartedNode, ManualClusterError> {
|
||||||
@ -73,12 +73,12 @@ impl LocalManualCluster {
|
|||||||
self.nodes.stop_all();
|
self.nodes.stop_all();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn restart_node(&self, index: usize) -> Result<(), ManualClusterError> {
|
pub async fn restart_node(&self, name: &str) -> Result<(), ManualClusterError> {
|
||||||
Ok(self.nodes.restart_node(index).await?)
|
Ok(self.nodes.restart_node(name).await?)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn stop_node(&self, index: usize) -> Result<(), ManualClusterError> {
|
pub async fn stop_node(&self, name: &str) -> Result<(), ManualClusterError> {
|
||||||
Ok(self.nodes.stop_node(index).await?)
|
Ok(self.nodes.stop_node(name).await?)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn wait_network_ready(&self) -> Result<(), ReadinessError> {
|
pub async fn wait_network_ready(&self) -> Result<(), ReadinessError> {
|
||||||
@ -107,15 +107,15 @@ impl Drop for LocalManualCluster {
|
|||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl NodeControlHandle for LocalManualCluster {
|
impl NodeControlHandle for LocalManualCluster {
|
||||||
async fn restart_node(&self, index: usize) -> Result<(), DynError> {
|
async fn restart_node(&self, name: &str) -> Result<(), DynError> {
|
||||||
self.nodes
|
self.nodes
|
||||||
.restart_node(index)
|
.restart_node(name)
|
||||||
.await
|
.await
|
||||||
.map_err(|err| err.into())
|
.map_err(|err| err.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn stop_node(&self, index: usize) -> Result<(), DynError> {
|
async fn stop_node(&self, name: &str) -> Result<(), DynError> {
|
||||||
self.nodes.stop_node(index).await.map_err(|err| err.into())
|
self.nodes.stop_node(name).await.map_err(|err| err.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn start_node(&self, name: &str) -> Result<StartedNode, DynError> {
|
async fn start_node(&self, name: &str) -> Result<StartedNode, DynError> {
|
||||||
@ -138,8 +138,8 @@ impl NodeControlHandle for LocalManualCluster {
|
|||||||
self.node_client(name)
|
self.node_client(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn node_pid(&self, index: usize) -> Option<u32> {
|
fn node_pid(&self, name: &str) -> Option<u32> {
|
||||||
self.node_pid(index)
|
self.node_pid(name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -44,8 +44,8 @@ pub enum LocalNodeManagerError {
|
|||||||
PortAllocation { message: String },
|
PortAllocation { message: String },
|
||||||
#[error("node config patch failed: {message}")]
|
#[error("node config patch failed: {message}")]
|
||||||
ConfigPatch { message: String },
|
ConfigPatch { message: String },
|
||||||
#[error("node index {index} is out of bounds")]
|
#[error("node name '{name}' is unknown")]
|
||||||
NodeIndex { index: usize },
|
NodeName { name: String },
|
||||||
#[error("failed to restart node: {source}")]
|
#[error("failed to restart node: {source}")]
|
||||||
Restart {
|
Restart {
|
||||||
#[source]
|
#[source]
|
||||||
@ -145,6 +145,7 @@ impl LocalNodeManager {
|
|||||||
peer_ports: seed.peer_ports.clone(),
|
peer_ports: seed.peer_ports.clone(),
|
||||||
peer_ports_by_name: seed.peer_ports_by_name.clone(),
|
peer_ports_by_name: seed.peer_ports_by_name.clone(),
|
||||||
clients_by_name: HashMap::new(),
|
clients_by_name: HashMap::new(),
|
||||||
|
indices_by_name: HashMap::new(),
|
||||||
nodes: Vec::new(),
|
nodes: Vec::new(),
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -169,12 +170,13 @@ impl LocalNodeManager {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn node_pid(&self, index: usize) -> Option<u32> {
|
pub fn node_pid(&self, name: &str) -> Option<u32> {
|
||||||
let mut state = self
|
let mut state = self
|
||||||
.state
|
.state
|
||||||
.lock()
|
.lock()
|
||||||
.unwrap_or_else(|poisoned| poisoned.into_inner());
|
.unwrap_or_else(|poisoned| poisoned.into_inner());
|
||||||
|
|
||||||
|
let index = *state.indices_by_name.get(name)?;
|
||||||
let node = state.nodes.get_mut(index)?;
|
let node = state.nodes.get_mut(index)?;
|
||||||
if node.is_running() {
|
if node.is_running() {
|
||||||
Some(node.pid())
|
Some(node.pid())
|
||||||
@ -195,6 +197,7 @@ impl LocalNodeManager {
|
|||||||
.peer_ports_by_name
|
.peer_ports_by_name
|
||||||
.clone_from(&self.seed.peer_ports_by_name);
|
.clone_from(&self.seed.peer_ports_by_name);
|
||||||
state.clients_by_name.clear();
|
state.clients_by_name.clear();
|
||||||
|
state.indices_by_name.clear();
|
||||||
state.node_count = self.seed.node_count;
|
state.node_count = self.seed.node_count;
|
||||||
self.node_clients.clear();
|
self.node_clients.clear();
|
||||||
}
|
}
|
||||||
@ -211,6 +214,7 @@ impl LocalNodeManager {
|
|||||||
state.peer_ports.clear();
|
state.peer_ports.clear();
|
||||||
state.peer_ports_by_name.clear();
|
state.peer_ports_by_name.clear();
|
||||||
state.clients_by_name.clear();
|
state.clients_by_name.clear();
|
||||||
|
state.indices_by_name.clear();
|
||||||
state.node_count = 0;
|
state.node_count = 0;
|
||||||
|
|
||||||
for (idx, node) in nodes.into_iter().enumerate() {
|
for (idx, node) in nodes.into_iter().enumerate() {
|
||||||
@ -290,6 +294,8 @@ impl LocalNodeManager {
|
|||||||
let index = state.node_count;
|
let index = state.node_count;
|
||||||
let label = if name.trim().is_empty() {
|
let label = if name.trim().is_empty() {
|
||||||
Self::default_label(index)
|
Self::default_label(index)
|
||||||
|
} else if name.starts_with("node-") {
|
||||||
|
name.to_string()
|
||||||
} else {
|
} else {
|
||||||
format!("node-{name}")
|
format!("node-{name}")
|
||||||
};
|
};
|
||||||
@ -334,18 +340,27 @@ impl LocalNodeManager {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn restart_node(&self, index: usize) -> Result<(), LocalNodeManagerError> {
|
pub async fn restart_node(&self, name: &str) -> Result<(), LocalNodeManagerError> {
|
||||||
let mut node = {
|
let (index, mut node) = {
|
||||||
let mut state = self
|
let mut state = self
|
||||||
.state
|
.state
|
||||||
.lock()
|
.lock()
|
||||||
.unwrap_or_else(|poisoned| poisoned.into_inner());
|
.unwrap_or_else(|poisoned| poisoned.into_inner());
|
||||||
|
|
||||||
|
let Some(index) = state.indices_by_name.get(name).copied() else {
|
||||||
|
return Err(LocalNodeManagerError::NodeName {
|
||||||
|
name: name.to_string(),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
if index >= state.nodes.len() {
|
if index >= state.nodes.len() {
|
||||||
return Err(LocalNodeManagerError::NodeIndex { index });
|
return Err(LocalNodeManagerError::NodeName {
|
||||||
|
name: name.to_string(),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
state.nodes.remove(index)
|
let node = state.nodes.remove(index);
|
||||||
|
(index, node)
|
||||||
};
|
};
|
||||||
|
|
||||||
node.restart()
|
node.restart()
|
||||||
@ -366,18 +381,27 @@ impl LocalNodeManager {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn stop_node(&self, index: usize) -> Result<(), LocalNodeManagerError> {
|
pub async fn stop_node(&self, name: &str) -> Result<(), LocalNodeManagerError> {
|
||||||
let mut node = {
|
let (index, mut node) = {
|
||||||
let mut state = self
|
let mut state = self
|
||||||
.state
|
.state
|
||||||
.lock()
|
.lock()
|
||||||
.unwrap_or_else(|poisoned| poisoned.into_inner());
|
.unwrap_or_else(|poisoned| poisoned.into_inner());
|
||||||
|
|
||||||
|
let Some(index) = state.indices_by_name.get(name).copied() else {
|
||||||
|
return Err(LocalNodeManagerError::NodeName {
|
||||||
|
name: name.to_string(),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
if index >= state.nodes.len() {
|
if index >= state.nodes.len() {
|
||||||
return Err(LocalNodeManagerError::NodeIndex { index });
|
return Err(LocalNodeManagerError::NodeName {
|
||||||
|
name: name.to_string(),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
state.nodes.remove(index)
|
let node = state.nodes.remove(index);
|
||||||
|
(index, node)
|
||||||
};
|
};
|
||||||
|
|
||||||
node.stop().await;
|
node.stop().await;
|
||||||
@ -446,12 +470,12 @@ fn apply_patch_if_needed(
|
|||||||
|
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
impl NodeControlHandle for LocalNodeManager {
|
impl NodeControlHandle for LocalNodeManager {
|
||||||
async fn restart_node(&self, index: usize) -> Result<(), DynError> {
|
async fn restart_node(&self, name: &str) -> Result<(), DynError> {
|
||||||
self.restart_node(index).await.map_err(|err| err.into())
|
self.restart_node(name).await.map_err(|err| err.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn stop_node(&self, index: usize) -> Result<(), DynError> {
|
async fn stop_node(&self, name: &str) -> Result<(), DynError> {
|
||||||
self.stop_node(index).await.map_err(|err| err.into())
|
self.stop_node(name).await.map_err(|err| err.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn start_node(&self, name: &str) -> Result<StartedNode, DynError> {
|
async fn start_node(&self, name: &str) -> Result<StartedNode, DynError> {
|
||||||
@ -474,7 +498,7 @@ impl NodeControlHandle for LocalNodeManager {
|
|||||||
self.node_client(name)
|
self.node_client(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn node_pid(&self, index: usize) -> Option<u32> {
|
fn node_pid(&self, name: &str) -> Option<u32> {
|
||||||
self.node_pid(index)
|
self.node_pid(name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -7,6 +7,7 @@ pub(crate) struct LocalNodeManagerState {
|
|||||||
pub(crate) peer_ports: Vec<u16>,
|
pub(crate) peer_ports: Vec<u16>,
|
||||||
pub(crate) peer_ports_by_name: HashMap<String, u16>,
|
pub(crate) peer_ports_by_name: HashMap<String, u16>,
|
||||||
pub(crate) clients_by_name: HashMap<String, ApiClient>,
|
pub(crate) clients_by_name: HashMap<String, ApiClient>,
|
||||||
|
pub(crate) indices_by_name: HashMap<String, usize>,
|
||||||
pub(crate) nodes: Vec<Node>,
|
pub(crate) nodes: Vec<Node>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -26,6 +27,8 @@ impl LocalNodeManagerState {
|
|||||||
node: Node,
|
node: Node,
|
||||||
) {
|
) {
|
||||||
self.register_common(node_name, network_port, client);
|
self.register_common(node_name, network_port, client);
|
||||||
|
let index = self.nodes.len();
|
||||||
|
self.indices_by_name.insert(node_name.to_string(), index);
|
||||||
self.node_count += 1;
|
self.node_count += 1;
|
||||||
self.nodes.push(node);
|
self.nodes.push(node);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,16 +22,17 @@ async fn local_restart_node() -> Result<(), Box<dyn std::error::Error + Send + S
|
|||||||
|
|
||||||
let control = context.node_control().ok_or("node control not available")?;
|
let control = context.node_control().ok_or("node control not available")?;
|
||||||
|
|
||||||
let old_pid = control.node_pid(0).ok_or("missing node pid")?;
|
let node_name = "node-0";
|
||||||
|
let old_pid = control.node_pid(node_name).ok_or("missing node pid")?;
|
||||||
|
|
||||||
control.restart_node(0).await?;
|
control.restart_node(node_name).await?;
|
||||||
|
|
||||||
let new_pid = control.node_pid(0).ok_or("missing node pid")?;
|
let new_pid = control.node_pid(node_name).ok_or("missing node pid")?;
|
||||||
assert_ne!(old_pid, new_pid, "expected a new process after restart");
|
assert_ne!(old_pid, new_pid, "expected a new process after restart");
|
||||||
|
|
||||||
control.stop_node(0).await?;
|
control.stop_node(node_name).await?;
|
||||||
assert!(
|
assert!(
|
||||||
control.node_pid(0).is_none(),
|
control.node_pid(node_name).is_none(),
|
||||||
"expected node pid to be absent after stop"
|
"expected node pid to be absent after stop"
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -47,18 +48,18 @@ async fn manual_cluster_restart_node() -> Result<(), Box<dyn std::error::Error +
|
|||||||
let deployer = LocalDeployer::default();
|
let deployer = LocalDeployer::default();
|
||||||
let cluster = deployer.manual_cluster(TopologyConfig::with_node_numbers(1))?;
|
let cluster = deployer.manual_cluster(TopologyConfig::with_node_numbers(1))?;
|
||||||
|
|
||||||
cluster.start_node("a").await?;
|
let node_name = cluster.start_node("a").await?.name;
|
||||||
|
|
||||||
let old_pid = cluster.node_pid(0).ok_or("missing node pid")?;
|
let old_pid = cluster.node_pid(&node_name).ok_or("missing node pid")?;
|
||||||
|
|
||||||
cluster.restart_node(0).await?;
|
cluster.restart_node(&node_name).await?;
|
||||||
|
|
||||||
let new_pid = cluster.node_pid(0).ok_or("missing node pid")?;
|
let new_pid = cluster.node_pid(&node_name).ok_or("missing node pid")?;
|
||||||
assert_ne!(old_pid, new_pid, "expected a new process after restart");
|
assert_ne!(old_pid, new_pid, "expected a new process after restart");
|
||||||
|
|
||||||
cluster.stop_node(0).await?;
|
cluster.stop_node(&node_name).await?;
|
||||||
assert!(
|
assert!(
|
||||||
cluster.node_pid(0).is_none(),
|
cluster.node_pid(&node_name).is_none(),
|
||||||
"expected node pid to be absent after stop"
|
"expected node pid to be absent after stop"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@ -44,7 +44,7 @@ impl RandomRestartWorkload {
|
|||||||
if self.include_nodes {
|
if self.include_nodes {
|
||||||
if node_count > 1 {
|
if node_count > 1 {
|
||||||
for index in 0..node_count {
|
for index in 0..node_count {
|
||||||
targets.push(Target::Node(index));
|
targets.push(Target::Node(format!("node-{index}")));
|
||||||
}
|
}
|
||||||
} else if node_count == 1 {
|
} else if node_count == 1 {
|
||||||
info!("chaos restart skipping nodes: only one node configured");
|
info!("chaos restart skipping nodes: only one node configured");
|
||||||
@ -76,7 +76,7 @@ impl RandomRestartWorkload {
|
|||||||
let ready = now.checked_sub(self.target_cooldown).unwrap_or(now);
|
let ready = now.checked_sub(self.target_cooldown).unwrap_or(now);
|
||||||
targets
|
targets
|
||||||
.iter()
|
.iter()
|
||||||
.copied()
|
.cloned()
|
||||||
.map(|target| (target, ready))
|
.map(|target| (target, ready))
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
@ -111,16 +111,16 @@ impl RandomRestartWorkload {
|
|||||||
|
|
||||||
let available: Vec<Target> = targets
|
let available: Vec<Target> = targets
|
||||||
.iter()
|
.iter()
|
||||||
.copied()
|
.cloned()
|
||||||
.filter(|target| cooldowns.get(target).is_none_or(|ready| *ready <= now))
|
.filter(|target| cooldowns.get(target).is_none_or(|ready| *ready <= now))
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
if let Some(choice) = available.choose(&mut thread_rng()).copied() {
|
if let Some(choice) = available.choose(&mut thread_rng()).cloned() {
|
||||||
tracing::debug!(?choice, "chaos restart picked target");
|
tracing::debug!(?choice, "chaos restart picked target");
|
||||||
return Ok(choice);
|
return Ok(choice);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(choice) = targets.choose(&mut thread_rng()).copied() {
|
if let Some(choice) = targets.choose(&mut thread_rng()).cloned() {
|
||||||
return Ok(choice);
|
return Ok(choice);
|
||||||
}
|
}
|
||||||
return Err("chaos restart workload has no eligible targets".into());
|
return Err("chaos restart workload has no eligible targets".into());
|
||||||
@ -158,10 +158,10 @@ impl Workload for RandomRestartWorkload {
|
|||||||
let target = self.pick_target(&targets, &cooldowns).await?;
|
let target = self.pick_target(&targets, &cooldowns).await?;
|
||||||
|
|
||||||
match target {
|
match target {
|
||||||
Target::Node(index) => {
|
Target::Node(ref name) => {
|
||||||
tracing::info!(index, "chaos restarting node");
|
tracing::info!(name, "chaos restarting node");
|
||||||
handle
|
handle
|
||||||
.restart_node(index)
|
.restart_node(name)
|
||||||
.await
|
.await
|
||||||
.map_err(|err| format!("node restart failed: {err}"))?
|
.map_err(|err| format!("node restart failed: {err}"))?
|
||||||
}
|
}
|
||||||
@ -172,7 +172,7 @@ impl Workload for RandomRestartWorkload {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
|
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
|
||||||
enum Target {
|
enum Target {
|
||||||
Node(usize),
|
Node(String),
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user