mirror of
https://github.com/logos-blockchain/logos-blockchain-testing.git
synced 2026-03-31 16:23:08 +00:00
514 lines
16 KiB
Rust
514 lines
16 KiB
Rust
use std::{
|
|
marker::PhantomData,
|
|
sync::{
|
|
Arc,
|
|
atomic::{AtomicUsize, Ordering},
|
|
},
|
|
time::Duration,
|
|
};
|
|
|
|
use async_trait::async_trait;
|
|
use testing_framework_core::{
|
|
scenario::{
|
|
Application, CleanupGuard, ClusterControlProfile, Deployer, DeploymentPolicy, DynError,
|
|
FeedHandle, FeedRuntime, HttpReadinessRequirement, Metrics, NodeClients,
|
|
NodeControlCapability, NodeControlHandle, RetryPolicy, Runner, RuntimeAssembly, Scenario,
|
|
ScenarioError, SourceOrchestrationPlan, build_source_orchestration_plan, spawn_feed,
|
|
},
|
|
topology::DeploymentDescriptor,
|
|
};
|
|
use thiserror::Error;
|
|
use tokio_retry::{
|
|
RetryIf,
|
|
strategy::{ExponentialBackoff, jitter},
|
|
};
|
|
use tracing::{debug, info, warn};
|
|
|
|
use crate::{
|
|
env::{LocalDeployerEnv, Node, wait_local_http_readiness},
|
|
external::build_external_client,
|
|
keep_tempdir_from_env,
|
|
manual::ManualCluster,
|
|
node_control::{NodeManager, NodeManagerSeed},
|
|
};
|
|
|
|
const READINESS_ATTEMPTS: usize = 3;
|
|
const READINESS_BACKOFF_BASE_MS: u64 = 250;
|
|
const READINESS_BACKOFF_MAX_SECS: u64 = 2;
|
|
|
|
struct LocalProcessGuard<E: LocalDeployerEnv> {
|
|
nodes: Vec<Node<E>>,
|
|
feed_task: Option<FeedHandle>,
|
|
}
|
|
|
|
impl<E: LocalDeployerEnv> LocalProcessGuard<E> {
|
|
fn new(nodes: Vec<Node<E>>, feed_task: FeedHandle) -> Self {
|
|
Self {
|
|
nodes,
|
|
feed_task: Some(feed_task),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<E: LocalDeployerEnv> CleanupGuard for LocalProcessGuard<E> {
|
|
fn cleanup(mut self: Box<Self>) {
|
|
if let Some(feed_task) = self.feed_task.take() {
|
|
CleanupGuard::cleanup(Box::new(feed_task));
|
|
}
|
|
// Nodes own local processes; dropping them stops the processes.
|
|
drop(self.nodes);
|
|
}
|
|
}
|
|
/// Spawns nodes as local processes.
|
|
#[derive(Clone)]
|
|
pub struct ProcessDeployer<E: LocalDeployerEnv> {
|
|
membership_check: bool,
|
|
_env: PhantomData<E>,
|
|
}
|
|
|
|
/// Errors returned by the local deployer.
|
|
#[derive(Debug, Error)]
|
|
pub enum ProcessDeployerError {
|
|
#[error("failed to spawn local topology: {source}")]
|
|
Spawn {
|
|
#[source]
|
|
source: DynError,
|
|
},
|
|
#[error("readiness probe failed: {source}")]
|
|
ReadinessFailed {
|
|
#[source]
|
|
source: DynError,
|
|
},
|
|
#[error("scenario topology is not supported by the local deployer")]
|
|
UnsupportedTopology,
|
|
#[error("workload failed: {source}")]
|
|
WorkloadFailed {
|
|
#[source]
|
|
source: DynError,
|
|
},
|
|
#[error("runtime preflight failed: no node clients available")]
|
|
RuntimePreflight,
|
|
#[error("source orchestration failed: {source}")]
|
|
SourceOrchestration {
|
|
#[source]
|
|
source: DynError,
|
|
},
|
|
#[error("expectations failed: {source}")]
|
|
ExpectationsFailed {
|
|
#[source]
|
|
source: DynError,
|
|
},
|
|
}
|
|
|
|
#[derive(Debug, Error)]
|
|
enum RetryAttemptError {
|
|
#[error("failed to spawn local topology: {source}")]
|
|
Spawn {
|
|
#[source]
|
|
source: DynError,
|
|
},
|
|
#[error("readiness probe failed: {source}")]
|
|
Readiness {
|
|
#[source]
|
|
source: DynError,
|
|
},
|
|
}
|
|
|
|
impl From<RetryAttemptError> for ProcessDeployerError {
|
|
fn from(value: RetryAttemptError) -> Self {
|
|
match value {
|
|
RetryAttemptError::Spawn { source } => Self::Spawn { source },
|
|
RetryAttemptError::Readiness { source } => Self::ReadinessFailed { source },
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy)]
|
|
struct RetryExecutionConfig {
|
|
max_attempts: usize,
|
|
keep_tempdir: bool,
|
|
readiness_enabled: bool,
|
|
readiness_requirement: HttpReadinessRequirement,
|
|
}
|
|
|
|
impl From<ScenarioError> for ProcessDeployerError {
|
|
fn from(value: ScenarioError) -> Self {
|
|
match value {
|
|
ScenarioError::Workload(source) => Self::WorkloadFailed { source },
|
|
ScenarioError::ExpectationCapture(source)
|
|
| ScenarioError::ExpectationFailedDuringCapture(source)
|
|
| ScenarioError::Expectations(source) => Self::ExpectationsFailed { source },
|
|
}
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl<E: LocalDeployerEnv> Deployer<E, ()> for ProcessDeployer<E> {
|
|
type Error = ProcessDeployerError;
|
|
|
|
async fn deploy(&self, scenario: &Scenario<E, ()>) -> Result<Runner<E>, Self::Error> {
|
|
self.deploy_without_node_control(scenario).await
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl<E: LocalDeployerEnv> Deployer<E, NodeControlCapability> for ProcessDeployer<E> {
|
|
type Error = ProcessDeployerError;
|
|
|
|
async fn deploy(
|
|
&self,
|
|
scenario: &Scenario<E, NodeControlCapability>,
|
|
) -> Result<Runner<E>, Self::Error> {
|
|
self.deploy_with_node_control(scenario).await
|
|
}
|
|
}
|
|
|
|
impl<E: LocalDeployerEnv> ProcessDeployer<E> {
|
|
/// Construct a local deployer.
|
|
#[must_use]
|
|
pub fn new() -> Self {
|
|
Self::default()
|
|
}
|
|
|
|
/// Enable or disable membership readiness checks.
|
|
#[must_use]
|
|
pub fn with_membership_check(mut self, enabled: bool) -> Self {
|
|
self.membership_check = enabled;
|
|
self
|
|
}
|
|
|
|
/// Build a manual cluster from a prepared topology descriptor.
|
|
#[must_use]
|
|
pub fn manual_cluster_from_descriptors(&self, descriptors: E::Deployment) -> ManualCluster<E> {
|
|
ManualCluster::from_topology(descriptors)
|
|
}
|
|
|
|
async fn deploy_without_node_control(
|
|
&self,
|
|
scenario: &Scenario<E, ()>,
|
|
) -> Result<Runner<E>, ProcessDeployerError> {
|
|
// Source planning is currently resolved here before node spawn/runtime setup.
|
|
let source_plan = build_source_orchestration_plan(scenario).map_err(|source| {
|
|
ProcessDeployerError::SourceOrchestration {
|
|
source: source.into(),
|
|
}
|
|
})?;
|
|
|
|
log_local_deploy_start(
|
|
scenario.deployment().node_count(),
|
|
scenario.deployment_policy(),
|
|
false,
|
|
);
|
|
|
|
let nodes = Self::spawn_nodes_for_scenario(scenario, self.membership_check).await?;
|
|
let node_clients = NodeClients::<E>::new(nodes.iter().map(|node| node.client()).collect());
|
|
|
|
let node_clients = merge_source_clients_for_local::<E>(&source_plan, node_clients)
|
|
.map_err(|source| ProcessDeployerError::SourceOrchestration { source })?;
|
|
|
|
let runtime = run_context_for(
|
|
scenario.deployment().clone(),
|
|
node_clients,
|
|
scenario.duration(),
|
|
scenario.expectation_cooldown(),
|
|
scenario.cluster_control_profile(),
|
|
None,
|
|
)
|
|
.await?;
|
|
|
|
let cleanup_guard: Box<dyn CleanupGuard> =
|
|
Box::new(LocalProcessGuard::<E>::new(nodes, runtime.feed_task));
|
|
|
|
Ok(runtime.assembly.build_runner(Some(cleanup_guard)))
|
|
}
|
|
|
|
async fn deploy_with_node_control(
|
|
&self,
|
|
scenario: &Scenario<E, NodeControlCapability>,
|
|
) -> Result<Runner<E>, ProcessDeployerError> {
|
|
// Source planning is currently resolved here before node spawn/runtime setup.
|
|
let source_plan = build_source_orchestration_plan(scenario).map_err(|source| {
|
|
ProcessDeployerError::SourceOrchestration {
|
|
source: source.into(),
|
|
}
|
|
})?;
|
|
|
|
log_local_deploy_start(
|
|
scenario.deployment().node_count(),
|
|
scenario.deployment_policy(),
|
|
true,
|
|
);
|
|
|
|
let nodes = Self::spawn_nodes_for_scenario(scenario, self.membership_check).await?;
|
|
let node_control = self.node_control_from(scenario, nodes);
|
|
let node_clients =
|
|
merge_source_clients_for_local::<E>(&source_plan, node_control.node_clients())
|
|
.map_err(|source| ProcessDeployerError::SourceOrchestration { source })?;
|
|
let runtime = run_context_for(
|
|
scenario.deployment().clone(),
|
|
node_clients,
|
|
scenario.duration(),
|
|
scenario.expectation_cooldown(),
|
|
scenario.cluster_control_profile(),
|
|
Some(node_control),
|
|
)
|
|
.await?;
|
|
|
|
Ok(runtime
|
|
.assembly
|
|
.build_runner(Some(Box::new(runtime.feed_task))))
|
|
}
|
|
|
|
fn node_control_from(
|
|
&self,
|
|
scenario: &Scenario<E, NodeControlCapability>,
|
|
nodes: Vec<Node<E>>,
|
|
) -> Arc<NodeManager<E>> {
|
|
let node_control = Arc::new(NodeManager::new_with_seed(
|
|
scenario.deployment().clone(),
|
|
NodeClients::default(),
|
|
keep_tempdir(scenario.deployment_policy()),
|
|
NodeManagerSeed::default(),
|
|
));
|
|
node_control.initialize_with_nodes(nodes);
|
|
node_control
|
|
}
|
|
|
|
async fn spawn_nodes_for_scenario<Caps>(
|
|
scenario: &Scenario<E, Caps>,
|
|
membership_check: bool,
|
|
) -> Result<Vec<Node<E>>, ProcessDeployerError> {
|
|
info!(
|
|
nodes = scenario.deployment().node_count(),
|
|
"spawning local nodes"
|
|
);
|
|
Self::spawn_with_readiness_retry(
|
|
scenario.deployment(),
|
|
membership_check,
|
|
scenario.deployment_policy(),
|
|
)
|
|
.await
|
|
}
|
|
|
|
async fn spawn_with_readiness_retry(
|
|
descriptors: &E::Deployment,
|
|
membership_check: bool,
|
|
deployment_policy: DeploymentPolicy,
|
|
) -> Result<Vec<Node<E>>, ProcessDeployerError> {
|
|
let (retry_policy, execution) =
|
|
build_retry_execution_config(deployment_policy, membership_check);
|
|
let attempts = Arc::new(AtomicUsize::new(0));
|
|
let strategy = retry_backoff_strategy(retry_policy, execution.max_attempts);
|
|
let operation = {
|
|
let attempts = Arc::clone(&attempts);
|
|
move || {
|
|
let attempts = Arc::clone(&attempts);
|
|
async move { run_retry_attempt::<E>(descriptors, execution, attempts).await }
|
|
}
|
|
};
|
|
let should_retry = retry_decision(Arc::clone(&attempts), execution.max_attempts);
|
|
|
|
let nodes = RetryIf::spawn(strategy, operation, should_retry).await?;
|
|
Ok(nodes)
|
|
}
|
|
}
|
|
|
|
fn merge_source_clients_for_local<E: LocalDeployerEnv>(
|
|
source_plan: &SourceOrchestrationPlan,
|
|
node_clients: NodeClients<E>,
|
|
) -> Result<NodeClients<E>, DynError> {
|
|
for source in source_plan.external_sources() {
|
|
let client =
|
|
E::external_node_client(source).or_else(|_| build_external_client::<E>(source))?;
|
|
node_clients.add_node(client);
|
|
}
|
|
Ok(node_clients)
|
|
}
|
|
|
|
fn build_retry_execution_config(
|
|
deployment_policy: DeploymentPolicy,
|
|
membership_check: bool,
|
|
) -> (RetryPolicy, RetryExecutionConfig) {
|
|
let retry_policy = retry_policy_from(deployment_policy);
|
|
let execution = RetryExecutionConfig {
|
|
max_attempts: retry_policy.max_attempts.max(1),
|
|
keep_tempdir: keep_tempdir(deployment_policy),
|
|
readiness_enabled: deployment_policy.readiness_enabled && membership_check,
|
|
readiness_requirement: deployment_policy.readiness_requirement,
|
|
};
|
|
|
|
(retry_policy, execution)
|
|
}
|
|
|
|
async fn run_retry_attempt<E: LocalDeployerEnv>(
|
|
descriptors: &E::Deployment,
|
|
execution: RetryExecutionConfig,
|
|
attempts: Arc<AtomicUsize>,
|
|
) -> Result<Vec<Node<E>>, RetryAttemptError> {
|
|
let attempt = attempts.fetch_add(1, Ordering::Relaxed) + 1;
|
|
let nodes = spawn_nodes_for_attempt::<E>(descriptors, execution.keep_tempdir).await?;
|
|
run_readiness_for_attempt::<E>(attempt, nodes, execution).await
|
|
}
|
|
|
|
fn retry_policy_from(deployment_policy: DeploymentPolicy) -> RetryPolicy {
|
|
deployment_policy
|
|
.retry_policy
|
|
.unwrap_or_else(default_local_retry_policy)
|
|
}
|
|
|
|
fn retry_backoff_strategy(
|
|
retry_policy: RetryPolicy,
|
|
max_attempts: usize,
|
|
) -> impl Iterator<Item = Duration> {
|
|
ExponentialBackoff::from_millis(retry_policy.base_delay.as_millis() as u64)
|
|
.max_delay(retry_policy.max_delay)
|
|
.map(jitter)
|
|
.take(max_attempts.saturating_sub(1))
|
|
}
|
|
|
|
async fn spawn_nodes_for_attempt<E: LocalDeployerEnv>(
|
|
descriptors: &E::Deployment,
|
|
keep_tempdir: bool,
|
|
) -> Result<Vec<Node<E>>, RetryAttemptError> {
|
|
NodeManager::<E>::spawn_initial_nodes(descriptors, keep_tempdir)
|
|
.await
|
|
.map_err(|source| RetryAttemptError::Spawn {
|
|
source: source.into(),
|
|
})
|
|
}
|
|
|
|
async fn run_readiness_for_attempt<E: LocalDeployerEnv>(
|
|
attempt: usize,
|
|
nodes: Vec<Node<E>>,
|
|
execution: RetryExecutionConfig,
|
|
) -> Result<Vec<Node<E>>, RetryAttemptError> {
|
|
if !execution.readiness_enabled {
|
|
info!("skipping local readiness checks");
|
|
return Ok(nodes);
|
|
}
|
|
|
|
match wait_local_http_readiness::<E>(&nodes, execution.readiness_requirement).await {
|
|
Ok(()) => {
|
|
info!(attempt, "local nodes are ready");
|
|
Ok(nodes)
|
|
}
|
|
Err(source) => {
|
|
let error: DynError = source.into();
|
|
debug!(attempt, error = ?error, "local readiness failed");
|
|
drop(nodes);
|
|
Err(RetryAttemptError::Readiness { source: error })
|
|
}
|
|
}
|
|
}
|
|
|
|
fn retry_decision(
|
|
attempts: Arc<AtomicUsize>,
|
|
max_attempts: usize,
|
|
) -> impl FnMut(&RetryAttemptError) -> bool {
|
|
move |error: &RetryAttemptError| {
|
|
let attempt = attempts.load(Ordering::Relaxed);
|
|
if attempt < max_attempts {
|
|
warn!(
|
|
attempt,
|
|
max_attempts,
|
|
error = %error,
|
|
"local spawn/readiness failed; retrying with backoff"
|
|
);
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<E: LocalDeployerEnv> Default for ProcessDeployer<E> {
|
|
fn default() -> Self {
|
|
Self {
|
|
membership_check: true,
|
|
_env: PhantomData,
|
|
}
|
|
}
|
|
}
|
|
|
|
const fn default_local_retry_policy() -> RetryPolicy {
|
|
RetryPolicy::new(
|
|
READINESS_ATTEMPTS,
|
|
Duration::from_millis(READINESS_BACKOFF_BASE_MS),
|
|
Duration::from_secs(READINESS_BACKOFF_MAX_SECS),
|
|
)
|
|
}
|
|
|
|
fn keep_tempdir(policy: DeploymentPolicy) -> bool {
|
|
policy.cleanup_policy.preserve_artifacts || keep_tempdir_from_env()
|
|
}
|
|
|
|
async fn spawn_feed_with<E: Application>(
|
|
node_clients: &NodeClients<E>,
|
|
) -> Result<(<E::FeedRuntime as FeedRuntime>::Feed, FeedHandle), ProcessDeployerError> {
|
|
let node_count = node_clients.len();
|
|
debug!(nodes = node_count, "starting local feed");
|
|
|
|
if node_count == 0 {
|
|
return Err(ProcessDeployerError::WorkloadFailed {
|
|
source: "feed requires at least one node".into(),
|
|
});
|
|
}
|
|
|
|
info!("starting feed");
|
|
|
|
spawn_feed::<E>(node_clients.clone())
|
|
.await
|
|
.map_err(workload_error)
|
|
}
|
|
|
|
fn workload_error(source: DynError) -> ProcessDeployerError {
|
|
ProcessDeployerError::WorkloadFailed { source }
|
|
}
|
|
|
|
fn log_local_deploy_start(node_count: usize, policy: DeploymentPolicy, has_node_control: bool) {
|
|
info!(
|
|
nodes = node_count,
|
|
node_control = has_node_control,
|
|
readiness_enabled = policy.readiness_enabled,
|
|
readiness_requirement = ?policy.readiness_requirement,
|
|
"starting local deployment"
|
|
);
|
|
}
|
|
|
|
struct RuntimeContext<E: Application> {
|
|
assembly: RuntimeAssembly<E>,
|
|
feed_task: FeedHandle,
|
|
}
|
|
|
|
async fn run_context_for<E: Application>(
|
|
descriptors: E::Deployment,
|
|
node_clients: NodeClients<E>,
|
|
duration: Duration,
|
|
expectation_cooldown: Duration,
|
|
cluster_control_profile: ClusterControlProfile,
|
|
node_control: Option<Arc<dyn NodeControlHandle<E>>>,
|
|
) -> Result<RuntimeContext<E>, ProcessDeployerError> {
|
|
if node_clients.is_empty() {
|
|
return Err(ProcessDeployerError::RuntimePreflight);
|
|
}
|
|
|
|
let (feed, feed_task) = spawn_feed_with::<E>(&node_clients).await?;
|
|
let mut assembly = RuntimeAssembly::new(
|
|
descriptors,
|
|
node_clients,
|
|
duration,
|
|
expectation_cooldown,
|
|
cluster_control_profile,
|
|
Metrics::empty(),
|
|
feed,
|
|
);
|
|
if let Some(node_control) = node_control {
|
|
assembly = assembly.with_node_control(node_control);
|
|
}
|
|
|
|
Ok(RuntimeContext {
|
|
assembly,
|
|
feed_task,
|
|
})
|
|
}
|