demo-apps: kvstore, queue, and openraft_kv

This commit is contained in:
Andrus Salumets 2026-04-14 21:10:18 +07:00 committed by GitHub
commit 8700bd5a6c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
102 changed files with 7454 additions and 74 deletions

View File

@ -6,7 +6,11 @@ exclude-dev = true
no-default-features = true
[advisories]
ignore = []
ignore = [
# Existing workspace dependencies still resolve rand 0.8 via tera/tokio-retry.
# Track removal when those upstream edges move to a fixed release.
"RUSTSEC-2026-0097",
]
yanked = "deny"
[bans]

766
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,18 @@ members = [
"cfgsync/artifacts",
"cfgsync/core",
"cfgsync/runtime",
"examples/kvstore/examples",
"examples/kvstore/kvstore-node",
"examples/kvstore/testing/integration",
"examples/kvstore/testing/workloads",
"examples/openraft_kv/examples",
"examples/openraft_kv/openraft-kv-node",
"examples/openraft_kv/testing/integration",
"examples/openraft_kv/testing/workloads",
"examples/queue/examples",
"examples/queue/queue-node",
"examples/queue/testing/integration",
"examples/queue/testing/workloads",
"examples/metrics_counter/examples",
"examples/metrics_counter/metrics-counter-node",
"examples/metrics_counter/testing/integration",
@ -56,6 +68,8 @@ bytes = { default-features = false, version = "1.3" }
hex = { default-features = false, version = "0.4.3" }
libp2p = { default-features = false, version = "0.55" }
num-bigint = { default-features = false, version = "0.4" }
openraft = { default-features = true, features = ["serde", "type-alias"], version = "0.10.0-alpha.17" }
openraft-memstore = { default-features = true, version = "0.10.0-alpha.17" }
parking_lot = { default-features = false, version = "0.12" }
rand = { default-features = false, features = ["std", "std_rng"], version = "0.8" }
reqwest = { default-features = false, version = "0.12" }

View File

@ -0,0 +1,314 @@
# Observation Runtime Plan
## Why this work exists
TF is good at deployment plumbing. It is weak at continuous observation.
Today, the same problems are solved repeatedly with custom loops:
- TF block feed logic in Logos
- Cucumber manual-cluster polling loops
- ad hoc catch-up scans for wallet and chain state
- app-local state polling in expectations
That is the gap this work should close.
The goal is not a generic "distributed systems DSL".
The goal is one reusable observation runtime that:
- continuously collects data from dynamic sources
- keeps typed materialized state
- exposes both current snapshot and delta/history views
- fits naturally in TF scenarios and Cucumber manual-cluster code
## Constraints
### TF constraints
- TF abstractions must stay universal and simple.
- TF must not know app semantics like blocks, wallets, leaders, jobs, or topics.
- TF must remain useful for simple apps such as `openraft_kv`, not only Logos.
### App constraints
- Apps must be able to build richer abstractions on top of TF.
- Logos must be able to support:
- current block-feed replacement
- fork-aware chain state
- public-peer sync targets
- multi-wallet UTXO tracking
- Apps must be able to adopt this incrementally.
### Migration constraints
- We do not want a flag-day rewrite.
- Existing loops can coexist with the new runtime until replacements are proven.
## Non-goals
This work should not:
- put feed back onto the base `Application` trait
- build app-specific semantics into TF core
- replace filesystem blockchain snapshots used for startup/restore
- force every app to use continuous observation
- introduce a large public abstraction stack that nobody can explain
## Core idea
Introduce one TF-level observation runtime.
That runtime owns:
- source refresh
- scheduling
- polling/ingestion
- bounded history
- latest snapshot caching
- delta publication
- freshness/error tracking
- lifecycle hooks for TF and Cucumber
Apps own:
- source types
- raw observation logic
- materialized state
- snapshot shape
- delta/event shape
- higher-level projections such as wallet state
## Public TF surface
The TF public surface should stay small.
### `ObservedSource<S>`
A named source instance.
Used for:
- local node clients
- public peer endpoints
- any other app-owned source type
### `SourceProvider<S>`
Returns the current source set.
This must support dynamic source lists because:
- manual cluster nodes come and go
- Cucumber worlds may attach public peers
- node control may restart or replace sources during a run
### `Observer`
App-owned observation logic.
It defines:
- `Source`
- `State`
- `Snapshot`
- `Event`
And it implements:
- `init(...)`
- `poll(...)`
- `snapshot(...)`
The important boundary is:
- TF owns the runtime
- app code owns materialization
### `ObservationRuntime`
The engine that:
- starts the loop
- refreshes sources
- calls `poll(...)`
- stores history
- publishes deltas
- updates latest snapshot
- tracks last error and freshness
### `ObservationHandle`
The read-side interface for workloads, expectations, and Cucumber steps.
It should expose at least:
- latest snapshot
- delta subscription
- bounded history
- last error
## Intended shape
```rust
pub struct ObservedSource<S> {
pub name: String,
pub source: S,
}
#[async_trait]
pub trait SourceProvider<S>: Send + Sync + 'static {
async fn sources(&self) -> Vec<ObservedSource<S>>;
}
#[async_trait]
pub trait Observer: Send + Sync + 'static {
type Source: Clone + Send + Sync + 'static;
type State: Send + Sync + 'static;
type Snapshot: Clone + Send + Sync + 'static;
type Event: Clone + Send + Sync + 'static;
async fn init(
&self,
sources: &[ObservedSource<Self::Source>],
) -> Result<Self::State, DynError>;
async fn poll(
&self,
sources: &[ObservedSource<Self::Source>],
state: &mut Self::State,
) -> Result<Vec<Self::Event>, DynError>;
fn snapshot(&self, state: &Self::State) -> Self::Snapshot;
}
```
This is enough.
If more helper layers are needed, they should stay internal first.
## How current use cases fit
### `openraft_kv`
Use one simple observer.
- sources: node clients
- state: latest per-node Raft state
- snapshot: sorted node-state view
- events: optional deltas, possibly empty at first
This is the simplest proving case.
It validates the runtime without dragging in Logos complexity.
### Logos block feed replacement
Use one shared chain observer.
- sources: local node clients
- state:
- node heads
- block graph
- heights
- seen headers
- recent history
- snapshot:
- current head/lib/graph summary
- events:
- newly discovered blocks
This covers both existing Logos feed use cases:
- current snapshot consumers
- delta/subscription consumers
### Cucumber manual-cluster sync
Use the same observer runtime with a different source set.
- sources:
- local manual-cluster node clients
- public peer endpoints
- state:
- local consensus views
- public consensus views
- derived majority public target
- snapshot:
- current local and public sync picture
This removes custom poll/sleep loops from steps.
### Multi-wallet fork-aware tracking
This should not be a TF concept.
It should be a Logos projection built on top of the shared chain observer.
- input: chain observer state
- output: per-header wallet state cache keyed by block header
- property: naturally fork-aware because it follows actual ancestry
That replaces repeated backward scans from tip with continuous maintained state.
## Logos layering
Logos should not put every concern into one giant impl.
Recommended layering:
1. **Chain source adapter**
- local node reads
- public peer reads
2. **Shared chain observer**
- catch-up
- continuous ingestion
- graph/history materialization
3. **Logos projections**
- head view
- public sync target
- fork graph queries
- wallet state
- tx inclusion helpers
TF provides the runtime.
Logos provides the domain model built on top.
## Adoption plan
### Phase 1: add TF observation runtime
- add `ObservedSource`, `SourceProvider`, `Observer`, `ObservationRuntime`, `ObservationHandle`
- keep the public API small
- no app migrations yet
### Phase 2: prove it on `openraft_kv`
- add one simple observer over `/state`
- migrate one expectation to use the observation handle
- validate local, compose, and k8s
### Phase 3: add Logos shared chain observer
- implement it alongside current feed/loops
- do not remove existing consumers yet
- prove snapshot and delta outputs are useful
### Phase 4: migrate one Logos consumer at a time
Suggested order:
1. fork/head snapshot consumer
2. tx inclusion consumer
3. Cucumber sync-to-public-chain logic
4. wallet/UTXO tracking
### Phase 5: delete old loops and feed paths
- only after the new runtime has replaced real consumers cleanly
## Validation gates
Each phase should have clear checks.
### Runtime-level
- crate-level `cargo check`
- targeted tests for runtime lifecycle and history retention
- explicit tests for dynamic source refresh
### App-level
- `openraft_kv`:
- local failover
- compose failover
- k8s failover
- Logos:
- one snapshot consumer migrated
- one delta consumer migrated
- Cucumber:
- one manual-cluster sync path migrated
## Open questions
These should stay open until implementation forces a decision:
- whether `ObservationHandle` should expose full history directly or only cursor/subscription access
- how much error/freshness metadata belongs in the generic runtime vs app snapshot types
- whether multiple observers should share one scheduler/runtime instance or simply run independently first
## Design guardrails
When implementing this work:
- keep TF public abstractions minimal
- keep app semantics out of TF core
- do not chase a generic testing DSL
- build from reusable blocks, not one-off mega impls
- keep migration incremental
- prefer simple, explainable runtime behavior over clever abstraction

View File

@ -0,0 +1,28 @@
# Build stage
FROM rustlang/rust:nightly-bookworm AS builder
WORKDIR /build
# Copy all workspace files required for workspace build.
COPY Cargo.toml Cargo.lock ./
COPY cfgsync/ ./cfgsync/
COPY examples/ ./examples/
COPY testing-framework/ ./testing-framework/
# Build kvstore-node in release mode.
RUN cargo build --release -p kvstore-node
# Runtime stage
FROM debian:bookworm-slim
RUN apt-get update && \
apt-get install -y ca-certificates && \
rm -rf /var/lib/apt/lists/*
COPY --from=builder /build/target/release/kvstore-node /usr/local/bin/kvstore-node
RUN mkdir -p /etc/kvstore
WORKDIR /app
ENTRYPOINT ["/usr/local/bin/kvstore-node"]
CMD ["--config", "/etc/kvstore/config.yaml"]

View File

@ -0,0 +1,64 @@
# KV Store Example
This example runs a small replicated key-value store.
The usual scenario writes keys through one node and checks that the other nodes
eventually return the same values.
## How TF runs this
Each example follows the same pattern:
- TF starts a small deployment of kvstore nodes
- a workload writes keys through one node
- an expectation keeps reading from all nodes until they agree on the values
## Scenarios
- `basic_convergence` runs the convergence check locally
- `compose_convergence` runs the same check in Docker Compose
- `k8s_convergence` runs it on Kubernetes
- `k8s_manual_convergence` starts the nodes through the k8s manual cluster API, restarts one node, and checks convergence again
## API
Each node exposes:
- `PUT /kv/:key` to write a value
- `GET /kv/:key` to read a value
- `GET /internal/snapshot` to read the local replicated state
## Run locally
```bash
cargo run -p kvstore-examples --bin kvstore_basic_convergence
```
## Run with Docker Compose
```bash
cargo run -p kvstore-examples --bin kvstore_compose_convergence
```
Set `KVSTORE_IMAGE` to override the default compose image tag.
## Run with Kubernetes
```bash
docker build -t kvstore-node:local -f examples/kvstore/Dockerfile .
cargo run -p kvstore-examples --bin kvstore_k8s_convergence
```
Prerequisites:
- `kubectl` configured with a reachable cluster
- `helm` installed
Optional image override:
- `KVSTORE_K8S_IMAGE` (falls back to `KVSTORE_IMAGE`, then `kvstore-node:local`)
## Run with Kubernetes manual cluster
```bash
docker build -t kvstore-node:local -f examples/kvstore/Dockerfile .
cargo run -p kvstore-examples --bin kvstore_k8s_manual_convergence
```

View File

@ -0,0 +1,35 @@
[package]
edition.workspace = true
license.workspace = true
name = "kvstore-examples"
version.workspace = true
[[bin]]
name = "kvstore_basic_convergence"
path = "src/bin/basic_convergence.rs"
[[bin]]
name = "kvstore_compose_convergence"
path = "src/bin/compose_convergence.rs"
[[bin]]
name = "kvstore_k8s_convergence"
path = "src/bin/k8s_convergence.rs"
[[bin]]
name = "kvstore_k8s_manual_convergence"
path = "src/bin/k8s_manual_convergence.rs"
[dependencies]
kvstore-node = { path = "../kvstore-node" }
kvstore-runtime-ext = { path = "../testing/integration" }
kvstore-runtime-workloads = { path = "../testing/workloads" }
testing-framework-core = { workspace = true }
testing-framework-runner-compose = { workspace = true }
testing-framework-runner-k8s = { workspace = true }
anyhow = "1.0"
serde = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tracing = { workspace = true }
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

View File

@ -0,0 +1,31 @@
use std::time::Duration;
use kvstore_runtime_ext::KvLocalDeployer;
use kvstore_runtime_workloads::{
KvBuilderExt, KvConverges, KvScenarioBuilder, KvTopology, KvWriteWorkload,
};
use testing_framework_core::scenario::Deployer;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let mut scenario = KvScenarioBuilder::deployment_with(|_| KvTopology::new(3))
.with_run_duration(Duration::from_secs(30))
.with_workload(
KvWriteWorkload::new()
.operations(300)
.key_count(30)
.rate_per_sec(30)
.key_prefix("demo"),
)
.with_expectation(KvConverges::new("demo", 30).timeout(Duration::from_secs(25)))
.build()?;
let deployer = KvLocalDeployer::default();
let runner = deployer.deploy(&scenario).await?;
runner.run(&mut scenario).await?;
Ok(())
}

View File

@ -0,0 +1,44 @@
use std::time::Duration;
use anyhow::{Context as _, Result};
use kvstore_runtime_workloads::{
KvBuilderExt, KvConverges, KvScenarioBuilder, KvTopology, KvWriteWorkload,
};
use testing_framework_core::scenario::Deployer;
use testing_framework_runner_compose::ComposeRunnerError;
use tracing::{info, warn};
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let mut scenario = KvScenarioBuilder::deployment_with(|_| KvTopology::new(3))
.with_run_duration(Duration::from_secs(30))
.with_workload(
KvWriteWorkload::new()
.operations(200)
.key_count(20)
.rate_per_sec(20),
)
.with_expectation(KvConverges::new("kv-demo", 20).timeout(Duration::from_secs(25)))
.build()?;
let deployer = kvstore_runtime_ext::KvComposeDeployer::new();
let runner = match deployer.deploy(&scenario).await {
Ok(runner) => runner,
Err(ComposeRunnerError::DockerUnavailable) => {
warn!("docker unavailable; skipping compose kv run");
return Ok(());
}
Err(error) => return Err(anyhow::Error::new(error)).context("deploying kv compose stack"),
};
info!("running kv compose convergence scenario");
runner
.run(&mut scenario)
.await
.context("running kv compose scenario")?;
Ok(())
}

View File

@ -0,0 +1,58 @@
use std::time::Duration;
use anyhow::{Context as _, Result};
use kvstore_runtime_ext::KvK8sDeployer;
use kvstore_runtime_workloads::{
KvBuilderExt, KvConverges, KvScenarioBuilder, KvTopology, KvWriteWorkload,
};
use testing_framework_core::scenario::Deployer;
use testing_framework_runner_k8s::K8sRunnerError;
use tracing::{info, warn};
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let mut scenario = KvScenarioBuilder::deployment_with(|_| KvTopology::new(3))
.with_run_duration(Duration::from_secs(30))
.with_workload(
KvWriteWorkload::new()
.operations(200)
.key_count(20)
.rate_per_sec(20),
)
.with_expectation(KvConverges::new("kv-demo", 20).timeout(Duration::from_secs(25)))
.build()?;
let deployer = KvK8sDeployer::new();
let runner = match deployer.deploy(&scenario).await {
Ok(runner) => runner,
Err(K8sRunnerError::ClientInit { source }) => {
warn!("k8s unavailable ({source}); skipping kv k8s run");
return Ok(());
}
Err(K8sRunnerError::InstallStack { source })
if k8s_cluster_unavailable(&source.to_string()) =>
{
warn!("k8s unavailable ({source}); skipping kv k8s run");
return Ok(());
}
Err(error) => return Err(anyhow::Error::new(error)).context("deploying kv k8s stack"),
};
info!("running kv k8s convergence scenario");
runner
.run(&mut scenario)
.await
.context("running kv k8s scenario")?;
Ok(())
}
fn k8s_cluster_unavailable(message: &str) -> bool {
message.contains("Unable to connect to the server")
|| message.contains("TLS handshake timeout")
|| message.contains("connection refused")
}

View File

@ -0,0 +1,155 @@
use std::time::Duration;
use anyhow::{Context as _, Result, anyhow};
use kvstore_node::KvHttpClient;
use kvstore_runtime_ext::{KvK8sDeployer, KvTopology};
use serde::{Deserialize, Serialize};
use testing_framework_runner_k8s::ManualClusterError;
use tracing::{info, warn};
#[derive(Serialize)]
struct PutRequest {
value: String,
expected_version: Option<u64>,
}
#[derive(Deserialize)]
struct PutResponse {
applied: bool,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
struct ValueRecord {
value: String,
version: u64,
origin: u64,
}
#[derive(Deserialize)]
struct GetResponse {
record: Option<ValueRecord>,
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let deployer = KvK8sDeployer::new();
let cluster = match deployer
.manual_cluster_from_descriptors(KvTopology::new(3))
.await
{
Ok(cluster) => cluster,
Err(ManualClusterError::ClientInit { source }) => {
warn!("k8s unavailable ({source}); skipping kv k8s manual run");
return Ok(());
}
Err(ManualClusterError::InstallStack { source })
if k8s_cluster_unavailable(&source.to_string()) =>
{
warn!("k8s unavailable ({source}); skipping kv k8s manual run");
return Ok(());
}
Err(error) => {
return Err(anyhow::Error::new(error)).context("creating kv k8s manual cluster");
}
};
let node0 = cluster.start_node("node-0").await?.client;
let node1 = cluster.start_node("node-1").await?.client;
let node2 = cluster.start_node("node-2").await?.client;
cluster.wait_network_ready().await?;
write_keys(&node0, "kv-manual", 12).await?;
wait_for_convergence(
&[node0.clone(), node1.clone(), node2.clone()],
"kv-manual",
12,
)
.await?;
info!("restarting node-2 in manual cluster");
cluster.restart_node("node-2").await?;
cluster.wait_network_ready().await?;
let node2 = cluster
.node_client("node-2")
.ok_or_else(|| anyhow!("node-2 client missing after restart"))?;
wait_for_convergence(&[node0, node1, node2], "kv-manual", 12).await?;
cluster.stop_all();
Ok(())
}
async fn write_keys(client: &KvHttpClient, prefix: &str, key_count: usize) -> Result<()> {
for index in 0..key_count {
let key = format!("{prefix}-{index}");
let response: PutResponse = client
.put(
&format!("/kv/{key}"),
&PutRequest {
value: format!("value-{index}"),
expected_version: None,
},
)
.await
.map_err(|error| anyhow!(error.to_string()))
.with_context(|| format!("writing key {key}"))?;
if !response.applied {
return Err(anyhow!("write rejected for key {key}"));
}
}
Ok(())
}
async fn wait_for_convergence(
clients: &[KvHttpClient],
prefix: &str,
key_count: usize,
) -> Result<()> {
let deadline = tokio::time::Instant::now() + Duration::from_secs(30);
while tokio::time::Instant::now() < deadline {
if is_converged(clients, prefix, key_count).await? {
info!(key_count, "kv manual cluster converged");
return Ok(());
}
tokio::time::sleep(Duration::from_millis(500)).await;
}
Err(anyhow!("kv manual cluster did not converge within timeout"))
}
async fn is_converged(clients: &[KvHttpClient], prefix: &str, key_count: usize) -> Result<bool> {
for index in 0..key_count {
let key = format!("{prefix}-{index}");
let first = read_key(&clients[0], &key).await?;
for client in &clients[1..] {
if read_key(client, &key).await? != first {
return Ok(false);
}
}
}
Ok(true)
}
async fn read_key(client: &KvHttpClient, key: &str) -> Result<Option<ValueRecord>> {
let response: GetResponse = client
.get(&format!("/kv/{key}"))
.await
.map_err(|error| anyhow!(error.to_string()))
.with_context(|| format!("reading key {key}"))?;
Ok(response.record)
}
fn k8s_cluster_unavailable(message: &str) -> bool {
message.contains("Unable to connect to the server")
|| message.contains("TLS handshake timeout")
|| message.contains("connection refused")
}

View File

@ -0,0 +1,24 @@
[package]
edition.workspace = true
license.workspace = true
name = "kvstore-node"
version.workspace = true
[[bin]]
name = "kvstore-node"
path = "src/main.rs"
[dependencies]
axum = "0.7"
tower-http = { version = "0.6", features = ["trace"] }
serde = { workspace = true }
serde_yaml = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tracing = { workspace = true }
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
anyhow = "1.0"
clap = { version = "4.0", features = ["derive"] }
reqwest = { workspace = true, features = ["json"] }

View File

@ -0,0 +1,40 @@
use reqwest::Url;
use serde::Serialize;
#[derive(Clone)]
pub struct KvHttpClient {
base_url: Url,
client: reqwest::Client,
}
impl KvHttpClient {
#[must_use]
pub fn new(base_url: Url) -> Self {
Self {
base_url,
client: reqwest::Client::new(),
}
}
pub async fn get<T: serde::de::DeserializeOwned>(&self, path: &str) -> anyhow::Result<T> {
let url = self.base_url.join(path)?;
let response = self.client.get(url).send().await?.error_for_status()?;
Ok(response.json().await?)
}
pub async fn put<B: Serialize, T: serde::de::DeserializeOwned>(
&self,
path: &str,
body: &B,
) -> anyhow::Result<T> {
let url = self.base_url.join(path)?;
let response = self
.client
.put(url)
.json(body)
.send()
.await?
.error_for_status()?;
Ok(response.json().await?)
}
}

View File

@ -0,0 +1,30 @@
use std::{fs, path::Path};
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PeerInfo {
pub node_id: u64,
pub http_address: String,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct KvConfig {
pub node_id: u64,
pub http_port: u16,
pub peers: Vec<PeerInfo>,
#[serde(default = "default_sync_interval_ms")]
pub sync_interval_ms: u64,
}
impl KvConfig {
pub fn load(path: &Path) -> anyhow::Result<Self> {
let raw = fs::read_to_string(path)?;
let config = serde_yaml::from_str(&raw)?;
Ok(config)
}
}
const fn default_sync_interval_ms() -> u64 {
1000
}

View File

@ -0,0 +1,3 @@
pub mod client;
pub use client::KvHttpClient;

View File

@ -0,0 +1,36 @@
mod config;
mod server;
mod state;
mod sync;
use std::path::PathBuf;
use clap::Parser;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
use crate::{config::KvConfig, state::KvState, sync::SyncService};
#[derive(Parser, Debug)]
#[command(name = "kvstore-node")]
struct Args {
#[arg(short, long)]
config: PathBuf,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::registry()
.with(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "kvstore_node=info,tower_http=debug".into()),
)
.with(tracing_subscriber::fmt::layer())
.init();
let args = Args::parse();
let config = KvConfig::load(&args.config)?;
let state = KvState::new(config.node_id);
SyncService::new(config.clone(), state.clone()).start();
server::start_server(config, state).await
}

View File

@ -0,0 +1,112 @@
use std::net::SocketAddr;
use axum::{
Router,
extract::{Path, State},
http::StatusCode,
response::Json,
routing::get,
};
use serde::{Deserialize, Serialize};
use tower_http::trace::TraceLayer;
use crate::{
config::KvConfig,
state::{KvState, Snapshot, ValueRecord},
};
#[derive(Serialize)]
struct HealthResponse {
status: &'static str,
}
#[derive(Deserialize)]
struct PutRequest {
value: String,
expected_version: Option<u64>,
}
#[derive(Serialize)]
struct PutResponse {
applied: bool,
version: u64,
}
#[derive(Serialize)]
struct GetResponse {
key: String,
record: Option<ValueRecord>,
}
pub async fn start_server(config: KvConfig, state: KvState) -> anyhow::Result<()> {
let app = Router::new()
.route("/health/live", get(health_live))
.route("/health/ready", get(health_ready))
.route("/kv/:key", get(get_key).put(put_key))
.route("/internal/snapshot", get(get_snapshot))
.layer(TraceLayer::new_for_http())
.with_state(state.clone());
let addr = SocketAddr::from(([0, 0, 0, 0], config.http_port));
let listener = tokio::net::TcpListener::bind(addr).await?;
state.set_ready(true).await;
tracing::info!(node_id = state.node_id(), %addr, "kv node ready");
axum::serve(listener, app).await?;
Ok(())
}
async fn health_live() -> (StatusCode, Json<HealthResponse>) {
(StatusCode::OK, Json(HealthResponse { status: "alive" }))
}
async fn health_ready(State(state): State<KvState>) -> (StatusCode, Json<HealthResponse>) {
if state.is_ready().await {
(StatusCode::OK, Json(HealthResponse { status: "ready" }))
} else {
(
StatusCode::SERVICE_UNAVAILABLE,
Json(HealthResponse {
status: "not-ready",
}),
)
}
}
async fn get_key(Path(key): Path<String>, State(state): State<KvState>) -> Json<GetResponse> {
let record = state.get(&key).await;
Json(GetResponse { key, record })
}
async fn put_key(
Path(key): Path<String>,
State(state): State<KvState>,
Json(request): Json<PutRequest>,
) -> (StatusCode, Json<PutResponse>) {
let outcome = state
.put_local(key, request.value, request.expected_version)
.await;
if outcome.applied {
(
StatusCode::OK,
Json(PutResponse {
applied: true,
version: outcome.current_version,
}),
)
} else {
(
StatusCode::CONFLICT,
Json(PutResponse {
applied: false,
version: outcome.current_version,
}),
)
}
}
async fn get_snapshot(State(state): State<KvState>) -> Json<Snapshot> {
Json(state.snapshot().await)
}

View File

@ -0,0 +1,111 @@
use std::{collections::HashMap, sync::Arc};
use serde::{Deserialize, Serialize};
use tokio::sync::RwLock;
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
pub struct ValueRecord {
pub value: String,
pub version: u64,
pub origin: u64,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Snapshot {
pub node_id: u64,
pub entries: HashMap<String, ValueRecord>,
}
#[derive(Clone, Debug)]
pub struct PutOutcome {
pub applied: bool,
pub current_version: u64,
}
#[derive(Clone)]
pub struct KvState {
node_id: u64,
ready: Arc<RwLock<bool>>,
entries: Arc<RwLock<HashMap<String, ValueRecord>>>,
}
impl KvState {
pub fn new(node_id: u64) -> Self {
Self {
node_id,
ready: Arc::new(RwLock::new(false)),
entries: Arc::new(RwLock::new(HashMap::new())),
}
}
pub const fn node_id(&self) -> u64 {
self.node_id
}
pub async fn set_ready(&self, value: bool) {
*self.ready.write().await = value;
}
pub async fn is_ready(&self) -> bool {
*self.ready.read().await
}
pub async fn get(&self, key: &str) -> Option<ValueRecord> {
self.entries.read().await.get(key).cloned()
}
pub async fn put_local(
&self,
key: String,
value: String,
expected_version: Option<u64>,
) -> PutOutcome {
let mut entries = self.entries.write().await;
let current_version = entries.get(&key).map_or(0, |record| record.version);
if expected_version.is_some_and(|expected| expected != current_version) {
return PutOutcome {
applied: false,
current_version,
};
}
let next_version = current_version.saturating_add(1);
entries.insert(
key,
ValueRecord {
value,
version: next_version,
origin: self.node_id,
},
);
PutOutcome {
applied: true,
current_version: next_version,
}
}
pub async fn merge_snapshot(&self, snapshot: Snapshot) {
let mut local = self.entries.write().await;
for (key, incoming) in snapshot.entries {
match local.get(&key) {
Some(existing) if !is_newer_record(&incoming, existing) => {}
_ => {
local.insert(key, incoming);
}
}
}
}
pub async fn snapshot(&self) -> Snapshot {
Snapshot {
node_id: self.node_id,
entries: self.entries.read().await.clone(),
}
}
}
fn is_newer_record(candidate: &ValueRecord, existing: &ValueRecord) -> bool {
(candidate.version, candidate.origin) > (existing.version, existing.origin)
}

View File

@ -0,0 +1,103 @@
use std::{collections::HashMap, sync::Arc, time::Duration};
use reqwest::Client;
use tokio::sync::Mutex;
use tracing::{debug, warn};
use crate::{
config::KvConfig,
state::{KvState, Snapshot},
};
const WARN_AFTER_CONSECUTIVE_FAILURES: u32 = 5;
#[derive(Clone)]
pub struct SyncService {
config: Arc<KvConfig>,
state: KvState,
client: Client,
failures_by_peer: Arc<Mutex<HashMap<String, u32>>>,
}
impl SyncService {
pub fn new(config: KvConfig, state: KvState) -> Self {
Self {
config: Arc::new(config),
state,
client: Client::new(),
failures_by_peer: Arc::new(Mutex::new(HashMap::new())),
}
}
pub fn start(&self) {
let service = self.clone();
tokio::spawn(async move {
service.run().await;
});
}
async fn run(self) {
let interval = Duration::from_millis(self.config.sync_interval_ms.max(100));
loop {
self.sync_once().await;
tokio::time::sleep(interval).await;
}
}
async fn sync_once(&self) {
for peer in &self.config.peers {
match self.fetch_snapshot(&peer.http_address).await {
Ok(snapshot) => {
self.state.merge_snapshot(snapshot).await;
self.clear_failure_counter(&peer.http_address).await;
}
Err(error) => {
self.record_sync_failure(&peer.http_address, &error).await;
}
}
}
}
async fn fetch_snapshot(&self, peer_address: &str) -> anyhow::Result<Snapshot> {
let url = format!("http://{peer_address}/internal/snapshot");
let snapshot = self
.client
.get(url)
.send()
.await?
.error_for_status()?
.json()
.await?;
Ok(snapshot)
}
async fn clear_failure_counter(&self, peer_address: &str) {
let mut failures = self.failures_by_peer.lock().await;
failures.remove(peer_address);
}
async fn record_sync_failure(&self, peer_address: &str, error: &anyhow::Error) {
let consecutive_failures = {
let mut failures = self.failures_by_peer.lock().await;
let entry = failures.entry(peer_address.to_owned()).or_insert(0);
*entry += 1;
*entry
};
if consecutive_failures >= WARN_AFTER_CONSECUTIVE_FAILURES {
warn!(
peer = %peer_address,
%error,
consecutive_failures,
"kv sync repeatedly failing"
);
} else {
debug!(
peer = %peer_address,
%error,
consecutive_failures,
"kv sync failed"
);
}
}
}

View File

@ -0,0 +1,15 @@
[package]
edition.workspace = true
license.workspace = true
name = "kvstore-runtime-ext"
version.workspace = true
[dependencies]
testing-framework-core = { workspace = true }
testing-framework-runner-compose = { workspace = true }
testing-framework-runner-k8s = { workspace = true }
testing-framework-runner-local = { workspace = true }
async-trait = { workspace = true }
kvstore-node = { path = "../../kvstore-node" }
serde = { workspace = true }

View File

@ -0,0 +1,75 @@
use std::io::Error;
use async_trait::async_trait;
use kvstore_node::KvHttpClient;
use serde::{Deserialize, Serialize};
use testing_framework_core::scenario::{
Application, ClusterNodeConfigApplication, ClusterNodeView, ClusterPeerView, DynError,
NodeAccess, serialize_cluster_yaml_config,
};
pub type KvTopology = testing_framework_core::topology::ClusterTopology;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct KvPeerInfo {
pub node_id: u64,
pub http_address: String,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct KvNodeConfig {
pub node_id: u64,
pub http_port: u16,
pub peers: Vec<KvPeerInfo>,
pub sync_interval_ms: u64,
}
pub struct KvEnv;
#[async_trait]
impl Application for KvEnv {
type Deployment = KvTopology;
type NodeClient = KvHttpClient;
type NodeConfig = KvNodeConfig;
fn build_node_client(access: &NodeAccess) -> Result<Self::NodeClient, DynError> {
Ok(KvHttpClient::new(access.api_base_url()?))
}
fn node_readiness_path() -> &'static str {
"/health/ready"
}
}
impl ClusterNodeConfigApplication for KvEnv {
type ConfigError = Error;
fn static_network_port() -> u16 {
8080
}
fn build_cluster_node_config(
node: &ClusterNodeView,
peers: &[ClusterPeerView],
) -> Result<Self::NodeConfig, Self::ConfigError> {
let peers = peers
.iter()
.map(|peer| KvPeerInfo {
node_id: peer.index() as u64,
http_address: peer.authority(),
})
.collect::<Vec<_>>();
Ok(KvNodeConfig {
node_id: node.index() as u64,
http_port: node.network_port(),
peers,
sync_interval_ms: 500,
})
}
fn serialize_cluster_node_config(
config: &Self::NodeConfig,
) -> Result<String, Self::ConfigError> {
serialize_cluster_yaml_config(config).map_err(Error::other)
}
}

View File

@ -0,0 +1,15 @@
use testing_framework_runner_compose::{BinaryConfigNodeSpec, ComposeBinaryApp};
use crate::KvEnv;
const NODE_CONFIG_PATH: &str = "/etc/kvstore/config.yaml";
impl ComposeBinaryApp for KvEnv {
fn compose_node_spec() -> BinaryConfigNodeSpec {
BinaryConfigNodeSpec::conventional(
"/usr/local/bin/kvstore-node",
NODE_CONFIG_PATH,
vec![8080, 8081],
)
}
}

View File

@ -0,0 +1,21 @@
use testing_framework_runner_k8s::{BinaryConfigK8sSpec, K8sBinaryApp};
use crate::KvEnv;
const CONTAINER_CONFIG_PATH: &str = "/etc/kvstore/config.yaml";
const CONTAINER_HTTP_PORT: u16 = 8080;
const SERVICE_TESTING_PORT: u16 = 8081;
const NODE_NAME_PREFIX: &str = "kvstore-node";
impl K8sBinaryApp for KvEnv {
fn k8s_binary_spec() -> BinaryConfigK8sSpec {
BinaryConfigK8sSpec::conventional(
"kvstore",
NODE_NAME_PREFIX,
"/usr/local/bin/kvstore-node",
CONTAINER_CONFIG_PATH,
CONTAINER_HTTP_PORT,
SERVICE_TESTING_PORT,
)
}
}

View File

@ -0,0 +1,12 @@
mod app;
mod compose_env;
mod k8s_env;
mod local_env;
pub mod scenario;
pub use app::*;
pub use scenario::{KvBuilderExt, KvScenarioBuilder};
pub type KvLocalDeployer = testing_framework_runner_local::ProcessDeployer<KvEnv>;
pub type KvComposeDeployer = testing_framework_runner_compose::ComposeDeployer<KvEnv>;
pub type KvK8sDeployer = testing_framework_runner_k8s::K8sDeployer<KvEnv>;

View File

@ -0,0 +1,41 @@
use std::collections::HashMap;
use testing_framework_core::scenario::{DynError, StartNodeOptions};
use testing_framework_runner_local::{
LocalBinaryApp, LocalNodePorts, LocalPeerNode, LocalProcessSpec,
build_local_cluster_node_config, yaml_node_config,
};
use crate::{KvEnv, KvNodeConfig};
impl LocalBinaryApp for KvEnv {
fn initial_node_name_prefix() -> &'static str {
"kv-node"
}
fn build_local_node_config_with_peers(
_topology: &Self::Deployment,
index: usize,
ports: &LocalNodePorts,
peers: &[LocalPeerNode],
_peer_ports_by_name: &HashMap<String, u16>,
_options: &StartNodeOptions<Self>,
_template_config: Option<
&<Self as testing_framework_core::scenario::Application>::NodeConfig,
>,
) -> Result<<Self as testing_framework_core::scenario::Application>::NodeConfig, DynError> {
build_local_cluster_node_config::<Self>(index, ports, peers)
}
fn local_process_spec() -> LocalProcessSpec {
LocalProcessSpec::new("KVSTORE_NODE_BIN", "kvstore-node").with_rust_log("kvstore_node=info")
}
fn render_local_config(config: &KvNodeConfig) -> Result<Vec<u8>, DynError> {
yaml_node_config(config)
}
fn http_api_port(config: &KvNodeConfig) -> u16 {
config.http_port
}
}

View File

@ -0,0 +1,15 @@
use testing_framework_core::scenario::ScenarioBuilder;
use crate::{KvEnv, KvTopology};
pub type KvScenarioBuilder = ScenarioBuilder<KvEnv>;
pub trait KvBuilderExt: Sized {
fn deployment_with(f: impl FnOnce(KvTopology) -> KvTopology) -> Self;
}
impl KvBuilderExt for KvScenarioBuilder {
fn deployment_with(f: impl FnOnce(KvTopology) -> KvTopology) -> Self {
KvScenarioBuilder::with_deployment(f(KvTopology::new(3)))
}
}

View File

@ -0,0 +1,15 @@
[package]
edition.workspace = true
license.workspace = true
name = "kvstore-runtime-workloads"
version.workspace = true
[dependencies]
kvstore-node = { path = "../../kvstore-node" }
kvstore-runtime-ext = { path = "../integration" }
testing-framework-core = { workspace = true }
async-trait = { workspace = true }
serde = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tracing = { workspace = true }

View File

@ -0,0 +1,100 @@
use std::time::Duration;
use async_trait::async_trait;
use kvstore_runtime_ext::KvEnv;
use serde::Deserialize;
use testing_framework_core::scenario::{DynError, Expectation, RunContext};
use tracing::info;
#[derive(Clone)]
pub struct KvConverges {
key_prefix: String,
key_count: usize,
timeout: Duration,
poll_interval: Duration,
}
#[derive(Deserialize, Clone, Debug, Eq, PartialEq)]
struct ValueRecord {
value: String,
version: u64,
origin: u64,
}
#[derive(Deserialize)]
struct GetResponse {
record: Option<ValueRecord>,
}
impl KvConverges {
#[must_use]
pub fn new(key_prefix: impl Into<String>, key_count: usize) -> Self {
Self {
key_prefix: key_prefix.into(),
key_count,
timeout: Duration::from_secs(20),
poll_interval: Duration::from_millis(500),
}
}
#[must_use]
pub const fn timeout(mut self, timeout: Duration) -> Self {
self.timeout = timeout;
self
}
}
#[async_trait]
impl Expectation<KvEnv> for KvConverges {
fn name(&self) -> &str {
"kv_converges"
}
async fn evaluate(&mut self, ctx: &RunContext<KvEnv>) -> Result<(), DynError> {
let clients = ctx.node_clients().snapshot();
if clients.is_empty() {
return Err("no kv node clients available".into());
}
let deadline = tokio::time::Instant::now() + self.timeout;
while tokio::time::Instant::now() < deadline {
if self.is_converged(&clients).await? {
info!(key_count = self.key_count, "kv convergence reached");
return Ok(());
}
tokio::time::sleep(self.poll_interval).await;
}
Err(format!(
"kv convergence not reached within {:?} for {} keys",
self.timeout, self.key_count
)
.into())
}
}
impl KvConverges {
async fn is_converged(&self, clients: &[kvstore_node::KvHttpClient]) -> Result<bool, DynError> {
for key_idx in 0..self.key_count {
let key = format!("{}-{key_idx}", self.key_prefix);
let first = read_key(clients, &key, 0).await?;
for node_idx in 1..clients.len() {
let current = read_key(clients, &key, node_idx).await?;
if current != first {
return Ok(false);
}
}
}
Ok(true)
}
}
async fn read_key(
clients: &[kvstore_node::KvHttpClient],
key: &str,
index: usize,
) -> Result<Option<ValueRecord>, DynError> {
let response: GetResponse = clients[index].get(&format!("/kv/{key}")).await?;
Ok(response.record)
}

View File

@ -0,0 +1,6 @@
mod expectations;
mod write;
pub use expectations::KvConverges;
pub use kvstore_runtime_ext::{KvBuilderExt, KvEnv, KvScenarioBuilder, KvTopology};
pub use write::KvWriteWorkload;

View File

@ -0,0 +1,135 @@
use std::time::Duration;
use async_trait::async_trait;
use kvstore_runtime_ext::KvEnv;
use serde::{Deserialize, Serialize};
use testing_framework_core::scenario::{DynError, RunContext, Workload};
use tracing::info;
#[derive(Clone)]
pub struct KvWriteWorkload {
operations: usize,
key_count: usize,
rate_per_sec: Option<usize>,
key_prefix: String,
}
#[derive(Serialize)]
struct PutRequest {
value: String,
expected_version: Option<u64>,
}
#[derive(Deserialize)]
struct PutResponse {
applied: bool,
version: u64,
}
impl KvWriteWorkload {
#[must_use]
pub fn new() -> Self {
Self {
operations: 200,
key_count: 20,
rate_per_sec: Some(25),
key_prefix: "kv-demo".to_owned(),
}
}
#[must_use]
pub const fn operations(mut self, value: usize) -> Self {
self.operations = value;
self
}
#[must_use]
pub const fn key_count(mut self, value: usize) -> Self {
self.key_count = value;
self
}
#[must_use]
pub const fn rate_per_sec(mut self, value: usize) -> Self {
self.rate_per_sec = Some(value);
self
}
#[must_use]
pub fn key_prefix(mut self, value: impl Into<String>) -> Self {
self.key_prefix = value.into();
self
}
}
impl Default for KvWriteWorkload {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Workload<KvEnv> for KvWriteWorkload {
fn name(&self) -> &str {
"kv_write_workload"
}
async fn start(&self, ctx: &RunContext<KvEnv>) -> Result<(), DynError> {
let clients = ctx.node_clients().snapshot();
let Some(leader) = clients.first() else {
return Err("no kv node clients available".into());
};
if self.key_count == 0 {
return Err("kv workload key_count must be > 0".into());
}
let interval = self.rate_per_sec.and_then(compute_interval);
info!(
operations = self.operations,
key_count = self.key_count,
rate_per_sec = ?self.rate_per_sec,
"starting kv write workload"
);
for idx in 0..self.operations {
let key = format!("{}-{}", self.key_prefix, idx % self.key_count);
let value = format!("value-{idx}");
let response: PutResponse = leader
.put(
&format!("/kv/{key}"),
&PutRequest {
value,
expected_version: None,
},
)
.await?;
if !response.applied {
return Err(format!("leader rejected write for key {key}").into());
}
if (idx + 1) % 25 == 0 {
info!(
completed = idx + 1,
version = response.version,
"kv write progress"
);
}
if let Some(delay) = interval {
tokio::time::sleep(delay).await;
}
}
Ok(())
}
}
fn compute_interval(rate_per_sec: usize) -> Option<Duration> {
if rate_per_sec == 0 {
return None;
}
Some(Duration::from_millis((1000 / rate_per_sec as u64).max(1)))
}

View File

@ -31,7 +31,7 @@ Each node exposes:
```bash
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:19091 \
cargo run -p metrics-counter-examples --bin compose_prometheus_expectation
cargo run -p metrics-counter-examples --bin metrics_counter_compose_prometheus_expectation
```
## Run with Kubernetes
@ -39,7 +39,7 @@ cargo run -p metrics-counter-examples --bin compose_prometheus_expectation
```bash
docker build -t metrics-counter-node:local -f examples/metrics_counter/Dockerfile .
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:30991 \
cargo run -p metrics-counter-examples --bin k8s_prometheus_expectation
cargo run -p metrics-counter-examples --bin metrics_counter_k8s_prometheus_expectation
```
Overrides:
@ -51,5 +51,5 @@ Overrides:
```bash
docker build -t metrics-counter-node:local -f examples/metrics_counter/Dockerfile .
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:30991 \
cargo run -p metrics-counter-examples --bin k8s_manual_prometheus
cargo run -p metrics-counter-examples --bin metrics_counter_k8s_manual_prometheus
```

View File

@ -4,6 +4,18 @@ license.workspace = true
name = "metrics-counter-examples"
version.workspace = true
[[bin]]
name = "metrics_counter_compose_prometheus_expectation"
path = "src/bin/compose_prometheus_expectation.rs"
[[bin]]
name = "metrics_counter_k8s_prometheus_expectation"
path = "src/bin/k8s_prometheus_expectation.rs"
[[bin]]
name = "metrics_counter_k8s_manual_prometheus"
path = "src/bin/k8s_manual_prometheus.rs"
[dependencies]
anyhow = "1.0"
metrics-counter-node = { path = "../metrics-counter-node" }

View File

@ -23,23 +23,23 @@ Each example follows the same pattern:
## Run locally
```bash
cargo run -p nats-examples --bin basic_roundtrip
cargo run -p nats-examples --bin nats_basic_roundtrip
```
If `nats-server` is not on `PATH`:
```bash
NATS_SERVER_BIN=/path/to/nats-server cargo run -p nats-examples --bin basic_roundtrip
NATS_SERVER_BIN=/path/to/nats-server cargo run -p nats-examples --bin nats_basic_roundtrip
```
## Run with Docker Compose
```bash
cargo run -p nats-examples --bin compose_roundtrip
cargo run -p nats-examples --bin nats_compose_roundtrip
```
## Run the parity check
```bash
cargo run -p nats-examples --bin parity_check
cargo run -p nats-examples --bin nats_parity_check
```

View File

@ -4,6 +4,18 @@ license.workspace = true
name = "nats-examples"
version.workspace = true
[[bin]]
name = "nats_basic_roundtrip"
path = "src/bin/basic_roundtrip.rs"
[[bin]]
name = "nats_compose_roundtrip"
path = "src/bin/compose_roundtrip.rs"
[[bin]]
name = "nats_parity_check"
path = "src/bin/parity_check.rs"
[dependencies]
anyhow = "1.0"
nats-runtime-ext = { path = "../testing/integration" }

View File

@ -0,0 +1,25 @@
# Build stage
FROM rustlang/rust:nightly-bookworm AS builder
WORKDIR /build
COPY Cargo.toml Cargo.lock ./
COPY cfgsync/ ./cfgsync/
COPY examples/ ./examples/
COPY testing-framework/ ./testing-framework/
RUN cargo build --release -p openraft-kv-node
FROM debian:bookworm-slim
RUN apt-get update && \
apt-get install -y ca-certificates && \
rm -rf /var/lib/apt/lists/*
COPY --from=builder /build/target/release/openraft-kv-node /usr/local/bin/openraft-kv-node
RUN mkdir -p /etc/openraft-kv
WORKDIR /app
ENTRYPOINT ["/usr/local/bin/openraft-kv-node"]
CMD ["--config", "/etc/openraft-kv/config.yaml"]

View File

@ -0,0 +1,87 @@
# OpenRaft KV Example
This example runs a small key-value service built on top of `OpenRaft`.
The main scenario does four things:
- bootstraps node 0 as a one-node cluster
- adds nodes 1 and 2 as learners and promotes them to voters
- writes one batch of keys through the current leader
- restarts that leader, waits for a new leader, writes again, and then checks
that all three nodes expose the same replicated state
## How TF runs this
- TF starts three OpenRaft nodes
- the workload bootstraps the cluster through the admin API
- the workload writes a first batch, restarts the current leader, waits for failover, and writes again
- the expectation checks that all three nodes converge on the same key/value state and membership
## Scenario
- `basic_failover` runs the leader-restart flow locally
- `compose_failover` runs the same flow in Docker Compose
- `k8s_failover` runs the same flow against a manual Kubernetes cluster deployment
## API
Each node exposes:
- `GET /healthz` for readiness
- `GET /state` for current Raft role, leader, membership, log progress, and replicated key/value data
- `POST /kv/write` to submit a write through the local Raft node
- `POST /kv/read` to read a key from the local state machine
- `POST /admin/init` to initialize a single-node cluster
- `POST /admin/add-learner` to add a new Raft learner
- `POST /admin/change-membership` to promote learners into the voting set
The node also exposes internal Raft RPC endpoints used only for replication:
- `POST /raft/vote`
- `POST /raft/append`
- `POST /raft/snapshot`
## Run locally
```bash
OPENRAFT_KV_NODE_BIN="$(pwd)/target/debug/openraft-kv-node" \
cargo run -p openraft-kv-examples --bin openraft_kv_basic_failover
```
Build the node first if you have not done that yet:
```bash
cargo build -p openraft-kv-node
```
## Run with Docker Compose
Build the image first:
```bash
docker build -t openraft-kv-node:local -f examples/openraft_kv/Dockerfile .
```
Then run:
```bash
cargo run -p openraft-kv-examples --bin openraft_kv_compose_failover
```
Set `OPENRAFT_KV_IMAGE` to override the default compose image tag.
## Run on Kubernetes
Build the same image first:
```bash
docker build -t openraft-kv-node:local -f examples/openraft_kv/Dockerfile .
```
Then run:
```bash
cargo run -p openraft-kv-examples --bin openraft_kv_k8s_failover
```
If no cluster is available, the example exits early and prints a skip message.

View File

@ -0,0 +1,28 @@
[package]
edition.workspace = true
license.workspace = true
name = "openraft-kv-examples"
version.workspace = true
[[bin]]
name = "openraft_kv_basic_failover"
path = "src/bin/basic_failover.rs"
[[bin]]
name = "openraft_kv_compose_failover"
path = "src/bin/compose_failover.rs"
[[bin]]
name = "openraft_kv_k8s_failover"
path = "src/bin/k8s_failover.rs"
[dependencies]
anyhow = "1.0"
openraft-kv-node = { path = "../openraft-kv-node" }
openraft-kv-runtime-ext = { path = "../testing/integration" }
openraft-kv-runtime-workloads = { path = "../testing/workloads" }
testing-framework-core = { workspace = true }
testing-framework-runner-k8s = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tracing = { workspace = true }
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

View File

@ -0,0 +1,20 @@
use std::time::Duration;
use openraft_kv_examples::build_failover_scenario;
use openraft_kv_runtime_ext::OpenRaftKvLocalDeployer;
use testing_framework_core::scenario::Deployer;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let mut scenario = build_failover_scenario(Duration::from_secs(45), Duration::from_secs(30))?;
let deployer = OpenRaftKvLocalDeployer::default();
let runner = deployer.deploy(&scenario).await?;
runner.run(&mut scenario).await?;
Ok(())
}

View File

@ -0,0 +1,20 @@
use std::time::Duration;
use openraft_kv_examples::build_failover_scenario;
use openraft_kv_runtime_ext::OpenRaftKvComposeDeployer;
use testing_framework_core::scenario::Deployer;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let mut scenario = build_failover_scenario(Duration::from_secs(60), Duration::from_secs(40))?;
let deployer = OpenRaftKvComposeDeployer::new();
let runner = deployer.deploy(&scenario).await?;
runner.run(&mut scenario).await?;
Ok(())
}

View File

@ -0,0 +1,195 @@
use std::{sync::Arc, time::Duration};
use anyhow::{Context as _, Result, anyhow};
use openraft_kv_examples::{
INITIAL_WRITE_BATCH, RAFT_KEY_PREFIX, SECOND_WRITE_BATCH, TOTAL_WRITES,
};
use openraft_kv_node::OpenRaftKvClient;
use openraft_kv_runtime_ext::{
OpenRaftClusterObserver, OpenRaftKvEnv, OpenRaftKvK8sDeployer, OpenRaftKvTopology,
OpenRaftManualClusterSourceProvider,
};
use openraft_kv_runtime_workloads::{
OpenRaftMembership, expected_kv, wait_for_observed_leader, wait_for_observed_membership,
wait_for_observed_replication, write_batch,
};
use testing_framework_core::observation::{ObservationHandle, ObservationRuntime};
use testing_framework_runner_k8s::{ManualCluster, ManualClusterError};
use tracing::{info, warn};
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let deployer = OpenRaftKvK8sDeployer::new();
let cluster = match deployer
.manual_cluster_from_descriptors(OpenRaftKvTopology::new(3))
.await
{
Ok(cluster) => cluster,
Err(ManualClusterError::ClientInit { source }) => {
warn!("k8s unavailable ({source}); skipping openraft k8s run");
return Ok(());
}
Err(ManualClusterError::InstallStack { source })
if k8s_cluster_unavailable(&source.to_string()) =>
{
warn!("k8s unavailable ({source}); skipping openraft k8s run");
return Ok(());
}
Err(error) => {
return Err(anyhow::Error::new(error)).context("creating openraft k8s cluster");
}
};
run_failover(Arc::new(cluster), Duration::from_secs(40)).await
}
async fn run_failover(cluster: Arc<ManualCluster<OpenRaftKvEnv>>, timeout: Duration) -> Result<()> {
start_cluster(cluster.as_ref()).await?;
let observation_runtime = start_observer(Arc::clone(&cluster)).await?;
let observer = observation_runtime.handle();
client_for_node(cluster.as_ref(), 0)?.init_self().await?;
let initial_leader = wait_for_observed_leader(&observer, timeout, None).await?;
let membership = current_membership(&observer)?;
add_learners_and_promote(
cluster.as_ref(),
&observer,
initial_leader,
&membership,
timeout,
)
.await?;
write_initial_batch(cluster.as_ref(), initial_leader).await?;
restart_leader(cluster.as_ref(), initial_leader).await?;
let new_leader = wait_for_observed_leader(&observer, timeout, Some(initial_leader)).await?;
write_second_batch(cluster.as_ref(), new_leader).await?;
let expected = expected_kv(RAFT_KEY_PREFIX, TOTAL_WRITES);
wait_for_observed_replication(&observer, &expected, timeout).await?;
cluster.stop_all();
Ok(())
}
async fn start_cluster(cluster: &ManualCluster<OpenRaftKvEnv>) -> Result<()> {
cluster.start_node("node-0").await?;
cluster.start_node("node-1").await?;
cluster.start_node("node-2").await?;
cluster.wait_network_ready().await?;
Ok(())
}
async fn start_observer(
cluster: Arc<ManualCluster<OpenRaftKvEnv>>,
) -> Result<ObservationRuntime<OpenRaftClusterObserver>> {
let provider = OpenRaftManualClusterSourceProvider::new(cluster, 3);
ObservationRuntime::start(
provider,
OpenRaftClusterObserver,
OpenRaftClusterObserver::config(),
)
.await
.map_err(anyhow::Error::new)
.context("starting openraft k8s observer")
}
async fn add_learners_and_promote(
cluster: &ManualCluster<OpenRaftKvEnv>,
observer: &ObservationHandle<OpenRaftClusterObserver>,
leader_id: u64,
membership: &OpenRaftMembership,
timeout: Duration,
) -> Result<()> {
let leader = client_for_node(cluster, leader_id)?;
for learner in membership.learner_targets(leader_id) {
info!(
target = learner.node_id,
addr = %learner.public_addr,
"adding learner"
);
leader
.add_learner(learner.node_id, &learner.public_addr)
.await?;
}
let voter_ids = membership.voter_ids();
leader.change_membership(voter_ids.iter().copied()).await?;
wait_for_observed_membership(observer, &voter_ids, timeout).await?;
Ok(())
}
async fn write_initial_batch(cluster: &ManualCluster<OpenRaftKvEnv>, leader_id: u64) -> Result<()> {
let leader = client_for_node(cluster, leader_id)?;
write_batch(&leader, RAFT_KEY_PREFIX, 0, INITIAL_WRITE_BATCH).await?;
Ok(())
}
async fn write_second_batch(cluster: &ManualCluster<OpenRaftKvEnv>, leader_id: u64) -> Result<()> {
let leader = client_for_node(cluster, leader_id)?;
write_batch(
&leader,
RAFT_KEY_PREFIX,
INITIAL_WRITE_BATCH,
SECOND_WRITE_BATCH,
)
.await?;
Ok(())
}
async fn restart_leader(cluster: &ManualCluster<OpenRaftKvEnv>, leader_id: u64) -> Result<()> {
let leader_name = format!("node-{leader_id}");
info!(%leader_name, "restarting current leader");
cluster.restart_node(&leader_name).await?;
cluster.wait_network_ready().await?;
Ok(())
}
fn current_membership(
observer: &ObservationHandle<OpenRaftClusterObserver>,
) -> Result<OpenRaftMembership> {
let snapshot = observer
.latest_snapshot()
.ok_or_else(|| anyhow!("openraft observer has not produced a snapshot yet"))?;
Ok(OpenRaftMembership::from_states(snapshot.value.states()))
}
fn client_for_node(
cluster: &ManualCluster<OpenRaftKvEnv>,
node_id: u64,
) -> Result<OpenRaftKvClient> {
cluster
.node_client(&format!("node-{node_id}"))
.ok_or_else(|| anyhow!("node-{node_id} client missing"))
}
fn k8s_cluster_unavailable(message: &str) -> bool {
message.contains("Unable to connect to the server")
|| message.contains("TLS handshake timeout")
|| message.contains("connection refused")
}

View File

@ -0,0 +1,41 @@
use std::time::Duration;
use openraft_kv_runtime_ext::{OpenRaftKvBuilderExt, OpenRaftKvEnv, OpenRaftKvScenarioBuilder};
use openraft_kv_runtime_workloads::{OpenRaftKvConverges, OpenRaftKvFailoverWorkload};
use testing_framework_core::scenario::{NodeControlCapability, Scenario};
/// Number of writes issued before the leader restart.
pub const INITIAL_WRITE_BATCH: usize = 8;
/// Number of writes issued after the leader restart.
pub const SECOND_WRITE_BATCH: usize = 8;
/// Total write count expected after the scenario completes.
pub const TOTAL_WRITES: usize = INITIAL_WRITE_BATCH + SECOND_WRITE_BATCH;
/// Key prefix shared by the failover workload and convergence expectation.
pub const RAFT_KEY_PREFIX: &str = "raft-key";
/// Builds the standard failover scenario used by the local and compose
/// binaries.
pub fn build_failover_scenario(
run_duration: Duration,
workload_timeout: Duration,
) -> anyhow::Result<Scenario<OpenRaftKvEnv, NodeControlCapability>> {
Ok(
OpenRaftKvScenarioBuilder::deployment_with(|deployment| deployment)
.with_cluster_observer()
.enable_node_control()
.with_run_duration(run_duration)
.with_workload(
OpenRaftKvFailoverWorkload::new()
.first_batch(INITIAL_WRITE_BATCH)
.second_batch(SECOND_WRITE_BATCH)
.timeout(workload_timeout)
.key_prefix(RAFT_KEY_PREFIX),
)
.with_expectation(
OpenRaftKvConverges::new(TOTAL_WRITES)
.timeout(run_duration)
.key_prefix(RAFT_KEY_PREFIX),
)
.build()?,
)
}

View File

@ -0,0 +1,23 @@
[package]
edition.workspace = true
license.workspace = true
name = "openraft-kv-node"
version.workspace = true
[[bin]]
name = "openraft-kv-node"
path = "src/main.rs"
[dependencies]
anyhow = "1.0"
axum = "0.7"
clap = { version = "4.0", features = ["derive"] }
openraft = { workspace = true }
openraft-memstore = { workspace = true }
reqwest = { workspace = true, features = ["json"] }
serde = { workspace = true }
serde_yaml = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tower-http = { version = "0.6", features = ["trace"] }
tracing = { workspace = true }
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

View File

@ -0,0 +1,136 @@
use std::{collections::BTreeSet, time::Duration};
use reqwest::Url;
use serde::{Serialize, de::DeserializeOwned};
use crate::types::{
AddLearnerRequest, AddLearnerResult, ChangeMembershipRequest, ChangeMembershipResult,
InitResult, OpenRaftKvReadRequest, OpenRaftKvReadResponse, OpenRaftKvState,
OpenRaftKvWriteRequest, OpenRaftKvWriteResponse,
};
/// Small HTTP client for the OpenRaft example node and its admin endpoints.
#[derive(Clone)]
pub struct OpenRaftKvClient {
base_url: Url,
client: reqwest::Client,
}
impl OpenRaftKvClient {
/// Builds a client for one node base URL.
#[must_use]
pub fn new(base_url: Url) -> Self {
Self {
base_url,
client: reqwest::Client::builder()
.timeout(Duration::from_secs(2))
.connect_timeout(Duration::from_secs(2))
.build()
.expect("openraft kv client timeout configuration is valid"),
}
}
/// Fetches the node's current Raft and application state.
pub async fn state(&self) -> anyhow::Result<OpenRaftKvState> {
self.get("state").await
}
/// Replicates one key/value write through the current leader.
pub async fn write(
&self,
key: &str,
value: &str,
serial: u64,
) -> anyhow::Result<OpenRaftKvWriteResponse> {
self.post_result(
"kv/write",
&OpenRaftKvWriteRequest {
key: key.to_owned(),
value: value.to_owned(),
serial,
},
)
.await
}
/// Reads one key from the replicated state machine.
pub async fn read(&self, key: &str) -> anyhow::Result<Option<String>> {
let response: OpenRaftKvReadResponse = self
.post_result(
"kv/read",
&OpenRaftKvReadRequest {
key: key.to_owned(),
},
)
.await?;
Ok(response.value)
}
/// Bootstraps a one-node cluster on this node.
pub async fn init_self(&self) -> anyhow::Result<()> {
let _: InitResult = self.post("admin/init", &()).await?;
Ok(())
}
/// Registers another node as a learner with the current leader.
pub async fn add_learner(&self, node_id: u64, addr: &str) -> anyhow::Result<()> {
let _: AddLearnerResult = self
.post(
"admin/add-learner",
&AddLearnerRequest {
node_id,
addr: addr.to_owned(),
},
)
.await?;
Ok(())
}
/// Promotes the cluster to the provided voter set.
pub async fn change_membership(
&self,
voters: impl IntoIterator<Item = u64>,
) -> anyhow::Result<()> {
let voters = normalize_voters(voters);
let request = ChangeMembershipRequest { voters };
let _: ChangeMembershipResult = self.post("admin/change-membership", &request).await?;
Ok(())
}
async fn get<T: DeserializeOwned>(&self, path: &str) -> anyhow::Result<T> {
let url = self.base_url.join(path)?;
let response = self.client.get(url).send().await?;
let response = response.error_for_status()?;
Ok(response.json().await?)
}
async fn post<B: Serialize, T: DeserializeOwned>(
&self,
path: &str,
body: &B,
) -> anyhow::Result<T> {
let url = self.base_url.join(path)?;
let response = self.client.post(url).json(body).send().await?;
let response = response.error_for_status()?;
Ok(response.json().await?)
}
async fn post_result<B: Serialize, T: DeserializeOwned>(
&self,
path: &str,
body: &B,
) -> anyhow::Result<T> {
let result: Result<T, String> = self.post(path, body).await?;
result.map_err(anyhow::Error::msg)
}
}
fn normalize_voters(voters: impl IntoIterator<Item = u64>) -> Vec<u64> {
let unique_voters = voters.into_iter().collect::<BTreeSet<_>>();
unique_voters.into_iter().collect()
}

View File

@ -0,0 +1,46 @@
use std::{collections::BTreeMap, fs, path::Path};
use serde::{Deserialize, Serialize};
/// Static node config written by TF for one OpenRaft node process.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct OpenRaftKvNodeConfig {
/// Stable OpenRaft node identifier.
pub node_id: u64,
/// HTTP port bound by the node process.
pub http_port: u16,
/// Advertised Raft address for this node.
pub public_addr: String,
/// Advertised Raft addresses for the other known nodes.
#[serde(default)]
pub peer_addrs: BTreeMap<u64, String>,
/// Heartbeat interval passed to the OpenRaft config.
#[serde(default = "default_heartbeat_interval_ms")]
pub heartbeat_interval_ms: u64,
/// Lower election timeout bound passed to OpenRaft.
#[serde(default = "default_election_timeout_min_ms")]
pub election_timeout_min_ms: u64,
/// Upper election timeout bound passed to OpenRaft.
#[serde(default = "default_election_timeout_max_ms")]
pub election_timeout_max_ms: u64,
}
impl OpenRaftKvNodeConfig {
/// Loads one node config from YAML on disk.
pub fn load(path: &Path) -> anyhow::Result<Self> {
let raw = fs::read_to_string(path)?;
Ok(serde_yaml::from_str(&raw)?)
}
}
const fn default_heartbeat_interval_ms() -> u64 {
500
}
const fn default_election_timeout_min_ms() -> u64 {
1_500
}
const fn default_election_timeout_max_ms() -> u64 {
3_000
}

View File

@ -0,0 +1,25 @@
//! OpenRaft-backed key-value node used by the `examples-simple-clusters`
//! branch.
/// HTTP client for interacting with one OpenRaft node.
pub mod client;
/// YAML node configuration used by TF and the node binary.
pub mod config;
mod network;
/// Axum server bootstrap and request handlers for one node process.
pub mod server;
/// Shared request, response, and state payload types.
pub mod types;
/// Re-export of the node HTTP client.
pub use client::OpenRaftKvClient;
/// Re-export of the node YAML config type.
pub use config::OpenRaftKvNodeConfig;
/// Re-export of the public request and state payloads.
pub use types::{
AddLearnerRequest, ChangeMembershipRequest, OpenRaftKvReadRequest, OpenRaftKvReadResponse,
OpenRaftKvState, OpenRaftKvWriteRequest, OpenRaftKvWriteResponse,
};
/// OpenRaft type configuration shared by the in-memory log and state machine.
pub type TypeConfig = openraft_memstore::TypeConfig;

View File

@ -0,0 +1,24 @@
use std::path::PathBuf;
use clap::Parser;
use openraft_kv_node::{config::OpenRaftKvNodeConfig, server::run_server};
use tracing_subscriber::EnvFilter;
#[derive(Parser, Clone, Debug)]
#[command(author, version, about)]
struct Opt {
#[arg(long)]
config: PathBuf,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(EnvFilter::from_default_env())
.with_ansi(false)
.init();
let options = Opt::parse();
let config = OpenRaftKvNodeConfig::load(&options.config)?;
run_server(config).await
}

View File

@ -0,0 +1,158 @@
//! HTTP transport used by OpenRaft to replicate between example nodes.
use std::{collections::BTreeMap, sync::Arc};
use openraft::{
RaftNetworkFactory, RaftNetworkV2,
alias::{SnapshotOf, VoteOf},
errors::{RPCError, StreamingError, Unreachable},
network::RPCOption,
};
use reqwest::Url;
use tokio::sync::RwLock;
use crate::{
TypeConfig,
types::{InstallFullSnapshotBody, SnapshotRpcResult},
};
/// Shared node-address book used by Raft RPC clients.
#[derive(Clone, Default)]
pub struct HttpNetworkFactory {
client: reqwest::Client,
known_nodes: Arc<RwLock<BTreeMap<u64, String>>>,
}
/// Per-target HTTP client used for Raft replication traffic.
pub struct HttpNetworkClient {
client: reqwest::Client,
target: u64,
target_addr: Option<String>,
}
impl HttpNetworkFactory {
/// Creates a network factory backed by one shared node-address map.
#[must_use]
pub fn new(known_nodes: Arc<RwLock<BTreeMap<u64, String>>>) -> Self {
Self {
client: reqwest::Client::new(),
known_nodes,
}
}
}
impl RaftNetworkFactory<TypeConfig> for HttpNetworkFactory {
type Network = HttpNetworkClient;
async fn new_client(&mut self, target: u64, _node: &()) -> Self::Network {
let target_addr = self.known_nodes.read().await.get(&target).cloned();
HttpNetworkClient {
client: self.client.clone(),
target,
target_addr,
}
}
}
impl RaftNetworkV2<TypeConfig> for HttpNetworkClient {
async fn append_entries(
&mut self,
rpc: openraft::raft::AppendEntriesRequest<TypeConfig>,
_option: RPCOption,
) -> Result<openraft::raft::AppendEntriesResponse<TypeConfig>, RPCError<TypeConfig>> {
self.post_rpc("raft/append", &rpc).await
}
async fn vote(
&mut self,
rpc: openraft::raft::VoteRequest<TypeConfig>,
_option: RPCOption,
) -> Result<openraft::raft::VoteResponse<TypeConfig>, RPCError<TypeConfig>> {
self.post_rpc("raft/vote", &rpc).await
}
async fn full_snapshot(
&mut self,
vote: VoteOf<TypeConfig>,
snapshot: SnapshotOf<TypeConfig>,
_cancel: impl std::future::Future<Output = openraft::errors::ReplicationClosed>
+ openraft::OptionalSend
+ 'static,
_option: RPCOption,
) -> Result<openraft::raft::SnapshotResponse<TypeConfig>, StreamingError<TypeConfig>> {
let body = InstallFullSnapshotBody {
vote,
meta: snapshot.meta,
data: snapshot.snapshot.into_inner(),
};
self.post_snapshot("raft/snapshot", &body).await
}
}
impl HttpNetworkClient {
async fn post_rpc<B, T>(&self, path: &str, body: &B) -> Result<T, RPCError<TypeConfig>>
where
B: serde::Serialize,
T: serde::de::DeserializeOwned,
{
let url = self.endpoint_url(path)?;
let response = self
.client
.post(url)
.json(body)
.send()
.await
.map_err(|err| RPCError::Unreachable(Unreachable::new(&err)))?
.error_for_status()
.map_err(|err| RPCError::Unreachable(Unreachable::new(&err)))?;
let result: Result<T, String> = response
.json()
.await
.map_err(|err| RPCError::Unreachable(Unreachable::new(&err)))?;
result.map_err(|err| RPCError::Unreachable(Unreachable::from_string(err)))
}
async fn post_snapshot(
&self,
path: &str,
body: &InstallFullSnapshotBody,
) -> Result<openraft::raft::SnapshotResponse<TypeConfig>, StreamingError<TypeConfig>> {
let url = self
.endpoint_url(path)
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?;
let response = self
.client
.post(url)
.json(body)
.send()
.await
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?
.error_for_status()
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?;
let result: SnapshotRpcResult = response
.json()
.await
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?;
result.map_err(|err| StreamingError::Unreachable(Unreachable::from_string(err)))
}
fn endpoint_url(&self, path: &str) -> Result<Url, Unreachable<TypeConfig>> {
let Some(addr) = &self.target_addr else {
return Err(Unreachable::from_string(format!(
"target {} has no known address",
self.target
)));
};
let mut url =
Url::parse(&format!("http://{addr}/")).map_err(|err| Unreachable::new(&err))?;
url.set_path(path);
Ok(url)
}
}

View File

@ -0,0 +1,276 @@
//! Axum server that exposes the OpenRaft example node and its admin endpoints.
use std::{
collections::{BTreeMap, BTreeSet},
sync::Arc,
};
use axum::{
Json, Router,
extract::State,
http::StatusCode,
routing::{get, post},
};
use openraft::{Config, Raft, SnapshotPolicy, type_config::async_runtime::WatchReceiver};
use openraft_memstore::{ClientRequest, MemLogStore, MemStateMachine, new_mem_store};
use tokio::sync::RwLock;
use tower_http::trace::TraceLayer;
use tracing::info;
use crate::{
TypeConfig,
config::OpenRaftKvNodeConfig,
network::HttpNetworkFactory,
types::{
AddLearnerRequest, AppendRpcResult, ChangeMembershipRequest, InitResult,
InstallSnapshotBody, MetricsResult, OpenRaftKvReadRequest, OpenRaftKvReadResponse,
OpenRaftKvState, OpenRaftKvWriteRequest, OpenRaftKvWriteResponse, SnapshotRpcResult,
VoteRpcResult,
},
};
type KnownNodes = Arc<RwLock<BTreeMap<u64, String>>>;
/// Shared state used by the HTTP handlers exposed by one node.
#[derive(Clone)]
pub struct AppState {
config: OpenRaftKvNodeConfig,
raft: Raft<TypeConfig, Arc<MemStateMachine>>,
state_machine: Arc<MemStateMachine>,
known_nodes: KnownNodes,
}
impl AppState {
/// Builds the application state for one node process.
pub fn new(
config: OpenRaftKvNodeConfig,
raft: Raft<TypeConfig, Arc<MemStateMachine>>,
state_machine: Arc<MemStateMachine>,
known_nodes: KnownNodes,
) -> Self {
Self {
config,
raft,
state_machine,
known_nodes,
}
}
}
/// Starts one OpenRaft-backed HTTP node.
pub async fn run_server(config: OpenRaftKvNodeConfig) -> anyhow::Result<()> {
let raft_config = Arc::new(
Config {
cluster_name: "openraft-kv".to_owned(),
heartbeat_interval: config.heartbeat_interval_ms,
election_timeout_min: config.election_timeout_min_ms,
election_timeout_max: config.election_timeout_max_ms,
snapshot_policy: SnapshotPolicy::Never,
..Default::default()
}
.validate()?,
);
let known_nodes = Arc::new(RwLock::new(known_nodes(&config)));
let (log_store, state_machine): (Arc<MemLogStore>, Arc<MemStateMachine>) = new_mem_store();
let network = HttpNetworkFactory::new(known_nodes.clone());
let raft = Raft::new(
config.node_id,
raft_config,
network,
log_store,
state_machine.clone(),
)
.await?;
let app_state = AppState::new(config.clone(), raft, state_machine, known_nodes);
let app = router(app_state);
let address = std::net::SocketAddr::from(([0, 0, 0, 0], config.http_port));
info!(
node_id = config.node_id,
public_addr = %config.public_addr,
peers = ?config.peer_addrs,
%address,
"starting openraft kv node"
);
let listener = tokio::net::TcpListener::bind(address).await?;
axum::serve(listener, app).await?;
Ok(())
}
fn router(app_state: AppState) -> Router {
let app_routes = Router::new()
.route("/healthz", get(healthz))
.route("/state", get(cluster_state))
.route("/kv/write", post(write))
.route("/kv/read", post(read));
let admin_routes = Router::new()
.route("/admin/init", post(init))
.route("/admin/add-learner", post(add_learner))
.route("/admin/change-membership", post(change_membership))
.route("/admin/metrics", get(metrics));
let raft_routes = Router::new()
.route("/raft/vote", post(vote))
.route("/raft/append", post(append))
.route("/raft/snapshot", post(snapshot));
app_routes
.merge(admin_routes)
.merge(raft_routes)
.layer(TraceLayer::new_for_http())
.with_state(app_state)
}
async fn healthz() -> &'static str {
"ok"
}
async fn cluster_state(State(app): State<AppState>) -> Result<Json<OpenRaftKvState>, StatusCode> {
let metrics = app.raft.metrics().borrow_watched().clone();
let sm = app.state_machine.get_state_machine().await;
let voters = metrics
.membership_config
.membership()
.voter_ids()
.collect::<Vec<_>>();
let kv = sm.client_status.into_iter().collect::<BTreeMap<_, _>>();
Ok(Json(OpenRaftKvState {
node_id: app.config.node_id,
public_addr: app.config.public_addr.clone(),
role: format!("{:?}", metrics.state),
current_leader: metrics.current_leader,
current_term: metrics.current_term,
last_log_index: metrics.last_log_index,
last_applied_index: metrics.last_applied.as_ref().map(|log_id| log_id.index()),
voters,
kv,
}))
}
async fn metrics(State(app): State<AppState>) -> Json<MetricsResult> {
Json(Ok(app.raft.metrics().borrow_watched().clone()))
}
async fn init(State(app): State<AppState>) -> Json<InitResult> {
let members = BTreeSet::from([app.config.node_id]);
Json(
app.raft
.initialize(members)
.await
.map_err(|err| err.to_string()),
)
}
async fn add_learner(
State(app): State<AppState>,
Json(request): Json<AddLearnerRequest>,
) -> Json<InitResult> {
let mut known_nodes = app.known_nodes.write().await;
known_nodes.insert(request.node_id, request.addr.clone());
drop(known_nodes);
Json(
app.raft
.add_learner(request.node_id, (), true)
.await
.map(|_| ())
.map_err(|err| err.to_string()),
)
}
async fn change_membership(
State(app): State<AppState>,
Json(request): Json<ChangeMembershipRequest>,
) -> Json<InitResult> {
Json(
app.raft
.change_membership(request.voters.into_iter().collect::<BTreeSet<_>>(), false)
.await
.map(|_| ())
.map_err(|err| err.to_string()),
)
}
async fn write(
State(app): State<AppState>,
Json(request): Json<OpenRaftKvWriteRequest>,
) -> Json<Result<OpenRaftKvWriteResponse, String>> {
let result = app
.raft
.client_write(ClientRequest {
client: request.key,
serial: request.serial,
status: request.value,
})
.await
.map(|response| OpenRaftKvWriteResponse {
previous: response.response().0.clone(),
})
.map_err(|err| err.to_string());
Json(result)
}
async fn read(
State(app): State<AppState>,
Json(request): Json<OpenRaftKvReadRequest>,
) -> Json<Result<OpenRaftKvReadResponse, String>> {
let sm = app.state_machine.get_state_machine().await;
Json(Ok(OpenRaftKvReadResponse {
value: sm.client_status.get(&request.key).cloned(),
}))
}
async fn vote(
State(app): State<AppState>,
Json(request): Json<openraft::raft::VoteRequest<TypeConfig>>,
) -> Json<VoteRpcResult> {
Json(app.raft.vote(request).await.map_err(|err| err.to_string()))
}
async fn append(
State(app): State<AppState>,
Json(request): Json<openraft::raft::AppendEntriesRequest<TypeConfig>>,
) -> Json<AppendRpcResult> {
Json(
app.raft
.append_entries(request)
.await
.map_err(|err| err.to_string()),
)
}
async fn snapshot(
State(app): State<AppState>,
Json(request): Json<InstallSnapshotBody>,
) -> Json<SnapshotRpcResult> {
let snapshot = openraft::alias::SnapshotOf::<TypeConfig> {
meta: request.meta,
snapshot: std::io::Cursor::new(request.data),
};
Json(
app.raft
.install_full_snapshot(request.vote, snapshot)
.await
.map_err(|err| err.to_string()),
)
}
fn known_nodes(config: &OpenRaftKvNodeConfig) -> BTreeMap<u64, String> {
let mut known_nodes = config.peer_addrs.clone();
known_nodes.insert(config.node_id, config.public_addr.clone());
known_nodes
}

View File

@ -0,0 +1,112 @@
use std::collections::BTreeMap;
use openraft::{
RaftMetrics,
alias::{SnapshotMetaOf, VoteOf},
raft::InstallSnapshotRequest,
};
use serde::{Deserialize, Serialize};
use crate::TypeConfig;
/// Result shape used by the simple admin endpoints in this example.
pub type OpenRaftResult<T> = Result<T, String>;
/// Request body for a replicated write submitted through the leader.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct OpenRaftKvWriteRequest {
/// Application key to write.
pub key: String,
/// Value stored for the key.
pub value: String,
/// Client-side serial used by OpenRaft's example state machine.
pub serial: u64,
}
/// Response body returned after a replicated write is committed.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct OpenRaftKvWriteResponse {
/// Previous value stored under the key, if any.
pub previous: Option<String>,
}
/// Request body for a key lookup.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct OpenRaftKvReadRequest {
/// Application key to look up.
pub key: String,
}
/// Response body returned by a key lookup.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct OpenRaftKvReadResponse {
/// Current value stored under the key, if any.
pub value: Option<String>,
}
/// Admin request used to register a learner in the current cluster.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct AddLearnerRequest {
/// OpenRaft node identifier for the learner.
pub node_id: u64,
/// Advertised Raft address for the learner.
pub addr: String,
}
/// Admin request used to promote the cluster to a concrete voter set.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ChangeMembershipRequest {
/// Full voter set that should own the cluster after the change.
pub voters: Vec<u64>,
}
/// Snapshot of one node's externally visible Raft and application state.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct OpenRaftKvState {
/// Stable OpenRaft node identifier.
pub node_id: u64,
/// Advertised Raft address for this node.
pub public_addr: String,
/// Current OpenRaft role rendered as text.
pub role: String,
/// Leader known by this node, if any.
pub current_leader: Option<u64>,
/// Current term reported by this node.
pub current_term: u64,
/// Highest log index stored locally.
pub last_log_index: Option<u64>,
/// Highest log index applied to the state machine.
pub last_applied_index: Option<u64>,
/// Current voter set reported by this node.
pub voters: Vec<u64>,
/// Application state machine contents.
pub kv: BTreeMap<String, String>,
}
/// JSON representation used for full-snapshot replication over HTTP.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct InstallFullSnapshotBody {
/// Vote bundled with the snapshot transfer.
pub vote: VoteOf<TypeConfig>,
/// Snapshot metadata describing the transferred state.
pub meta: SnapshotMetaOf<TypeConfig>,
/// Serialized state machine bytes.
pub data: Vec<u8>,
}
/// Serialized result of a vote RPC.
pub type VoteRpcResult = Result<openraft::raft::VoteResponse<TypeConfig>, String>;
/// Serialized result of an append-entries RPC.
pub type AppendRpcResult = Result<openraft::raft::AppendEntriesResponse<TypeConfig>, String>;
/// Serialized result of a full-snapshot RPC.
pub type SnapshotRpcResult = Result<openraft::raft::SnapshotResponse<TypeConfig>, String>;
/// JSON payload returned by the metrics endpoint.
pub type MetricsResult = Result<RaftMetrics<TypeConfig>, String>;
/// JSON payload returned by `/admin/init`.
pub type InitResult = Result<(), String>;
/// JSON payload returned by `/admin/add-learner`.
pub type AddLearnerResult = Result<(), String>;
/// JSON payload returned by `/admin/change-membership`.
pub type ChangeMembershipResult = Result<(), String>;
/// Request type accepted by the snapshot endpoint.
pub type InstallSnapshotBody = InstallSnapshotRequest<TypeConfig>;

View File

@ -0,0 +1,14 @@
[package]
edition.workspace = true
license.workspace = true
name = "openraft-kv-runtime-ext"
version.workspace = true
[dependencies]
async-trait = { workspace = true }
openraft-kv-node = { path = "../../openraft-kv-node" }
reqwest = { workspace = true }
testing-framework-core = { workspace = true }
testing-framework-runner-compose = { workspace = true }
testing-framework-runner-k8s = { workspace = true }
testing-framework-runner-local = { workspace = true }

View File

@ -0,0 +1,59 @@
use std::io::Error;
use openraft_kv_node::{OpenRaftKvClient, OpenRaftKvNodeConfig};
use testing_framework_core::scenario::{
Application, ClusterNodeConfigApplication, ClusterNodeView, ClusterPeerView, DynError,
NodeAccess, serialize_cluster_yaml_config,
};
/// Three-node topology used by the OpenRaft example scenarios.
pub type OpenRaftKvTopology = testing_framework_core::topology::ClusterTopology;
/// Application environment wiring for the OpenRaft-backed key-value example.
pub struct OpenRaftKvEnv;
impl Application for OpenRaftKvEnv {
type Deployment = OpenRaftKvTopology;
type NodeClient = OpenRaftKvClient;
type NodeConfig = OpenRaftKvNodeConfig;
fn build_node_client(access: &NodeAccess) -> Result<Self::NodeClient, DynError> {
Ok(OpenRaftKvClient::new(access.api_base_url()?))
}
fn node_readiness_path() -> &'static str {
"/healthz"
}
}
impl ClusterNodeConfigApplication for OpenRaftKvEnv {
type ConfigError = Error;
fn static_network_port() -> u16 {
8080
}
fn build_cluster_node_config(
node: &ClusterNodeView,
peers: &[ClusterPeerView],
) -> Result<Self::NodeConfig, Self::ConfigError> {
Ok(OpenRaftKvNodeConfig {
node_id: node.index() as u64,
http_port: node.network_port(),
public_addr: node.authority(),
peer_addrs: peers
.iter()
.map(|peer| (peer.index() as u64, peer.authority()))
.collect(),
heartbeat_interval_ms: 500,
election_timeout_min_ms: 1_500,
election_timeout_max_ms: 3_000,
})
}
fn serialize_cluster_node_config(
config: &Self::NodeConfig,
) -> Result<String, Self::ConfigError> {
serialize_cluster_yaml_config(config).map_err(Error::other)
}
}

View File

@ -0,0 +1,112 @@
use std::{fs, path::Path};
use testing_framework_core::{
cfgsync::StaticNodeConfigProvider,
scenario::{Application, DynError},
topology::DeploymentDescriptor,
};
use testing_framework_runner_compose::{
BinaryConfigNodeSpec, ComposeDeployEnv, ComposeDescriptor, NodeDescriptor,
binary_config_node_runtime_spec, node_identifier,
};
use crate::OpenRaftKvEnv;
const NODE_CONFIG_PATH: &str = "/etc/openraft-kv/config.yaml";
const COMPOSE_HTTP_PORT_BASE: u16 = 47_080;
fn compose_node_spec() -> BinaryConfigNodeSpec {
BinaryConfigNodeSpec::conventional(
"/usr/local/bin/openraft-kv-node",
NODE_CONFIG_PATH,
vec![8080],
)
}
fn fixed_loopback_port_binding(host_port: u16, container_port: u16) -> String {
format!("127.0.0.1:{host_port}:{container_port}")
}
impl ComposeDeployEnv for OpenRaftKvEnv {
fn prepare_compose_configs(
path: &Path,
topology: &<Self as Application>::Deployment,
_cfgsync_port: u16,
_metrics_otlp_ingest_url: Option<&reqwest::Url>,
) -> Result<(), DynError> {
let hostnames = Self::cfgsync_hostnames(topology);
let stack_dir = path
.parent()
.ok_or_else(|| std::io::Error::other("compose config path has no parent"))?;
let configs_dir = stack_dir.join("configs");
fs::create_dir_all(&configs_dir)?;
for index in 0..topology.node_count() {
let mut config = Self::build_node_config(topology, index)?;
Self::rewrite_for_hostnames(topology, index, &hostnames, &mut config)?;
let rendered = Self::serialize_node_config(&config)?;
fs::write(
configs_dir.join(Self::static_node_config_file_name(index)),
rendered,
)?;
}
Ok(())
}
fn static_node_config_file_name(index: usize) -> String {
format!("node-{index}.yaml")
}
fn binary_config_node_spec(
_topology: &<Self as Application>::Deployment,
_index: usize,
) -> Result<Option<BinaryConfigNodeSpec>, DynError> {
Ok(Some(compose_node_spec()))
}
fn compose_descriptor(
topology: &<Self as Application>::Deployment,
_cfgsync_port: u16,
) -> Result<ComposeDescriptor, DynError> {
let spec = compose_node_spec();
let nodes = (0..topology.node_count())
.map(|index| {
let runtime = binary_config_node_runtime_spec(index, &spec);
let file_name = Self::static_node_config_file_name(index);
let host_port = COMPOSE_HTTP_PORT_BASE + index as u16;
let ports = compose_node_ports(host_port, &runtime.container_ports);
NodeDescriptor::new(
node_identifier(index),
runtime.image,
runtime.entrypoint,
vec![format!(
"./stack/configs/{file_name}:{}:ro",
spec.config_container_path
)],
runtime.extra_hosts,
ports,
runtime.container_ports,
runtime.environment,
runtime.platform,
)
})
.collect();
Ok(ComposeDescriptor::new(nodes))
}
}
fn compose_node_ports(host_port: u16, container_ports: &[u16]) -> Vec<String> {
container_ports
.iter()
.map(|port| {
// OpenRaft failover restarts the leader. Fixed host ports keep TF
// clients stable across `docker compose restart`.
fixed_loopback_port_binding(host_port, *port)
})
.collect()
}

View File

@ -0,0 +1,21 @@
use testing_framework_runner_k8s::{BinaryConfigK8sSpec, K8sBinaryApp};
use crate::OpenRaftKvEnv;
const CONTAINER_CONFIG_PATH: &str = "/etc/openraft-kv/config.yaml";
const CONTAINER_HTTP_PORT: u16 = 8080;
const SERVICE_TESTING_PORT: u16 = 8081;
const NODE_NAME_PREFIX: &str = "openraft-kv-node";
impl K8sBinaryApp for OpenRaftKvEnv {
fn k8s_binary_spec() -> BinaryConfigK8sSpec {
BinaryConfigK8sSpec::conventional(
"openraft-kv",
NODE_NAME_PREFIX,
"/usr/local/bin/openraft-kv-node",
CONTAINER_CONFIG_PATH,
CONTAINER_HTTP_PORT,
SERVICE_TESTING_PORT,
)
}
}

View File

@ -0,0 +1,18 @@
mod app;
mod compose_env;
mod k8s_env;
mod local_env;
mod observation;
pub mod scenario;
pub use app::*;
pub use observation::*;
pub use scenario::{OpenRaftKvBuilderExt, OpenRaftKvScenarioBuilder};
/// Local process deployer for the OpenRaft example app.
pub type OpenRaftKvLocalDeployer = testing_framework_runner_local::ProcessDeployer<OpenRaftKvEnv>;
/// Docker Compose deployer for the OpenRaft example app.
pub type OpenRaftKvComposeDeployer =
testing_framework_runner_compose::ComposeDeployer<OpenRaftKvEnv>;
/// Kubernetes deployer for the OpenRaft example app.
pub type OpenRaftKvK8sDeployer = testing_framework_runner_k8s::K8sDeployer<OpenRaftKvEnv>;

View File

@ -0,0 +1,125 @@
use std::collections::{BTreeMap, HashMap};
use openraft_kv_node::OpenRaftKvNodeConfig;
use testing_framework_core::{
scenario::{DynError, StartNodeOptions},
topology::DeploymentDescriptor,
};
use testing_framework_runner_local::{
BuiltNodeConfig, LocalDeployerEnv, LocalNodePorts, LocalProcessSpec, NodeConfigEntry,
reserve_local_node_ports, yaml_node_config,
};
use crate::OpenRaftKvEnv;
impl LocalDeployerEnv for OpenRaftKvEnv {
fn build_node_config_from_template(
_topology: &Self::Deployment,
index: usize,
_peer_ports_by_name: &HashMap<String, u16>,
_options: &StartNodeOptions<Self>,
peer_ports: &[u16],
template_config: Option<&OpenRaftKvNodeConfig>,
) -> Result<BuiltNodeConfig<OpenRaftKvNodeConfig>, DynError> {
let mut reserved = reserve_local_node_ports(1, &[], "node")
.map_err(|source| -> DynError { source.into() })?;
let ports = reserved
.pop()
.ok_or_else(|| std::io::Error::other("failed to reserve local node ports"))?;
let mut config = template_config
.cloned()
.unwrap_or_else(|| local_node_config(index, ports.network_port(), BTreeMap::new()));
// OpenRaft peer config is index-sensitive, so local restarts must rebuild
// the full peer map from the current reserved port set.
let network_port = ports.network_port();
config.node_id = index as u64;
config.http_port = network_port;
config.public_addr = local_addr(network_port);
config.peer_addrs = peer_addrs_from_ports(peer_ports, index);
Ok(BuiltNodeConfig {
config,
network_port,
})
}
fn build_initial_node_configs(
topology: &Self::Deployment,
) -> Result<
Vec<NodeConfigEntry<OpenRaftKvNodeConfig>>,
testing_framework_runner_local::process::ProcessSpawnError,
> {
let reserved_ports = reserve_local_node_ports(topology.node_count(), &[], "node")?;
let peer_ports = reserved_ports
.iter()
.map(LocalNodePorts::network_port)
.collect::<Vec<_>>();
// Build every node from the same reserved port view so the initial
// cluster starts with a consistent peer list on all nodes.
Ok(reserved_ports
.iter()
.enumerate()
.map(|(index, ports)| NodeConfigEntry {
name: format!("node-{index}"),
config: local_node_config(
index,
ports.network_port(),
peer_addrs_from_ports(&peer_ports, index),
),
})
.collect())
}
fn initial_node_name_prefix() -> &'static str {
"node"
}
fn local_process_spec() -> Option<LocalProcessSpec> {
Some(
LocalProcessSpec::new("OPENRAFT_KV_NODE_BIN", "openraft-kv-node").with_rust_log("info"),
)
}
fn render_local_config(config: &OpenRaftKvNodeConfig) -> Result<Vec<u8>, DynError> {
yaml_node_config(config)
}
fn http_api_port(config: &OpenRaftKvNodeConfig) -> Option<u16> {
Some(config.http_port)
}
}
fn local_node_config(
index: usize,
network_port: u16,
peer_addrs: BTreeMap<u64, String>,
) -> OpenRaftKvNodeConfig {
OpenRaftKvNodeConfig {
node_id: index as u64,
http_port: network_port,
public_addr: local_addr(network_port),
peer_addrs,
heartbeat_interval_ms: 500,
election_timeout_min_ms: 1_500,
election_timeout_max_ms: 3_000,
}
}
fn peer_addrs_from_ports(peer_ports: &[u16], local_index: usize) -> BTreeMap<u64, String> {
peer_ports
.iter()
.enumerate()
.filter(|(peer_index, _)| *peer_index != local_index)
.map(|(peer_index, peer_port)| (peer_index as u64, local_addr(*peer_port)))
.collect()
}
fn local_addr(port: u16) -> String {
format!("127.0.0.1:{port}")
}

View File

@ -0,0 +1,262 @@
use std::{
collections::{BTreeMap, BTreeSet},
sync::Arc,
time::Duration,
};
use async_trait::async_trait;
use openraft_kv_node::{OpenRaftKvClient, OpenRaftKvState};
use testing_framework_core::{
observation::{
BoxedSourceProvider, ObservationConfig, ObservedSource, Observer, StaticSourceProvider,
},
scenario::{Application, DynError, NodeClients},
};
use testing_framework_runner_k8s::ManualCluster;
use crate::OpenRaftKvEnv;
const OBSERVATION_INTERVAL: Duration = Duration::from_millis(250);
const OBSERVATION_HISTORY_LIMIT: usize = 16;
/// Materialized OpenRaft cluster state built from the latest node polls.
#[derive(Clone, Debug, Default)]
pub struct OpenRaftClusterSnapshot {
states: Vec<OpenRaftKvState>,
failures: Vec<OpenRaftSourceFailure>,
}
impl OpenRaftClusterSnapshot {
/// Returns the successfully observed node states sorted by node id.
#[must_use]
pub fn states(&self) -> &[OpenRaftKvState] {
&self.states
}
/// Returns `true` when the snapshot contains no successful node states.
#[must_use]
pub fn is_empty(&self) -> bool {
self.states.is_empty()
}
/// Returns the unique observed leader when all responding nodes agree.
#[must_use]
pub fn agreed_leader(&self, different_from: Option<u64>) -> Option<u64> {
let observed = self
.states
.iter()
.filter_map(|state| state.current_leader)
.collect::<BTreeSet<_>>();
let leader = observed.iter().next().copied()?;
(observed.len() == 1 && different_from != Some(leader)).then_some(leader)
}
/// Returns `true` when every observed node reports the expected voter set.
#[must_use]
pub fn all_voters_match(&self, expected_voters: &BTreeSet<u64>) -> bool {
!self.states.is_empty()
&& self.failures.is_empty()
&& self.states.iter().all(|state| {
state.voters.iter().copied().collect::<BTreeSet<_>>() == *expected_voters
})
}
/// Returns `true` when every observed node exposes the expected replicated
/// key/value data.
#[must_use]
pub fn all_kv_match(
&self,
expected: &BTreeMap<String, String>,
full_voter_set: &[u64],
) -> bool {
!self.states.is_empty()
&& self.failures.is_empty()
&& self.states.iter().all(|state| {
state.current_leader.is_some()
&& state.voters == full_voter_set
&& expected
.iter()
.all(|(key, value)| state.kv.get(key) == Some(value))
})
}
/// Returns a concise summary for timeout and validation errors.
#[must_use]
pub fn summary(&self) -> String {
let mut lines = self
.states
.iter()
.map(|state| {
format!(
"node={} leader={:?} voters={:?} keys={}",
state.node_id,
state.current_leader,
state.voters,
state.kv.len()
)
})
.collect::<Vec<_>>();
lines.extend(self.failures.iter().map(OpenRaftSourceFailure::summary));
if lines.is_empty() {
return "no state observed yet".to_owned();
}
lines.join("; ")
}
}
/// One failed source read captured during an observation cycle.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct OpenRaftSourceFailure {
source_name: String,
message: String,
}
impl OpenRaftSourceFailure {
fn new(source_name: &str, message: &str) -> Self {
Self {
source_name: source_name.to_owned(),
message: message.to_owned(),
}
}
fn summary(&self) -> String {
format!("source={} error={}", self.source_name, self.message)
}
}
/// Observer that keeps the latest per-node OpenRaft state.
#[derive(Clone, Debug, Default)]
pub struct OpenRaftClusterObserver;
impl OpenRaftClusterObserver {
/// Default runtime configuration for the OpenRaft example observer.
#[must_use]
pub fn config() -> ObservationConfig {
ObservationConfig {
interval: OBSERVATION_INTERVAL,
history_limit: OBSERVATION_HISTORY_LIMIT,
}
}
}
/// Captures one best-effort OpenRaft cluster snapshot from the provided node
/// clients.
pub async fn capture_openraft_cluster_snapshot(
clients: &[OpenRaftKvClient],
) -> OpenRaftClusterSnapshot {
capture_cluster_snapshot(&named_sources(clients.to_vec())).await
}
#[async_trait]
impl Observer for OpenRaftClusterObserver {
type Source = OpenRaftKvClient;
type State = OpenRaftClusterSnapshot;
type Snapshot = OpenRaftClusterSnapshot;
type Event = ();
async fn init(
&self,
sources: &[ObservedSource<Self::Source>],
) -> Result<Self::State, DynError> {
Ok(capture_cluster_snapshot(sources).await)
}
async fn poll(
&self,
sources: &[ObservedSource<Self::Source>],
state: &mut Self::State,
) -> Result<Vec<Self::Event>, DynError> {
*state = capture_cluster_snapshot(sources).await;
Ok(Vec::new())
}
fn snapshot(&self, state: &Self::State) -> Self::Snapshot {
state.clone()
}
}
/// Builds the fixed source provider used by the scenario-based OpenRaft
/// examples.
pub fn openraft_cluster_source_provider(
_deployment: &<OpenRaftKvEnv as Application>::Deployment,
node_clients: NodeClients<OpenRaftKvEnv>,
) -> Result<BoxedSourceProvider<OpenRaftKvClient>, DynError> {
Ok(Box::new(StaticSourceProvider::new(named_sources(
node_clients.snapshot(),
))))
}
/// Dynamic source provider backed by a manual cluster.
///
/// This keeps observation aligned with the latest client handles after manual
/// node restarts.
#[derive(Clone)]
pub struct OpenRaftManualClusterSourceProvider {
cluster: Arc<ManualCluster<OpenRaftKvEnv>>,
node_names: Vec<String>,
}
impl OpenRaftManualClusterSourceProvider {
/// Builds a provider for the fixed node names used by the OpenRaft
/// examples.
#[must_use]
pub fn new(cluster: Arc<ManualCluster<OpenRaftKvEnv>>, node_count: usize) -> Self {
Self {
cluster,
node_names: (0..node_count)
.map(|index| format!("node-{index}"))
.collect(),
}
}
}
#[async_trait]
impl testing_framework_core::observation::SourceProvider<OpenRaftKvClient>
for OpenRaftManualClusterSourceProvider
{
async fn sources(&self) -> Result<Vec<ObservedSource<OpenRaftKvClient>>, DynError> {
Ok(self
.node_names
.iter()
.filter_map(|name| {
self.cluster
.node_client(name)
.map(|client| ObservedSource::new(name, client))
})
.collect())
}
}
fn named_sources(clients: Vec<OpenRaftKvClient>) -> Vec<ObservedSource<OpenRaftKvClient>> {
clients
.into_iter()
.enumerate()
.map(|(index, client)| ObservedSource::new(&format!("node-{index}"), client))
.collect()
}
async fn capture_cluster_snapshot(
sources: &[ObservedSource<OpenRaftKvClient>],
) -> OpenRaftClusterSnapshot {
let mut states = Vec::with_capacity(sources.len());
let mut failures = Vec::new();
for source in sources {
match source.source.state().await {
Ok(state) => states.push(state),
Err(error) => {
failures.push(OpenRaftSourceFailure::new(&source.name, &error.to_string()))
}
}
}
states.sort_by_key(|state| state.node_id);
OpenRaftClusterSnapshot { states, failures }
}

View File

@ -0,0 +1,32 @@
use testing_framework_core::scenario::{CoreBuilderExt, ScenarioBuilder};
use crate::{
OpenRaftClusterObserver, OpenRaftKvEnv, OpenRaftKvTopology, openraft_cluster_source_provider,
};
/// Scenario builder alias used by the OpenRaft example binaries.
pub type OpenRaftKvScenarioBuilder = ScenarioBuilder<OpenRaftKvEnv>;
/// Convenience helpers for constructing the fixed three-node OpenRaft topology.
pub trait OpenRaftKvBuilderExt: Sized {
/// Starts from the default three-node deployment and lets callers adjust
/// it.
fn deployment_with(f: impl FnOnce(OpenRaftKvTopology) -> OpenRaftKvTopology) -> Self;
/// Attaches the default OpenRaft cluster observer to the scenario.
fn with_cluster_observer(self) -> Self;
}
impl OpenRaftKvBuilderExt for OpenRaftKvScenarioBuilder {
fn deployment_with(f: impl FnOnce(OpenRaftKvTopology) -> OpenRaftKvTopology) -> Self {
OpenRaftKvScenarioBuilder::with_deployment(f(OpenRaftKvTopology::new(3)))
}
fn with_cluster_observer(self) -> Self {
self.with_observer(
OpenRaftClusterObserver,
openraft_cluster_source_provider,
OpenRaftClusterObserver::config(),
)
}
}

View File

@ -0,0 +1,15 @@
[package]
edition.workspace = true
license.workspace = true
name = "openraft-kv-runtime-workloads"
version.workspace = true
[dependencies]
anyhow = "1.0"
async-trait = { workspace = true }
openraft-kv-node = { path = "../../openraft-kv-node" }
openraft-kv-runtime-ext = { path = "../integration" }
testing-framework-core = { workspace = true }
thiserror = "2.0"
tokio = { workspace = true, features = ["full"] }
tracing = { workspace = true }

View File

@ -0,0 +1,61 @@
use std::time::Duration;
use async_trait::async_trait;
use openraft_kv_runtime_ext::{OpenRaftClusterObserver, OpenRaftKvEnv};
use testing_framework_core::{
observation::ObservationHandle,
scenario::{DynError, Expectation, RunContext},
};
use crate::support::{expected_kv, wait_for_observed_replication};
/// Expectation that waits for the full voter set and the writes from this run
/// to converge on every node.
#[derive(Clone)]
pub struct OpenRaftKvConverges {
total_writes: usize,
timeout: Duration,
key_prefix: String,
}
impl OpenRaftKvConverges {
/// Creates a convergence check for the given number of replicated writes.
#[must_use]
pub fn new(total_writes: usize) -> Self {
Self {
total_writes,
timeout: Duration::from_secs(30),
key_prefix: "raft-key".to_owned(),
}
}
/// Overrides the key prefix used to derive expected writes.
#[must_use]
pub fn key_prefix(mut self, value: &str) -> Self {
self.key_prefix = value.to_owned();
self
}
/// Overrides the convergence timeout.
#[must_use]
pub const fn timeout(mut self, value: Duration) -> Self {
self.timeout = value;
self
}
}
#[async_trait]
impl Expectation<OpenRaftKvEnv> for OpenRaftKvConverges {
fn name(&self) -> &str {
"openraft_kv_converges"
}
async fn evaluate(&mut self, ctx: &RunContext<OpenRaftKvEnv>) -> Result<(), DynError> {
let expected = expected_kv(&self.key_prefix, self.total_writes);
let observer = ctx.require_extension::<ObservationHandle<OpenRaftClusterObserver>>()?;
wait_for_observed_replication(&observer, &expected, self.timeout).await?;
Ok(())
}
}

View File

@ -0,0 +1,207 @@
use std::time::Duration;
use async_trait::async_trait;
use openraft_kv_node::OpenRaftKvClient;
use openraft_kv_runtime_ext::{OpenRaftClusterObserver, OpenRaftKvEnv};
use testing_framework_core::{
observation::ObservationHandle,
scenario::{DynError, RunContext, Workload},
};
use tracing::info;
use crate::support::{
OpenRaftMembership, ensure_cluster_size, resolve_client_for_node, wait_for_observed_leader,
wait_for_observed_membership, write_batch,
};
/// Workload that bootstraps the cluster, expands it to three voters, writes one
/// batch, restarts the leader, then writes a second batch through the new
/// leader.
#[derive(Clone)]
pub struct OpenRaftKvFailoverWorkload {
first_batch: usize,
second_batch: usize,
timeout: Duration,
key_prefix: String,
}
impl OpenRaftKvFailoverWorkload {
/// Creates the default failover workload configuration.
#[must_use]
pub fn new() -> Self {
Self {
first_batch: 8,
second_batch: 8,
timeout: Duration::from_secs(30),
key_prefix: "raft-key".to_owned(),
}
}
/// Sets the number of writes issued before the leader restart.
#[must_use]
pub const fn first_batch(mut self, value: usize) -> Self {
self.first_batch = value;
self
}
/// Sets the number of writes issued after the leader restart.
#[must_use]
pub const fn second_batch(mut self, value: usize) -> Self {
self.second_batch = value;
self
}
/// Overrides the key prefix used for generated writes.
#[must_use]
pub fn key_prefix(mut self, value: &str) -> Self {
self.key_prefix = value.to_owned();
self
}
/// Overrides the timeout used for leader and membership transitions.
#[must_use]
pub const fn timeout(mut self, value: Duration) -> Self {
self.timeout = value;
self
}
}
impl Default for OpenRaftKvFailoverWorkload {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Workload<OpenRaftKvEnv> for OpenRaftKvFailoverWorkload {
fn name(&self) -> &str {
"openraft_kv_failover_workload"
}
async fn start(&self, ctx: &RunContext<OpenRaftKvEnv>) -> Result<(), DynError> {
let clients = ctx.node_clients().snapshot();
let observer = ctx.require_extension::<ObservationHandle<OpenRaftClusterObserver>>()?;
ensure_cluster_size(&clients, 3)?;
self.bootstrap_cluster(&clients).await?;
let initial_leader = wait_for_observed_leader(&observer, self.timeout, None).await?;
let membership = OpenRaftMembership::discover(&clients).await?;
self.promote_cluster(&observer, &clients, initial_leader, &membership)
.await?;
self.write_initial_batch(&clients, initial_leader).await?;
let new_leader = self
.restart_leader_and_wait_for_failover(ctx, &observer, initial_leader)
.await?;
self.write_second_batch(&clients, new_leader).await?;
Ok(())
}
}
impl OpenRaftKvFailoverWorkload {
async fn bootstrap_cluster(&self, clients: &[OpenRaftKvClient]) -> Result<(), DynError> {
info!("initializing openraft cluster");
clients[0].init_self().await?;
Ok(())
}
async fn promote_cluster(
&self,
observer: &ObservationHandle<OpenRaftClusterObserver>,
clients: &[OpenRaftKvClient],
leader_id: u64,
membership: &OpenRaftMembership,
) -> Result<(), DynError> {
let leader = resolve_client_for_node(clients, leader_id, self.timeout).await?;
for learner in membership.learner_targets(leader_id) {
info!(
target = learner.node_id,
addr = %learner.public_addr,
"adding learner"
);
leader
.add_learner(learner.node_id, &learner.public_addr)
.await?;
}
let voter_ids = membership.voter_ids();
leader.change_membership(voter_ids.iter().copied()).await?;
wait_for_observed_membership(observer, &voter_ids, self.timeout).await?;
Ok(())
}
async fn write_initial_batch(
&self,
clients: &[OpenRaftKvClient],
leader_id: u64,
) -> Result<(), DynError> {
info!(
leader = leader_id,
writes = self.first_batch,
"writing initial batch"
);
let leader = resolve_client_for_node(clients, leader_id, self.timeout).await?;
write_batch(&leader, &self.key_prefix, 0, self.first_batch).await?;
Ok(())
}
async fn restart_leader_and_wait_for_failover(
&self,
ctx: &RunContext<OpenRaftKvEnv>,
observer: &ObservationHandle<OpenRaftClusterObserver>,
leader_id: u64,
) -> Result<u64, DynError> {
let Some(control) = ctx.node_control() else {
return Err("openraft failover workload requires node control".into());
};
let leader_name = format!("node-{leader_id}");
info!(%leader_name, "restarting current leader");
control.restart_node(&leader_name).await?;
let new_leader = wait_for_observed_leader(observer, self.timeout, Some(leader_id)).await?;
info!(
old_leader = leader_id,
new_leader, "leader changed after restart"
);
Ok(new_leader)
}
async fn write_second_batch(
&self,
clients: &[OpenRaftKvClient],
leader_id: u64,
) -> Result<(), DynError> {
info!(
leader = leader_id,
writes = self.second_batch,
"writing second batch"
);
let leader = resolve_client_for_node(clients, leader_id, self.timeout).await?;
write_batch(
&leader,
&self.key_prefix,
self.first_batch,
self.second_batch,
)
.await?;
Ok(())
}
}

View File

@ -0,0 +1,14 @@
mod convergence;
mod failover;
mod support;
/// Replication expectation used by the OpenRaft example binaries.
pub use convergence::OpenRaftKvConverges;
/// Failover workload used by the OpenRaft example binaries.
pub use failover::OpenRaftKvFailoverWorkload;
/// Shared cluster helpers used by the OpenRaft workload and manual k8s example.
pub use support::{
FULL_VOTER_SET, OpenRaftClusterError, OpenRaftMembership, ensure_cluster_size, expected_kv,
resolve_client_for_node, wait_for_leader, wait_for_membership, wait_for_observed_leader,
wait_for_observed_membership, wait_for_observed_replication, wait_for_replication, write_batch,
};

View File

@ -0,0 +1,328 @@
use std::{
collections::{BTreeMap, BTreeSet},
time::Duration,
};
use openraft_kv_node::{OpenRaftKvClient, OpenRaftKvState};
use openraft_kv_runtime_ext::{
OpenRaftClusterObserver, OpenRaftClusterSnapshot, capture_openraft_cluster_snapshot,
};
use testing_framework_core::observation::{ObservationHandle, ObservationSnapshot};
use thiserror::Error;
use tokio::time::{Instant, sleep};
const POLL_INTERVAL: Duration = Duration::from_millis(250);
const CLIENT_RESOLUTION_INTERVAL: Duration = Duration::from_millis(200);
/// Fixed voter set used by the example cluster.
pub const FULL_VOTER_SET: [u64; 3] = [0, 1, 2];
/// One learner candidate discovered from cluster state.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct LearnerTarget {
/// Node identifier used by OpenRaft membership.
pub node_id: u64,
/// Public address advertised for Raft traffic.
pub public_addr: String,
}
/// Membership view captured from the current node states.
#[derive(Clone, Debug)]
pub struct OpenRaftMembership {
states: Vec<OpenRaftKvState>,
}
impl OpenRaftMembership {
/// Builds a membership view from already observed node states.
#[must_use]
pub fn from_states(states: &[OpenRaftKvState]) -> Self {
let mut states = states.to_vec();
states.sort_by_key(|state| state.node_id);
Self { states }
}
/// Reads and sorts the current node states by id.
pub async fn discover(clients: &[OpenRaftKvClient]) -> Result<Self, OpenRaftClusterError> {
let mut states = Vec::with_capacity(clients.len());
for client in clients {
states.push(client.state().await.map_err(OpenRaftClusterError::Client)?);
}
Ok(Self::from_states(&states))
}
/// Returns the full voter set implied by the discovered nodes.
#[must_use]
pub fn voter_ids(&self) -> BTreeSet<u64> {
self.states.iter().map(|state| state.node_id).collect()
}
/// Returns every non-leader node as a learner target.
#[must_use]
pub fn learner_targets(&self, leader_id: u64) -> Vec<LearnerTarget> {
self.states
.iter()
.filter(|state| state.node_id != leader_id)
.map(|state| LearnerTarget {
node_id: state.node_id,
public_addr: state.public_addr.clone(),
})
.collect()
}
}
/// Errors raised by the OpenRaft example cluster helpers.
#[derive(Debug, Error)]
pub enum OpenRaftClusterError {
#[error("openraft example requires at least {expected} node clients, got {actual}")]
InsufficientClients { expected: usize, actual: usize },
#[error("failed to query openraft node state: {0}")]
Client(#[source] anyhow::Error),
#[error("openraft cluster observation is not available yet")]
MissingObservation,
#[error(
"timed out waiting for {action} after {timeout:?}; last observation: {last_observation}"
)]
Timeout {
action: &'static str,
timeout: Duration,
last_observation: String,
},
#[error("timed out resolving node client for {node_id} after {timeout:?}")]
ClientResolution { node_id: u64, timeout: Duration },
}
/// Ensures the example cluster has the expected number of node clients.
pub fn ensure_cluster_size(
clients: &[OpenRaftKvClient],
expected: usize,
) -> Result<(), OpenRaftClusterError> {
if clients.len() < expected {
return Err(OpenRaftClusterError::InsufficientClients {
expected,
actual: clients.len(),
});
}
Ok(())
}
/// Waits until the cluster converges on one leader.
pub async fn wait_for_leader(
clients: &[OpenRaftKvClient],
timeout: Duration,
different_from: Option<u64>,
) -> Result<u64, OpenRaftClusterError> {
let deadline = Instant::now() + timeout;
loop {
let last_observation = capture_openraft_cluster_snapshot(clients).await;
if let Some(leader) = last_observation.agreed_leader(different_from) {
return Ok(leader);
}
if Instant::now() >= deadline {
return Err(OpenRaftClusterError::Timeout {
action: "leader agreement",
timeout,
last_observation: last_observation.summary(),
});
}
sleep(POLL_INTERVAL).await;
}
}
/// Waits until every node reports the expected voter set.
pub async fn wait_for_membership(
clients: &[OpenRaftKvClient],
expected_voters: &BTreeSet<u64>,
timeout: Duration,
) -> Result<(), OpenRaftClusterError> {
let deadline = Instant::now() + timeout;
loop {
let last_observation = capture_openraft_cluster_snapshot(clients).await;
if last_observation.all_voters_match(expected_voters) {
return Ok(());
}
if Instant::now() >= deadline {
return Err(OpenRaftClusterError::Timeout {
action: "membership convergence",
timeout,
last_observation: last_observation.summary(),
});
}
sleep(POLL_INTERVAL).await;
}
}
/// Waits until every node reports the full replicated key set.
pub async fn wait_for_replication(
clients: &[OpenRaftKvClient],
expected: &BTreeMap<String, String>,
timeout: Duration,
) -> Result<(), OpenRaftClusterError> {
let deadline = Instant::now() + timeout;
loop {
let last_observation = capture_openraft_cluster_snapshot(clients).await;
if last_observation.all_kv_match(expected, &FULL_VOTER_SET) {
return Ok(());
}
if Instant::now() >= deadline {
return Err(OpenRaftClusterError::Timeout {
action: "replicated state convergence",
timeout,
last_observation: last_observation.summary(),
});
}
sleep(POLL_INTERVAL).await;
}
}
/// Waits until the observer reports one agreed leader.
pub async fn wait_for_observed_leader(
handle: &ObservationHandle<OpenRaftClusterObserver>,
timeout: Duration,
different_from: Option<u64>,
) -> Result<u64, OpenRaftClusterError> {
let snapshot =
wait_for_observed_snapshot(handle, timeout, "observed leader agreement", |snapshot| {
snapshot.agreed_leader(different_from).is_some()
})
.await?;
snapshot
.value
.agreed_leader(different_from)
.ok_or(OpenRaftClusterError::MissingObservation)
}
/// Waits until the observer reports the expected voter set on every node.
pub async fn wait_for_observed_membership(
handle: &ObservationHandle<OpenRaftClusterObserver>,
expected_voters: &BTreeSet<u64>,
timeout: Duration,
) -> Result<(), OpenRaftClusterError> {
wait_for_observed_snapshot(
handle,
timeout,
"observed membership convergence",
|snapshot| snapshot.all_voters_match(expected_voters),
)
.await?;
Ok(())
}
/// Waits until the observer reports the full replicated key set.
pub async fn wait_for_observed_replication(
handle: &ObservationHandle<OpenRaftClusterObserver>,
expected: &BTreeMap<String, String>,
timeout: Duration,
) -> Result<(), OpenRaftClusterError> {
wait_for_observed_snapshot(
handle,
timeout,
"observed replicated state convergence",
|snapshot| snapshot.all_kv_match(expected, &FULL_VOTER_SET),
)
.await?;
Ok(())
}
/// Resolves the client handle that currently identifies as `node_id`.
pub async fn resolve_client_for_node(
clients: &[OpenRaftKvClient],
node_id: u64,
timeout: Duration,
) -> Result<OpenRaftKvClient, OpenRaftClusterError> {
let deadline = Instant::now() + timeout;
loop {
for client in clients {
let Ok(state) = client.state().await else {
continue;
};
if state.node_id == node_id {
return Ok(client.clone());
}
}
if Instant::now() >= deadline {
return Err(OpenRaftClusterError::ClientResolution { node_id, timeout });
}
sleep(CLIENT_RESOLUTION_INTERVAL).await;
}
}
/// Issues a contiguous batch of writes through the current leader.
pub async fn write_batch(
leader: &OpenRaftKvClient,
prefix: &str,
start: usize,
count: usize,
) -> Result<(), OpenRaftClusterError> {
for index in start..(start + count) {
let key = format!("{prefix}-{index}");
let value = format!("value-{index}");
leader
.write(&key, &value, index as u64 + 1)
.await
.map_err(OpenRaftClusterError::Client)?;
}
Ok(())
}
/// Builds the replicated key/value map expected after the workload completes.
#[must_use]
pub fn expected_kv(prefix: &str, total_writes: usize) -> BTreeMap<String, String> {
(0..total_writes)
.map(|index| (format!("{prefix}-{index}"), format!("value-{index}")))
.collect()
}
async fn wait_for_observed_snapshot(
handle: &ObservationHandle<OpenRaftClusterObserver>,
timeout: Duration,
action: &'static str,
matches: impl Fn(&OpenRaftClusterSnapshot) -> bool,
) -> Result<ObservationSnapshot<OpenRaftClusterSnapshot>, OpenRaftClusterError> {
let deadline = Instant::now() + timeout;
let mut last_summary = "no state observed yet".to_owned();
loop {
if let Some(snapshot) = handle.latest_snapshot() {
last_summary = snapshot.value.summary();
if matches(&snapshot.value) {
return Ok(snapshot);
}
}
if Instant::now() >= deadline {
return Err(OpenRaftClusterError::Timeout {
action,
timeout,
last_observation: last_summary,
});
}
sleep(POLL_INTERVAL).await;
}
}

View File

@ -26,15 +26,15 @@ Each example follows the same pattern:
## Run locally
```bash
cargo run -p pubsub-examples --bin basic_ws_roundtrip
cargo run -p pubsub-examples --bin basic_ws_reconnect
cargo run -p pubsub-examples --bin pubsub_basic_ws_roundtrip
cargo run -p pubsub-examples --bin pubsub_basic_ws_reconnect
```
## Run with Docker Compose
```bash
cargo run -p pubsub-examples --bin compose_ws_roundtrip
cargo run -p pubsub-examples --bin compose_ws_reconnect
cargo run -p pubsub-examples --bin pubsub_compose_ws_roundtrip
cargo run -p pubsub-examples --bin pubsub_compose_ws_reconnect
```
Set `PUBSUB_IMAGE` to override the default compose image tag.
@ -43,7 +43,7 @@ Set `PUBSUB_IMAGE` to override the default compose image tag.
```bash
docker build -t pubsub-node:local -f examples/pubsub/Dockerfile .
cargo run -p pubsub-examples --bin k8s_ws_roundtrip
cargo run -p pubsub-examples --bin pubsub_k8s_ws_roundtrip
```
Prerequisites:
@ -57,5 +57,5 @@ Optional image override:
```bash
docker build -t pubsub-node:local -f examples/pubsub/Dockerfile .
cargo run -p pubsub-examples --bin k8s_manual_ws_roundtrip
cargo run -p pubsub-examples --bin pubsub_k8s_manual_ws_roundtrip
```

View File

@ -4,6 +4,30 @@ license.workspace = true
name = "pubsub-examples"
version.workspace = true
[[bin]]
name = "pubsub_basic_ws_roundtrip"
path = "src/bin/basic_ws_roundtrip.rs"
[[bin]]
name = "pubsub_basic_ws_reconnect"
path = "src/bin/basic_ws_reconnect.rs"
[[bin]]
name = "pubsub_compose_ws_roundtrip"
path = "src/bin/compose_ws_roundtrip.rs"
[[bin]]
name = "pubsub_compose_ws_reconnect"
path = "src/bin/compose_ws_reconnect.rs"
[[bin]]
name = "pubsub_k8s_ws_roundtrip"
path = "src/bin/k8s_ws_roundtrip.rs"
[[bin]]
name = "pubsub_k8s_manual_ws_roundtrip"
path = "src/bin/k8s_manual_ws_roundtrip.rs"
[dependencies]
anyhow = "1.0"
pubsub-node = { path = "../pubsub-node" }

24
examples/queue/Dockerfile Normal file
View File

@ -0,0 +1,24 @@
FROM rustlang/rust:nightly-bookworm AS builder
WORKDIR /build
COPY Cargo.toml Cargo.lock ./
COPY cfgsync/ ./cfgsync/
COPY examples/ ./examples/
COPY testing-framework/ ./testing-framework/
RUN cargo build --release -p queue-node
FROM debian:bookworm-slim
RUN apt-get update && \
apt-get install -y ca-certificates && \
rm -rf /var/lib/apt/lists/*
COPY --from=builder /build/target/release/queue-node /usr/local/bin/queue-node
RUN mkdir -p /etc/queue
WORKDIR /app
ENTRYPOINT ["/usr/local/bin/queue-node"]
CMD ["--config", "/etc/queue/config.yaml"]

47
examples/queue/README.md Normal file
View File

@ -0,0 +1,47 @@
# Queue Example
This example runs a small replicated FIFO queue.
The scenarios enqueue messages, dequeue them again, and check that queue state
either converges or drains as expected.
## How TF runs this
Each example follows the same pattern:
- TF starts a small deployment of queue nodes
- a workload produces messages, or produces and consumes them
- an expectation checks either that queue state converges or that the queue drains
## Scenarios
- `basic_convergence` produces messages and checks that queue state converges locally
- `basic_roundtrip` produces and consumes messages locally until the queue drains
- `basic_restart_chaos` injects random local node restarts during the run
- `compose_convergence` and `compose_roundtrip` run the same checks in Docker Compose
## API
Each node exposes:
- `POST /queue/enqueue` to add a message
- `POST /queue/dequeue` to remove a message
- `GET /queue/state` to inspect the current queue state
- `GET /internal/snapshot` to read the local replicated state
## Run locally
```bash
cargo run -p queue-examples --bin queue_basic_convergence
cargo run -p queue-examples --bin queue_basic_roundtrip
cargo run -p queue-examples --bin queue_basic_restart_chaos
```
## Run with Docker Compose
```bash
cargo run -p queue-examples --bin queue_compose_convergence
cargo run -p queue-examples --bin queue_compose_roundtrip
```
Set `QUEUE_IMAGE` to override the default compose image tag.

View File

@ -0,0 +1,36 @@
[package]
edition.workspace = true
license.workspace = true
name = "queue-examples"
version.workspace = true
[[bin]]
name = "queue_basic_convergence"
path = "src/bin/basic_convergence.rs"
[[bin]]
name = "queue_basic_restart_chaos"
path = "src/bin/basic_restart_chaos.rs"
[[bin]]
name = "queue_basic_roundtrip"
path = "src/bin/basic_roundtrip.rs"
[[bin]]
name = "queue_compose_convergence"
path = "src/bin/compose_convergence.rs"
[[bin]]
name = "queue_compose_roundtrip"
path = "src/bin/compose_roundtrip.rs"
[dependencies]
anyhow = "1.0"
async-trait = { workspace = true }
queue-runtime-ext = { path = "../testing/integration" }
queue-runtime-workloads = { path = "../testing/workloads" }
testing-framework-core = { workspace = true }
testing-framework-runner-compose = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tracing = { workspace = true }
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

View File

@ -0,0 +1,32 @@
use std::time::Duration;
use queue_runtime_ext::QueueLocalDeployer;
use queue_runtime_workloads::{
QueueBuilderExt, QueueConverges, QueueProduceWorkload, QueueScenarioBuilder, QueueTopology,
};
use testing_framework_core::scenario::Deployer;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let operations = 300;
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
.with_run_duration(Duration::from_secs(30))
.with_workload(
QueueProduceWorkload::new()
.operations(operations)
.rate_per_sec(30)
.payload_prefix("demo"),
)
.with_expectation(QueueConverges::new(operations).timeout(Duration::from_secs(25)))
.build()?;
let deployer = QueueLocalDeployer::default();
let runner = deployer.deploy(&scenario).await?;
runner.run(&mut scenario).await?;
Ok(())
}

View File

@ -0,0 +1,84 @@
use std::time::Duration;
use async_trait::async_trait;
use queue_runtime_ext::QueueLocalDeployer;
use queue_runtime_workloads::{
QueueBuilderExt, QueueConverges, QueueProduceWorkload, QueueScenarioBuilder, QueueTopology,
};
use testing_framework_core::{
scenario::{Deployer, DynError, RunContext, Workload},
topology::DeploymentDescriptor,
};
use tracing::info;
#[derive(Clone)]
struct FixedRestartChaosWorkload {
restarts: usize,
delay: Duration,
}
impl FixedRestartChaosWorkload {
const fn new(restarts: usize, delay: Duration) -> Self {
Self { restarts, delay }
}
}
#[async_trait]
impl Workload<queue_runtime_workloads::QueueEnv> for FixedRestartChaosWorkload {
fn name(&self) -> &str {
"fixed_restart_chaos"
}
async fn start(
&self,
ctx: &RunContext<queue_runtime_workloads::QueueEnv>,
) -> Result<(), DynError> {
let Some(control) = ctx.node_control() else {
return Err("fixed restart chaos requires node control".into());
};
let node_count = ctx.descriptors().node_count();
if node_count == 0 {
return Err("fixed restart chaos requires at least one node".into());
}
for step in 0..self.restarts {
tokio::time::sleep(self.delay).await;
let target_index = if node_count > 1 {
(step % (node_count - 1)) + 1
} else {
0
};
let target = format!("node-{target_index}");
info!(step, %target, "triggering controlled chaos restart");
control.restart_node(&target).await?;
}
Ok(())
}
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
.enable_node_control()
.with_workload(FixedRestartChaosWorkload::new(3, Duration::from_secs(8)))
.with_run_duration(Duration::from_secs(30))
.with_workload(
QueueProduceWorkload::new()
.operations(400)
.rate_per_sec(40)
.payload_prefix("queue-chaos"),
)
.with_expectation(QueueConverges::new(200).timeout(Duration::from_secs(30)))
.build()?;
let deployer = QueueLocalDeployer::default();
let runner = deployer.deploy(&scenario).await?;
runner.run(&mut scenario).await?;
Ok(())
}

View File

@ -0,0 +1,31 @@
use std::time::Duration;
use queue_runtime_ext::QueueLocalDeployer;
use queue_runtime_workloads::{
QueueBuilderExt, QueueDrained, QueueRoundTripWorkload, QueueScenarioBuilder, QueueTopology,
};
use testing_framework_core::scenario::Deployer;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let operations = 200;
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
.with_run_duration(Duration::from_secs(30))
.with_workload(
QueueRoundTripWorkload::new()
.operations(operations)
.rate_per_sec(25),
)
.with_expectation(QueueDrained::new().timeout(Duration::from_secs(25)))
.build()?;
let deployer = QueueLocalDeployer::default();
let runner = deployer.deploy(&scenario).await?;
runner.run(&mut scenario).await?;
Ok(())
}

View File

@ -0,0 +1,47 @@
use std::time::Duration;
use anyhow::{Context as _, Result};
use queue_runtime_workloads::{
QueueBuilderExt, QueueConverges, QueueProduceWorkload, QueueScenarioBuilder, QueueTopology,
};
use testing_framework_core::scenario::Deployer;
use testing_framework_runner_compose::ComposeRunnerError;
use tracing::{info, warn};
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let operations = 200;
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
.with_run_duration(Duration::from_secs(30))
.with_workload(
QueueProduceWorkload::new()
.operations(operations)
.rate_per_sec(20),
)
.with_expectation(QueueConverges::new(operations).timeout(Duration::from_secs(25)))
.build()?;
let deployer = queue_runtime_ext::QueueComposeDeployer::new();
let runner = match deployer.deploy(&scenario).await {
Ok(runner) => runner,
Err(ComposeRunnerError::DockerUnavailable) => {
warn!("docker unavailable; skipping compose queue run");
return Ok(());
}
Err(error) => {
return Err(anyhow::Error::new(error)).context("deploying queue compose stack");
}
};
info!("running queue compose convergence scenario");
runner
.run(&mut scenario)
.await
.context("running queue compose scenario")?;
Ok(())
}

View File

@ -0,0 +1,48 @@
use std::time::Duration;
use anyhow::{Context as _, Result};
use queue_runtime_workloads::{
QueueBuilderExt, QueueDrained, QueueRoundTripWorkload, QueueScenarioBuilder, QueueTopology,
};
use testing_framework_core::scenario::Deployer;
use testing_framework_runner_compose::ComposeRunnerError;
use tracing::{info, warn};
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let operations = 200;
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
.with_run_duration(Duration::from_secs(30))
.with_workload(
QueueRoundTripWorkload::new()
.operations(operations)
.rate_per_sec(20),
)
.with_expectation(QueueDrained::new().timeout(Duration::from_secs(25)))
.build()?;
let deployer = queue_runtime_ext::QueueComposeDeployer::new();
let runner = match deployer.deploy(&scenario).await {
Ok(runner) => runner,
Err(ComposeRunnerError::DockerUnavailable) => {
warn!("docker unavailable; skipping compose queue roundtrip run");
return Ok(());
}
Err(error) => {
return Err(anyhow::Error::new(error))
.context("deploying queue compose roundtrip stack");
}
};
info!("running queue compose roundtrip scenario");
runner
.run(&mut scenario)
.await
.context("running queue compose roundtrip scenario")?;
Ok(())
}

View File

@ -0,0 +1,21 @@
[package]
edition.workspace = true
license.workspace = true
name = "queue-node"
version.workspace = true
[[bin]]
name = "queue-node"
path = "src/main.rs"
[dependencies]
anyhow = "1.0"
axum = "0.7"
clap = { version = "4.0", features = ["derive"] }
reqwest = { workspace = true, features = ["json"] }
serde = { workspace = true }
serde_yaml = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tower-http = { version = "0.6", features = ["trace"] }
tracing = { workspace = true }
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

View File

@ -0,0 +1,40 @@
use reqwest::Url;
use serde::Serialize;
#[derive(Clone)]
pub struct QueueHttpClient {
base_url: Url,
client: reqwest::Client,
}
impl QueueHttpClient {
#[must_use]
pub fn new(base_url: Url) -> Self {
Self {
base_url,
client: reqwest::Client::new(),
}
}
pub async fn get<T: serde::de::DeserializeOwned>(&self, path: &str) -> anyhow::Result<T> {
let url = self.base_url.join(path)?;
let response = self.client.get(url).send().await?.error_for_status()?;
Ok(response.json().await?)
}
pub async fn post<B: Serialize, T: serde::de::DeserializeOwned>(
&self,
path: &str,
body: &B,
) -> anyhow::Result<T> {
let url = self.base_url.join(path)?;
let response = self
.client
.post(url)
.json(body)
.send()
.await?
.error_for_status()?;
Ok(response.json().await?)
}
}

View File

@ -0,0 +1,29 @@
use std::{fs, path::Path};
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PeerInfo {
pub node_id: u64,
pub http_address: String,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct QueueConfig {
pub node_id: u64,
pub http_port: u16,
pub peers: Vec<PeerInfo>,
#[serde(default = "default_sync_interval_ms")]
pub sync_interval_ms: u64,
}
impl QueueConfig {
pub fn load(path: &Path) -> anyhow::Result<Self> {
let raw = fs::read_to_string(path)?;
Ok(serde_yaml::from_str(&raw)?)
}
}
const fn default_sync_interval_ms() -> u64 {
1000
}

View File

@ -0,0 +1,3 @@
pub mod client;
pub use client::QueueHttpClient;

View File

@ -0,0 +1,36 @@
mod config;
mod server;
mod state;
mod sync;
use std::path::PathBuf;
use clap::Parser;
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
use crate::{config::QueueConfig, state::QueueState, sync::SyncService};
#[derive(Parser, Debug)]
#[command(name = "queue-node")]
struct Args {
#[arg(short, long)]
config: PathBuf,
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::registry()
.with(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "queue_node=info,tower_http=debug".into()),
)
.with(tracing_subscriber::fmt::layer())
.init();
let args = Args::parse();
let config = QueueConfig::load(&args.config)?;
let state = QueueState::new(config.node_id);
SyncService::new(config.clone(), state.clone()).start();
server::start_server(config, state).await
}

View File

@ -0,0 +1,115 @@
use std::net::SocketAddr;
use axum::{
Router,
extract::State,
http::StatusCode,
response::Json,
routing::{get, post},
};
use serde::{Deserialize, Serialize};
use tower_http::trace::TraceLayer;
use crate::{
config::QueueConfig,
state::{QueueMessage, QueueRevision, QueueState, QueueStateView, Snapshot},
};
#[derive(Serialize)]
struct HealthResponse {
status: &'static str,
}
#[derive(Deserialize)]
struct EnqueueRequest {
payload: String,
}
#[derive(Serialize)]
struct EnqueueResponse {
accepted: bool,
id: u64,
queue_len: usize,
revision: QueueRevision,
}
#[derive(Serialize)]
struct DequeueResponse {
message: Option<QueueMessage>,
queue_len: usize,
revision: QueueRevision,
}
pub async fn start_server(config: QueueConfig, state: QueueState) -> anyhow::Result<()> {
let app = Router::new()
.route("/health/live", get(health_live))
.route("/health/ready", get(health_ready))
.route("/queue/enqueue", post(enqueue))
.route("/queue/dequeue", post(dequeue))
.route("/queue/state", get(queue_state))
.route("/internal/snapshot", get(get_snapshot))
.layer(TraceLayer::new_for_http())
.with_state(state.clone());
let addr = SocketAddr::from(([0, 0, 0, 0], config.http_port));
let listener = tokio::net::TcpListener::bind(addr).await?;
state.set_ready(true).await;
tracing::info!(node_id = state.node_id(), %addr, "queue node ready");
axum::serve(listener, app).await?;
Ok(())
}
async fn health_live() -> (StatusCode, Json<HealthResponse>) {
(StatusCode::OK, Json(HealthResponse { status: "alive" }))
}
async fn health_ready(State(state): State<QueueState>) -> (StatusCode, Json<HealthResponse>) {
if state.is_ready().await {
(StatusCode::OK, Json(HealthResponse { status: "ready" }))
} else {
(
StatusCode::SERVICE_UNAVAILABLE,
Json(HealthResponse {
status: "not-ready",
}),
)
}
}
async fn enqueue(
State(state): State<QueueState>,
Json(request): Json<EnqueueRequest>,
) -> (StatusCode, Json<EnqueueResponse>) {
let outcome = state.enqueue_local(request.payload).await;
(
StatusCode::OK,
Json(EnqueueResponse {
accepted: outcome.accepted,
id: outcome.id,
queue_len: outcome.queue_len,
revision: outcome.revision,
}),
)
}
async fn dequeue(State(state): State<QueueState>) -> (StatusCode, Json<DequeueResponse>) {
let outcome = state.dequeue_local().await;
(
StatusCode::OK,
Json(DequeueResponse {
message: outcome.message,
queue_len: outcome.queue_len,
revision: outcome.revision,
}),
)
}
async fn queue_state(State(state): State<QueueState>) -> Json<QueueStateView> {
Json(state.queue_state().await)
}
async fn get_snapshot(State(state): State<QueueState>) -> Json<Snapshot> {
Json(state.snapshot().await)
}

View File

@ -0,0 +1,151 @@
use std::{collections::VecDeque, sync::Arc};
use serde::{Deserialize, Serialize};
use tokio::sync::RwLock;
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
pub struct QueueRevision {
pub version: u64,
pub origin: u64,
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub struct QueueMessage {
pub id: u64,
pub payload: String,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Snapshot {
pub node_id: u64,
pub revision: QueueRevision,
pub messages: Vec<QueueMessage>,
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
pub struct QueueStateView {
pub revision: QueueRevision,
pub queue_len: usize,
pub head_id: Option<u64>,
pub tail_id: Option<u64>,
}
#[derive(Clone, Debug)]
pub struct EnqueueOutcome {
pub accepted: bool,
pub id: u64,
pub queue_len: usize,
pub revision: QueueRevision,
}
#[derive(Clone, Debug)]
pub struct DequeueOutcome {
pub message: Option<QueueMessage>,
pub queue_len: usize,
pub revision: QueueRevision,
}
#[derive(Debug, Default)]
struct QueueData {
revision: QueueRevision,
messages: VecDeque<QueueMessage>,
}
#[derive(Clone)]
pub struct QueueState {
node_id: u64,
ready: Arc<RwLock<bool>>,
data: Arc<RwLock<QueueData>>,
}
impl QueueState {
pub fn new(node_id: u64) -> Self {
Self {
node_id,
ready: Arc::new(RwLock::new(false)),
data: Arc::new(RwLock::new(QueueData::default())),
}
}
pub const fn node_id(&self) -> u64 {
self.node_id
}
pub async fn set_ready(&self, value: bool) {
*self.ready.write().await = value;
}
pub async fn is_ready(&self) -> bool {
*self.ready.read().await
}
pub async fn enqueue_local(&self, payload: String) -> EnqueueOutcome {
let mut data = self.data.write().await;
let id = next_message_id(&data.messages);
data.messages.push_back(QueueMessage { id, payload });
bump_revision(&mut data.revision, self.node_id);
EnqueueOutcome {
accepted: true,
id,
queue_len: data.messages.len(),
revision: data.revision,
}
}
pub async fn dequeue_local(&self) -> DequeueOutcome {
let mut data = self.data.write().await;
let message = data.messages.pop_front();
if message.is_some() {
bump_revision(&mut data.revision, self.node_id);
}
DequeueOutcome {
message,
queue_len: data.messages.len(),
revision: data.revision,
}
}
pub async fn queue_state(&self) -> QueueStateView {
let data = self.data.read().await;
QueueStateView {
revision: data.revision,
queue_len: data.messages.len(),
head_id: data.messages.front().map(|message| message.id),
tail_id: data.messages.back().map(|message| message.id),
}
}
pub async fn merge_snapshot(&self, snapshot: Snapshot) {
let mut data = self.data.write().await;
if is_newer_revision(snapshot.revision, data.revision) {
data.revision = snapshot.revision;
data.messages = snapshot.messages.into();
}
}
pub async fn snapshot(&self) -> Snapshot {
let data = self.data.read().await;
Snapshot {
node_id: self.node_id,
revision: data.revision,
messages: data.messages.iter().cloned().collect(),
}
}
}
fn next_message_id(messages: &VecDeque<QueueMessage>) -> u64 {
messages
.back()
.map_or(1, |message| message.id.saturating_add(1))
}
fn bump_revision(revision: &mut QueueRevision, node_id: u64) {
revision.version = revision.version.saturating_add(1);
revision.origin = node_id;
}
fn is_newer_revision(candidate: QueueRevision, existing: QueueRevision) -> bool {
(candidate.version, candidate.origin) > (existing.version, existing.origin)
}

View File

@ -0,0 +1,103 @@
use std::{collections::HashMap, sync::Arc, time::Duration};
use reqwest::Client;
use tokio::sync::Mutex;
use tracing::{debug, warn};
use crate::{
config::QueueConfig,
state::{QueueState, Snapshot},
};
const WARN_AFTER_CONSECUTIVE_FAILURES: u32 = 5;
#[derive(Clone)]
pub struct SyncService {
config: Arc<QueueConfig>,
state: QueueState,
client: Client,
failures_by_peer: Arc<Mutex<HashMap<String, u32>>>,
}
impl SyncService {
pub fn new(config: QueueConfig, state: QueueState) -> Self {
Self {
config: Arc::new(config),
state,
client: Client::new(),
failures_by_peer: Arc::new(Mutex::new(HashMap::new())),
}
}
pub fn start(&self) {
let service = self.clone();
tokio::spawn(async move {
service.run().await;
});
}
async fn run(self) {
let interval = Duration::from_millis(self.config.sync_interval_ms.max(100));
loop {
self.sync_once().await;
tokio::time::sleep(interval).await;
}
}
async fn sync_once(&self) {
for peer in &self.config.peers {
match self.fetch_snapshot(&peer.http_address).await {
Ok(snapshot) => {
self.state.merge_snapshot(snapshot).await;
self.clear_failure_counter(&peer.http_address).await;
}
Err(error) => {
self.record_sync_failure(&peer.http_address, &error).await;
}
}
}
}
async fn fetch_snapshot(&self, peer_address: &str) -> anyhow::Result<Snapshot> {
let url = format!("http://{peer_address}/internal/snapshot");
let snapshot = self
.client
.get(url)
.send()
.await?
.error_for_status()?
.json()
.await?;
Ok(snapshot)
}
async fn clear_failure_counter(&self, peer_address: &str) {
let mut failures = self.failures_by_peer.lock().await;
failures.remove(peer_address);
}
async fn record_sync_failure(&self, peer_address: &str, error: &anyhow::Error) {
let consecutive_failures = {
let mut failures = self.failures_by_peer.lock().await;
let entry = failures.entry(peer_address.to_owned()).or_insert(0);
*entry += 1;
*entry
};
if consecutive_failures >= WARN_AFTER_CONSECUTIVE_FAILURES {
warn!(
peer = %peer_address,
%error,
consecutive_failures,
"queue sync repeatedly failing"
);
} else {
debug!(
peer = %peer_address,
%error,
consecutive_failures,
"queue sync failed"
);
}
}
}

View File

@ -0,0 +1,13 @@
[package]
edition.workspace = true
license.workspace = true
name = "queue-runtime-ext"
version.workspace = true
[dependencies]
async-trait = { workspace = true }
queue-node = { path = "../../queue-node" }
serde = { workspace = true }
testing-framework-core = { workspace = true }
testing-framework-runner-compose = { workspace = true }
testing-framework-runner-local = { workspace = true }

View File

@ -0,0 +1,75 @@
use std::io::Error;
use async_trait::async_trait;
use queue_node::QueueHttpClient;
use serde::{Deserialize, Serialize};
use testing_framework_core::scenario::{
Application, ClusterNodeConfigApplication, ClusterNodeView, ClusterPeerView, DynError,
NodeAccess, serialize_cluster_yaml_config,
};
pub type QueueTopology = testing_framework_core::topology::ClusterTopology;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct QueuePeerInfo {
pub node_id: u64,
pub http_address: String,
}
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct QueueNodeConfig {
pub node_id: u64,
pub http_port: u16,
pub peers: Vec<QueuePeerInfo>,
pub sync_interval_ms: u64,
}
pub struct QueueEnv;
#[async_trait]
impl Application for QueueEnv {
type Deployment = QueueTopology;
type NodeClient = QueueHttpClient;
type NodeConfig = QueueNodeConfig;
fn build_node_client(access: &NodeAccess) -> Result<Self::NodeClient, DynError> {
Ok(QueueHttpClient::new(access.api_base_url()?))
}
fn node_readiness_path() -> &'static str {
"/health/ready"
}
}
impl ClusterNodeConfigApplication for QueueEnv {
type ConfigError = Error;
fn static_network_port() -> u16 {
8080
}
fn build_cluster_node_config(
node: &ClusterNodeView,
peers: &[ClusterPeerView],
) -> Result<Self::NodeConfig, Self::ConfigError> {
let peers = peers
.iter()
.map(|peer| QueuePeerInfo {
node_id: peer.index() as u64,
http_address: peer.authority(),
})
.collect::<Vec<_>>();
Ok(QueueNodeConfig {
node_id: node.index() as u64,
http_port: node.network_port(),
peers,
sync_interval_ms: 500,
})
}
fn serialize_cluster_node_config(
config: &Self::NodeConfig,
) -> Result<String, Self::ConfigError> {
serialize_cluster_yaml_config(config).map_err(Error::other)
}
}

View File

@ -0,0 +1,15 @@
use testing_framework_runner_compose::{BinaryConfigNodeSpec, ComposeBinaryApp};
use crate::QueueEnv;
const NODE_CONFIG_PATH: &str = "/etc/queue/config.yaml";
impl ComposeBinaryApp for QueueEnv {
fn compose_node_spec() -> BinaryConfigNodeSpec {
BinaryConfigNodeSpec::conventional(
"/usr/local/bin/queue-node",
NODE_CONFIG_PATH,
vec![8080, 8081],
)
}
}

View File

@ -0,0 +1,10 @@
mod app;
mod compose_env;
mod local_env;
pub mod scenario;
pub use app::*;
pub use scenario::{QueueBuilderExt, QueueScenarioBuilder};
pub type QueueLocalDeployer = testing_framework_runner_local::ProcessDeployer<QueueEnv>;
pub type QueueComposeDeployer = testing_framework_runner_compose::ComposeDeployer<QueueEnv>;

View File

@ -0,0 +1,41 @@
use std::collections::HashMap;
use testing_framework_core::scenario::{DynError, StartNodeOptions};
use testing_framework_runner_local::{
LocalBinaryApp, LocalNodePorts, LocalPeerNode, LocalProcessSpec,
build_local_cluster_node_config, yaml_node_config,
};
use crate::{QueueEnv, QueueNodeConfig};
impl LocalBinaryApp for QueueEnv {
fn initial_node_name_prefix() -> &'static str {
"queue-node"
}
fn build_local_node_config_with_peers(
_topology: &Self::Deployment,
index: usize,
ports: &LocalNodePorts,
peers: &[LocalPeerNode],
_peer_ports_by_name: &HashMap<String, u16>,
_options: &StartNodeOptions<Self>,
_template_config: Option<
&<Self as testing_framework_core::scenario::Application>::NodeConfig,
>,
) -> Result<<Self as testing_framework_core::scenario::Application>::NodeConfig, DynError> {
build_local_cluster_node_config::<Self>(index, ports, peers)
}
fn local_process_spec() -> LocalProcessSpec {
LocalProcessSpec::new("QUEUE_NODE_BIN", "queue-node").with_rust_log("queue_node=info")
}
fn render_local_config(config: &QueueNodeConfig) -> Result<Vec<u8>, DynError> {
yaml_node_config(config)
}
fn http_api_port(config: &QueueNodeConfig) -> u16 {
config.http_port
}
}

View File

@ -0,0 +1,15 @@
use testing_framework_core::scenario::ScenarioBuilder;
use crate::{QueueEnv, QueueTopology};
pub type QueueScenarioBuilder = ScenarioBuilder<QueueEnv>;
pub trait QueueBuilderExt: Sized {
fn deployment_with(f: impl FnOnce(QueueTopology) -> QueueTopology) -> Self;
}
impl QueueBuilderExt for QueueScenarioBuilder {
fn deployment_with(f: impl FnOnce(QueueTopology) -> QueueTopology) -> Self {
QueueScenarioBuilder::with_deployment(f(QueueTopology::new(3)))
}
}

View File

@ -0,0 +1,14 @@
[package]
edition.workspace = true
license.workspace = true
name = "queue-runtime-workloads"
version.workspace = true
[dependencies]
async-trait = { workspace = true }
queue-node = { path = "../../queue-node" }
queue-runtime-ext = { path = "../integration" }
serde = { workspace = true }
testing-framework-core = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tracing = { workspace = true }

View File

@ -0,0 +1,104 @@
use std::time::Duration;
use async_trait::async_trait;
use queue_runtime_ext::QueueEnv;
use serde::Deserialize;
use testing_framework_core::scenario::{DynError, Expectation, RunContext};
use tracing::info;
#[derive(Clone)]
pub struct QueueDrained {
timeout: Duration,
poll_interval: Duration,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
struct QueueRevision {
version: u64,
origin: u64,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
struct QueueStateResponse {
revision: QueueRevision,
queue_len: usize,
head_id: Option<u64>,
tail_id: Option<u64>,
}
impl QueueDrained {
#[must_use]
pub fn new() -> Self {
Self {
timeout: Duration::from_secs(20),
poll_interval: Duration::from_millis(500),
}
}
#[must_use]
pub const fn timeout(mut self, timeout: Duration) -> Self {
self.timeout = timeout;
self
}
}
impl Default for QueueDrained {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Expectation<QueueEnv> for QueueDrained {
fn name(&self) -> &str {
"queue_drained"
}
async fn evaluate(&mut self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
let clients = ctx.node_clients().snapshot();
if clients.is_empty() {
return Err("no queue node clients available".into());
}
let deadline = tokio::time::Instant::now() + self.timeout;
while tokio::time::Instant::now() < deadline {
if is_drained_and_converged(&clients).await? {
info!("queue drained and converged");
return Ok(());
}
tokio::time::sleep(self.poll_interval).await;
}
Err(format!("queue not drained within {:?}", self.timeout).into())
}
}
async fn is_drained_and_converged(
clients: &[queue_node::QueueHttpClient],
) -> Result<bool, DynError> {
let Some((first, rest)) = clients.split_first() else {
return Ok(false);
};
let baseline = read_state(first).await?;
if !is_drained(&baseline) {
return Ok(false);
}
for client in rest {
let current = read_state(client).await?;
if current != baseline {
return Ok(false);
}
}
Ok(true)
}
fn is_drained(state: &QueueStateResponse) -> bool {
state.queue_len == 0 && state.head_id.is_none() && state.tail_id.is_none()
}
async fn read_state(client: &queue_node::QueueHttpClient) -> Result<QueueStateResponse, DynError> {
Ok(client.get("/queue/state").await?)
}

View File

@ -0,0 +1,106 @@
use std::time::Duration;
use async_trait::async_trait;
use queue_runtime_ext::QueueEnv;
use serde::Deserialize;
use testing_framework_core::scenario::{DynError, Expectation, RunContext};
use tracing::info;
#[derive(Clone)]
pub struct QueueConverges {
min_queue_len: usize,
timeout: Duration,
poll_interval: Duration,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
struct QueueRevision {
version: u64,
origin: u64,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
struct QueueStateResponse {
revision: QueueRevision,
queue_len: usize,
head_id: Option<u64>,
tail_id: Option<u64>,
}
impl QueueConverges {
#[must_use]
pub fn new(min_queue_len: usize) -> Self {
Self {
min_queue_len,
timeout: Duration::from_secs(20),
poll_interval: Duration::from_millis(500),
}
}
#[must_use]
pub const fn timeout(mut self, timeout: Duration) -> Self {
self.timeout = timeout;
self
}
}
#[async_trait]
impl Expectation<QueueEnv> for QueueConverges {
fn name(&self) -> &str {
"queue_converges"
}
async fn evaluate(&mut self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
let clients = ctx.node_clients().snapshot();
if clients.is_empty() {
return Err("no queue node clients available".into());
}
let deadline = tokio::time::Instant::now() + self.timeout;
while tokio::time::Instant::now() < deadline {
if self.is_converged(&clients).await? {
info!(
min_queue_len = self.min_queue_len,
"queue convergence reached"
);
return Ok(());
}
tokio::time::sleep(self.poll_interval).await;
}
Err(format!(
"queue convergence not reached within {:?} (min_queue_len={})",
self.timeout, self.min_queue_len
)
.into())
}
}
impl QueueConverges {
async fn is_converged(
&self,
clients: &[queue_node::QueueHttpClient],
) -> Result<bool, DynError> {
let Some((first, rest)) = clients.split_first() else {
return Ok(false);
};
let baseline = read_state(first).await?;
if baseline.queue_len < self.min_queue_len {
return Ok(false);
}
for client in rest {
let current = read_state(client).await?;
if current != baseline {
return Ok(false);
}
}
Ok(true)
}
}
async fn read_state(client: &queue_node::QueueHttpClient) -> Result<QueueStateResponse, DynError> {
Ok(client.get("/queue/state").await?)
}

View File

@ -0,0 +1,10 @@
mod drained;
mod expectations;
mod produce;
mod roundtrip;
pub use drained::QueueDrained;
pub use expectations::QueueConverges;
pub use produce::QueueProduceWorkload;
pub use queue_runtime_ext::{QueueBuilderExt, QueueEnv, QueueScenarioBuilder, QueueTopology};
pub use roundtrip::QueueRoundTripWorkload;

View File

@ -0,0 +1,116 @@
use std::time::Duration;
use async_trait::async_trait;
use queue_runtime_ext::QueueEnv;
use serde::{Deserialize, Serialize};
use testing_framework_core::scenario::{DynError, RunContext, Workload};
use tracing::info;
#[derive(Clone)]
pub struct QueueProduceWorkload {
operations: usize,
rate_per_sec: Option<usize>,
payload_prefix: String,
}
#[derive(Serialize)]
struct EnqueueRequest {
payload: String,
}
#[derive(Deserialize)]
struct EnqueueResponse {
accepted: bool,
id: u64,
queue_len: usize,
}
impl QueueProduceWorkload {
#[must_use]
pub fn new() -> Self {
Self {
operations: 200,
rate_per_sec: Some(25),
payload_prefix: "queue-demo".to_owned(),
}
}
#[must_use]
pub const fn operations(mut self, value: usize) -> Self {
self.operations = value;
self
}
#[must_use]
pub const fn rate_per_sec(mut self, value: usize) -> Self {
self.rate_per_sec = Some(value);
self
}
#[must_use]
pub fn payload_prefix(mut self, value: impl Into<String>) -> Self {
self.payload_prefix = value.into();
self
}
}
impl Default for QueueProduceWorkload {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Workload<QueueEnv> for QueueProduceWorkload {
fn name(&self) -> &str {
"queue_produce_workload"
}
async fn start(&self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
let clients = ctx.node_clients().snapshot();
let Some(producer) = clients.first() else {
return Err("no queue node clients available".into());
};
let interval = self.rate_per_sec.and_then(compute_interval);
info!(
operations = self.operations,
rate_per_sec = ?self.rate_per_sec,
"starting queue produce workload"
);
for idx in 0..self.operations {
let payload = format!("{}-{idx}", self.payload_prefix);
let response: EnqueueResponse = producer
.post("/queue/enqueue", &EnqueueRequest { payload })
.await?;
if !response.accepted {
return Err(format!("node rejected enqueue at operation {idx}").into());
}
if (idx + 1) % 25 == 0 {
info!(
completed = idx + 1,
last_id = response.id,
queue_len = response.queue_len,
"queue produce progress"
);
}
if let Some(delay) = interval {
tokio::time::sleep(delay).await;
}
}
Ok(())
}
}
fn compute_interval(rate_per_sec: usize) -> Option<Duration> {
if rate_per_sec == 0 {
return None;
}
Some(Duration::from_millis((1000 / rate_per_sec as u64).max(1)))
}

View File

@ -0,0 +1,179 @@
use std::{collections::HashSet, time::Duration};
use async_trait::async_trait;
use queue_runtime_ext::QueueEnv;
use serde::{Deserialize, Serialize};
use testing_framework_core::scenario::{DynError, RunContext, Workload};
use tokio::time::{Instant, sleep};
use tracing::info;
#[derive(Clone)]
pub struct QueueRoundTripWorkload {
operations: usize,
rate_per_sec: Option<usize>,
payload_prefix: String,
drain_timeout: Duration,
empty_poll_interval: Duration,
}
#[derive(Serialize)]
struct EnqueueRequest {
payload: String,
}
#[derive(Deserialize)]
struct EnqueueResponse {
accepted: bool,
id: u64,
}
#[derive(Serialize)]
struct DequeueRequest {}
#[derive(Deserialize)]
struct QueueMessage {
id: u64,
payload: String,
}
#[derive(Deserialize)]
struct DequeueResponse {
message: Option<QueueMessage>,
}
impl QueueRoundTripWorkload {
#[must_use]
pub fn new() -> Self {
Self {
operations: 200,
rate_per_sec: Some(25),
payload_prefix: "queue-roundtrip".to_owned(),
drain_timeout: Duration::from_secs(20),
empty_poll_interval: Duration::from_millis(100),
}
}
#[must_use]
pub const fn operations(mut self, value: usize) -> Self {
self.operations = value;
self
}
#[must_use]
pub const fn rate_per_sec(mut self, value: usize) -> Self {
self.rate_per_sec = Some(value);
self
}
#[must_use]
pub fn payload_prefix(mut self, value: impl Into<String>) -> Self {
self.payload_prefix = value.into();
self
}
#[must_use]
pub const fn drain_timeout(mut self, value: Duration) -> Self {
self.drain_timeout = value;
self
}
}
impl Default for QueueRoundTripWorkload {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl Workload<QueueEnv> for QueueRoundTripWorkload {
fn name(&self) -> &str {
"queue_roundtrip_workload"
}
async fn start(&self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
let clients = ctx.node_clients().snapshot();
let Some(driver) = clients.first() else {
return Err("no queue node clients available".into());
};
let interval = self.rate_per_sec.and_then(compute_interval);
let mut produced_ids = HashSet::with_capacity(self.operations);
info!(
operations = self.operations,
"queue roundtrip: produce phase"
);
for idx in 0..self.operations {
let payload = format!("{}-{idx}", self.payload_prefix);
let response: EnqueueResponse = driver
.post("/queue/enqueue", &EnqueueRequest { payload })
.await?;
if !response.accepted {
return Err(format!("enqueue rejected at operation {idx}").into());
}
if !produced_ids.insert(response.id) {
return Err(format!("duplicate enqueue id observed: {}", response.id).into());
}
if let Some(delay) = interval {
sleep(delay).await;
}
}
info!(
operations = self.operations,
"queue roundtrip: consume phase"
);
let mut consumed = 0usize;
let deadline = Instant::now() + self.drain_timeout;
while consumed < self.operations && Instant::now() < deadline {
let response: DequeueResponse =
driver.post("/queue/dequeue", &DequeueRequest {}).await?;
match response.message {
Some(message) => {
if !message.payload.starts_with(&self.payload_prefix) {
return Err(format!("unexpected payload: {}", message.payload).into());
}
if !produced_ids.remove(&message.id) {
return Err(
format!("unknown or duplicate dequeue id: {}", message.id).into()
);
}
consumed += 1;
}
None => sleep(self.empty_poll_interval).await,
}
}
if consumed != self.operations {
return Err(format!(
"queue roundtrip timed out: consumed {consumed}/{} messages",
self.operations
)
.into());
}
if !produced_ids.is_empty() {
return Err(format!(
"queue roundtrip ended with {} undrained produced ids",
produced_ids.len()
)
.into());
}
info!(operations = self.operations, "queue roundtrip finished");
Ok(())
}
}
fn compute_interval(rate_per_sec: usize) -> Option<Duration> {
if rate_per_sec == 0 {
return None;
}
Some(Duration::from_millis((1000 / rate_per_sec as u64).max(1)))
}

View File

@ -25,11 +25,11 @@ Each example follows the same pattern:
## Run with Docker Compose
```bash
cargo run -p redis-streams-examples --bin compose_roundtrip
cargo run -p redis-streams-examples --bin redis_streams_compose_roundtrip
```
## Run the reclaim scenario
```bash
cargo run -p redis-streams-examples --bin compose_failover
cargo run -p redis-streams-examples --bin redis_streams_compose_failover
```

View File

@ -4,6 +4,14 @@ license.workspace = true
name = "redis-streams-examples"
version.workspace = true
[[bin]]
name = "redis_streams_compose_roundtrip"
path = "src/bin/compose_roundtrip.rs"
[[bin]]
name = "redis_streams_compose_failover"
path = "src/bin/compose_failover.rs"
[dependencies]
anyhow = "1.0"
redis-streams-runtime-ext = { path = "../testing/integration" }

View File

@ -29,5 +29,5 @@ reqwest = { features = ["json"], workspace = true }
serde = { workspace = true }
serde_yaml = { workspace = true }
thiserror = { workspace = true }
tokio = { features = ["macros", "process", "rt-multi-thread", "time"], workspace = true }
tokio = { features = ["macros", "process", "rt-multi-thread", "sync", "time"], workspace = true }
tracing = { workspace = true }

View File

@ -1,5 +1,6 @@
pub mod cfgsync;
pub mod env;
pub mod observation;
pub mod runtime;
pub mod scenario;
pub mod topology;

View File

@ -0,0 +1,161 @@
use std::{marker::PhantomData, sync::Arc};
use async_trait::async_trait;
use super::{
ObservationConfig, ObservationHandle, ObservationRuntime, ObservedSource, Observer,
SourceProvider,
};
use crate::scenario::{
Application, DynError, NodeClients, PreparedRuntimeExtension, RuntimeExtensionFactory,
};
/// Boxed source provider used by observation factories.
pub type BoxedSourceProvider<S> = Box<dyn SourceProvider<S>>;
/// Builds an observation source provider once node clients are available.
pub trait SourceProviderFactory<E: Application, S>: Send + Sync + 'static {
/// Builds the source provider for one scenario run.
fn build_source_provider(
&self,
deployment: &E::Deployment,
node_clients: NodeClients<E>,
) -> Result<BoxedSourceProvider<S>, DynError>;
}
impl<E, S, F> SourceProviderFactory<E, S> for F
where
E: Application,
S: Clone + Send + Sync + 'static,
F: Fn(&E::Deployment, NodeClients<E>) -> Result<BoxedSourceProvider<S>, DynError>
+ Send
+ Sync
+ 'static,
{
fn build_source_provider(
&self,
deployment: &E::Deployment,
node_clients: NodeClients<E>,
) -> Result<BoxedSourceProvider<S>, DynError> {
self(deployment, node_clients)
}
}
/// Fixed source provider for scenario runs with a stable source set.
#[derive(Clone, Debug)]
pub struct StaticSourceProvider<S> {
sources: Vec<ObservedSource<S>>,
}
impl<S> StaticSourceProvider<S> {
/// Builds a provider from a fixed source list.
#[must_use]
pub fn new(sources: Vec<ObservedSource<S>>) -> Self {
Self { sources }
}
}
#[async_trait]
impl<S> SourceProvider<S> for StaticSourceProvider<S>
where
S: Clone + Send + Sync + 'static,
{
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError> {
Ok(self.sources.clone())
}
}
/// Runtime extension factory that starts one observer and stores its handle in
/// `RunContext`.
pub struct ObservationExtensionFactory<E: Application, O: Observer> {
observer_builder: Arc<dyn Fn() -> O + Send + Sync>,
source_provider_factory: Arc<dyn SourceProviderFactory<E, O::Source>>,
config: ObservationConfig,
env_marker: PhantomData<E>,
}
impl<E: Application, O: Observer> ObservationExtensionFactory<E, O> {
/// Builds an observation extension factory from builders.
#[must_use]
pub fn from_parts(
observer_builder: impl Fn() -> O + Send + Sync + 'static,
source_provider_factory: impl SourceProviderFactory<E, O::Source>,
config: ObservationConfig,
) -> Self {
Self {
observer_builder: Arc::new(observer_builder),
source_provider_factory: Arc::new(source_provider_factory),
config,
env_marker: PhantomData,
}
}
}
impl<E, O> ObservationExtensionFactory<E, O>
where
E: Application,
O: Observer + Clone,
{
/// Builds an observation extension factory from one clonable observer.
#[must_use]
pub fn new(
observer: O,
source_provider_factory: impl SourceProviderFactory<E, O::Source>,
config: ObservationConfig,
) -> Self {
Self::from_parts(move || observer.clone(), source_provider_factory, config)
}
}
#[async_trait]
impl<E, O> RuntimeExtensionFactory<E> for ObservationExtensionFactory<E, O>
where
E: Application,
O: Observer,
{
async fn prepare(
&self,
deployment: &E::Deployment,
node_clients: NodeClients<E>,
) -> Result<PreparedRuntimeExtension, DynError> {
let source_provider = self
.source_provider_factory
.build_source_provider(deployment, node_clients)?;
let observer = (self.observer_builder)();
let runtime =
ObservationRuntime::start(source_provider, observer, self.config.clone()).await?;
let (handle, task) = runtime.into_parts();
Ok(PreparedRuntimeExtension::from_task(handle, task))
}
}
#[async_trait]
impl<S, P> SourceProvider<S> for Box<P>
where
S: Clone + Send + Sync + 'static,
P: SourceProvider<S> + ?Sized,
{
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError> {
(**self).sources().await
}
}
#[async_trait]
impl<S, P> SourceProvider<S> for Arc<P>
where
S: Clone + Send + Sync + 'static,
P: SourceProvider<S> + ?Sized,
{
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError> {
(**self).sources().await
}
}
impl<O: Observer> From<ObservationHandle<O>> for PreparedRuntimeExtension {
fn from(handle: ObservationHandle<O>) -> Self {
PreparedRuntimeExtension::new(handle)
}
}

View File

@ -0,0 +1,503 @@
//! Generic continuous observation runtime.
//!
//! This module provides the reusable runtime needed by both TF scenarios and
//! manual-cluster consumers such as Cucumber worlds. It does not know any app
//! semantics. Apps provide source types, observation logic, materialized state,
//! snapshots, and delta events.
mod factory;
use std::{
any::type_name,
collections::VecDeque,
sync::Arc,
time::{Duration, SystemTime},
};
use async_trait::async_trait;
pub use factory::{
BoxedSourceProvider, ObservationExtensionFactory, SourceProviderFactory, StaticSourceProvider,
};
use parking_lot::Mutex;
use tokio::{
sync::broadcast,
task::JoinHandle,
time::{MissedTickBehavior, interval},
};
use tracing::{debug, info, warn};
use crate::scenario::DynError;
/// Configuration for a background observation runtime.
#[derive(Clone, Debug)]
pub struct ObservationConfig {
/// Time between observation cycles.
pub interval: Duration,
/// Maximum number of non-empty event batches retained in memory.
pub history_limit: usize,
}
impl Default for ObservationConfig {
fn default() -> Self {
Self {
interval: Duration::from_secs(1),
history_limit: 64,
}
}
}
/// One named observation source.
#[derive(Clone, Debug)]
pub struct ObservedSource<S> {
/// Human-readable source name used in logs and app-level reporting.
pub name: String,
/// App-owned source handle.
pub source: S,
}
impl<S> ObservedSource<S> {
/// Builds one named observation source.
#[must_use]
pub fn new(name: &str, source: S) -> Self {
Self {
name: name.to_owned(),
source,
}
}
}
/// Supplies the current observation source set.
#[async_trait]
pub trait SourceProvider<S>: Send + Sync + 'static {
/// Returns the current source set for the next observation cycle.
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError>;
}
/// App-owned observation logic.
#[async_trait]
pub trait Observer: Send + Sync + 'static {
/// App-owned source type.
type Source: Clone + Send + Sync + 'static;
/// App-owned retained materialized state.
type State: Send + Sync + 'static;
/// App-owned current snapshot view.
type Snapshot: Clone + Send + Sync + 'static;
/// App-owned delta event type emitted per cycle.
type Event: Clone + Send + Sync + 'static;
/// Builds the initial retained state from the current source set.
async fn init(&self, sources: &[ObservedSource<Self::Source>])
-> Result<Self::State, DynError>;
/// Advances retained state by one cycle and returns any new delta events.
async fn poll(
&self,
sources: &[ObservedSource<Self::Source>],
state: &mut Self::State,
) -> Result<Vec<Self::Event>, DynError>;
/// Builds the current snapshot view from retained state.
fn snapshot(&self, state: &Self::State) -> Self::Snapshot;
}
/// One materialized snapshot emitted by the runtime.
#[derive(Clone, Debug)]
pub struct ObservationSnapshot<S> {
/// Monotonic cycle number.
pub cycle: u64,
/// Capture timestamp.
pub observed_at: SystemTime,
/// Number of sources used for this snapshot.
pub source_count: usize,
/// App-owned snapshot payload.
pub value: S,
}
/// One delta batch emitted by a successful observation cycle.
#[derive(Clone, Debug)]
pub struct ObservationBatch<E> {
/// Monotonic cycle number.
pub cycle: u64,
/// Capture timestamp.
pub observed_at: SystemTime,
/// Number of sources used for this batch.
pub source_count: usize,
/// App-owned delta events discovered in this cycle.
pub events: Vec<E>,
}
/// Observation runtime failure stage.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ObservationFailureStage {
/// Source refresh failed before a poll could run.
SourceRefresh,
/// Observer poll failed after sources were refreshed.
Poll,
}
/// Last failed observation cycle.
#[derive(Clone, Debug)]
pub struct ObservationFailure {
/// Monotonic cycle number.
pub cycle: u64,
/// Failure timestamp.
pub observed_at: SystemTime,
/// Number of sources involved in the failed cycle.
pub source_count: usize,
/// Runtime stage that failed.
pub stage: ObservationFailureStage,
/// Human-readable failure message.
pub message: String,
}
/// Errors returned while starting an observation runtime.
#[derive(Debug, thiserror::Error)]
pub enum ObservationRuntimeError {
/// The configured interval is invalid.
#[error("observation interval must be greater than zero")]
InvalidInterval,
/// Source discovery failed during runtime startup.
#[error("failed to refresh observation sources during startup: {source}")]
SourceRefresh {
#[source]
source: DynError,
},
/// Observer state initialization failed during runtime startup.
#[error("failed to initialize observation state: {source}")]
ObserverInit {
#[source]
source: DynError,
},
}
/// Read-side handle for one running observer.
pub struct ObservationHandle<O: Observer> {
shared: Arc<Mutex<SharedObservationState<O>>>,
batches: broadcast::Sender<Arc<ObservationBatch<O::Event>>>,
}
impl<O: Observer> Clone for ObservationHandle<O> {
fn clone(&self) -> Self {
Self {
shared: Arc::clone(&self.shared),
batches: self.batches.clone(),
}
}
}
impl<O: Observer> ObservationHandle<O> {
/// Returns the latest successful snapshot, if one has been produced.
#[must_use]
pub fn latest_snapshot(&self) -> Option<ObservationSnapshot<O::Snapshot>> {
self.shared.lock().latest_snapshot.clone()
}
/// Returns retained non-empty event batches.
#[must_use]
pub fn history(&self) -> Vec<Arc<ObservationBatch<O::Event>>> {
self.shared.lock().history.iter().cloned().collect()
}
/// Returns the most recent cycle failure, if any.
#[must_use]
pub fn last_error(&self) -> Option<ObservationFailure> {
self.shared.lock().last_error.clone()
}
/// Subscribes to future non-empty event batches.
#[must_use]
pub fn subscribe(&self) -> broadcast::Receiver<Arc<ObservationBatch<O::Event>>> {
self.batches.subscribe()
}
}
/// Lifecycle owner for one background observation runtime.
pub struct ObservationRuntime<O: Observer> {
handle: ObservationHandle<O>,
task: Option<JoinHandle<()>>,
}
impl<O: Observer> ObservationRuntime<O> {
/// Starts one background observation runtime.
pub async fn start<P>(
provider: P,
observer: O,
config: ObservationConfig,
) -> Result<Self, ObservationRuntimeError>
where
P: SourceProvider<O::Source>,
{
ensure_positive_interval(config.interval)?;
let sources = provider
.sources()
.await
.map_err(|source| ObservationRuntimeError::SourceRefresh { source })?;
let source_count = sources.len();
let state = observer
.init(&sources)
.await
.map_err(|source| ObservationRuntimeError::ObserverInit { source })?;
let snapshot = build_snapshot(0, source_count, &observer, &state);
let batches = broadcast::channel(config.history_limit.max(1)).0;
let shared = Arc::new(Mutex::new(SharedObservationState::new(snapshot)));
let handle = ObservationHandle {
shared: Arc::clone(&shared),
batches,
};
info!(
observer = type_name::<O>(),
interval_ms = config.interval.as_millis(),
history_limit = config.history_limit,
source_count,
"starting observation runtime"
);
let runtime_handle = handle.clone();
let task = tokio::spawn(run_observation_loop(
provider,
observer,
config,
shared,
runtime_handle.batches.clone(),
state,
));
Ok(Self {
handle: runtime_handle,
task: Some(task),
})
}
/// Returns a read-side handle for the running observer.
#[must_use]
pub fn handle(&self) -> ObservationHandle<O> {
self.handle.clone()
}
/// Splits the runtime into its handle and background task.
#[must_use]
pub fn into_parts(mut self) -> (ObservationHandle<O>, JoinHandle<()>) {
let task = self
.task
.take()
.expect("observation runtime task is always present before into_parts");
(self.handle.clone(), task)
}
/// Aborts the background task.
pub fn abort(&mut self) {
if let Some(task) = self.task.take() {
task.abort();
}
}
}
impl<O: Observer> Drop for ObservationRuntime<O> {
fn drop(&mut self) {
self.abort();
}
}
struct SharedObservationState<O: Observer> {
latest_snapshot: Option<ObservationSnapshot<O::Snapshot>>,
history: VecDeque<Arc<ObservationBatch<O::Event>>>,
last_error: Option<ObservationFailure>,
}
impl<O: Observer> SharedObservationState<O> {
fn new(snapshot: ObservationSnapshot<O::Snapshot>) -> Self {
Self {
latest_snapshot: Some(snapshot),
history: VecDeque::new(),
last_error: None,
}
}
}
async fn run_observation_loop<O, P>(
provider: P,
observer: O,
config: ObservationConfig,
shared: Arc<Mutex<SharedObservationState<O>>>,
batches: broadcast::Sender<Arc<ObservationBatch<O::Event>>>,
mut state: O::State,
) where
O: Observer,
P: SourceProvider<O::Source>,
{
let mut ticker = build_interval(config.interval);
let mut cycle = 1u64;
ticker.tick().await;
loop {
ticker.tick().await;
let cycle_outcome = observe_cycle(&provider, &observer, cycle, &mut state).await;
match cycle_outcome {
Ok(success) => record_cycle_success(&shared, &batches, &config, success),
Err(failure) => record_cycle_failure(&shared, failure),
}
cycle += 1;
}
}
struct CycleSuccess<O: Observer> {
snapshot: ObservationSnapshot<O::Snapshot>,
batch: Option<Arc<ObservationBatch<O::Event>>>,
}
async fn observe_cycle<O, P>(
provider: &P,
observer: &O,
cycle: u64,
state: &mut O::State,
) -> Result<CycleSuccess<O>, ObservationFailure>
where
O: Observer,
P: SourceProvider<O::Source>,
{
let sources = provider.sources().await.map_err(|source| {
build_failure(cycle, 0, ObservationFailureStage::SourceRefresh, source)
})?;
let source_count = sources.len();
let events = observer.poll(&sources, state).await.map_err(|source| {
build_failure(cycle, source_count, ObservationFailureStage::Poll, source)
})?;
let snapshot = build_snapshot(cycle, source_count, observer, state);
let batch = build_batch(cycle, source_count, events);
Ok(CycleSuccess { snapshot, batch })
}
fn record_cycle_success<O: Observer>(
shared: &Arc<Mutex<SharedObservationState<O>>>,
batches: &broadcast::Sender<Arc<ObservationBatch<O::Event>>>,
config: &ObservationConfig,
success: CycleSuccess<O>,
) {
debug!(
observer = type_name::<O>(),
cycle = success.snapshot.cycle,
source_count = success.snapshot.source_count,
event_count = success.batch.as_ref().map_or(0, |batch| batch.events.len()),
"observation cycle completed"
);
let mut state = shared.lock();
state.latest_snapshot = Some(success.snapshot);
state.last_error = None;
let Some(batch) = success.batch else {
return;
};
push_history(&mut state.history, Arc::clone(&batch), config.history_limit);
drop(state);
let _ = batches.send(batch);
}
fn record_cycle_failure<O: Observer>(
shared: &Arc<Mutex<SharedObservationState<O>>>,
failure: ObservationFailure,
) {
warn!(
observer = type_name::<O>(),
cycle = failure.cycle,
source_count = failure.source_count,
stage = ?failure.stage,
message = %failure.message,
"observation cycle failed"
);
shared.lock().last_error = Some(failure);
}
fn ensure_positive_interval(interval: Duration) -> Result<(), ObservationRuntimeError> {
if interval.is_zero() {
return Err(ObservationRuntimeError::InvalidInterval);
}
Ok(())
}
fn build_interval(period: Duration) -> tokio::time::Interval {
let mut ticker = interval(period);
ticker.set_missed_tick_behavior(MissedTickBehavior::Delay);
ticker
}
fn build_snapshot<O: Observer>(
cycle: u64,
source_count: usize,
observer: &O,
state: &O::State,
) -> ObservationSnapshot<O::Snapshot> {
ObservationSnapshot {
cycle,
observed_at: SystemTime::now(),
source_count,
value: observer.snapshot(state),
}
}
fn build_batch<E>(
cycle: u64,
source_count: usize,
events: Vec<E>,
) -> Option<Arc<ObservationBatch<E>>> {
if events.is_empty() {
return None;
}
Some(Arc::new(ObservationBatch {
cycle,
observed_at: SystemTime::now(),
source_count,
events,
}))
}
fn build_failure(
cycle: u64,
source_count: usize,
stage: ObservationFailureStage,
source: DynError,
) -> ObservationFailure {
ObservationFailure {
cycle,
observed_at: SystemTime::now(),
source_count,
stage,
message: source.to_string(),
}
}
fn push_history<E>(
history: &mut VecDeque<Arc<ObservationBatch<E>>>,
batch: Arc<ObservationBatch<E>>,
history_limit: usize,
) {
if history_limit == 0 {
return;
}
history.push_back(batch);
while history.len() > history_limit {
history.pop_front();
}
}
#[cfg(test)]
mod tests;

View File

@ -0,0 +1,250 @@
use std::{
sync::{
Arc,
atomic::{AtomicUsize, Ordering},
},
time::Duration,
};
use async_trait::async_trait;
use parking_lot::Mutex;
use tokio::time::{Instant, sleep};
use super::{
ObservationConfig, ObservationFailureStage, ObservationRuntime, ObservedSource, Observer,
SourceProvider,
};
use crate::scenario::DynError;
#[derive(Clone)]
struct TestSourceProvider {
sources: Arc<Mutex<Vec<ObservedSource<u64>>>>,
fail_refreshes: Arc<AtomicUsize>,
}
impl TestSourceProvider {
fn new(sources: Vec<ObservedSource<u64>>) -> Self {
Self {
sources: Arc::new(Mutex::new(sources)),
fail_refreshes: Arc::new(AtomicUsize::new(0)),
}
}
fn replace_sources(&self, sources: Vec<ObservedSource<u64>>) {
*self.sources.lock() = sources;
}
fn fail_next_refresh(&self) {
self.fail_refreshes.store(1, Ordering::SeqCst);
}
}
#[async_trait]
impl SourceProvider<u64> for TestSourceProvider {
async fn sources(&self) -> Result<Vec<ObservedSource<u64>>, DynError> {
if self.fail_refreshes.swap(0, Ordering::SeqCst) == 1 {
return Err("refresh failed".into());
}
Ok(self.sources.lock().clone())
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct TestSnapshot {
total_sources_seen: u64,
last_source_count: usize,
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct TestEvent {
total_sources_seen: u64,
}
#[derive(Default)]
struct TestState {
total_sources_seen: u64,
last_source_count: usize,
}
struct CountingObserver;
#[async_trait]
impl Observer for CountingObserver {
type Source = u64;
type State = TestState;
type Snapshot = TestSnapshot;
type Event = TestEvent;
async fn init(
&self,
sources: &[ObservedSource<Self::Source>],
) -> Result<Self::State, DynError> {
Ok(TestState {
total_sources_seen: sources.iter().map(|source| source.source).sum(),
last_source_count: sources.len(),
})
}
async fn poll(
&self,
sources: &[ObservedSource<Self::Source>],
state: &mut Self::State,
) -> Result<Vec<Self::Event>, DynError> {
state.total_sources_seen += sources.iter().map(|source| source.source).sum::<u64>();
state.last_source_count = sources.len();
Ok(vec![TestEvent {
total_sources_seen: state.total_sources_seen,
}])
}
fn snapshot(&self, state: &Self::State) -> Self::Snapshot {
TestSnapshot {
total_sources_seen: state.total_sources_seen,
last_source_count: state.last_source_count,
}
}
}
#[tokio::test]
async fn runtime_updates_snapshot_and_history() {
let provider = TestSourceProvider::new(vec![ObservedSource::new("node-0", 2)]);
let runtime = ObservationRuntime::start(
provider,
CountingObserver,
ObservationConfig {
interval: Duration::from_millis(25),
history_limit: 2,
},
)
.await
.expect("runtime should start");
let handle = runtime.handle();
wait_for_cycle(&handle, 2).await;
let snapshot = handle.latest_snapshot().expect("snapshot should exist");
assert!(snapshot.cycle >= 2);
assert_eq!(snapshot.source_count, 1);
assert_eq!(snapshot.value.last_source_count, 1);
assert!(snapshot.value.total_sources_seen >= 6);
let history = handle.history();
assert_eq!(history.len(), 2);
assert!(history.iter().all(|batch| !batch.events.is_empty()));
}
#[tokio::test]
async fn runtime_refreshes_sources_each_cycle() {
let provider = TestSourceProvider::new(vec![ObservedSource::new("node-0", 1)]);
let runtime = ObservationRuntime::start(
provider.clone(),
CountingObserver,
ObservationConfig {
interval: Duration::from_millis(25),
history_limit: 4,
},
)
.await
.expect("runtime should start");
let handle = runtime.handle();
wait_for_cycle(&handle, 1).await;
provider.replace_sources(vec![
ObservedSource::new("node-0", 1),
ObservedSource::new("node-1", 3),
]);
wait_for_snapshot_source_count(&handle, 2).await;
let snapshot = handle.latest_snapshot().expect("snapshot should exist");
assert_eq!(snapshot.source_count, 2);
assert_eq!(snapshot.value.last_source_count, 2);
}
#[tokio::test]
async fn runtime_records_cycle_failures() {
let provider = TestSourceProvider::new(vec![ObservedSource::new("node-0", 1)]);
let runtime = ObservationRuntime::start(
provider.clone(),
CountingObserver,
ObservationConfig {
interval: Duration::from_millis(25),
history_limit: 2,
},
)
.await
.expect("runtime should start");
let handle = runtime.handle();
provider.fail_next_refresh();
wait_for_failure(&handle).await;
let failure = handle.last_error().expect("failure should exist");
assert_eq!(failure.stage, ObservationFailureStage::SourceRefresh);
assert_eq!(failure.message, "refresh failed");
}
async fn wait_for_cycle(handle: &super::ObservationHandle<CountingObserver>, cycle: u64) {
let deadline = Instant::now() + Duration::from_secs(2);
loop {
let Some(snapshot) = handle.latest_snapshot() else {
sleep(Duration::from_millis(10)).await;
continue;
};
if snapshot.cycle >= cycle {
return;
}
assert!(
Instant::now() < deadline,
"timed out waiting for cycle {cycle}"
);
sleep(Duration::from_millis(10)).await;
}
}
async fn wait_for_snapshot_source_count(
handle: &super::ObservationHandle<CountingObserver>,
source_count: usize,
) {
let deadline = Instant::now() + Duration::from_secs(2);
loop {
let Some(snapshot) = handle.latest_snapshot() else {
sleep(Duration::from_millis(10)).await;
continue;
};
if snapshot.source_count == source_count {
return;
}
assert!(
Instant::now() < deadline,
"timed out waiting for source_count {source_count}"
);
sleep(Duration::from_millis(10)).await;
}
}
async fn wait_for_failure(handle: &super::ObservationHandle<CountingObserver>) {
let deadline = Instant::now() + Duration::from_secs(2);
loop {
if handle.last_error().is_some() {
return;
}
assert!(Instant::now() < deadline, "timed out waiting for failure");
sleep(Duration::from_millis(10)).await;
}
}

View File

@ -4,7 +4,12 @@ use super::{
Application, CleanupPolicy, DeploymentPolicy, Expectation, HttpReadinessRequirement,
RetryPolicy, RuntimeExtensionFactory, Workload, internal::CoreBuilderAccess,
};
use crate::topology::{DeploymentProvider, DeploymentSeed};
use crate::{
observation::{
ObservationConfig, ObservationExtensionFactory, Observer, SourceProviderFactory,
},
topology::{DeploymentProvider, DeploymentSeed},
};
type DeploymentProviderHandle<E> = Box<dyn DeploymentProvider<<E as Application>::Deployment>>;
@ -60,6 +65,48 @@ pub trait CoreBuilderExt: CoreBuilderAccess + Sized {
self.map_core_builder(|builder| builder.with_runtime_extension_factory(extension))
}
/// Registers one clonable observer as a runtime extension.
#[must_use]
fn with_observer<O>(
self,
observer: O,
source_provider_factory: impl SourceProviderFactory<Self::Env, O::Source>,
config: ObservationConfig,
) -> Self
where
O: Observer + Clone,
Self::Env: Application,
{
let extension = ObservationExtensionFactory::<Self::Env, O>::new(
observer,
source_provider_factory,
config,
);
self.with_runtime_extension_factory(Box::new(extension))
}
/// Registers one observer built lazily per run as a runtime extension.
#[must_use]
fn with_observer_factory<O>(
self,
observer_builder: impl Fn() -> O + Send + Sync + 'static,
source_provider_factory: impl SourceProviderFactory<Self::Env, O::Source>,
config: ObservationConfig,
) -> Self
where
O: Observer,
Self::Env: Application,
{
let extension = ObservationExtensionFactory::<Self::Env, O>::from_parts(
observer_builder,
source_provider_factory,
config,
);
self.with_runtime_extension_factory(Box::new(extension))
}
#[must_use]
fn with_run_duration(self, duration: Duration) -> Self {
self.map_core_builder(|builder| builder.with_run_duration(duration))

Some files were not shown because too many files have changed in this diff Show More