mirror of
https://github.com/logos-blockchain/logos-blockchain-testing.git
synced 2026-04-15 07:33:13 +00:00
demo-apps: kvstore, queue, and openraft_kv
This commit is contained in:
commit
8700bd5a6c
@ -6,7 +6,11 @@ exclude-dev = true
|
||||
no-default-features = true
|
||||
|
||||
[advisories]
|
||||
ignore = []
|
||||
ignore = [
|
||||
# Existing workspace dependencies still resolve rand 0.8 via tera/tokio-retry.
|
||||
# Track removal when those upstream edges move to a fixed release.
|
||||
"RUSTSEC-2026-0097",
|
||||
]
|
||||
yanked = "deny"
|
||||
|
||||
[bans]
|
||||
|
||||
766
Cargo.lock
generated
766
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
14
Cargo.toml
14
Cargo.toml
@ -4,6 +4,18 @@ members = [
|
||||
"cfgsync/artifacts",
|
||||
"cfgsync/core",
|
||||
"cfgsync/runtime",
|
||||
"examples/kvstore/examples",
|
||||
"examples/kvstore/kvstore-node",
|
||||
"examples/kvstore/testing/integration",
|
||||
"examples/kvstore/testing/workloads",
|
||||
"examples/openraft_kv/examples",
|
||||
"examples/openraft_kv/openraft-kv-node",
|
||||
"examples/openraft_kv/testing/integration",
|
||||
"examples/openraft_kv/testing/workloads",
|
||||
"examples/queue/examples",
|
||||
"examples/queue/queue-node",
|
||||
"examples/queue/testing/integration",
|
||||
"examples/queue/testing/workloads",
|
||||
"examples/metrics_counter/examples",
|
||||
"examples/metrics_counter/metrics-counter-node",
|
||||
"examples/metrics_counter/testing/integration",
|
||||
@ -56,6 +68,8 @@ bytes = { default-features = false, version = "1.3" }
|
||||
hex = { default-features = false, version = "0.4.3" }
|
||||
libp2p = { default-features = false, version = "0.55" }
|
||||
num-bigint = { default-features = false, version = "0.4" }
|
||||
openraft = { default-features = true, features = ["serde", "type-alias"], version = "0.10.0-alpha.17" }
|
||||
openraft-memstore = { default-features = true, version = "0.10.0-alpha.17" }
|
||||
parking_lot = { default-features = false, version = "0.12" }
|
||||
rand = { default-features = false, features = ["std", "std_rng"], version = "0.8" }
|
||||
reqwest = { default-features = false, version = "0.12" }
|
||||
|
||||
314
docs/observation-runtime-plan.md
Normal file
314
docs/observation-runtime-plan.md
Normal file
@ -0,0 +1,314 @@
|
||||
# Observation Runtime Plan
|
||||
|
||||
## Why this work exists
|
||||
|
||||
TF is good at deployment plumbing. It is weak at continuous observation.
|
||||
|
||||
Today, the same problems are solved repeatedly with custom loops:
|
||||
- TF block feed logic in Logos
|
||||
- Cucumber manual-cluster polling loops
|
||||
- ad hoc catch-up scans for wallet and chain state
|
||||
- app-local state polling in expectations
|
||||
|
||||
That is the gap this work should close.
|
||||
|
||||
The goal is not a generic "distributed systems DSL".
|
||||
The goal is one reusable observation runtime that:
|
||||
- continuously collects data from dynamic sources
|
||||
- keeps typed materialized state
|
||||
- exposes both current snapshot and delta/history views
|
||||
- fits naturally in TF scenarios and Cucumber manual-cluster code
|
||||
|
||||
## Constraints
|
||||
|
||||
### TF constraints
|
||||
- TF abstractions must stay universal and simple.
|
||||
- TF must not know app semantics like blocks, wallets, leaders, jobs, or topics.
|
||||
- TF must remain useful for simple apps such as `openraft_kv`, not only Logos.
|
||||
|
||||
### App constraints
|
||||
- Apps must be able to build richer abstractions on top of TF.
|
||||
- Logos must be able to support:
|
||||
- current block-feed replacement
|
||||
- fork-aware chain state
|
||||
- public-peer sync targets
|
||||
- multi-wallet UTXO tracking
|
||||
- Apps must be able to adopt this incrementally.
|
||||
|
||||
### Migration constraints
|
||||
- We do not want a flag-day rewrite.
|
||||
- Existing loops can coexist with the new runtime until replacements are proven.
|
||||
|
||||
## Non-goals
|
||||
|
||||
This work should not:
|
||||
- put feed back onto the base `Application` trait
|
||||
- build app-specific semantics into TF core
|
||||
- replace filesystem blockchain snapshots used for startup/restore
|
||||
- force every app to use continuous observation
|
||||
- introduce a large public abstraction stack that nobody can explain
|
||||
|
||||
## Core idea
|
||||
|
||||
Introduce one TF-level observation runtime.
|
||||
|
||||
That runtime owns:
|
||||
- source refresh
|
||||
- scheduling
|
||||
- polling/ingestion
|
||||
- bounded history
|
||||
- latest snapshot caching
|
||||
- delta publication
|
||||
- freshness/error tracking
|
||||
- lifecycle hooks for TF and Cucumber
|
||||
|
||||
Apps own:
|
||||
- source types
|
||||
- raw observation logic
|
||||
- materialized state
|
||||
- snapshot shape
|
||||
- delta/event shape
|
||||
- higher-level projections such as wallet state
|
||||
|
||||
## Public TF surface
|
||||
|
||||
The TF public surface should stay small.
|
||||
|
||||
### `ObservedSource<S>`
|
||||
A named source instance.
|
||||
|
||||
Used for:
|
||||
- local node clients
|
||||
- public peer endpoints
|
||||
- any other app-owned source type
|
||||
|
||||
### `SourceProvider<S>`
|
||||
Returns the current source set.
|
||||
|
||||
This must support dynamic source lists because:
|
||||
- manual cluster nodes come and go
|
||||
- Cucumber worlds may attach public peers
|
||||
- node control may restart or replace sources during a run
|
||||
|
||||
### `Observer`
|
||||
App-owned observation logic.
|
||||
|
||||
It defines:
|
||||
- `Source`
|
||||
- `State`
|
||||
- `Snapshot`
|
||||
- `Event`
|
||||
|
||||
And it implements:
|
||||
- `init(...)`
|
||||
- `poll(...)`
|
||||
- `snapshot(...)`
|
||||
|
||||
The important boundary is:
|
||||
- TF owns the runtime
|
||||
- app code owns materialization
|
||||
|
||||
### `ObservationRuntime`
|
||||
The engine that:
|
||||
- starts the loop
|
||||
- refreshes sources
|
||||
- calls `poll(...)`
|
||||
- stores history
|
||||
- publishes deltas
|
||||
- updates latest snapshot
|
||||
- tracks last error and freshness
|
||||
|
||||
### `ObservationHandle`
|
||||
The read-side interface for workloads, expectations, and Cucumber steps.
|
||||
|
||||
It should expose at least:
|
||||
- latest snapshot
|
||||
- delta subscription
|
||||
- bounded history
|
||||
- last error
|
||||
|
||||
## Intended shape
|
||||
|
||||
```rust
|
||||
pub struct ObservedSource<S> {
|
||||
pub name: String,
|
||||
pub source: S,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait SourceProvider<S>: Send + Sync + 'static {
|
||||
async fn sources(&self) -> Vec<ObservedSource<S>>;
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait Observer: Send + Sync + 'static {
|
||||
type Source: Clone + Send + Sync + 'static;
|
||||
type State: Send + Sync + 'static;
|
||||
type Snapshot: Clone + Send + Sync + 'static;
|
||||
type Event: Clone + Send + Sync + 'static;
|
||||
|
||||
async fn init(
|
||||
&self,
|
||||
sources: &[ObservedSource<Self::Source>],
|
||||
) -> Result<Self::State, DynError>;
|
||||
|
||||
async fn poll(
|
||||
&self,
|
||||
sources: &[ObservedSource<Self::Source>],
|
||||
state: &mut Self::State,
|
||||
) -> Result<Vec<Self::Event>, DynError>;
|
||||
|
||||
fn snapshot(&self, state: &Self::State) -> Self::Snapshot;
|
||||
}
|
||||
```
|
||||
|
||||
This is enough.
|
||||
|
||||
If more helper layers are needed, they should stay internal first.
|
||||
|
||||
## How current use cases fit
|
||||
|
||||
### `openraft_kv`
|
||||
Use one simple observer.
|
||||
|
||||
- sources: node clients
|
||||
- state: latest per-node Raft state
|
||||
- snapshot: sorted node-state view
|
||||
- events: optional deltas, possibly empty at first
|
||||
|
||||
This is the simplest proving case.
|
||||
It validates the runtime without dragging in Logos complexity.
|
||||
|
||||
### Logos block feed replacement
|
||||
Use one shared chain observer.
|
||||
|
||||
- sources: local node clients
|
||||
- state:
|
||||
- node heads
|
||||
- block graph
|
||||
- heights
|
||||
- seen headers
|
||||
- recent history
|
||||
- snapshot:
|
||||
- current head/lib/graph summary
|
||||
- events:
|
||||
- newly discovered blocks
|
||||
|
||||
This covers both existing Logos feed use cases:
|
||||
- current snapshot consumers
|
||||
- delta/subscription consumers
|
||||
|
||||
### Cucumber manual-cluster sync
|
||||
Use the same observer runtime with a different source set.
|
||||
|
||||
- sources:
|
||||
- local manual-cluster node clients
|
||||
- public peer endpoints
|
||||
- state:
|
||||
- local consensus views
|
||||
- public consensus views
|
||||
- derived majority public target
|
||||
- snapshot:
|
||||
- current local and public sync picture
|
||||
|
||||
This removes custom poll/sleep loops from steps.
|
||||
|
||||
### Multi-wallet fork-aware tracking
|
||||
This should not be a TF concept.
|
||||
|
||||
It should be a Logos projection built on top of the shared chain observer.
|
||||
|
||||
- input: chain observer state
|
||||
- output: per-header wallet state cache keyed by block header
|
||||
- property: naturally fork-aware because it follows actual ancestry
|
||||
|
||||
That replaces repeated backward scans from tip with continuous maintained state.
|
||||
|
||||
## Logos layering
|
||||
|
||||
Logos should not put every concern into one giant impl.
|
||||
|
||||
Recommended layering:
|
||||
|
||||
1. **Chain source adapter**
|
||||
- local node reads
|
||||
- public peer reads
|
||||
|
||||
2. **Shared chain observer**
|
||||
- catch-up
|
||||
- continuous ingestion
|
||||
- graph/history materialization
|
||||
|
||||
3. **Logos projections**
|
||||
- head view
|
||||
- public sync target
|
||||
- fork graph queries
|
||||
- wallet state
|
||||
- tx inclusion helpers
|
||||
|
||||
TF provides the runtime.
|
||||
Logos provides the domain model built on top.
|
||||
|
||||
## Adoption plan
|
||||
|
||||
### Phase 1: add TF observation runtime
|
||||
- add `ObservedSource`, `SourceProvider`, `Observer`, `ObservationRuntime`, `ObservationHandle`
|
||||
- keep the public API small
|
||||
- no app migrations yet
|
||||
|
||||
### Phase 2: prove it on `openraft_kv`
|
||||
- add one simple observer over `/state`
|
||||
- migrate one expectation to use the observation handle
|
||||
- validate local, compose, and k8s
|
||||
|
||||
### Phase 3: add Logos shared chain observer
|
||||
- implement it alongside current feed/loops
|
||||
- do not remove existing consumers yet
|
||||
- prove snapshot and delta outputs are useful
|
||||
|
||||
### Phase 4: migrate one Logos consumer at a time
|
||||
Suggested order:
|
||||
1. fork/head snapshot consumer
|
||||
2. tx inclusion consumer
|
||||
3. Cucumber sync-to-public-chain logic
|
||||
4. wallet/UTXO tracking
|
||||
|
||||
### Phase 5: delete old loops and feed paths
|
||||
- only after the new runtime has replaced real consumers cleanly
|
||||
|
||||
## Validation gates
|
||||
|
||||
Each phase should have clear checks.
|
||||
|
||||
### Runtime-level
|
||||
- crate-level `cargo check`
|
||||
- targeted tests for runtime lifecycle and history retention
|
||||
- explicit tests for dynamic source refresh
|
||||
|
||||
### App-level
|
||||
- `openraft_kv`:
|
||||
- local failover
|
||||
- compose failover
|
||||
- k8s failover
|
||||
- Logos:
|
||||
- one snapshot consumer migrated
|
||||
- one delta consumer migrated
|
||||
- Cucumber:
|
||||
- one manual-cluster sync path migrated
|
||||
|
||||
## Open questions
|
||||
|
||||
These should stay open until implementation forces a decision:
|
||||
- whether `ObservationHandle` should expose full history directly or only cursor/subscription access
|
||||
- how much error/freshness metadata belongs in the generic runtime vs app snapshot types
|
||||
- whether multiple observers should share one scheduler/runtime instance or simply run independently first
|
||||
|
||||
## Design guardrails
|
||||
|
||||
When implementing this work:
|
||||
- keep TF public abstractions minimal
|
||||
- keep app semantics out of TF core
|
||||
- do not chase a generic testing DSL
|
||||
- build from reusable blocks, not one-off mega impls
|
||||
- keep migration incremental
|
||||
- prefer simple, explainable runtime behavior over clever abstraction
|
||||
28
examples/kvstore/Dockerfile
Normal file
28
examples/kvstore/Dockerfile
Normal file
@ -0,0 +1,28 @@
|
||||
# Build stage
|
||||
FROM rustlang/rust:nightly-bookworm AS builder
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
# Copy all workspace files required for workspace build.
|
||||
COPY Cargo.toml Cargo.lock ./
|
||||
COPY cfgsync/ ./cfgsync/
|
||||
COPY examples/ ./examples/
|
||||
COPY testing-framework/ ./testing-framework/
|
||||
|
||||
# Build kvstore-node in release mode.
|
||||
RUN cargo build --release -p kvstore-node
|
||||
|
||||
# Runtime stage
|
||||
FROM debian:bookworm-slim
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /build/target/release/kvstore-node /usr/local/bin/kvstore-node
|
||||
|
||||
RUN mkdir -p /etc/kvstore
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/kvstore-node"]
|
||||
CMD ["--config", "/etc/kvstore/config.yaml"]
|
||||
64
examples/kvstore/README.md
Normal file
64
examples/kvstore/README.md
Normal file
@ -0,0 +1,64 @@
|
||||
# KV Store Example
|
||||
|
||||
This example runs a small replicated key-value store.
|
||||
|
||||
The usual scenario writes keys through one node and checks that the other nodes
|
||||
eventually return the same values.
|
||||
|
||||
## How TF runs this
|
||||
|
||||
Each example follows the same pattern:
|
||||
|
||||
- TF starts a small deployment of kvstore nodes
|
||||
- a workload writes keys through one node
|
||||
- an expectation keeps reading from all nodes until they agree on the values
|
||||
|
||||
## Scenarios
|
||||
|
||||
- `basic_convergence` runs the convergence check locally
|
||||
- `compose_convergence` runs the same check in Docker Compose
|
||||
- `k8s_convergence` runs it on Kubernetes
|
||||
- `k8s_manual_convergence` starts the nodes through the k8s manual cluster API, restarts one node, and checks convergence again
|
||||
|
||||
## API
|
||||
|
||||
Each node exposes:
|
||||
|
||||
- `PUT /kv/:key` to write a value
|
||||
- `GET /kv/:key` to read a value
|
||||
- `GET /internal/snapshot` to read the local replicated state
|
||||
|
||||
## Run locally
|
||||
|
||||
```bash
|
||||
cargo run -p kvstore-examples --bin kvstore_basic_convergence
|
||||
```
|
||||
|
||||
## Run with Docker Compose
|
||||
|
||||
```bash
|
||||
cargo run -p kvstore-examples --bin kvstore_compose_convergence
|
||||
```
|
||||
|
||||
Set `KVSTORE_IMAGE` to override the default compose image tag.
|
||||
|
||||
## Run with Kubernetes
|
||||
|
||||
```bash
|
||||
docker build -t kvstore-node:local -f examples/kvstore/Dockerfile .
|
||||
cargo run -p kvstore-examples --bin kvstore_k8s_convergence
|
||||
```
|
||||
|
||||
Prerequisites:
|
||||
- `kubectl` configured with a reachable cluster
|
||||
- `helm` installed
|
||||
|
||||
Optional image override:
|
||||
- `KVSTORE_K8S_IMAGE` (falls back to `KVSTORE_IMAGE`, then `kvstore-node:local`)
|
||||
|
||||
## Run with Kubernetes manual cluster
|
||||
|
||||
```bash
|
||||
docker build -t kvstore-node:local -f examples/kvstore/Dockerfile .
|
||||
cargo run -p kvstore-examples --bin kvstore_k8s_manual_convergence
|
||||
```
|
||||
35
examples/kvstore/examples/Cargo.toml
Normal file
35
examples/kvstore/examples/Cargo.toml
Normal file
@ -0,0 +1,35 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "kvstore-examples"
|
||||
version.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "kvstore_basic_convergence"
|
||||
path = "src/bin/basic_convergence.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "kvstore_compose_convergence"
|
||||
path = "src/bin/compose_convergence.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "kvstore_k8s_convergence"
|
||||
path = "src/bin/k8s_convergence.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "kvstore_k8s_manual_convergence"
|
||||
path = "src/bin/k8s_manual_convergence.rs"
|
||||
|
||||
[dependencies]
|
||||
kvstore-node = { path = "../kvstore-node" }
|
||||
kvstore-runtime-ext = { path = "../testing/integration" }
|
||||
kvstore-runtime-workloads = { path = "../testing/workloads" }
|
||||
testing-framework-core = { workspace = true }
|
||||
testing-framework-runner-compose = { workspace = true }
|
||||
testing-framework-runner-k8s = { workspace = true }
|
||||
|
||||
anyhow = "1.0"
|
||||
serde = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
31
examples/kvstore/examples/src/bin/basic_convergence.rs
Normal file
31
examples/kvstore/examples/src/bin/basic_convergence.rs
Normal file
@ -0,0 +1,31 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use kvstore_runtime_ext::KvLocalDeployer;
|
||||
use kvstore_runtime_workloads::{
|
||||
KvBuilderExt, KvConverges, KvScenarioBuilder, KvTopology, KvWriteWorkload,
|
||||
};
|
||||
use testing_framework_core::scenario::Deployer;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let mut scenario = KvScenarioBuilder::deployment_with(|_| KvTopology::new(3))
|
||||
.with_run_duration(Duration::from_secs(30))
|
||||
.with_workload(
|
||||
KvWriteWorkload::new()
|
||||
.operations(300)
|
||||
.key_count(30)
|
||||
.rate_per_sec(30)
|
||||
.key_prefix("demo"),
|
||||
)
|
||||
.with_expectation(KvConverges::new("demo", 30).timeout(Duration::from_secs(25)))
|
||||
.build()?;
|
||||
|
||||
let deployer = KvLocalDeployer::default();
|
||||
let runner = deployer.deploy(&scenario).await?;
|
||||
runner.run(&mut scenario).await?;
|
||||
Ok(())
|
||||
}
|
||||
44
examples/kvstore/examples/src/bin/compose_convergence.rs
Normal file
44
examples/kvstore/examples/src/bin/compose_convergence.rs
Normal file
@ -0,0 +1,44 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context as _, Result};
|
||||
use kvstore_runtime_workloads::{
|
||||
KvBuilderExt, KvConverges, KvScenarioBuilder, KvTopology, KvWriteWorkload,
|
||||
};
|
||||
use testing_framework_core::scenario::Deployer;
|
||||
use testing_framework_runner_compose::ComposeRunnerError;
|
||||
use tracing::{info, warn};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let mut scenario = KvScenarioBuilder::deployment_with(|_| KvTopology::new(3))
|
||||
.with_run_duration(Duration::from_secs(30))
|
||||
.with_workload(
|
||||
KvWriteWorkload::new()
|
||||
.operations(200)
|
||||
.key_count(20)
|
||||
.rate_per_sec(20),
|
||||
)
|
||||
.with_expectation(KvConverges::new("kv-demo", 20).timeout(Duration::from_secs(25)))
|
||||
.build()?;
|
||||
|
||||
let deployer = kvstore_runtime_ext::KvComposeDeployer::new();
|
||||
let runner = match deployer.deploy(&scenario).await {
|
||||
Ok(runner) => runner,
|
||||
Err(ComposeRunnerError::DockerUnavailable) => {
|
||||
warn!("docker unavailable; skipping compose kv run");
|
||||
return Ok(());
|
||||
}
|
||||
Err(error) => return Err(anyhow::Error::new(error)).context("deploying kv compose stack"),
|
||||
};
|
||||
|
||||
info!("running kv compose convergence scenario");
|
||||
runner
|
||||
.run(&mut scenario)
|
||||
.await
|
||||
.context("running kv compose scenario")?;
|
||||
Ok(())
|
||||
}
|
||||
58
examples/kvstore/examples/src/bin/k8s_convergence.rs
Normal file
58
examples/kvstore/examples/src/bin/k8s_convergence.rs
Normal file
@ -0,0 +1,58 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context as _, Result};
|
||||
use kvstore_runtime_ext::KvK8sDeployer;
|
||||
use kvstore_runtime_workloads::{
|
||||
KvBuilderExt, KvConverges, KvScenarioBuilder, KvTopology, KvWriteWorkload,
|
||||
};
|
||||
use testing_framework_core::scenario::Deployer;
|
||||
use testing_framework_runner_k8s::K8sRunnerError;
|
||||
use tracing::{info, warn};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let mut scenario = KvScenarioBuilder::deployment_with(|_| KvTopology::new(3))
|
||||
.with_run_duration(Duration::from_secs(30))
|
||||
.with_workload(
|
||||
KvWriteWorkload::new()
|
||||
.operations(200)
|
||||
.key_count(20)
|
||||
.rate_per_sec(20),
|
||||
)
|
||||
.with_expectation(KvConverges::new("kv-demo", 20).timeout(Duration::from_secs(25)))
|
||||
.build()?;
|
||||
|
||||
let deployer = KvK8sDeployer::new();
|
||||
let runner = match deployer.deploy(&scenario).await {
|
||||
Ok(runner) => runner,
|
||||
Err(K8sRunnerError::ClientInit { source }) => {
|
||||
warn!("k8s unavailable ({source}); skipping kv k8s run");
|
||||
return Ok(());
|
||||
}
|
||||
Err(K8sRunnerError::InstallStack { source })
|
||||
if k8s_cluster_unavailable(&source.to_string()) =>
|
||||
{
|
||||
warn!("k8s unavailable ({source}); skipping kv k8s run");
|
||||
return Ok(());
|
||||
}
|
||||
Err(error) => return Err(anyhow::Error::new(error)).context("deploying kv k8s stack"),
|
||||
};
|
||||
|
||||
info!("running kv k8s convergence scenario");
|
||||
runner
|
||||
.run(&mut scenario)
|
||||
.await
|
||||
.context("running kv k8s scenario")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn k8s_cluster_unavailable(message: &str) -> bool {
|
||||
message.contains("Unable to connect to the server")
|
||||
|| message.contains("TLS handshake timeout")
|
||||
|| message.contains("connection refused")
|
||||
}
|
||||
155
examples/kvstore/examples/src/bin/k8s_manual_convergence.rs
Normal file
155
examples/kvstore/examples/src/bin/k8s_manual_convergence.rs
Normal file
@ -0,0 +1,155 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context as _, Result, anyhow};
|
||||
use kvstore_node::KvHttpClient;
|
||||
use kvstore_runtime_ext::{KvK8sDeployer, KvTopology};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use testing_framework_runner_k8s::ManualClusterError;
|
||||
use tracing::{info, warn};
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct PutRequest {
|
||||
value: String,
|
||||
expected_version: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct PutResponse {
|
||||
applied: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
|
||||
struct ValueRecord {
|
||||
value: String,
|
||||
version: u64,
|
||||
origin: u64,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GetResponse {
|
||||
record: Option<ValueRecord>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let deployer = KvK8sDeployer::new();
|
||||
let cluster = match deployer
|
||||
.manual_cluster_from_descriptors(KvTopology::new(3))
|
||||
.await
|
||||
{
|
||||
Ok(cluster) => cluster,
|
||||
Err(ManualClusterError::ClientInit { source }) => {
|
||||
warn!("k8s unavailable ({source}); skipping kv k8s manual run");
|
||||
return Ok(());
|
||||
}
|
||||
Err(ManualClusterError::InstallStack { source })
|
||||
if k8s_cluster_unavailable(&source.to_string()) =>
|
||||
{
|
||||
warn!("k8s unavailable ({source}); skipping kv k8s manual run");
|
||||
return Ok(());
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(anyhow::Error::new(error)).context("creating kv k8s manual cluster");
|
||||
}
|
||||
};
|
||||
|
||||
let node0 = cluster.start_node("node-0").await?.client;
|
||||
let node1 = cluster.start_node("node-1").await?.client;
|
||||
let node2 = cluster.start_node("node-2").await?.client;
|
||||
|
||||
cluster.wait_network_ready().await?;
|
||||
|
||||
write_keys(&node0, "kv-manual", 12).await?;
|
||||
wait_for_convergence(
|
||||
&[node0.clone(), node1.clone(), node2.clone()],
|
||||
"kv-manual",
|
||||
12,
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!("restarting node-2 in manual cluster");
|
||||
cluster.restart_node("node-2").await?;
|
||||
cluster.wait_network_ready().await?;
|
||||
|
||||
let node2 = cluster
|
||||
.node_client("node-2")
|
||||
.ok_or_else(|| anyhow!("node-2 client missing after restart"))?;
|
||||
wait_for_convergence(&[node0, node1, node2], "kv-manual", 12).await?;
|
||||
|
||||
cluster.stop_all();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn write_keys(client: &KvHttpClient, prefix: &str, key_count: usize) -> Result<()> {
|
||||
for index in 0..key_count {
|
||||
let key = format!("{prefix}-{index}");
|
||||
let response: PutResponse = client
|
||||
.put(
|
||||
&format!("/kv/{key}"),
|
||||
&PutRequest {
|
||||
value: format!("value-{index}"),
|
||||
expected_version: None,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.map_err(|error| anyhow!(error.to_string()))
|
||||
.with_context(|| format!("writing key {key}"))?;
|
||||
|
||||
if !response.applied {
|
||||
return Err(anyhow!("write rejected for key {key}"));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn wait_for_convergence(
|
||||
clients: &[KvHttpClient],
|
||||
prefix: &str,
|
||||
key_count: usize,
|
||||
) -> Result<()> {
|
||||
let deadline = tokio::time::Instant::now() + Duration::from_secs(30);
|
||||
|
||||
while tokio::time::Instant::now() < deadline {
|
||||
if is_converged(clients, prefix, key_count).await? {
|
||||
info!(key_count, "kv manual cluster converged");
|
||||
return Ok(());
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
|
||||
Err(anyhow!("kv manual cluster did not converge within timeout"))
|
||||
}
|
||||
|
||||
async fn is_converged(clients: &[KvHttpClient], prefix: &str, key_count: usize) -> Result<bool> {
|
||||
for index in 0..key_count {
|
||||
let key = format!("{prefix}-{index}");
|
||||
let first = read_key(&clients[0], &key).await?;
|
||||
for client in &clients[1..] {
|
||||
if read_key(client, &key).await? != first {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
async fn read_key(client: &KvHttpClient, key: &str) -> Result<Option<ValueRecord>> {
|
||||
let response: GetResponse = client
|
||||
.get(&format!("/kv/{key}"))
|
||||
.await
|
||||
.map_err(|error| anyhow!(error.to_string()))
|
||||
.with_context(|| format!("reading key {key}"))?;
|
||||
Ok(response.record)
|
||||
}
|
||||
|
||||
fn k8s_cluster_unavailable(message: &str) -> bool {
|
||||
message.contains("Unable to connect to the server")
|
||||
|| message.contains("TLS handshake timeout")
|
||||
|| message.contains("connection refused")
|
||||
}
|
||||
24
examples/kvstore/kvstore-node/Cargo.toml
Normal file
24
examples/kvstore/kvstore-node/Cargo.toml
Normal file
@ -0,0 +1,24 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "kvstore-node"
|
||||
version.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "kvstore-node"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
axum = "0.7"
|
||||
tower-http = { version = "0.6", features = ["trace"] }
|
||||
|
||||
serde = { workspace = true }
|
||||
serde_yaml = { workspace = true }
|
||||
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
|
||||
anyhow = "1.0"
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
reqwest = { workspace = true, features = ["json"] }
|
||||
40
examples/kvstore/kvstore-node/src/client.rs
Normal file
40
examples/kvstore/kvstore-node/src/client.rs
Normal file
@ -0,0 +1,40 @@
|
||||
use reqwest::Url;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct KvHttpClient {
|
||||
base_url: Url,
|
||||
client: reqwest::Client,
|
||||
}
|
||||
|
||||
impl KvHttpClient {
|
||||
#[must_use]
|
||||
pub fn new(base_url: Url) -> Self {
|
||||
Self {
|
||||
base_url,
|
||||
client: reqwest::Client::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get<T: serde::de::DeserializeOwned>(&self, path: &str) -> anyhow::Result<T> {
|
||||
let url = self.base_url.join(path)?;
|
||||
let response = self.client.get(url).send().await?.error_for_status()?;
|
||||
Ok(response.json().await?)
|
||||
}
|
||||
|
||||
pub async fn put<B: Serialize, T: serde::de::DeserializeOwned>(
|
||||
&self,
|
||||
path: &str,
|
||||
body: &B,
|
||||
) -> anyhow::Result<T> {
|
||||
let url = self.base_url.join(path)?;
|
||||
let response = self
|
||||
.client
|
||||
.put(url)
|
||||
.json(body)
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?;
|
||||
Ok(response.json().await?)
|
||||
}
|
||||
}
|
||||
30
examples/kvstore/kvstore-node/src/config.rs
Normal file
30
examples/kvstore/kvstore-node/src/config.rs
Normal file
@ -0,0 +1,30 @@
|
||||
use std::{fs, path::Path};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct PeerInfo {
|
||||
pub node_id: u64,
|
||||
pub http_address: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct KvConfig {
|
||||
pub node_id: u64,
|
||||
pub http_port: u16,
|
||||
pub peers: Vec<PeerInfo>,
|
||||
#[serde(default = "default_sync_interval_ms")]
|
||||
pub sync_interval_ms: u64,
|
||||
}
|
||||
|
||||
impl KvConfig {
|
||||
pub fn load(path: &Path) -> anyhow::Result<Self> {
|
||||
let raw = fs::read_to_string(path)?;
|
||||
let config = serde_yaml::from_str(&raw)?;
|
||||
Ok(config)
|
||||
}
|
||||
}
|
||||
|
||||
const fn default_sync_interval_ms() -> u64 {
|
||||
1000
|
||||
}
|
||||
3
examples/kvstore/kvstore-node/src/lib.rs
Normal file
3
examples/kvstore/kvstore-node/src/lib.rs
Normal file
@ -0,0 +1,3 @@
|
||||
pub mod client;
|
||||
|
||||
pub use client::KvHttpClient;
|
||||
36
examples/kvstore/kvstore-node/src/main.rs
Normal file
36
examples/kvstore/kvstore-node/src/main.rs
Normal file
@ -0,0 +1,36 @@
|
||||
mod config;
|
||||
mod server;
|
||||
mod state;
|
||||
mod sync;
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Parser;
|
||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||
|
||||
use crate::{config::KvConfig, state::KvState, sync::SyncService};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "kvstore-node")]
|
||||
struct Args {
|
||||
#[arg(short, long)]
|
||||
config: PathBuf,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::registry()
|
||||
.with(
|
||||
tracing_subscriber::EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| "kvstore_node=info,tower_http=debug".into()),
|
||||
)
|
||||
.with(tracing_subscriber::fmt::layer())
|
||||
.init();
|
||||
|
||||
let args = Args::parse();
|
||||
let config = KvConfig::load(&args.config)?;
|
||||
|
||||
let state = KvState::new(config.node_id);
|
||||
SyncService::new(config.clone(), state.clone()).start();
|
||||
server::start_server(config, state).await
|
||||
}
|
||||
112
examples/kvstore/kvstore-node/src/server.rs
Normal file
112
examples/kvstore/kvstore-node/src/server.rs
Normal file
@ -0,0 +1,112 @@
|
||||
use std::net::SocketAddr;
|
||||
|
||||
use axum::{
|
||||
Router,
|
||||
extract::{Path, State},
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::get,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tower_http::trace::TraceLayer;
|
||||
|
||||
use crate::{
|
||||
config::KvConfig,
|
||||
state::{KvState, Snapshot, ValueRecord},
|
||||
};
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct HealthResponse {
|
||||
status: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct PutRequest {
|
||||
value: String,
|
||||
expected_version: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct PutResponse {
|
||||
applied: bool,
|
||||
version: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct GetResponse {
|
||||
key: String,
|
||||
record: Option<ValueRecord>,
|
||||
}
|
||||
|
||||
pub async fn start_server(config: KvConfig, state: KvState) -> anyhow::Result<()> {
|
||||
let app = Router::new()
|
||||
.route("/health/live", get(health_live))
|
||||
.route("/health/ready", get(health_ready))
|
||||
.route("/kv/:key", get(get_key).put(put_key))
|
||||
.route("/internal/snapshot", get(get_snapshot))
|
||||
.layer(TraceLayer::new_for_http())
|
||||
.with_state(state.clone());
|
||||
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], config.http_port));
|
||||
let listener = tokio::net::TcpListener::bind(addr).await?;
|
||||
|
||||
state.set_ready(true).await;
|
||||
tracing::info!(node_id = state.node_id(), %addr, "kv node ready");
|
||||
|
||||
axum::serve(listener, app).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn health_live() -> (StatusCode, Json<HealthResponse>) {
|
||||
(StatusCode::OK, Json(HealthResponse { status: "alive" }))
|
||||
}
|
||||
|
||||
async fn health_ready(State(state): State<KvState>) -> (StatusCode, Json<HealthResponse>) {
|
||||
if state.is_ready().await {
|
||||
(StatusCode::OK, Json(HealthResponse { status: "ready" }))
|
||||
} else {
|
||||
(
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
Json(HealthResponse {
|
||||
status: "not-ready",
|
||||
}),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_key(Path(key): Path<String>, State(state): State<KvState>) -> Json<GetResponse> {
|
||||
let record = state.get(&key).await;
|
||||
Json(GetResponse { key, record })
|
||||
}
|
||||
|
||||
async fn put_key(
|
||||
Path(key): Path<String>,
|
||||
State(state): State<KvState>,
|
||||
Json(request): Json<PutRequest>,
|
||||
) -> (StatusCode, Json<PutResponse>) {
|
||||
let outcome = state
|
||||
.put_local(key, request.value, request.expected_version)
|
||||
.await;
|
||||
|
||||
if outcome.applied {
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(PutResponse {
|
||||
applied: true,
|
||||
version: outcome.current_version,
|
||||
}),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
StatusCode::CONFLICT,
|
||||
Json(PutResponse {
|
||||
applied: false,
|
||||
version: outcome.current_version,
|
||||
}),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_snapshot(State(state): State<KvState>) -> Json<Snapshot> {
|
||||
Json(state.snapshot().await)
|
||||
}
|
||||
111
examples/kvstore/kvstore-node/src/state.rs
Normal file
111
examples/kvstore/kvstore-node/src/state.rs
Normal file
@ -0,0 +1,111 @@
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||
pub struct ValueRecord {
|
||||
pub value: String,
|
||||
pub version: u64,
|
||||
pub origin: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct Snapshot {
|
||||
pub node_id: u64,
|
||||
pub entries: HashMap<String, ValueRecord>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PutOutcome {
|
||||
pub applied: bool,
|
||||
pub current_version: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct KvState {
|
||||
node_id: u64,
|
||||
ready: Arc<RwLock<bool>>,
|
||||
entries: Arc<RwLock<HashMap<String, ValueRecord>>>,
|
||||
}
|
||||
|
||||
impl KvState {
|
||||
pub fn new(node_id: u64) -> Self {
|
||||
Self {
|
||||
node_id,
|
||||
ready: Arc::new(RwLock::new(false)),
|
||||
entries: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn node_id(&self) -> u64 {
|
||||
self.node_id
|
||||
}
|
||||
|
||||
pub async fn set_ready(&self, value: bool) {
|
||||
*self.ready.write().await = value;
|
||||
}
|
||||
|
||||
pub async fn is_ready(&self) -> bool {
|
||||
*self.ready.read().await
|
||||
}
|
||||
|
||||
pub async fn get(&self, key: &str) -> Option<ValueRecord> {
|
||||
self.entries.read().await.get(key).cloned()
|
||||
}
|
||||
|
||||
pub async fn put_local(
|
||||
&self,
|
||||
key: String,
|
||||
value: String,
|
||||
expected_version: Option<u64>,
|
||||
) -> PutOutcome {
|
||||
let mut entries = self.entries.write().await;
|
||||
let current_version = entries.get(&key).map_or(0, |record| record.version);
|
||||
|
||||
if expected_version.is_some_and(|expected| expected != current_version) {
|
||||
return PutOutcome {
|
||||
applied: false,
|
||||
current_version,
|
||||
};
|
||||
}
|
||||
|
||||
let next_version = current_version.saturating_add(1);
|
||||
entries.insert(
|
||||
key,
|
||||
ValueRecord {
|
||||
value,
|
||||
version: next_version,
|
||||
origin: self.node_id,
|
||||
},
|
||||
);
|
||||
|
||||
PutOutcome {
|
||||
applied: true,
|
||||
current_version: next_version,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn merge_snapshot(&self, snapshot: Snapshot) {
|
||||
let mut local = self.entries.write().await;
|
||||
for (key, incoming) in snapshot.entries {
|
||||
match local.get(&key) {
|
||||
Some(existing) if !is_newer_record(&incoming, existing) => {}
|
||||
_ => {
|
||||
local.insert(key, incoming);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn snapshot(&self) -> Snapshot {
|
||||
Snapshot {
|
||||
node_id: self.node_id,
|
||||
entries: self.entries.read().await.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_newer_record(candidate: &ValueRecord, existing: &ValueRecord) -> bool {
|
||||
(candidate.version, candidate.origin) > (existing.version, existing.origin)
|
||||
}
|
||||
103
examples/kvstore/kvstore-node/src/sync.rs
Normal file
103
examples/kvstore/kvstore-node/src/sync.rs
Normal file
@ -0,0 +1,103 @@
|
||||
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||
|
||||
use reqwest::Client;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::{
|
||||
config::KvConfig,
|
||||
state::{KvState, Snapshot},
|
||||
};
|
||||
|
||||
const WARN_AFTER_CONSECUTIVE_FAILURES: u32 = 5;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SyncService {
|
||||
config: Arc<KvConfig>,
|
||||
state: KvState,
|
||||
client: Client,
|
||||
failures_by_peer: Arc<Mutex<HashMap<String, u32>>>,
|
||||
}
|
||||
|
||||
impl SyncService {
|
||||
pub fn new(config: KvConfig, state: KvState) -> Self {
|
||||
Self {
|
||||
config: Arc::new(config),
|
||||
state,
|
||||
client: Client::new(),
|
||||
failures_by_peer: Arc::new(Mutex::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start(&self) {
|
||||
let service = self.clone();
|
||||
tokio::spawn(async move {
|
||||
service.run().await;
|
||||
});
|
||||
}
|
||||
|
||||
async fn run(self) {
|
||||
let interval = Duration::from_millis(self.config.sync_interval_ms.max(100));
|
||||
loop {
|
||||
self.sync_once().await;
|
||||
tokio::time::sleep(interval).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn sync_once(&self) {
|
||||
for peer in &self.config.peers {
|
||||
match self.fetch_snapshot(&peer.http_address).await {
|
||||
Ok(snapshot) => {
|
||||
self.state.merge_snapshot(snapshot).await;
|
||||
self.clear_failure_counter(&peer.http_address).await;
|
||||
}
|
||||
Err(error) => {
|
||||
self.record_sync_failure(&peer.http_address, &error).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_snapshot(&self, peer_address: &str) -> anyhow::Result<Snapshot> {
|
||||
let url = format!("http://{peer_address}/internal/snapshot");
|
||||
let snapshot = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?
|
||||
.json()
|
||||
.await?;
|
||||
Ok(snapshot)
|
||||
}
|
||||
|
||||
async fn clear_failure_counter(&self, peer_address: &str) {
|
||||
let mut failures = self.failures_by_peer.lock().await;
|
||||
failures.remove(peer_address);
|
||||
}
|
||||
|
||||
async fn record_sync_failure(&self, peer_address: &str, error: &anyhow::Error) {
|
||||
let consecutive_failures = {
|
||||
let mut failures = self.failures_by_peer.lock().await;
|
||||
let entry = failures.entry(peer_address.to_owned()).or_insert(0);
|
||||
*entry += 1;
|
||||
*entry
|
||||
};
|
||||
|
||||
if consecutive_failures >= WARN_AFTER_CONSECUTIVE_FAILURES {
|
||||
warn!(
|
||||
peer = %peer_address,
|
||||
%error,
|
||||
consecutive_failures,
|
||||
"kv sync repeatedly failing"
|
||||
);
|
||||
} else {
|
||||
debug!(
|
||||
peer = %peer_address,
|
||||
%error,
|
||||
consecutive_failures,
|
||||
"kv sync failed"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
15
examples/kvstore/testing/integration/Cargo.toml
Normal file
15
examples/kvstore/testing/integration/Cargo.toml
Normal file
@ -0,0 +1,15 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "kvstore-runtime-ext"
|
||||
version.workspace = true
|
||||
|
||||
[dependencies]
|
||||
testing-framework-core = { workspace = true }
|
||||
testing-framework-runner-compose = { workspace = true }
|
||||
testing-framework-runner-k8s = { workspace = true }
|
||||
testing-framework-runner-local = { workspace = true }
|
||||
|
||||
async-trait = { workspace = true }
|
||||
kvstore-node = { path = "../../kvstore-node" }
|
||||
serde = { workspace = true }
|
||||
75
examples/kvstore/testing/integration/src/app.rs
Normal file
75
examples/kvstore/testing/integration/src/app.rs
Normal file
@ -0,0 +1,75 @@
|
||||
use std::io::Error;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kvstore_node::KvHttpClient;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use testing_framework_core::scenario::{
|
||||
Application, ClusterNodeConfigApplication, ClusterNodeView, ClusterPeerView, DynError,
|
||||
NodeAccess, serialize_cluster_yaml_config,
|
||||
};
|
||||
|
||||
pub type KvTopology = testing_framework_core::topology::ClusterTopology;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct KvPeerInfo {
|
||||
pub node_id: u64,
|
||||
pub http_address: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct KvNodeConfig {
|
||||
pub node_id: u64,
|
||||
pub http_port: u16,
|
||||
pub peers: Vec<KvPeerInfo>,
|
||||
pub sync_interval_ms: u64,
|
||||
}
|
||||
|
||||
pub struct KvEnv;
|
||||
|
||||
#[async_trait]
|
||||
impl Application for KvEnv {
|
||||
type Deployment = KvTopology;
|
||||
type NodeClient = KvHttpClient;
|
||||
type NodeConfig = KvNodeConfig;
|
||||
fn build_node_client(access: &NodeAccess) -> Result<Self::NodeClient, DynError> {
|
||||
Ok(KvHttpClient::new(access.api_base_url()?))
|
||||
}
|
||||
|
||||
fn node_readiness_path() -> &'static str {
|
||||
"/health/ready"
|
||||
}
|
||||
}
|
||||
|
||||
impl ClusterNodeConfigApplication for KvEnv {
|
||||
type ConfigError = Error;
|
||||
|
||||
fn static_network_port() -> u16 {
|
||||
8080
|
||||
}
|
||||
|
||||
fn build_cluster_node_config(
|
||||
node: &ClusterNodeView,
|
||||
peers: &[ClusterPeerView],
|
||||
) -> Result<Self::NodeConfig, Self::ConfigError> {
|
||||
let peers = peers
|
||||
.iter()
|
||||
.map(|peer| KvPeerInfo {
|
||||
node_id: peer.index() as u64,
|
||||
http_address: peer.authority(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(KvNodeConfig {
|
||||
node_id: node.index() as u64,
|
||||
http_port: node.network_port(),
|
||||
peers,
|
||||
sync_interval_ms: 500,
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_cluster_node_config(
|
||||
config: &Self::NodeConfig,
|
||||
) -> Result<String, Self::ConfigError> {
|
||||
serialize_cluster_yaml_config(config).map_err(Error::other)
|
||||
}
|
||||
}
|
||||
15
examples/kvstore/testing/integration/src/compose_env.rs
Normal file
15
examples/kvstore/testing/integration/src/compose_env.rs
Normal file
@ -0,0 +1,15 @@
|
||||
use testing_framework_runner_compose::{BinaryConfigNodeSpec, ComposeBinaryApp};
|
||||
|
||||
use crate::KvEnv;
|
||||
|
||||
const NODE_CONFIG_PATH: &str = "/etc/kvstore/config.yaml";
|
||||
|
||||
impl ComposeBinaryApp for KvEnv {
|
||||
fn compose_node_spec() -> BinaryConfigNodeSpec {
|
||||
BinaryConfigNodeSpec::conventional(
|
||||
"/usr/local/bin/kvstore-node",
|
||||
NODE_CONFIG_PATH,
|
||||
vec![8080, 8081],
|
||||
)
|
||||
}
|
||||
}
|
||||
21
examples/kvstore/testing/integration/src/k8s_env.rs
Normal file
21
examples/kvstore/testing/integration/src/k8s_env.rs
Normal file
@ -0,0 +1,21 @@
|
||||
use testing_framework_runner_k8s::{BinaryConfigK8sSpec, K8sBinaryApp};
|
||||
|
||||
use crate::KvEnv;
|
||||
|
||||
const CONTAINER_CONFIG_PATH: &str = "/etc/kvstore/config.yaml";
|
||||
const CONTAINER_HTTP_PORT: u16 = 8080;
|
||||
const SERVICE_TESTING_PORT: u16 = 8081;
|
||||
const NODE_NAME_PREFIX: &str = "kvstore-node";
|
||||
|
||||
impl K8sBinaryApp for KvEnv {
|
||||
fn k8s_binary_spec() -> BinaryConfigK8sSpec {
|
||||
BinaryConfigK8sSpec::conventional(
|
||||
"kvstore",
|
||||
NODE_NAME_PREFIX,
|
||||
"/usr/local/bin/kvstore-node",
|
||||
CONTAINER_CONFIG_PATH,
|
||||
CONTAINER_HTTP_PORT,
|
||||
SERVICE_TESTING_PORT,
|
||||
)
|
||||
}
|
||||
}
|
||||
12
examples/kvstore/testing/integration/src/lib.rs
Normal file
12
examples/kvstore/testing/integration/src/lib.rs
Normal file
@ -0,0 +1,12 @@
|
||||
mod app;
|
||||
mod compose_env;
|
||||
mod k8s_env;
|
||||
mod local_env;
|
||||
pub mod scenario;
|
||||
|
||||
pub use app::*;
|
||||
pub use scenario::{KvBuilderExt, KvScenarioBuilder};
|
||||
|
||||
pub type KvLocalDeployer = testing_framework_runner_local::ProcessDeployer<KvEnv>;
|
||||
pub type KvComposeDeployer = testing_framework_runner_compose::ComposeDeployer<KvEnv>;
|
||||
pub type KvK8sDeployer = testing_framework_runner_k8s::K8sDeployer<KvEnv>;
|
||||
41
examples/kvstore/testing/integration/src/local_env.rs
Normal file
41
examples/kvstore/testing/integration/src/local_env.rs
Normal file
@ -0,0 +1,41 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use testing_framework_core::scenario::{DynError, StartNodeOptions};
|
||||
use testing_framework_runner_local::{
|
||||
LocalBinaryApp, LocalNodePorts, LocalPeerNode, LocalProcessSpec,
|
||||
build_local_cluster_node_config, yaml_node_config,
|
||||
};
|
||||
|
||||
use crate::{KvEnv, KvNodeConfig};
|
||||
|
||||
impl LocalBinaryApp for KvEnv {
|
||||
fn initial_node_name_prefix() -> &'static str {
|
||||
"kv-node"
|
||||
}
|
||||
|
||||
fn build_local_node_config_with_peers(
|
||||
_topology: &Self::Deployment,
|
||||
index: usize,
|
||||
ports: &LocalNodePorts,
|
||||
peers: &[LocalPeerNode],
|
||||
_peer_ports_by_name: &HashMap<String, u16>,
|
||||
_options: &StartNodeOptions<Self>,
|
||||
_template_config: Option<
|
||||
&<Self as testing_framework_core::scenario::Application>::NodeConfig,
|
||||
>,
|
||||
) -> Result<<Self as testing_framework_core::scenario::Application>::NodeConfig, DynError> {
|
||||
build_local_cluster_node_config::<Self>(index, ports, peers)
|
||||
}
|
||||
|
||||
fn local_process_spec() -> LocalProcessSpec {
|
||||
LocalProcessSpec::new("KVSTORE_NODE_BIN", "kvstore-node").with_rust_log("kvstore_node=info")
|
||||
}
|
||||
|
||||
fn render_local_config(config: &KvNodeConfig) -> Result<Vec<u8>, DynError> {
|
||||
yaml_node_config(config)
|
||||
}
|
||||
|
||||
fn http_api_port(config: &KvNodeConfig) -> u16 {
|
||||
config.http_port
|
||||
}
|
||||
}
|
||||
15
examples/kvstore/testing/integration/src/scenario.rs
Normal file
15
examples/kvstore/testing/integration/src/scenario.rs
Normal file
@ -0,0 +1,15 @@
|
||||
use testing_framework_core::scenario::ScenarioBuilder;
|
||||
|
||||
use crate::{KvEnv, KvTopology};
|
||||
|
||||
pub type KvScenarioBuilder = ScenarioBuilder<KvEnv>;
|
||||
|
||||
pub trait KvBuilderExt: Sized {
|
||||
fn deployment_with(f: impl FnOnce(KvTopology) -> KvTopology) -> Self;
|
||||
}
|
||||
|
||||
impl KvBuilderExt for KvScenarioBuilder {
|
||||
fn deployment_with(f: impl FnOnce(KvTopology) -> KvTopology) -> Self {
|
||||
KvScenarioBuilder::with_deployment(f(KvTopology::new(3)))
|
||||
}
|
||||
}
|
||||
15
examples/kvstore/testing/workloads/Cargo.toml
Normal file
15
examples/kvstore/testing/workloads/Cargo.toml
Normal file
@ -0,0 +1,15 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "kvstore-runtime-workloads"
|
||||
version.workspace = true
|
||||
|
||||
[dependencies]
|
||||
kvstore-node = { path = "../../kvstore-node" }
|
||||
kvstore-runtime-ext = { path = "../integration" }
|
||||
testing-framework-core = { workspace = true }
|
||||
|
||||
async-trait = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
tracing = { workspace = true }
|
||||
100
examples/kvstore/testing/workloads/src/expectations.rs
Normal file
100
examples/kvstore/testing/workloads/src/expectations.rs
Normal file
@ -0,0 +1,100 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kvstore_runtime_ext::KvEnv;
|
||||
use serde::Deserialize;
|
||||
use testing_framework_core::scenario::{DynError, Expectation, RunContext};
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct KvConverges {
|
||||
key_prefix: String,
|
||||
key_count: usize,
|
||||
timeout: Duration,
|
||||
poll_interval: Duration,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Clone, Debug, Eq, PartialEq)]
|
||||
struct ValueRecord {
|
||||
value: String,
|
||||
version: u64,
|
||||
origin: u64,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct GetResponse {
|
||||
record: Option<ValueRecord>,
|
||||
}
|
||||
|
||||
impl KvConverges {
|
||||
#[must_use]
|
||||
pub fn new(key_prefix: impl Into<String>, key_count: usize) -> Self {
|
||||
Self {
|
||||
key_prefix: key_prefix.into(),
|
||||
key_count,
|
||||
timeout: Duration::from_secs(20),
|
||||
poll_interval: Duration::from_millis(500),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn timeout(mut self, timeout: Duration) -> Self {
|
||||
self.timeout = timeout;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Expectation<KvEnv> for KvConverges {
|
||||
fn name(&self) -> &str {
|
||||
"kv_converges"
|
||||
}
|
||||
|
||||
async fn evaluate(&mut self, ctx: &RunContext<KvEnv>) -> Result<(), DynError> {
|
||||
let clients = ctx.node_clients().snapshot();
|
||||
if clients.is_empty() {
|
||||
return Err("no kv node clients available".into());
|
||||
}
|
||||
|
||||
let deadline = tokio::time::Instant::now() + self.timeout;
|
||||
while tokio::time::Instant::now() < deadline {
|
||||
if self.is_converged(&clients).await? {
|
||||
info!(key_count = self.key_count, "kv convergence reached");
|
||||
return Ok(());
|
||||
}
|
||||
tokio::time::sleep(self.poll_interval).await;
|
||||
}
|
||||
|
||||
Err(format!(
|
||||
"kv convergence not reached within {:?} for {} keys",
|
||||
self.timeout, self.key_count
|
||||
)
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl KvConverges {
|
||||
async fn is_converged(&self, clients: &[kvstore_node::KvHttpClient]) -> Result<bool, DynError> {
|
||||
for key_idx in 0..self.key_count {
|
||||
let key = format!("{}-{key_idx}", self.key_prefix);
|
||||
let first = read_key(clients, &key, 0).await?;
|
||||
for node_idx in 1..clients.len() {
|
||||
let current = read_key(clients, &key, node_idx).await?;
|
||||
if current != first {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_key(
|
||||
clients: &[kvstore_node::KvHttpClient],
|
||||
key: &str,
|
||||
index: usize,
|
||||
) -> Result<Option<ValueRecord>, DynError> {
|
||||
let response: GetResponse = clients[index].get(&format!("/kv/{key}")).await?;
|
||||
Ok(response.record)
|
||||
}
|
||||
6
examples/kvstore/testing/workloads/src/lib.rs
Normal file
6
examples/kvstore/testing/workloads/src/lib.rs
Normal file
@ -0,0 +1,6 @@
|
||||
mod expectations;
|
||||
mod write;
|
||||
|
||||
pub use expectations::KvConverges;
|
||||
pub use kvstore_runtime_ext::{KvBuilderExt, KvEnv, KvScenarioBuilder, KvTopology};
|
||||
pub use write::KvWriteWorkload;
|
||||
135
examples/kvstore/testing/workloads/src/write.rs
Normal file
135
examples/kvstore/testing/workloads/src/write.rs
Normal file
@ -0,0 +1,135 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use kvstore_runtime_ext::KvEnv;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use testing_framework_core::scenario::{DynError, RunContext, Workload};
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct KvWriteWorkload {
|
||||
operations: usize,
|
||||
key_count: usize,
|
||||
rate_per_sec: Option<usize>,
|
||||
key_prefix: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct PutRequest {
|
||||
value: String,
|
||||
expected_version: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct PutResponse {
|
||||
applied: bool,
|
||||
version: u64,
|
||||
}
|
||||
|
||||
impl KvWriteWorkload {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
operations: 200,
|
||||
key_count: 20,
|
||||
rate_per_sec: Some(25),
|
||||
key_prefix: "kv-demo".to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn operations(mut self, value: usize) -> Self {
|
||||
self.operations = value;
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn key_count(mut self, value: usize) -> Self {
|
||||
self.key_count = value;
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn rate_per_sec(mut self, value: usize) -> Self {
|
||||
self.rate_per_sec = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn key_prefix(mut self, value: impl Into<String>) -> Self {
|
||||
self.key_prefix = value.into();
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for KvWriteWorkload {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Workload<KvEnv> for KvWriteWorkload {
|
||||
fn name(&self) -> &str {
|
||||
"kv_write_workload"
|
||||
}
|
||||
|
||||
async fn start(&self, ctx: &RunContext<KvEnv>) -> Result<(), DynError> {
|
||||
let clients = ctx.node_clients().snapshot();
|
||||
let Some(leader) = clients.first() else {
|
||||
return Err("no kv node clients available".into());
|
||||
};
|
||||
|
||||
if self.key_count == 0 {
|
||||
return Err("kv workload key_count must be > 0".into());
|
||||
}
|
||||
|
||||
let interval = self.rate_per_sec.and_then(compute_interval);
|
||||
info!(
|
||||
operations = self.operations,
|
||||
key_count = self.key_count,
|
||||
rate_per_sec = ?self.rate_per_sec,
|
||||
"starting kv write workload"
|
||||
);
|
||||
|
||||
for idx in 0..self.operations {
|
||||
let key = format!("{}-{}", self.key_prefix, idx % self.key_count);
|
||||
let value = format!("value-{idx}");
|
||||
let response: PutResponse = leader
|
||||
.put(
|
||||
&format!("/kv/{key}"),
|
||||
&PutRequest {
|
||||
value,
|
||||
expected_version: None,
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
if !response.applied {
|
||||
return Err(format!("leader rejected write for key {key}").into());
|
||||
}
|
||||
|
||||
if (idx + 1) % 25 == 0 {
|
||||
info!(
|
||||
completed = idx + 1,
|
||||
version = response.version,
|
||||
"kv write progress"
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(delay) = interval {
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_interval(rate_per_sec: usize) -> Option<Duration> {
|
||||
if rate_per_sec == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Duration::from_millis((1000 / rate_per_sec as u64).max(1)))
|
||||
}
|
||||
@ -31,7 +31,7 @@ Each node exposes:
|
||||
|
||||
```bash
|
||||
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:19091 \
|
||||
cargo run -p metrics-counter-examples --bin compose_prometheus_expectation
|
||||
cargo run -p metrics-counter-examples --bin metrics_counter_compose_prometheus_expectation
|
||||
```
|
||||
|
||||
## Run with Kubernetes
|
||||
@ -39,7 +39,7 @@ cargo run -p metrics-counter-examples --bin compose_prometheus_expectation
|
||||
```bash
|
||||
docker build -t metrics-counter-node:local -f examples/metrics_counter/Dockerfile .
|
||||
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:30991 \
|
||||
cargo run -p metrics-counter-examples --bin k8s_prometheus_expectation
|
||||
cargo run -p metrics-counter-examples --bin metrics_counter_k8s_prometheus_expectation
|
||||
```
|
||||
|
||||
Overrides:
|
||||
@ -51,5 +51,5 @@ Overrides:
|
||||
```bash
|
||||
docker build -t metrics-counter-node:local -f examples/metrics_counter/Dockerfile .
|
||||
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:30991 \
|
||||
cargo run -p metrics-counter-examples --bin k8s_manual_prometheus
|
||||
cargo run -p metrics-counter-examples --bin metrics_counter_k8s_manual_prometheus
|
||||
```
|
||||
|
||||
@ -4,6 +4,18 @@ license.workspace = true
|
||||
name = "metrics-counter-examples"
|
||||
version.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "metrics_counter_compose_prometheus_expectation"
|
||||
path = "src/bin/compose_prometheus_expectation.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "metrics_counter_k8s_prometheus_expectation"
|
||||
path = "src/bin/k8s_prometheus_expectation.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "metrics_counter_k8s_manual_prometheus"
|
||||
path = "src/bin/k8s_manual_prometheus.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
metrics-counter-node = { path = "../metrics-counter-node" }
|
||||
|
||||
@ -23,23 +23,23 @@ Each example follows the same pattern:
|
||||
## Run locally
|
||||
|
||||
```bash
|
||||
cargo run -p nats-examples --bin basic_roundtrip
|
||||
cargo run -p nats-examples --bin nats_basic_roundtrip
|
||||
```
|
||||
|
||||
If `nats-server` is not on `PATH`:
|
||||
|
||||
```bash
|
||||
NATS_SERVER_BIN=/path/to/nats-server cargo run -p nats-examples --bin basic_roundtrip
|
||||
NATS_SERVER_BIN=/path/to/nats-server cargo run -p nats-examples --bin nats_basic_roundtrip
|
||||
```
|
||||
|
||||
## Run with Docker Compose
|
||||
|
||||
```bash
|
||||
cargo run -p nats-examples --bin compose_roundtrip
|
||||
cargo run -p nats-examples --bin nats_compose_roundtrip
|
||||
```
|
||||
|
||||
## Run the parity check
|
||||
|
||||
```bash
|
||||
cargo run -p nats-examples --bin parity_check
|
||||
cargo run -p nats-examples --bin nats_parity_check
|
||||
```
|
||||
|
||||
@ -4,6 +4,18 @@ license.workspace = true
|
||||
name = "nats-examples"
|
||||
version.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "nats_basic_roundtrip"
|
||||
path = "src/bin/basic_roundtrip.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "nats_compose_roundtrip"
|
||||
path = "src/bin/compose_roundtrip.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "nats_parity_check"
|
||||
path = "src/bin/parity_check.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
nats-runtime-ext = { path = "../testing/integration" }
|
||||
|
||||
25
examples/openraft_kv/Dockerfile
Normal file
25
examples/openraft_kv/Dockerfile
Normal file
@ -0,0 +1,25 @@
|
||||
# Build stage
|
||||
FROM rustlang/rust:nightly-bookworm AS builder
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
COPY Cargo.toml Cargo.lock ./
|
||||
COPY cfgsync/ ./cfgsync/
|
||||
COPY examples/ ./examples/
|
||||
COPY testing-framework/ ./testing-framework/
|
||||
|
||||
RUN cargo build --release -p openraft-kv-node
|
||||
|
||||
FROM debian:bookworm-slim
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /build/target/release/openraft-kv-node /usr/local/bin/openraft-kv-node
|
||||
|
||||
RUN mkdir -p /etc/openraft-kv
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/openraft-kv-node"]
|
||||
CMD ["--config", "/etc/openraft-kv/config.yaml"]
|
||||
87
examples/openraft_kv/README.md
Normal file
87
examples/openraft_kv/README.md
Normal file
@ -0,0 +1,87 @@
|
||||
# OpenRaft KV Example
|
||||
|
||||
This example runs a small key-value service built on top of `OpenRaft`.
|
||||
|
||||
The main scenario does four things:
|
||||
|
||||
- bootstraps node 0 as a one-node cluster
|
||||
- adds nodes 1 and 2 as learners and promotes them to voters
|
||||
- writes one batch of keys through the current leader
|
||||
- restarts that leader, waits for a new leader, writes again, and then checks
|
||||
that all three nodes expose the same replicated state
|
||||
|
||||
## How TF runs this
|
||||
|
||||
- TF starts three OpenRaft nodes
|
||||
- the workload bootstraps the cluster through the admin API
|
||||
- the workload writes a first batch, restarts the current leader, waits for failover, and writes again
|
||||
- the expectation checks that all three nodes converge on the same key/value state and membership
|
||||
|
||||
## Scenario
|
||||
|
||||
- `basic_failover` runs the leader-restart flow locally
|
||||
- `compose_failover` runs the same flow in Docker Compose
|
||||
- `k8s_failover` runs the same flow against a manual Kubernetes cluster deployment
|
||||
|
||||
## API
|
||||
|
||||
Each node exposes:
|
||||
|
||||
- `GET /healthz` for readiness
|
||||
- `GET /state` for current Raft role, leader, membership, log progress, and replicated key/value data
|
||||
- `POST /kv/write` to submit a write through the local Raft node
|
||||
- `POST /kv/read` to read a key from the local state machine
|
||||
- `POST /admin/init` to initialize a single-node cluster
|
||||
- `POST /admin/add-learner` to add a new Raft learner
|
||||
- `POST /admin/change-membership` to promote learners into the voting set
|
||||
|
||||
The node also exposes internal Raft RPC endpoints used only for replication:
|
||||
|
||||
- `POST /raft/vote`
|
||||
- `POST /raft/append`
|
||||
- `POST /raft/snapshot`
|
||||
|
||||
## Run locally
|
||||
|
||||
```bash
|
||||
OPENRAFT_KV_NODE_BIN="$(pwd)/target/debug/openraft-kv-node" \
|
||||
cargo run -p openraft-kv-examples --bin openraft_kv_basic_failover
|
||||
```
|
||||
|
||||
Build the node first if you have not done that yet:
|
||||
|
||||
```bash
|
||||
cargo build -p openraft-kv-node
|
||||
```
|
||||
|
||||
## Run with Docker Compose
|
||||
|
||||
Build the image first:
|
||||
|
||||
```bash
|
||||
docker build -t openraft-kv-node:local -f examples/openraft_kv/Dockerfile .
|
||||
```
|
||||
|
||||
Then run:
|
||||
|
||||
```bash
|
||||
cargo run -p openraft-kv-examples --bin openraft_kv_compose_failover
|
||||
```
|
||||
|
||||
Set `OPENRAFT_KV_IMAGE` to override the default compose image tag.
|
||||
|
||||
## Run on Kubernetes
|
||||
|
||||
Build the same image first:
|
||||
|
||||
```bash
|
||||
docker build -t openraft-kv-node:local -f examples/openraft_kv/Dockerfile .
|
||||
```
|
||||
|
||||
Then run:
|
||||
|
||||
```bash
|
||||
cargo run -p openraft-kv-examples --bin openraft_kv_k8s_failover
|
||||
```
|
||||
|
||||
If no cluster is available, the example exits early and prints a skip message.
|
||||
28
examples/openraft_kv/examples/Cargo.toml
Normal file
28
examples/openraft_kv/examples/Cargo.toml
Normal file
@ -0,0 +1,28 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "openraft-kv-examples"
|
||||
version.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "openraft_kv_basic_failover"
|
||||
path = "src/bin/basic_failover.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "openraft_kv_compose_failover"
|
||||
path = "src/bin/compose_failover.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "openraft_kv_k8s_failover"
|
||||
path = "src/bin/k8s_failover.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
openraft-kv-node = { path = "../openraft-kv-node" }
|
||||
openraft-kv-runtime-ext = { path = "../testing/integration" }
|
||||
openraft-kv-runtime-workloads = { path = "../testing/workloads" }
|
||||
testing-framework-core = { workspace = true }
|
||||
testing-framework-runner-k8s = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
20
examples/openraft_kv/examples/src/bin/basic_failover.rs
Normal file
20
examples/openraft_kv/examples/src/bin/basic_failover.rs
Normal file
@ -0,0 +1,20 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use openraft_kv_examples::build_failover_scenario;
|
||||
use openraft_kv_runtime_ext::OpenRaftKvLocalDeployer;
|
||||
use testing_framework_core::scenario::Deployer;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let mut scenario = build_failover_scenario(Duration::from_secs(45), Duration::from_secs(30))?;
|
||||
|
||||
let deployer = OpenRaftKvLocalDeployer::default();
|
||||
let runner = deployer.deploy(&scenario).await?;
|
||||
runner.run(&mut scenario).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
20
examples/openraft_kv/examples/src/bin/compose_failover.rs
Normal file
20
examples/openraft_kv/examples/src/bin/compose_failover.rs
Normal file
@ -0,0 +1,20 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use openraft_kv_examples::build_failover_scenario;
|
||||
use openraft_kv_runtime_ext::OpenRaftKvComposeDeployer;
|
||||
use testing_framework_core::scenario::Deployer;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let mut scenario = build_failover_scenario(Duration::from_secs(60), Duration::from_secs(40))?;
|
||||
|
||||
let deployer = OpenRaftKvComposeDeployer::new();
|
||||
let runner = deployer.deploy(&scenario).await?;
|
||||
runner.run(&mut scenario).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
195
examples/openraft_kv/examples/src/bin/k8s_failover.rs
Normal file
195
examples/openraft_kv/examples/src/bin/k8s_failover.rs
Normal file
@ -0,0 +1,195 @@
|
||||
use std::{sync::Arc, time::Duration};
|
||||
|
||||
use anyhow::{Context as _, Result, anyhow};
|
||||
use openraft_kv_examples::{
|
||||
INITIAL_WRITE_BATCH, RAFT_KEY_PREFIX, SECOND_WRITE_BATCH, TOTAL_WRITES,
|
||||
};
|
||||
use openraft_kv_node::OpenRaftKvClient;
|
||||
use openraft_kv_runtime_ext::{
|
||||
OpenRaftClusterObserver, OpenRaftKvEnv, OpenRaftKvK8sDeployer, OpenRaftKvTopology,
|
||||
OpenRaftManualClusterSourceProvider,
|
||||
};
|
||||
use openraft_kv_runtime_workloads::{
|
||||
OpenRaftMembership, expected_kv, wait_for_observed_leader, wait_for_observed_membership,
|
||||
wait_for_observed_replication, write_batch,
|
||||
};
|
||||
use testing_framework_core::observation::{ObservationHandle, ObservationRuntime};
|
||||
use testing_framework_runner_k8s::{ManualCluster, ManualClusterError};
|
||||
use tracing::{info, warn};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let deployer = OpenRaftKvK8sDeployer::new();
|
||||
let cluster = match deployer
|
||||
.manual_cluster_from_descriptors(OpenRaftKvTopology::new(3))
|
||||
.await
|
||||
{
|
||||
Ok(cluster) => cluster,
|
||||
Err(ManualClusterError::ClientInit { source }) => {
|
||||
warn!("k8s unavailable ({source}); skipping openraft k8s run");
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
Err(ManualClusterError::InstallStack { source })
|
||||
if k8s_cluster_unavailable(&source.to_string()) =>
|
||||
{
|
||||
warn!("k8s unavailable ({source}); skipping openraft k8s run");
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(anyhow::Error::new(error)).context("creating openraft k8s cluster");
|
||||
}
|
||||
};
|
||||
|
||||
run_failover(Arc::new(cluster), Duration::from_secs(40)).await
|
||||
}
|
||||
|
||||
async fn run_failover(cluster: Arc<ManualCluster<OpenRaftKvEnv>>, timeout: Duration) -> Result<()> {
|
||||
start_cluster(cluster.as_ref()).await?;
|
||||
|
||||
let observation_runtime = start_observer(Arc::clone(&cluster)).await?;
|
||||
let observer = observation_runtime.handle();
|
||||
|
||||
client_for_node(cluster.as_ref(), 0)?.init_self().await?;
|
||||
|
||||
let initial_leader = wait_for_observed_leader(&observer, timeout, None).await?;
|
||||
let membership = current_membership(&observer)?;
|
||||
|
||||
add_learners_and_promote(
|
||||
cluster.as_ref(),
|
||||
&observer,
|
||||
initial_leader,
|
||||
&membership,
|
||||
timeout,
|
||||
)
|
||||
.await?;
|
||||
write_initial_batch(cluster.as_ref(), initial_leader).await?;
|
||||
|
||||
restart_leader(cluster.as_ref(), initial_leader).await?;
|
||||
|
||||
let new_leader = wait_for_observed_leader(&observer, timeout, Some(initial_leader)).await?;
|
||||
write_second_batch(cluster.as_ref(), new_leader).await?;
|
||||
|
||||
let expected = expected_kv(RAFT_KEY_PREFIX, TOTAL_WRITES);
|
||||
wait_for_observed_replication(&observer, &expected, timeout).await?;
|
||||
|
||||
cluster.stop_all();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn start_cluster(cluster: &ManualCluster<OpenRaftKvEnv>) -> Result<()> {
|
||||
cluster.start_node("node-0").await?;
|
||||
cluster.start_node("node-1").await?;
|
||||
cluster.start_node("node-2").await?;
|
||||
|
||||
cluster.wait_network_ready().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn start_observer(
|
||||
cluster: Arc<ManualCluster<OpenRaftKvEnv>>,
|
||||
) -> Result<ObservationRuntime<OpenRaftClusterObserver>> {
|
||||
let provider = OpenRaftManualClusterSourceProvider::new(cluster, 3);
|
||||
|
||||
ObservationRuntime::start(
|
||||
provider,
|
||||
OpenRaftClusterObserver,
|
||||
OpenRaftClusterObserver::config(),
|
||||
)
|
||||
.await
|
||||
.map_err(anyhow::Error::new)
|
||||
.context("starting openraft k8s observer")
|
||||
}
|
||||
|
||||
async fn add_learners_and_promote(
|
||||
cluster: &ManualCluster<OpenRaftKvEnv>,
|
||||
observer: &ObservationHandle<OpenRaftClusterObserver>,
|
||||
leader_id: u64,
|
||||
membership: &OpenRaftMembership,
|
||||
timeout: Duration,
|
||||
) -> Result<()> {
|
||||
let leader = client_for_node(cluster, leader_id)?;
|
||||
|
||||
for learner in membership.learner_targets(leader_id) {
|
||||
info!(
|
||||
target = learner.node_id,
|
||||
addr = %learner.public_addr,
|
||||
"adding learner"
|
||||
);
|
||||
|
||||
leader
|
||||
.add_learner(learner.node_id, &learner.public_addr)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let voter_ids = membership.voter_ids();
|
||||
leader.change_membership(voter_ids.iter().copied()).await?;
|
||||
|
||||
wait_for_observed_membership(observer, &voter_ids, timeout).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn write_initial_batch(cluster: &ManualCluster<OpenRaftKvEnv>, leader_id: u64) -> Result<()> {
|
||||
let leader = client_for_node(cluster, leader_id)?;
|
||||
|
||||
write_batch(&leader, RAFT_KEY_PREFIX, 0, INITIAL_WRITE_BATCH).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn write_second_batch(cluster: &ManualCluster<OpenRaftKvEnv>, leader_id: u64) -> Result<()> {
|
||||
let leader = client_for_node(cluster, leader_id)?;
|
||||
|
||||
write_batch(
|
||||
&leader,
|
||||
RAFT_KEY_PREFIX,
|
||||
INITIAL_WRITE_BATCH,
|
||||
SECOND_WRITE_BATCH,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn restart_leader(cluster: &ManualCluster<OpenRaftKvEnv>, leader_id: u64) -> Result<()> {
|
||||
let leader_name = format!("node-{leader_id}");
|
||||
info!(%leader_name, "restarting current leader");
|
||||
|
||||
cluster.restart_node(&leader_name).await?;
|
||||
cluster.wait_network_ready().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn current_membership(
|
||||
observer: &ObservationHandle<OpenRaftClusterObserver>,
|
||||
) -> Result<OpenRaftMembership> {
|
||||
let snapshot = observer
|
||||
.latest_snapshot()
|
||||
.ok_or_else(|| anyhow!("openraft observer has not produced a snapshot yet"))?;
|
||||
|
||||
Ok(OpenRaftMembership::from_states(snapshot.value.states()))
|
||||
}
|
||||
|
||||
fn client_for_node(
|
||||
cluster: &ManualCluster<OpenRaftKvEnv>,
|
||||
node_id: u64,
|
||||
) -> Result<OpenRaftKvClient> {
|
||||
cluster
|
||||
.node_client(&format!("node-{node_id}"))
|
||||
.ok_or_else(|| anyhow!("node-{node_id} client missing"))
|
||||
}
|
||||
|
||||
fn k8s_cluster_unavailable(message: &str) -> bool {
|
||||
message.contains("Unable to connect to the server")
|
||||
|| message.contains("TLS handshake timeout")
|
||||
|| message.contains("connection refused")
|
||||
}
|
||||
41
examples/openraft_kv/examples/src/lib.rs
Normal file
41
examples/openraft_kv/examples/src/lib.rs
Normal file
@ -0,0 +1,41 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use openraft_kv_runtime_ext::{OpenRaftKvBuilderExt, OpenRaftKvEnv, OpenRaftKvScenarioBuilder};
|
||||
use openraft_kv_runtime_workloads::{OpenRaftKvConverges, OpenRaftKvFailoverWorkload};
|
||||
use testing_framework_core::scenario::{NodeControlCapability, Scenario};
|
||||
|
||||
/// Number of writes issued before the leader restart.
|
||||
pub const INITIAL_WRITE_BATCH: usize = 8;
|
||||
/// Number of writes issued after the leader restart.
|
||||
pub const SECOND_WRITE_BATCH: usize = 8;
|
||||
/// Total write count expected after the scenario completes.
|
||||
pub const TOTAL_WRITES: usize = INITIAL_WRITE_BATCH + SECOND_WRITE_BATCH;
|
||||
/// Key prefix shared by the failover workload and convergence expectation.
|
||||
pub const RAFT_KEY_PREFIX: &str = "raft-key";
|
||||
|
||||
/// Builds the standard failover scenario used by the local and compose
|
||||
/// binaries.
|
||||
pub fn build_failover_scenario(
|
||||
run_duration: Duration,
|
||||
workload_timeout: Duration,
|
||||
) -> anyhow::Result<Scenario<OpenRaftKvEnv, NodeControlCapability>> {
|
||||
Ok(
|
||||
OpenRaftKvScenarioBuilder::deployment_with(|deployment| deployment)
|
||||
.with_cluster_observer()
|
||||
.enable_node_control()
|
||||
.with_run_duration(run_duration)
|
||||
.with_workload(
|
||||
OpenRaftKvFailoverWorkload::new()
|
||||
.first_batch(INITIAL_WRITE_BATCH)
|
||||
.second_batch(SECOND_WRITE_BATCH)
|
||||
.timeout(workload_timeout)
|
||||
.key_prefix(RAFT_KEY_PREFIX),
|
||||
)
|
||||
.with_expectation(
|
||||
OpenRaftKvConverges::new(TOTAL_WRITES)
|
||||
.timeout(run_duration)
|
||||
.key_prefix(RAFT_KEY_PREFIX),
|
||||
)
|
||||
.build()?,
|
||||
)
|
||||
}
|
||||
23
examples/openraft_kv/openraft-kv-node/Cargo.toml
Normal file
23
examples/openraft_kv/openraft-kv-node/Cargo.toml
Normal file
@ -0,0 +1,23 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "openraft-kv-node"
|
||||
version.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "openraft-kv-node"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
axum = "0.7"
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
openraft = { workspace = true }
|
||||
openraft-memstore = { workspace = true }
|
||||
reqwest = { workspace = true, features = ["json"] }
|
||||
serde = { workspace = true }
|
||||
serde_yaml = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
tower-http = { version = "0.6", features = ["trace"] }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
136
examples/openraft_kv/openraft-kv-node/src/client.rs
Normal file
136
examples/openraft_kv/openraft-kv-node/src/client.rs
Normal file
@ -0,0 +1,136 @@
|
||||
use std::{collections::BTreeSet, time::Duration};
|
||||
|
||||
use reqwest::Url;
|
||||
use serde::{Serialize, de::DeserializeOwned};
|
||||
|
||||
use crate::types::{
|
||||
AddLearnerRequest, AddLearnerResult, ChangeMembershipRequest, ChangeMembershipResult,
|
||||
InitResult, OpenRaftKvReadRequest, OpenRaftKvReadResponse, OpenRaftKvState,
|
||||
OpenRaftKvWriteRequest, OpenRaftKvWriteResponse,
|
||||
};
|
||||
|
||||
/// Small HTTP client for the OpenRaft example node and its admin endpoints.
|
||||
#[derive(Clone)]
|
||||
pub struct OpenRaftKvClient {
|
||||
base_url: Url,
|
||||
client: reqwest::Client,
|
||||
}
|
||||
|
||||
impl OpenRaftKvClient {
|
||||
/// Builds a client for one node base URL.
|
||||
#[must_use]
|
||||
pub fn new(base_url: Url) -> Self {
|
||||
Self {
|
||||
base_url,
|
||||
client: reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(2))
|
||||
.connect_timeout(Duration::from_secs(2))
|
||||
.build()
|
||||
.expect("openraft kv client timeout configuration is valid"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetches the node's current Raft and application state.
|
||||
pub async fn state(&self) -> anyhow::Result<OpenRaftKvState> {
|
||||
self.get("state").await
|
||||
}
|
||||
|
||||
/// Replicates one key/value write through the current leader.
|
||||
pub async fn write(
|
||||
&self,
|
||||
key: &str,
|
||||
value: &str,
|
||||
serial: u64,
|
||||
) -> anyhow::Result<OpenRaftKvWriteResponse> {
|
||||
self.post_result(
|
||||
"kv/write",
|
||||
&OpenRaftKvWriteRequest {
|
||||
key: key.to_owned(),
|
||||
value: value.to_owned(),
|
||||
serial,
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Reads one key from the replicated state machine.
|
||||
pub async fn read(&self, key: &str) -> anyhow::Result<Option<String>> {
|
||||
let response: OpenRaftKvReadResponse = self
|
||||
.post_result(
|
||||
"kv/read",
|
||||
&OpenRaftKvReadRequest {
|
||||
key: key.to_owned(),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
Ok(response.value)
|
||||
}
|
||||
|
||||
/// Bootstraps a one-node cluster on this node.
|
||||
pub async fn init_self(&self) -> anyhow::Result<()> {
|
||||
let _: InitResult = self.post("admin/init", &()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Registers another node as a learner with the current leader.
|
||||
pub async fn add_learner(&self, node_id: u64, addr: &str) -> anyhow::Result<()> {
|
||||
let _: AddLearnerResult = self
|
||||
.post(
|
||||
"admin/add-learner",
|
||||
&AddLearnerRequest {
|
||||
node_id,
|
||||
addr: addr.to_owned(),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Promotes the cluster to the provided voter set.
|
||||
pub async fn change_membership(
|
||||
&self,
|
||||
voters: impl IntoIterator<Item = u64>,
|
||||
) -> anyhow::Result<()> {
|
||||
let voters = normalize_voters(voters);
|
||||
let request = ChangeMembershipRequest { voters };
|
||||
|
||||
let _: ChangeMembershipResult = self.post("admin/change-membership", &request).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get<T: DeserializeOwned>(&self, path: &str) -> anyhow::Result<T> {
|
||||
let url = self.base_url.join(path)?;
|
||||
let response = self.client.get(url).send().await?;
|
||||
let response = response.error_for_status()?;
|
||||
|
||||
Ok(response.json().await?)
|
||||
}
|
||||
|
||||
async fn post<B: Serialize, T: DeserializeOwned>(
|
||||
&self,
|
||||
path: &str,
|
||||
body: &B,
|
||||
) -> anyhow::Result<T> {
|
||||
let url = self.base_url.join(path)?;
|
||||
|
||||
let response = self.client.post(url).json(body).send().await?;
|
||||
|
||||
let response = response.error_for_status()?;
|
||||
|
||||
Ok(response.json().await?)
|
||||
}
|
||||
|
||||
async fn post_result<B: Serialize, T: DeserializeOwned>(
|
||||
&self,
|
||||
path: &str,
|
||||
body: &B,
|
||||
) -> anyhow::Result<T> {
|
||||
let result: Result<T, String> = self.post(path, body).await?;
|
||||
result.map_err(anyhow::Error::msg)
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_voters(voters: impl IntoIterator<Item = u64>) -> Vec<u64> {
|
||||
let unique_voters = voters.into_iter().collect::<BTreeSet<_>>();
|
||||
unique_voters.into_iter().collect()
|
||||
}
|
||||
46
examples/openraft_kv/openraft-kv-node/src/config.rs
Normal file
46
examples/openraft_kv/openraft-kv-node/src/config.rs
Normal file
@ -0,0 +1,46 @@
|
||||
use std::{collections::BTreeMap, fs, path::Path};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Static node config written by TF for one OpenRaft node process.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct OpenRaftKvNodeConfig {
|
||||
/// Stable OpenRaft node identifier.
|
||||
pub node_id: u64,
|
||||
/// HTTP port bound by the node process.
|
||||
pub http_port: u16,
|
||||
/// Advertised Raft address for this node.
|
||||
pub public_addr: String,
|
||||
/// Advertised Raft addresses for the other known nodes.
|
||||
#[serde(default)]
|
||||
pub peer_addrs: BTreeMap<u64, String>,
|
||||
/// Heartbeat interval passed to the OpenRaft config.
|
||||
#[serde(default = "default_heartbeat_interval_ms")]
|
||||
pub heartbeat_interval_ms: u64,
|
||||
/// Lower election timeout bound passed to OpenRaft.
|
||||
#[serde(default = "default_election_timeout_min_ms")]
|
||||
pub election_timeout_min_ms: u64,
|
||||
/// Upper election timeout bound passed to OpenRaft.
|
||||
#[serde(default = "default_election_timeout_max_ms")]
|
||||
pub election_timeout_max_ms: u64,
|
||||
}
|
||||
|
||||
impl OpenRaftKvNodeConfig {
|
||||
/// Loads one node config from YAML on disk.
|
||||
pub fn load(path: &Path) -> anyhow::Result<Self> {
|
||||
let raw = fs::read_to_string(path)?;
|
||||
Ok(serde_yaml::from_str(&raw)?)
|
||||
}
|
||||
}
|
||||
|
||||
const fn default_heartbeat_interval_ms() -> u64 {
|
||||
500
|
||||
}
|
||||
|
||||
const fn default_election_timeout_min_ms() -> u64 {
|
||||
1_500
|
||||
}
|
||||
|
||||
const fn default_election_timeout_max_ms() -> u64 {
|
||||
3_000
|
||||
}
|
||||
25
examples/openraft_kv/openraft-kv-node/src/lib.rs
Normal file
25
examples/openraft_kv/openraft-kv-node/src/lib.rs
Normal file
@ -0,0 +1,25 @@
|
||||
//! OpenRaft-backed key-value node used by the `examples-simple-clusters`
|
||||
//! branch.
|
||||
|
||||
/// HTTP client for interacting with one OpenRaft node.
|
||||
pub mod client;
|
||||
/// YAML node configuration used by TF and the node binary.
|
||||
pub mod config;
|
||||
mod network;
|
||||
/// Axum server bootstrap and request handlers for one node process.
|
||||
pub mod server;
|
||||
/// Shared request, response, and state payload types.
|
||||
pub mod types;
|
||||
|
||||
/// Re-export of the node HTTP client.
|
||||
pub use client::OpenRaftKvClient;
|
||||
/// Re-export of the node YAML config type.
|
||||
pub use config::OpenRaftKvNodeConfig;
|
||||
/// Re-export of the public request and state payloads.
|
||||
pub use types::{
|
||||
AddLearnerRequest, ChangeMembershipRequest, OpenRaftKvReadRequest, OpenRaftKvReadResponse,
|
||||
OpenRaftKvState, OpenRaftKvWriteRequest, OpenRaftKvWriteResponse,
|
||||
};
|
||||
|
||||
/// OpenRaft type configuration shared by the in-memory log and state machine.
|
||||
pub type TypeConfig = openraft_memstore::TypeConfig;
|
||||
24
examples/openraft_kv/openraft-kv-node/src/main.rs
Normal file
24
examples/openraft_kv/openraft-kv-node/src/main.rs
Normal file
@ -0,0 +1,24 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Parser;
|
||||
use openraft_kv_node::{config::OpenRaftKvNodeConfig, server::run_server};
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
#[derive(Parser, Clone, Debug)]
|
||||
#[command(author, version, about)]
|
||||
struct Opt {
|
||||
#[arg(long)]
|
||||
config: PathBuf,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(EnvFilter::from_default_env())
|
||||
.with_ansi(false)
|
||||
.init();
|
||||
|
||||
let options = Opt::parse();
|
||||
let config = OpenRaftKvNodeConfig::load(&options.config)?;
|
||||
run_server(config).await
|
||||
}
|
||||
158
examples/openraft_kv/openraft-kv-node/src/network.rs
Normal file
158
examples/openraft_kv/openraft-kv-node/src/network.rs
Normal file
@ -0,0 +1,158 @@
|
||||
//! HTTP transport used by OpenRaft to replicate between example nodes.
|
||||
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
||||
use openraft::{
|
||||
RaftNetworkFactory, RaftNetworkV2,
|
||||
alias::{SnapshotOf, VoteOf},
|
||||
errors::{RPCError, StreamingError, Unreachable},
|
||||
network::RPCOption,
|
||||
};
|
||||
use reqwest::Url;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::{
|
||||
TypeConfig,
|
||||
types::{InstallFullSnapshotBody, SnapshotRpcResult},
|
||||
};
|
||||
|
||||
/// Shared node-address book used by Raft RPC clients.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct HttpNetworkFactory {
|
||||
client: reqwest::Client,
|
||||
known_nodes: Arc<RwLock<BTreeMap<u64, String>>>,
|
||||
}
|
||||
|
||||
/// Per-target HTTP client used for Raft replication traffic.
|
||||
pub struct HttpNetworkClient {
|
||||
client: reqwest::Client,
|
||||
target: u64,
|
||||
target_addr: Option<String>,
|
||||
}
|
||||
|
||||
impl HttpNetworkFactory {
|
||||
/// Creates a network factory backed by one shared node-address map.
|
||||
#[must_use]
|
||||
pub fn new(known_nodes: Arc<RwLock<BTreeMap<u64, String>>>) -> Self {
|
||||
Self {
|
||||
client: reqwest::Client::new(),
|
||||
known_nodes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RaftNetworkFactory<TypeConfig> for HttpNetworkFactory {
|
||||
type Network = HttpNetworkClient;
|
||||
|
||||
async fn new_client(&mut self, target: u64, _node: &()) -> Self::Network {
|
||||
let target_addr = self.known_nodes.read().await.get(&target).cloned();
|
||||
|
||||
HttpNetworkClient {
|
||||
client: self.client.clone(),
|
||||
target,
|
||||
target_addr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RaftNetworkV2<TypeConfig> for HttpNetworkClient {
|
||||
async fn append_entries(
|
||||
&mut self,
|
||||
rpc: openraft::raft::AppendEntriesRequest<TypeConfig>,
|
||||
_option: RPCOption,
|
||||
) -> Result<openraft::raft::AppendEntriesResponse<TypeConfig>, RPCError<TypeConfig>> {
|
||||
self.post_rpc("raft/append", &rpc).await
|
||||
}
|
||||
|
||||
async fn vote(
|
||||
&mut self,
|
||||
rpc: openraft::raft::VoteRequest<TypeConfig>,
|
||||
_option: RPCOption,
|
||||
) -> Result<openraft::raft::VoteResponse<TypeConfig>, RPCError<TypeConfig>> {
|
||||
self.post_rpc("raft/vote", &rpc).await
|
||||
}
|
||||
|
||||
async fn full_snapshot(
|
||||
&mut self,
|
||||
vote: VoteOf<TypeConfig>,
|
||||
snapshot: SnapshotOf<TypeConfig>,
|
||||
_cancel: impl std::future::Future<Output = openraft::errors::ReplicationClosed>
|
||||
+ openraft::OptionalSend
|
||||
+ 'static,
|
||||
_option: RPCOption,
|
||||
) -> Result<openraft::raft::SnapshotResponse<TypeConfig>, StreamingError<TypeConfig>> {
|
||||
let body = InstallFullSnapshotBody {
|
||||
vote,
|
||||
meta: snapshot.meta,
|
||||
data: snapshot.snapshot.into_inner(),
|
||||
};
|
||||
|
||||
self.post_snapshot("raft/snapshot", &body).await
|
||||
}
|
||||
}
|
||||
|
||||
impl HttpNetworkClient {
|
||||
async fn post_rpc<B, T>(&self, path: &str, body: &B) -> Result<T, RPCError<TypeConfig>>
|
||||
where
|
||||
B: serde::Serialize,
|
||||
T: serde::de::DeserializeOwned,
|
||||
{
|
||||
let url = self.endpoint_url(path)?;
|
||||
let response = self
|
||||
.client
|
||||
.post(url)
|
||||
.json(body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|err| RPCError::Unreachable(Unreachable::new(&err)))?
|
||||
.error_for_status()
|
||||
.map_err(|err| RPCError::Unreachable(Unreachable::new(&err)))?;
|
||||
|
||||
let result: Result<T, String> = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|err| RPCError::Unreachable(Unreachable::new(&err)))?;
|
||||
|
||||
result.map_err(|err| RPCError::Unreachable(Unreachable::from_string(err)))
|
||||
}
|
||||
|
||||
async fn post_snapshot(
|
||||
&self,
|
||||
path: &str,
|
||||
body: &InstallFullSnapshotBody,
|
||||
) -> Result<openraft::raft::SnapshotResponse<TypeConfig>, StreamingError<TypeConfig>> {
|
||||
let url = self
|
||||
.endpoint_url(path)
|
||||
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?;
|
||||
let response = self
|
||||
.client
|
||||
.post(url)
|
||||
.json(body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?
|
||||
.error_for_status()
|
||||
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?;
|
||||
|
||||
let result: SnapshotRpcResult = response
|
||||
.json()
|
||||
.await
|
||||
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?;
|
||||
|
||||
result.map_err(|err| StreamingError::Unreachable(Unreachable::from_string(err)))
|
||||
}
|
||||
|
||||
fn endpoint_url(&self, path: &str) -> Result<Url, Unreachable<TypeConfig>> {
|
||||
let Some(addr) = &self.target_addr else {
|
||||
return Err(Unreachable::from_string(format!(
|
||||
"target {} has no known address",
|
||||
self.target
|
||||
)));
|
||||
};
|
||||
|
||||
let mut url =
|
||||
Url::parse(&format!("http://{addr}/")).map_err(|err| Unreachable::new(&err))?;
|
||||
url.set_path(path);
|
||||
Ok(url)
|
||||
}
|
||||
}
|
||||
276
examples/openraft_kv/openraft-kv-node/src/server.rs
Normal file
276
examples/openraft_kv/openraft-kv-node/src/server.rs
Normal file
@ -0,0 +1,276 @@
|
||||
//! Axum server that exposes the OpenRaft example node and its admin endpoints.
|
||||
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use axum::{
|
||||
Json, Router,
|
||||
extract::State,
|
||||
http::StatusCode,
|
||||
routing::{get, post},
|
||||
};
|
||||
use openraft::{Config, Raft, SnapshotPolicy, type_config::async_runtime::WatchReceiver};
|
||||
use openraft_memstore::{ClientRequest, MemLogStore, MemStateMachine, new_mem_store};
|
||||
use tokio::sync::RwLock;
|
||||
use tower_http::trace::TraceLayer;
|
||||
use tracing::info;
|
||||
|
||||
use crate::{
|
||||
TypeConfig,
|
||||
config::OpenRaftKvNodeConfig,
|
||||
network::HttpNetworkFactory,
|
||||
types::{
|
||||
AddLearnerRequest, AppendRpcResult, ChangeMembershipRequest, InitResult,
|
||||
InstallSnapshotBody, MetricsResult, OpenRaftKvReadRequest, OpenRaftKvReadResponse,
|
||||
OpenRaftKvState, OpenRaftKvWriteRequest, OpenRaftKvWriteResponse, SnapshotRpcResult,
|
||||
VoteRpcResult,
|
||||
},
|
||||
};
|
||||
|
||||
type KnownNodes = Arc<RwLock<BTreeMap<u64, String>>>;
|
||||
|
||||
/// Shared state used by the HTTP handlers exposed by one node.
|
||||
#[derive(Clone)]
|
||||
pub struct AppState {
|
||||
config: OpenRaftKvNodeConfig,
|
||||
raft: Raft<TypeConfig, Arc<MemStateMachine>>,
|
||||
state_machine: Arc<MemStateMachine>,
|
||||
known_nodes: KnownNodes,
|
||||
}
|
||||
|
||||
impl AppState {
|
||||
/// Builds the application state for one node process.
|
||||
pub fn new(
|
||||
config: OpenRaftKvNodeConfig,
|
||||
raft: Raft<TypeConfig, Arc<MemStateMachine>>,
|
||||
state_machine: Arc<MemStateMachine>,
|
||||
known_nodes: KnownNodes,
|
||||
) -> Self {
|
||||
Self {
|
||||
config,
|
||||
raft,
|
||||
state_machine,
|
||||
known_nodes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Starts one OpenRaft-backed HTTP node.
|
||||
pub async fn run_server(config: OpenRaftKvNodeConfig) -> anyhow::Result<()> {
|
||||
let raft_config = Arc::new(
|
||||
Config {
|
||||
cluster_name: "openraft-kv".to_owned(),
|
||||
heartbeat_interval: config.heartbeat_interval_ms,
|
||||
election_timeout_min: config.election_timeout_min_ms,
|
||||
election_timeout_max: config.election_timeout_max_ms,
|
||||
snapshot_policy: SnapshotPolicy::Never,
|
||||
..Default::default()
|
||||
}
|
||||
.validate()?,
|
||||
);
|
||||
|
||||
let known_nodes = Arc::new(RwLock::new(known_nodes(&config)));
|
||||
|
||||
let (log_store, state_machine): (Arc<MemLogStore>, Arc<MemStateMachine>) = new_mem_store();
|
||||
let network = HttpNetworkFactory::new(known_nodes.clone());
|
||||
|
||||
let raft = Raft::new(
|
||||
config.node_id,
|
||||
raft_config,
|
||||
network,
|
||||
log_store,
|
||||
state_machine.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let app_state = AppState::new(config.clone(), raft, state_machine, known_nodes);
|
||||
let app = router(app_state);
|
||||
let address = std::net::SocketAddr::from(([0, 0, 0, 0], config.http_port));
|
||||
|
||||
info!(
|
||||
node_id = config.node_id,
|
||||
public_addr = %config.public_addr,
|
||||
peers = ?config.peer_addrs,
|
||||
%address,
|
||||
"starting openraft kv node"
|
||||
);
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(address).await?;
|
||||
axum::serve(listener, app).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn router(app_state: AppState) -> Router {
|
||||
let app_routes = Router::new()
|
||||
.route("/healthz", get(healthz))
|
||||
.route("/state", get(cluster_state))
|
||||
.route("/kv/write", post(write))
|
||||
.route("/kv/read", post(read));
|
||||
|
||||
let admin_routes = Router::new()
|
||||
.route("/admin/init", post(init))
|
||||
.route("/admin/add-learner", post(add_learner))
|
||||
.route("/admin/change-membership", post(change_membership))
|
||||
.route("/admin/metrics", get(metrics));
|
||||
|
||||
let raft_routes = Router::new()
|
||||
.route("/raft/vote", post(vote))
|
||||
.route("/raft/append", post(append))
|
||||
.route("/raft/snapshot", post(snapshot));
|
||||
|
||||
app_routes
|
||||
.merge(admin_routes)
|
||||
.merge(raft_routes)
|
||||
.layer(TraceLayer::new_for_http())
|
||||
.with_state(app_state)
|
||||
}
|
||||
|
||||
async fn healthz() -> &'static str {
|
||||
"ok"
|
||||
}
|
||||
|
||||
async fn cluster_state(State(app): State<AppState>) -> Result<Json<OpenRaftKvState>, StatusCode> {
|
||||
let metrics = app.raft.metrics().borrow_watched().clone();
|
||||
|
||||
let sm = app.state_machine.get_state_machine().await;
|
||||
|
||||
let voters = metrics
|
||||
.membership_config
|
||||
.membership()
|
||||
.voter_ids()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let kv = sm.client_status.into_iter().collect::<BTreeMap<_, _>>();
|
||||
|
||||
Ok(Json(OpenRaftKvState {
|
||||
node_id: app.config.node_id,
|
||||
public_addr: app.config.public_addr.clone(),
|
||||
role: format!("{:?}", metrics.state),
|
||||
current_leader: metrics.current_leader,
|
||||
current_term: metrics.current_term,
|
||||
last_log_index: metrics.last_log_index,
|
||||
last_applied_index: metrics.last_applied.as_ref().map(|log_id| log_id.index()),
|
||||
voters,
|
||||
kv,
|
||||
}))
|
||||
}
|
||||
|
||||
async fn metrics(State(app): State<AppState>) -> Json<MetricsResult> {
|
||||
Json(Ok(app.raft.metrics().borrow_watched().clone()))
|
||||
}
|
||||
|
||||
async fn init(State(app): State<AppState>) -> Json<InitResult> {
|
||||
let members = BTreeSet::from([app.config.node_id]);
|
||||
|
||||
Json(
|
||||
app.raft
|
||||
.initialize(members)
|
||||
.await
|
||||
.map_err(|err| err.to_string()),
|
||||
)
|
||||
}
|
||||
|
||||
async fn add_learner(
|
||||
State(app): State<AppState>,
|
||||
Json(request): Json<AddLearnerRequest>,
|
||||
) -> Json<InitResult> {
|
||||
let mut known_nodes = app.known_nodes.write().await;
|
||||
known_nodes.insert(request.node_id, request.addr.clone());
|
||||
drop(known_nodes);
|
||||
|
||||
Json(
|
||||
app.raft
|
||||
.add_learner(request.node_id, (), true)
|
||||
.await
|
||||
.map(|_| ())
|
||||
.map_err(|err| err.to_string()),
|
||||
)
|
||||
}
|
||||
|
||||
async fn change_membership(
|
||||
State(app): State<AppState>,
|
||||
Json(request): Json<ChangeMembershipRequest>,
|
||||
) -> Json<InitResult> {
|
||||
Json(
|
||||
app.raft
|
||||
.change_membership(request.voters.into_iter().collect::<BTreeSet<_>>(), false)
|
||||
.await
|
||||
.map(|_| ())
|
||||
.map_err(|err| err.to_string()),
|
||||
)
|
||||
}
|
||||
|
||||
async fn write(
|
||||
State(app): State<AppState>,
|
||||
Json(request): Json<OpenRaftKvWriteRequest>,
|
||||
) -> Json<Result<OpenRaftKvWriteResponse, String>> {
|
||||
let result = app
|
||||
.raft
|
||||
.client_write(ClientRequest {
|
||||
client: request.key,
|
||||
serial: request.serial,
|
||||
status: request.value,
|
||||
})
|
||||
.await
|
||||
.map(|response| OpenRaftKvWriteResponse {
|
||||
previous: response.response().0.clone(),
|
||||
})
|
||||
.map_err(|err| err.to_string());
|
||||
|
||||
Json(result)
|
||||
}
|
||||
|
||||
async fn read(
|
||||
State(app): State<AppState>,
|
||||
Json(request): Json<OpenRaftKvReadRequest>,
|
||||
) -> Json<Result<OpenRaftKvReadResponse, String>> {
|
||||
let sm = app.state_machine.get_state_machine().await;
|
||||
|
||||
Json(Ok(OpenRaftKvReadResponse {
|
||||
value: sm.client_status.get(&request.key).cloned(),
|
||||
}))
|
||||
}
|
||||
|
||||
async fn vote(
|
||||
State(app): State<AppState>,
|
||||
Json(request): Json<openraft::raft::VoteRequest<TypeConfig>>,
|
||||
) -> Json<VoteRpcResult> {
|
||||
Json(app.raft.vote(request).await.map_err(|err| err.to_string()))
|
||||
}
|
||||
|
||||
async fn append(
|
||||
State(app): State<AppState>,
|
||||
Json(request): Json<openraft::raft::AppendEntriesRequest<TypeConfig>>,
|
||||
) -> Json<AppendRpcResult> {
|
||||
Json(
|
||||
app.raft
|
||||
.append_entries(request)
|
||||
.await
|
||||
.map_err(|err| err.to_string()),
|
||||
)
|
||||
}
|
||||
|
||||
async fn snapshot(
|
||||
State(app): State<AppState>,
|
||||
Json(request): Json<InstallSnapshotBody>,
|
||||
) -> Json<SnapshotRpcResult> {
|
||||
let snapshot = openraft::alias::SnapshotOf::<TypeConfig> {
|
||||
meta: request.meta,
|
||||
snapshot: std::io::Cursor::new(request.data),
|
||||
};
|
||||
|
||||
Json(
|
||||
app.raft
|
||||
.install_full_snapshot(request.vote, snapshot)
|
||||
.await
|
||||
.map_err(|err| err.to_string()),
|
||||
)
|
||||
}
|
||||
|
||||
fn known_nodes(config: &OpenRaftKvNodeConfig) -> BTreeMap<u64, String> {
|
||||
let mut known_nodes = config.peer_addrs.clone();
|
||||
known_nodes.insert(config.node_id, config.public_addr.clone());
|
||||
known_nodes
|
||||
}
|
||||
112
examples/openraft_kv/openraft-kv-node/src/types.rs
Normal file
112
examples/openraft_kv/openraft-kv-node/src/types.rs
Normal file
@ -0,0 +1,112 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use openraft::{
|
||||
RaftMetrics,
|
||||
alias::{SnapshotMetaOf, VoteOf},
|
||||
raft::InstallSnapshotRequest,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::TypeConfig;
|
||||
|
||||
/// Result shape used by the simple admin endpoints in this example.
|
||||
pub type OpenRaftResult<T> = Result<T, String>;
|
||||
|
||||
/// Request body for a replicated write submitted through the leader.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct OpenRaftKvWriteRequest {
|
||||
/// Application key to write.
|
||||
pub key: String,
|
||||
/// Value stored for the key.
|
||||
pub value: String,
|
||||
/// Client-side serial used by OpenRaft's example state machine.
|
||||
pub serial: u64,
|
||||
}
|
||||
|
||||
/// Response body returned after a replicated write is committed.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct OpenRaftKvWriteResponse {
|
||||
/// Previous value stored under the key, if any.
|
||||
pub previous: Option<String>,
|
||||
}
|
||||
|
||||
/// Request body for a key lookup.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct OpenRaftKvReadRequest {
|
||||
/// Application key to look up.
|
||||
pub key: String,
|
||||
}
|
||||
|
||||
/// Response body returned by a key lookup.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct OpenRaftKvReadResponse {
|
||||
/// Current value stored under the key, if any.
|
||||
pub value: Option<String>,
|
||||
}
|
||||
|
||||
/// Admin request used to register a learner in the current cluster.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct AddLearnerRequest {
|
||||
/// OpenRaft node identifier for the learner.
|
||||
pub node_id: u64,
|
||||
/// Advertised Raft address for the learner.
|
||||
pub addr: String,
|
||||
}
|
||||
|
||||
/// Admin request used to promote the cluster to a concrete voter set.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct ChangeMembershipRequest {
|
||||
/// Full voter set that should own the cluster after the change.
|
||||
pub voters: Vec<u64>,
|
||||
}
|
||||
|
||||
/// Snapshot of one node's externally visible Raft and application state.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct OpenRaftKvState {
|
||||
/// Stable OpenRaft node identifier.
|
||||
pub node_id: u64,
|
||||
/// Advertised Raft address for this node.
|
||||
pub public_addr: String,
|
||||
/// Current OpenRaft role rendered as text.
|
||||
pub role: String,
|
||||
/// Leader known by this node, if any.
|
||||
pub current_leader: Option<u64>,
|
||||
/// Current term reported by this node.
|
||||
pub current_term: u64,
|
||||
/// Highest log index stored locally.
|
||||
pub last_log_index: Option<u64>,
|
||||
/// Highest log index applied to the state machine.
|
||||
pub last_applied_index: Option<u64>,
|
||||
/// Current voter set reported by this node.
|
||||
pub voters: Vec<u64>,
|
||||
/// Application state machine contents.
|
||||
pub kv: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
/// JSON representation used for full-snapshot replication over HTTP.
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct InstallFullSnapshotBody {
|
||||
/// Vote bundled with the snapshot transfer.
|
||||
pub vote: VoteOf<TypeConfig>,
|
||||
/// Snapshot metadata describing the transferred state.
|
||||
pub meta: SnapshotMetaOf<TypeConfig>,
|
||||
/// Serialized state machine bytes.
|
||||
pub data: Vec<u8>,
|
||||
}
|
||||
|
||||
/// Serialized result of a vote RPC.
|
||||
pub type VoteRpcResult = Result<openraft::raft::VoteResponse<TypeConfig>, String>;
|
||||
/// Serialized result of an append-entries RPC.
|
||||
pub type AppendRpcResult = Result<openraft::raft::AppendEntriesResponse<TypeConfig>, String>;
|
||||
/// Serialized result of a full-snapshot RPC.
|
||||
pub type SnapshotRpcResult = Result<openraft::raft::SnapshotResponse<TypeConfig>, String>;
|
||||
/// JSON payload returned by the metrics endpoint.
|
||||
pub type MetricsResult = Result<RaftMetrics<TypeConfig>, String>;
|
||||
/// JSON payload returned by `/admin/init`.
|
||||
pub type InitResult = Result<(), String>;
|
||||
/// JSON payload returned by `/admin/add-learner`.
|
||||
pub type AddLearnerResult = Result<(), String>;
|
||||
/// JSON payload returned by `/admin/change-membership`.
|
||||
pub type ChangeMembershipResult = Result<(), String>;
|
||||
/// Request type accepted by the snapshot endpoint.
|
||||
pub type InstallSnapshotBody = InstallSnapshotRequest<TypeConfig>;
|
||||
14
examples/openraft_kv/testing/integration/Cargo.toml
Normal file
14
examples/openraft_kv/testing/integration/Cargo.toml
Normal file
@ -0,0 +1,14 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "openraft-kv-runtime-ext"
|
||||
version.workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait = { workspace = true }
|
||||
openraft-kv-node = { path = "../../openraft-kv-node" }
|
||||
reqwest = { workspace = true }
|
||||
testing-framework-core = { workspace = true }
|
||||
testing-framework-runner-compose = { workspace = true }
|
||||
testing-framework-runner-k8s = { workspace = true }
|
||||
testing-framework-runner-local = { workspace = true }
|
||||
59
examples/openraft_kv/testing/integration/src/app.rs
Normal file
59
examples/openraft_kv/testing/integration/src/app.rs
Normal file
@ -0,0 +1,59 @@
|
||||
use std::io::Error;
|
||||
|
||||
use openraft_kv_node::{OpenRaftKvClient, OpenRaftKvNodeConfig};
|
||||
use testing_framework_core::scenario::{
|
||||
Application, ClusterNodeConfigApplication, ClusterNodeView, ClusterPeerView, DynError,
|
||||
NodeAccess, serialize_cluster_yaml_config,
|
||||
};
|
||||
|
||||
/// Three-node topology used by the OpenRaft example scenarios.
|
||||
pub type OpenRaftKvTopology = testing_framework_core::topology::ClusterTopology;
|
||||
|
||||
/// Application environment wiring for the OpenRaft-backed key-value example.
|
||||
pub struct OpenRaftKvEnv;
|
||||
|
||||
impl Application for OpenRaftKvEnv {
|
||||
type Deployment = OpenRaftKvTopology;
|
||||
type NodeClient = OpenRaftKvClient;
|
||||
type NodeConfig = OpenRaftKvNodeConfig;
|
||||
|
||||
fn build_node_client(access: &NodeAccess) -> Result<Self::NodeClient, DynError> {
|
||||
Ok(OpenRaftKvClient::new(access.api_base_url()?))
|
||||
}
|
||||
|
||||
fn node_readiness_path() -> &'static str {
|
||||
"/healthz"
|
||||
}
|
||||
}
|
||||
|
||||
impl ClusterNodeConfigApplication for OpenRaftKvEnv {
|
||||
type ConfigError = Error;
|
||||
|
||||
fn static_network_port() -> u16 {
|
||||
8080
|
||||
}
|
||||
|
||||
fn build_cluster_node_config(
|
||||
node: &ClusterNodeView,
|
||||
peers: &[ClusterPeerView],
|
||||
) -> Result<Self::NodeConfig, Self::ConfigError> {
|
||||
Ok(OpenRaftKvNodeConfig {
|
||||
node_id: node.index() as u64,
|
||||
http_port: node.network_port(),
|
||||
public_addr: node.authority(),
|
||||
peer_addrs: peers
|
||||
.iter()
|
||||
.map(|peer| (peer.index() as u64, peer.authority()))
|
||||
.collect(),
|
||||
heartbeat_interval_ms: 500,
|
||||
election_timeout_min_ms: 1_500,
|
||||
election_timeout_max_ms: 3_000,
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_cluster_node_config(
|
||||
config: &Self::NodeConfig,
|
||||
) -> Result<String, Self::ConfigError> {
|
||||
serialize_cluster_yaml_config(config).map_err(Error::other)
|
||||
}
|
||||
}
|
||||
112
examples/openraft_kv/testing/integration/src/compose_env.rs
Normal file
112
examples/openraft_kv/testing/integration/src/compose_env.rs
Normal file
@ -0,0 +1,112 @@
|
||||
use std::{fs, path::Path};
|
||||
|
||||
use testing_framework_core::{
|
||||
cfgsync::StaticNodeConfigProvider,
|
||||
scenario::{Application, DynError},
|
||||
topology::DeploymentDescriptor,
|
||||
};
|
||||
use testing_framework_runner_compose::{
|
||||
BinaryConfigNodeSpec, ComposeDeployEnv, ComposeDescriptor, NodeDescriptor,
|
||||
binary_config_node_runtime_spec, node_identifier,
|
||||
};
|
||||
|
||||
use crate::OpenRaftKvEnv;
|
||||
|
||||
const NODE_CONFIG_PATH: &str = "/etc/openraft-kv/config.yaml";
|
||||
const COMPOSE_HTTP_PORT_BASE: u16 = 47_080;
|
||||
|
||||
fn compose_node_spec() -> BinaryConfigNodeSpec {
|
||||
BinaryConfigNodeSpec::conventional(
|
||||
"/usr/local/bin/openraft-kv-node",
|
||||
NODE_CONFIG_PATH,
|
||||
vec![8080],
|
||||
)
|
||||
}
|
||||
|
||||
fn fixed_loopback_port_binding(host_port: u16, container_port: u16) -> String {
|
||||
format!("127.0.0.1:{host_port}:{container_port}")
|
||||
}
|
||||
|
||||
impl ComposeDeployEnv for OpenRaftKvEnv {
|
||||
fn prepare_compose_configs(
|
||||
path: &Path,
|
||||
topology: &<Self as Application>::Deployment,
|
||||
_cfgsync_port: u16,
|
||||
_metrics_otlp_ingest_url: Option<&reqwest::Url>,
|
||||
) -> Result<(), DynError> {
|
||||
let hostnames = Self::cfgsync_hostnames(topology);
|
||||
let stack_dir = path
|
||||
.parent()
|
||||
.ok_or_else(|| std::io::Error::other("compose config path has no parent"))?;
|
||||
let configs_dir = stack_dir.join("configs");
|
||||
fs::create_dir_all(&configs_dir)?;
|
||||
|
||||
for index in 0..topology.node_count() {
|
||||
let mut config = Self::build_node_config(topology, index)?;
|
||||
Self::rewrite_for_hostnames(topology, index, &hostnames, &mut config)?;
|
||||
let rendered = Self::serialize_node_config(&config)?;
|
||||
fs::write(
|
||||
configs_dir.join(Self::static_node_config_file_name(index)),
|
||||
rendered,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn static_node_config_file_name(index: usize) -> String {
|
||||
format!("node-{index}.yaml")
|
||||
}
|
||||
|
||||
fn binary_config_node_spec(
|
||||
_topology: &<Self as Application>::Deployment,
|
||||
_index: usize,
|
||||
) -> Result<Option<BinaryConfigNodeSpec>, DynError> {
|
||||
Ok(Some(compose_node_spec()))
|
||||
}
|
||||
|
||||
fn compose_descriptor(
|
||||
topology: &<Self as Application>::Deployment,
|
||||
_cfgsync_port: u16,
|
||||
) -> Result<ComposeDescriptor, DynError> {
|
||||
let spec = compose_node_spec();
|
||||
|
||||
let nodes = (0..topology.node_count())
|
||||
.map(|index| {
|
||||
let runtime = binary_config_node_runtime_spec(index, &spec);
|
||||
let file_name = Self::static_node_config_file_name(index);
|
||||
|
||||
let host_port = COMPOSE_HTTP_PORT_BASE + index as u16;
|
||||
let ports = compose_node_ports(host_port, &runtime.container_ports);
|
||||
|
||||
NodeDescriptor::new(
|
||||
node_identifier(index),
|
||||
runtime.image,
|
||||
runtime.entrypoint,
|
||||
vec![format!(
|
||||
"./stack/configs/{file_name}:{}:ro",
|
||||
spec.config_container_path
|
||||
)],
|
||||
runtime.extra_hosts,
|
||||
ports,
|
||||
runtime.container_ports,
|
||||
runtime.environment,
|
||||
runtime.platform,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(ComposeDescriptor::new(nodes))
|
||||
}
|
||||
}
|
||||
|
||||
fn compose_node_ports(host_port: u16, container_ports: &[u16]) -> Vec<String> {
|
||||
container_ports
|
||||
.iter()
|
||||
.map(|port| {
|
||||
// OpenRaft failover restarts the leader. Fixed host ports keep TF
|
||||
// clients stable across `docker compose restart`.
|
||||
fixed_loopback_port_binding(host_port, *port)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
21
examples/openraft_kv/testing/integration/src/k8s_env.rs
Normal file
21
examples/openraft_kv/testing/integration/src/k8s_env.rs
Normal file
@ -0,0 +1,21 @@
|
||||
use testing_framework_runner_k8s::{BinaryConfigK8sSpec, K8sBinaryApp};
|
||||
|
||||
use crate::OpenRaftKvEnv;
|
||||
|
||||
const CONTAINER_CONFIG_PATH: &str = "/etc/openraft-kv/config.yaml";
|
||||
const CONTAINER_HTTP_PORT: u16 = 8080;
|
||||
const SERVICE_TESTING_PORT: u16 = 8081;
|
||||
const NODE_NAME_PREFIX: &str = "openraft-kv-node";
|
||||
|
||||
impl K8sBinaryApp for OpenRaftKvEnv {
|
||||
fn k8s_binary_spec() -> BinaryConfigK8sSpec {
|
||||
BinaryConfigK8sSpec::conventional(
|
||||
"openraft-kv",
|
||||
NODE_NAME_PREFIX,
|
||||
"/usr/local/bin/openraft-kv-node",
|
||||
CONTAINER_CONFIG_PATH,
|
||||
CONTAINER_HTTP_PORT,
|
||||
SERVICE_TESTING_PORT,
|
||||
)
|
||||
}
|
||||
}
|
||||
18
examples/openraft_kv/testing/integration/src/lib.rs
Normal file
18
examples/openraft_kv/testing/integration/src/lib.rs
Normal file
@ -0,0 +1,18 @@
|
||||
mod app;
|
||||
mod compose_env;
|
||||
mod k8s_env;
|
||||
mod local_env;
|
||||
mod observation;
|
||||
pub mod scenario;
|
||||
|
||||
pub use app::*;
|
||||
pub use observation::*;
|
||||
pub use scenario::{OpenRaftKvBuilderExt, OpenRaftKvScenarioBuilder};
|
||||
|
||||
/// Local process deployer for the OpenRaft example app.
|
||||
pub type OpenRaftKvLocalDeployer = testing_framework_runner_local::ProcessDeployer<OpenRaftKvEnv>;
|
||||
/// Docker Compose deployer for the OpenRaft example app.
|
||||
pub type OpenRaftKvComposeDeployer =
|
||||
testing_framework_runner_compose::ComposeDeployer<OpenRaftKvEnv>;
|
||||
/// Kubernetes deployer for the OpenRaft example app.
|
||||
pub type OpenRaftKvK8sDeployer = testing_framework_runner_k8s::K8sDeployer<OpenRaftKvEnv>;
|
||||
125
examples/openraft_kv/testing/integration/src/local_env.rs
Normal file
125
examples/openraft_kv/testing/integration/src/local_env.rs
Normal file
@ -0,0 +1,125 @@
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
|
||||
use openraft_kv_node::OpenRaftKvNodeConfig;
|
||||
use testing_framework_core::{
|
||||
scenario::{DynError, StartNodeOptions},
|
||||
topology::DeploymentDescriptor,
|
||||
};
|
||||
use testing_framework_runner_local::{
|
||||
BuiltNodeConfig, LocalDeployerEnv, LocalNodePorts, LocalProcessSpec, NodeConfigEntry,
|
||||
reserve_local_node_ports, yaml_node_config,
|
||||
};
|
||||
|
||||
use crate::OpenRaftKvEnv;
|
||||
|
||||
impl LocalDeployerEnv for OpenRaftKvEnv {
|
||||
fn build_node_config_from_template(
|
||||
_topology: &Self::Deployment,
|
||||
index: usize,
|
||||
_peer_ports_by_name: &HashMap<String, u16>,
|
||||
_options: &StartNodeOptions<Self>,
|
||||
peer_ports: &[u16],
|
||||
template_config: Option<&OpenRaftKvNodeConfig>,
|
||||
) -> Result<BuiltNodeConfig<OpenRaftKvNodeConfig>, DynError> {
|
||||
let mut reserved = reserve_local_node_ports(1, &[], "node")
|
||||
.map_err(|source| -> DynError { source.into() })?;
|
||||
|
||||
let ports = reserved
|
||||
.pop()
|
||||
.ok_or_else(|| std::io::Error::other("failed to reserve local node ports"))?;
|
||||
|
||||
let mut config = template_config
|
||||
.cloned()
|
||||
.unwrap_or_else(|| local_node_config(index, ports.network_port(), BTreeMap::new()));
|
||||
|
||||
// OpenRaft peer config is index-sensitive, so local restarts must rebuild
|
||||
// the full peer map from the current reserved port set.
|
||||
let network_port = ports.network_port();
|
||||
config.node_id = index as u64;
|
||||
config.http_port = network_port;
|
||||
config.public_addr = local_addr(network_port);
|
||||
config.peer_addrs = peer_addrs_from_ports(peer_ports, index);
|
||||
|
||||
Ok(BuiltNodeConfig {
|
||||
config,
|
||||
network_port,
|
||||
})
|
||||
}
|
||||
|
||||
fn build_initial_node_configs(
|
||||
topology: &Self::Deployment,
|
||||
) -> Result<
|
||||
Vec<NodeConfigEntry<OpenRaftKvNodeConfig>>,
|
||||
testing_framework_runner_local::process::ProcessSpawnError,
|
||||
> {
|
||||
let reserved_ports = reserve_local_node_ports(topology.node_count(), &[], "node")?;
|
||||
|
||||
let peer_ports = reserved_ports
|
||||
.iter()
|
||||
.map(LocalNodePorts::network_port)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// Build every node from the same reserved port view so the initial
|
||||
// cluster starts with a consistent peer list on all nodes.
|
||||
Ok(reserved_ports
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(index, ports)| NodeConfigEntry {
|
||||
name: format!("node-{index}"),
|
||||
config: local_node_config(
|
||||
index,
|
||||
ports.network_port(),
|
||||
peer_addrs_from_ports(&peer_ports, index),
|
||||
),
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn initial_node_name_prefix() -> &'static str {
|
||||
"node"
|
||||
}
|
||||
|
||||
fn local_process_spec() -> Option<LocalProcessSpec> {
|
||||
Some(
|
||||
LocalProcessSpec::new("OPENRAFT_KV_NODE_BIN", "openraft-kv-node").with_rust_log("info"),
|
||||
)
|
||||
}
|
||||
|
||||
fn render_local_config(config: &OpenRaftKvNodeConfig) -> Result<Vec<u8>, DynError> {
|
||||
yaml_node_config(config)
|
||||
}
|
||||
|
||||
fn http_api_port(config: &OpenRaftKvNodeConfig) -> Option<u16> {
|
||||
Some(config.http_port)
|
||||
}
|
||||
}
|
||||
|
||||
fn local_node_config(
|
||||
index: usize,
|
||||
network_port: u16,
|
||||
peer_addrs: BTreeMap<u64, String>,
|
||||
) -> OpenRaftKvNodeConfig {
|
||||
OpenRaftKvNodeConfig {
|
||||
node_id: index as u64,
|
||||
http_port: network_port,
|
||||
public_addr: local_addr(network_port),
|
||||
peer_addrs,
|
||||
|
||||
heartbeat_interval_ms: 500,
|
||||
election_timeout_min_ms: 1_500,
|
||||
election_timeout_max_ms: 3_000,
|
||||
}
|
||||
}
|
||||
|
||||
fn peer_addrs_from_ports(peer_ports: &[u16], local_index: usize) -> BTreeMap<u64, String> {
|
||||
peer_ports
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(peer_index, _)| *peer_index != local_index)
|
||||
.map(|(peer_index, peer_port)| (peer_index as u64, local_addr(*peer_port)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn local_addr(port: u16) -> String {
|
||||
format!("127.0.0.1:{port}")
|
||||
}
|
||||
262
examples/openraft_kv/testing/integration/src/observation.rs
Normal file
262
examples/openraft_kv/testing/integration/src/observation.rs
Normal file
@ -0,0 +1,262 @@
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
sync::Arc,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use openraft_kv_node::{OpenRaftKvClient, OpenRaftKvState};
|
||||
use testing_framework_core::{
|
||||
observation::{
|
||||
BoxedSourceProvider, ObservationConfig, ObservedSource, Observer, StaticSourceProvider,
|
||||
},
|
||||
scenario::{Application, DynError, NodeClients},
|
||||
};
|
||||
use testing_framework_runner_k8s::ManualCluster;
|
||||
|
||||
use crate::OpenRaftKvEnv;
|
||||
|
||||
const OBSERVATION_INTERVAL: Duration = Duration::from_millis(250);
|
||||
const OBSERVATION_HISTORY_LIMIT: usize = 16;
|
||||
|
||||
/// Materialized OpenRaft cluster state built from the latest node polls.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct OpenRaftClusterSnapshot {
|
||||
states: Vec<OpenRaftKvState>,
|
||||
failures: Vec<OpenRaftSourceFailure>,
|
||||
}
|
||||
|
||||
impl OpenRaftClusterSnapshot {
|
||||
/// Returns the successfully observed node states sorted by node id.
|
||||
#[must_use]
|
||||
pub fn states(&self) -> &[OpenRaftKvState] {
|
||||
&self.states
|
||||
}
|
||||
|
||||
/// Returns `true` when the snapshot contains no successful node states.
|
||||
#[must_use]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.states.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the unique observed leader when all responding nodes agree.
|
||||
#[must_use]
|
||||
pub fn agreed_leader(&self, different_from: Option<u64>) -> Option<u64> {
|
||||
let observed = self
|
||||
.states
|
||||
.iter()
|
||||
.filter_map(|state| state.current_leader)
|
||||
.collect::<BTreeSet<_>>();
|
||||
|
||||
let leader = observed.iter().next().copied()?;
|
||||
|
||||
(observed.len() == 1 && different_from != Some(leader)).then_some(leader)
|
||||
}
|
||||
|
||||
/// Returns `true` when every observed node reports the expected voter set.
|
||||
#[must_use]
|
||||
pub fn all_voters_match(&self, expected_voters: &BTreeSet<u64>) -> bool {
|
||||
!self.states.is_empty()
|
||||
&& self.failures.is_empty()
|
||||
&& self.states.iter().all(|state| {
|
||||
state.voters.iter().copied().collect::<BTreeSet<_>>() == *expected_voters
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns `true` when every observed node exposes the expected replicated
|
||||
/// key/value data.
|
||||
#[must_use]
|
||||
pub fn all_kv_match(
|
||||
&self,
|
||||
expected: &BTreeMap<String, String>,
|
||||
full_voter_set: &[u64],
|
||||
) -> bool {
|
||||
!self.states.is_empty()
|
||||
&& self.failures.is_empty()
|
||||
&& self.states.iter().all(|state| {
|
||||
state.current_leader.is_some()
|
||||
&& state.voters == full_voter_set
|
||||
&& expected
|
||||
.iter()
|
||||
.all(|(key, value)| state.kv.get(key) == Some(value))
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a concise summary for timeout and validation errors.
|
||||
#[must_use]
|
||||
pub fn summary(&self) -> String {
|
||||
let mut lines = self
|
||||
.states
|
||||
.iter()
|
||||
.map(|state| {
|
||||
format!(
|
||||
"node={} leader={:?} voters={:?} keys={}",
|
||||
state.node_id,
|
||||
state.current_leader,
|
||||
state.voters,
|
||||
state.kv.len()
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
lines.extend(self.failures.iter().map(OpenRaftSourceFailure::summary));
|
||||
|
||||
if lines.is_empty() {
|
||||
return "no state observed yet".to_owned();
|
||||
}
|
||||
|
||||
lines.join("; ")
|
||||
}
|
||||
}
|
||||
|
||||
/// One failed source read captured during an observation cycle.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct OpenRaftSourceFailure {
|
||||
source_name: String,
|
||||
message: String,
|
||||
}
|
||||
|
||||
impl OpenRaftSourceFailure {
|
||||
fn new(source_name: &str, message: &str) -> Self {
|
||||
Self {
|
||||
source_name: source_name.to_owned(),
|
||||
message: message.to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
fn summary(&self) -> String {
|
||||
format!("source={} error={}", self.source_name, self.message)
|
||||
}
|
||||
}
|
||||
|
||||
/// Observer that keeps the latest per-node OpenRaft state.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct OpenRaftClusterObserver;
|
||||
|
||||
impl OpenRaftClusterObserver {
|
||||
/// Default runtime configuration for the OpenRaft example observer.
|
||||
#[must_use]
|
||||
pub fn config() -> ObservationConfig {
|
||||
ObservationConfig {
|
||||
interval: OBSERVATION_INTERVAL,
|
||||
history_limit: OBSERVATION_HISTORY_LIMIT,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Captures one best-effort OpenRaft cluster snapshot from the provided node
|
||||
/// clients.
|
||||
pub async fn capture_openraft_cluster_snapshot(
|
||||
clients: &[OpenRaftKvClient],
|
||||
) -> OpenRaftClusterSnapshot {
|
||||
capture_cluster_snapshot(&named_sources(clients.to_vec())).await
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Observer for OpenRaftClusterObserver {
|
||||
type Source = OpenRaftKvClient;
|
||||
type State = OpenRaftClusterSnapshot;
|
||||
type Snapshot = OpenRaftClusterSnapshot;
|
||||
type Event = ();
|
||||
|
||||
async fn init(
|
||||
&self,
|
||||
sources: &[ObservedSource<Self::Source>],
|
||||
) -> Result<Self::State, DynError> {
|
||||
Ok(capture_cluster_snapshot(sources).await)
|
||||
}
|
||||
|
||||
async fn poll(
|
||||
&self,
|
||||
sources: &[ObservedSource<Self::Source>],
|
||||
state: &mut Self::State,
|
||||
) -> Result<Vec<Self::Event>, DynError> {
|
||||
*state = capture_cluster_snapshot(sources).await;
|
||||
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
fn snapshot(&self, state: &Self::State) -> Self::Snapshot {
|
||||
state.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds the fixed source provider used by the scenario-based OpenRaft
|
||||
/// examples.
|
||||
pub fn openraft_cluster_source_provider(
|
||||
_deployment: &<OpenRaftKvEnv as Application>::Deployment,
|
||||
node_clients: NodeClients<OpenRaftKvEnv>,
|
||||
) -> Result<BoxedSourceProvider<OpenRaftKvClient>, DynError> {
|
||||
Ok(Box::new(StaticSourceProvider::new(named_sources(
|
||||
node_clients.snapshot(),
|
||||
))))
|
||||
}
|
||||
|
||||
/// Dynamic source provider backed by a manual cluster.
|
||||
///
|
||||
/// This keeps observation aligned with the latest client handles after manual
|
||||
/// node restarts.
|
||||
#[derive(Clone)]
|
||||
pub struct OpenRaftManualClusterSourceProvider {
|
||||
cluster: Arc<ManualCluster<OpenRaftKvEnv>>,
|
||||
node_names: Vec<String>,
|
||||
}
|
||||
|
||||
impl OpenRaftManualClusterSourceProvider {
|
||||
/// Builds a provider for the fixed node names used by the OpenRaft
|
||||
/// examples.
|
||||
#[must_use]
|
||||
pub fn new(cluster: Arc<ManualCluster<OpenRaftKvEnv>>, node_count: usize) -> Self {
|
||||
Self {
|
||||
cluster,
|
||||
node_names: (0..node_count)
|
||||
.map(|index| format!("node-{index}"))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl testing_framework_core::observation::SourceProvider<OpenRaftKvClient>
|
||||
for OpenRaftManualClusterSourceProvider
|
||||
{
|
||||
async fn sources(&self) -> Result<Vec<ObservedSource<OpenRaftKvClient>>, DynError> {
|
||||
Ok(self
|
||||
.node_names
|
||||
.iter()
|
||||
.filter_map(|name| {
|
||||
self.cluster
|
||||
.node_client(name)
|
||||
.map(|client| ObservedSource::new(name, client))
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
|
||||
fn named_sources(clients: Vec<OpenRaftKvClient>) -> Vec<ObservedSource<OpenRaftKvClient>> {
|
||||
clients
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(index, client)| ObservedSource::new(&format!("node-{index}"), client))
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn capture_cluster_snapshot(
|
||||
sources: &[ObservedSource<OpenRaftKvClient>],
|
||||
) -> OpenRaftClusterSnapshot {
|
||||
let mut states = Vec::with_capacity(sources.len());
|
||||
let mut failures = Vec::new();
|
||||
|
||||
for source in sources {
|
||||
match source.source.state().await {
|
||||
Ok(state) => states.push(state),
|
||||
Err(error) => {
|
||||
failures.push(OpenRaftSourceFailure::new(&source.name, &error.to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
states.sort_by_key(|state| state.node_id);
|
||||
|
||||
OpenRaftClusterSnapshot { states, failures }
|
||||
}
|
||||
32
examples/openraft_kv/testing/integration/src/scenario.rs
Normal file
32
examples/openraft_kv/testing/integration/src/scenario.rs
Normal file
@ -0,0 +1,32 @@
|
||||
use testing_framework_core::scenario::{CoreBuilderExt, ScenarioBuilder};
|
||||
|
||||
use crate::{
|
||||
OpenRaftClusterObserver, OpenRaftKvEnv, OpenRaftKvTopology, openraft_cluster_source_provider,
|
||||
};
|
||||
|
||||
/// Scenario builder alias used by the OpenRaft example binaries.
|
||||
pub type OpenRaftKvScenarioBuilder = ScenarioBuilder<OpenRaftKvEnv>;
|
||||
|
||||
/// Convenience helpers for constructing the fixed three-node OpenRaft topology.
|
||||
pub trait OpenRaftKvBuilderExt: Sized {
|
||||
/// Starts from the default three-node deployment and lets callers adjust
|
||||
/// it.
|
||||
fn deployment_with(f: impl FnOnce(OpenRaftKvTopology) -> OpenRaftKvTopology) -> Self;
|
||||
|
||||
/// Attaches the default OpenRaft cluster observer to the scenario.
|
||||
fn with_cluster_observer(self) -> Self;
|
||||
}
|
||||
|
||||
impl OpenRaftKvBuilderExt for OpenRaftKvScenarioBuilder {
|
||||
fn deployment_with(f: impl FnOnce(OpenRaftKvTopology) -> OpenRaftKvTopology) -> Self {
|
||||
OpenRaftKvScenarioBuilder::with_deployment(f(OpenRaftKvTopology::new(3)))
|
||||
}
|
||||
|
||||
fn with_cluster_observer(self) -> Self {
|
||||
self.with_observer(
|
||||
OpenRaftClusterObserver,
|
||||
openraft_cluster_source_provider,
|
||||
OpenRaftClusterObserver::config(),
|
||||
)
|
||||
}
|
||||
}
|
||||
15
examples/openraft_kv/testing/workloads/Cargo.toml
Normal file
15
examples/openraft_kv/testing/workloads/Cargo.toml
Normal file
@ -0,0 +1,15 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "openraft-kv-runtime-workloads"
|
||||
version.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
async-trait = { workspace = true }
|
||||
openraft-kv-node = { path = "../../openraft-kv-node" }
|
||||
openraft-kv-runtime-ext = { path = "../integration" }
|
||||
testing-framework-core = { workspace = true }
|
||||
thiserror = "2.0"
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
tracing = { workspace = true }
|
||||
61
examples/openraft_kv/testing/workloads/src/convergence.rs
Normal file
61
examples/openraft_kv/testing/workloads/src/convergence.rs
Normal file
@ -0,0 +1,61 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use openraft_kv_runtime_ext::{OpenRaftClusterObserver, OpenRaftKvEnv};
|
||||
use testing_framework_core::{
|
||||
observation::ObservationHandle,
|
||||
scenario::{DynError, Expectation, RunContext},
|
||||
};
|
||||
|
||||
use crate::support::{expected_kv, wait_for_observed_replication};
|
||||
|
||||
/// Expectation that waits for the full voter set and the writes from this run
|
||||
/// to converge on every node.
|
||||
#[derive(Clone)]
|
||||
pub struct OpenRaftKvConverges {
|
||||
total_writes: usize,
|
||||
timeout: Duration,
|
||||
key_prefix: String,
|
||||
}
|
||||
|
||||
impl OpenRaftKvConverges {
|
||||
/// Creates a convergence check for the given number of replicated writes.
|
||||
#[must_use]
|
||||
pub fn new(total_writes: usize) -> Self {
|
||||
Self {
|
||||
total_writes,
|
||||
timeout: Duration::from_secs(30),
|
||||
key_prefix: "raft-key".to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Overrides the key prefix used to derive expected writes.
|
||||
#[must_use]
|
||||
pub fn key_prefix(mut self, value: &str) -> Self {
|
||||
self.key_prefix = value.to_owned();
|
||||
self
|
||||
}
|
||||
|
||||
/// Overrides the convergence timeout.
|
||||
#[must_use]
|
||||
pub const fn timeout(mut self, value: Duration) -> Self {
|
||||
self.timeout = value;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Expectation<OpenRaftKvEnv> for OpenRaftKvConverges {
|
||||
fn name(&self) -> &str {
|
||||
"openraft_kv_converges"
|
||||
}
|
||||
|
||||
async fn evaluate(&mut self, ctx: &RunContext<OpenRaftKvEnv>) -> Result<(), DynError> {
|
||||
let expected = expected_kv(&self.key_prefix, self.total_writes);
|
||||
let observer = ctx.require_extension::<ObservationHandle<OpenRaftClusterObserver>>()?;
|
||||
|
||||
wait_for_observed_replication(&observer, &expected, self.timeout).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
207
examples/openraft_kv/testing/workloads/src/failover.rs
Normal file
207
examples/openraft_kv/testing/workloads/src/failover.rs
Normal file
@ -0,0 +1,207 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use openraft_kv_node::OpenRaftKvClient;
|
||||
use openraft_kv_runtime_ext::{OpenRaftClusterObserver, OpenRaftKvEnv};
|
||||
use testing_framework_core::{
|
||||
observation::ObservationHandle,
|
||||
scenario::{DynError, RunContext, Workload},
|
||||
};
|
||||
use tracing::info;
|
||||
|
||||
use crate::support::{
|
||||
OpenRaftMembership, ensure_cluster_size, resolve_client_for_node, wait_for_observed_leader,
|
||||
wait_for_observed_membership, write_batch,
|
||||
};
|
||||
|
||||
/// Workload that bootstraps the cluster, expands it to three voters, writes one
|
||||
/// batch, restarts the leader, then writes a second batch through the new
|
||||
/// leader.
|
||||
#[derive(Clone)]
|
||||
pub struct OpenRaftKvFailoverWorkload {
|
||||
first_batch: usize,
|
||||
second_batch: usize,
|
||||
timeout: Duration,
|
||||
key_prefix: String,
|
||||
}
|
||||
|
||||
impl OpenRaftKvFailoverWorkload {
|
||||
/// Creates the default failover workload configuration.
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
first_batch: 8,
|
||||
second_batch: 8,
|
||||
timeout: Duration::from_secs(30),
|
||||
key_prefix: "raft-key".to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the number of writes issued before the leader restart.
|
||||
#[must_use]
|
||||
pub const fn first_batch(mut self, value: usize) -> Self {
|
||||
self.first_batch = value;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the number of writes issued after the leader restart.
|
||||
#[must_use]
|
||||
pub const fn second_batch(mut self, value: usize) -> Self {
|
||||
self.second_batch = value;
|
||||
self
|
||||
}
|
||||
|
||||
/// Overrides the key prefix used for generated writes.
|
||||
#[must_use]
|
||||
pub fn key_prefix(mut self, value: &str) -> Self {
|
||||
self.key_prefix = value.to_owned();
|
||||
self
|
||||
}
|
||||
|
||||
/// Overrides the timeout used for leader and membership transitions.
|
||||
#[must_use]
|
||||
pub const fn timeout(mut self, value: Duration) -> Self {
|
||||
self.timeout = value;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for OpenRaftKvFailoverWorkload {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Workload<OpenRaftKvEnv> for OpenRaftKvFailoverWorkload {
|
||||
fn name(&self) -> &str {
|
||||
"openraft_kv_failover_workload"
|
||||
}
|
||||
|
||||
async fn start(&self, ctx: &RunContext<OpenRaftKvEnv>) -> Result<(), DynError> {
|
||||
let clients = ctx.node_clients().snapshot();
|
||||
let observer = ctx.require_extension::<ObservationHandle<OpenRaftClusterObserver>>()?;
|
||||
|
||||
ensure_cluster_size(&clients, 3)?;
|
||||
|
||||
self.bootstrap_cluster(&clients).await?;
|
||||
|
||||
let initial_leader = wait_for_observed_leader(&observer, self.timeout, None).await?;
|
||||
let membership = OpenRaftMembership::discover(&clients).await?;
|
||||
|
||||
self.promote_cluster(&observer, &clients, initial_leader, &membership)
|
||||
.await?;
|
||||
self.write_initial_batch(&clients, initial_leader).await?;
|
||||
|
||||
let new_leader = self
|
||||
.restart_leader_and_wait_for_failover(ctx, &observer, initial_leader)
|
||||
.await?;
|
||||
self.write_second_batch(&clients, new_leader).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl OpenRaftKvFailoverWorkload {
|
||||
async fn bootstrap_cluster(&self, clients: &[OpenRaftKvClient]) -> Result<(), DynError> {
|
||||
info!("initializing openraft cluster");
|
||||
|
||||
clients[0].init_self().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn promote_cluster(
|
||||
&self,
|
||||
observer: &ObservationHandle<OpenRaftClusterObserver>,
|
||||
clients: &[OpenRaftKvClient],
|
||||
leader_id: u64,
|
||||
membership: &OpenRaftMembership,
|
||||
) -> Result<(), DynError> {
|
||||
let leader = resolve_client_for_node(clients, leader_id, self.timeout).await?;
|
||||
|
||||
for learner in membership.learner_targets(leader_id) {
|
||||
info!(
|
||||
target = learner.node_id,
|
||||
addr = %learner.public_addr,
|
||||
"adding learner"
|
||||
);
|
||||
|
||||
leader
|
||||
.add_learner(learner.node_id, &learner.public_addr)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let voter_ids = membership.voter_ids();
|
||||
leader.change_membership(voter_ids.iter().copied()).await?;
|
||||
|
||||
wait_for_observed_membership(observer, &voter_ids, self.timeout).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn write_initial_batch(
|
||||
&self,
|
||||
clients: &[OpenRaftKvClient],
|
||||
leader_id: u64,
|
||||
) -> Result<(), DynError> {
|
||||
info!(
|
||||
leader = leader_id,
|
||||
writes = self.first_batch,
|
||||
"writing initial batch"
|
||||
);
|
||||
|
||||
let leader = resolve_client_for_node(clients, leader_id, self.timeout).await?;
|
||||
write_batch(&leader, &self.key_prefix, 0, self.first_batch).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn restart_leader_and_wait_for_failover(
|
||||
&self,
|
||||
ctx: &RunContext<OpenRaftKvEnv>,
|
||||
observer: &ObservationHandle<OpenRaftClusterObserver>,
|
||||
leader_id: u64,
|
||||
) -> Result<u64, DynError> {
|
||||
let Some(control) = ctx.node_control() else {
|
||||
return Err("openraft failover workload requires node control".into());
|
||||
};
|
||||
|
||||
let leader_name = format!("node-{leader_id}");
|
||||
info!(%leader_name, "restarting current leader");
|
||||
|
||||
control.restart_node(&leader_name).await?;
|
||||
|
||||
let new_leader = wait_for_observed_leader(observer, self.timeout, Some(leader_id)).await?;
|
||||
|
||||
info!(
|
||||
old_leader = leader_id,
|
||||
new_leader, "leader changed after restart"
|
||||
);
|
||||
|
||||
Ok(new_leader)
|
||||
}
|
||||
|
||||
async fn write_second_batch(
|
||||
&self,
|
||||
clients: &[OpenRaftKvClient],
|
||||
leader_id: u64,
|
||||
) -> Result<(), DynError> {
|
||||
info!(
|
||||
leader = leader_id,
|
||||
writes = self.second_batch,
|
||||
"writing second batch"
|
||||
);
|
||||
|
||||
let leader = resolve_client_for_node(clients, leader_id, self.timeout).await?;
|
||||
write_batch(
|
||||
&leader,
|
||||
&self.key_prefix,
|
||||
self.first_batch,
|
||||
self.second_batch,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
14
examples/openraft_kv/testing/workloads/src/lib.rs
Normal file
14
examples/openraft_kv/testing/workloads/src/lib.rs
Normal file
@ -0,0 +1,14 @@
|
||||
mod convergence;
|
||||
mod failover;
|
||||
mod support;
|
||||
|
||||
/// Replication expectation used by the OpenRaft example binaries.
|
||||
pub use convergence::OpenRaftKvConverges;
|
||||
/// Failover workload used by the OpenRaft example binaries.
|
||||
pub use failover::OpenRaftKvFailoverWorkload;
|
||||
/// Shared cluster helpers used by the OpenRaft workload and manual k8s example.
|
||||
pub use support::{
|
||||
FULL_VOTER_SET, OpenRaftClusterError, OpenRaftMembership, ensure_cluster_size, expected_kv,
|
||||
resolve_client_for_node, wait_for_leader, wait_for_membership, wait_for_observed_leader,
|
||||
wait_for_observed_membership, wait_for_observed_replication, wait_for_replication, write_batch,
|
||||
};
|
||||
328
examples/openraft_kv/testing/workloads/src/support.rs
Normal file
328
examples/openraft_kv/testing/workloads/src/support.rs
Normal file
@ -0,0 +1,328 @@
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use openraft_kv_node::{OpenRaftKvClient, OpenRaftKvState};
|
||||
use openraft_kv_runtime_ext::{
|
||||
OpenRaftClusterObserver, OpenRaftClusterSnapshot, capture_openraft_cluster_snapshot,
|
||||
};
|
||||
use testing_framework_core::observation::{ObservationHandle, ObservationSnapshot};
|
||||
use thiserror::Error;
|
||||
use tokio::time::{Instant, sleep};
|
||||
|
||||
const POLL_INTERVAL: Duration = Duration::from_millis(250);
|
||||
const CLIENT_RESOLUTION_INTERVAL: Duration = Duration::from_millis(200);
|
||||
|
||||
/// Fixed voter set used by the example cluster.
|
||||
pub const FULL_VOTER_SET: [u64; 3] = [0, 1, 2];
|
||||
|
||||
/// One learner candidate discovered from cluster state.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct LearnerTarget {
|
||||
/// Node identifier used by OpenRaft membership.
|
||||
pub node_id: u64,
|
||||
/// Public address advertised for Raft traffic.
|
||||
pub public_addr: String,
|
||||
}
|
||||
|
||||
/// Membership view captured from the current node states.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct OpenRaftMembership {
|
||||
states: Vec<OpenRaftKvState>,
|
||||
}
|
||||
|
||||
impl OpenRaftMembership {
|
||||
/// Builds a membership view from already observed node states.
|
||||
#[must_use]
|
||||
pub fn from_states(states: &[OpenRaftKvState]) -> Self {
|
||||
let mut states = states.to_vec();
|
||||
states.sort_by_key(|state| state.node_id);
|
||||
|
||||
Self { states }
|
||||
}
|
||||
|
||||
/// Reads and sorts the current node states by id.
|
||||
pub async fn discover(clients: &[OpenRaftKvClient]) -> Result<Self, OpenRaftClusterError> {
|
||||
let mut states = Vec::with_capacity(clients.len());
|
||||
|
||||
for client in clients {
|
||||
states.push(client.state().await.map_err(OpenRaftClusterError::Client)?);
|
||||
}
|
||||
|
||||
Ok(Self::from_states(&states))
|
||||
}
|
||||
|
||||
/// Returns the full voter set implied by the discovered nodes.
|
||||
#[must_use]
|
||||
pub fn voter_ids(&self) -> BTreeSet<u64> {
|
||||
self.states.iter().map(|state| state.node_id).collect()
|
||||
}
|
||||
|
||||
/// Returns every non-leader node as a learner target.
|
||||
#[must_use]
|
||||
pub fn learner_targets(&self, leader_id: u64) -> Vec<LearnerTarget> {
|
||||
self.states
|
||||
.iter()
|
||||
.filter(|state| state.node_id != leader_id)
|
||||
.map(|state| LearnerTarget {
|
||||
node_id: state.node_id,
|
||||
public_addr: state.public_addr.clone(),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors raised by the OpenRaft example cluster helpers.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum OpenRaftClusterError {
|
||||
#[error("openraft example requires at least {expected} node clients, got {actual}")]
|
||||
InsufficientClients { expected: usize, actual: usize },
|
||||
#[error("failed to query openraft node state: {0}")]
|
||||
Client(#[source] anyhow::Error),
|
||||
#[error("openraft cluster observation is not available yet")]
|
||||
MissingObservation,
|
||||
#[error(
|
||||
"timed out waiting for {action} after {timeout:?}; last observation: {last_observation}"
|
||||
)]
|
||||
Timeout {
|
||||
action: &'static str,
|
||||
timeout: Duration,
|
||||
last_observation: String,
|
||||
},
|
||||
#[error("timed out resolving node client for {node_id} after {timeout:?}")]
|
||||
ClientResolution { node_id: u64, timeout: Duration },
|
||||
}
|
||||
|
||||
/// Ensures the example cluster has the expected number of node clients.
|
||||
pub fn ensure_cluster_size(
|
||||
clients: &[OpenRaftKvClient],
|
||||
expected: usize,
|
||||
) -> Result<(), OpenRaftClusterError> {
|
||||
if clients.len() < expected {
|
||||
return Err(OpenRaftClusterError::InsufficientClients {
|
||||
expected,
|
||||
actual: clients.len(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Waits until the cluster converges on one leader.
|
||||
pub async fn wait_for_leader(
|
||||
clients: &[OpenRaftKvClient],
|
||||
timeout: Duration,
|
||||
different_from: Option<u64>,
|
||||
) -> Result<u64, OpenRaftClusterError> {
|
||||
let deadline = Instant::now() + timeout;
|
||||
|
||||
loop {
|
||||
let last_observation = capture_openraft_cluster_snapshot(clients).await;
|
||||
|
||||
if let Some(leader) = last_observation.agreed_leader(different_from) {
|
||||
return Ok(leader);
|
||||
}
|
||||
|
||||
if Instant::now() >= deadline {
|
||||
return Err(OpenRaftClusterError::Timeout {
|
||||
action: "leader agreement",
|
||||
timeout,
|
||||
last_observation: last_observation.summary(),
|
||||
});
|
||||
}
|
||||
|
||||
sleep(POLL_INTERVAL).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Waits until every node reports the expected voter set.
|
||||
pub async fn wait_for_membership(
|
||||
clients: &[OpenRaftKvClient],
|
||||
expected_voters: &BTreeSet<u64>,
|
||||
timeout: Duration,
|
||||
) -> Result<(), OpenRaftClusterError> {
|
||||
let deadline = Instant::now() + timeout;
|
||||
|
||||
loop {
|
||||
let last_observation = capture_openraft_cluster_snapshot(clients).await;
|
||||
|
||||
if last_observation.all_voters_match(expected_voters) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if Instant::now() >= deadline {
|
||||
return Err(OpenRaftClusterError::Timeout {
|
||||
action: "membership convergence",
|
||||
timeout,
|
||||
last_observation: last_observation.summary(),
|
||||
});
|
||||
}
|
||||
|
||||
sleep(POLL_INTERVAL).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Waits until every node reports the full replicated key set.
|
||||
pub async fn wait_for_replication(
|
||||
clients: &[OpenRaftKvClient],
|
||||
expected: &BTreeMap<String, String>,
|
||||
timeout: Duration,
|
||||
) -> Result<(), OpenRaftClusterError> {
|
||||
let deadline = Instant::now() + timeout;
|
||||
|
||||
loop {
|
||||
let last_observation = capture_openraft_cluster_snapshot(clients).await;
|
||||
|
||||
if last_observation.all_kv_match(expected, &FULL_VOTER_SET) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if Instant::now() >= deadline {
|
||||
return Err(OpenRaftClusterError::Timeout {
|
||||
action: "replicated state convergence",
|
||||
timeout,
|
||||
last_observation: last_observation.summary(),
|
||||
});
|
||||
}
|
||||
|
||||
sleep(POLL_INTERVAL).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Waits until the observer reports one agreed leader.
|
||||
pub async fn wait_for_observed_leader(
|
||||
handle: &ObservationHandle<OpenRaftClusterObserver>,
|
||||
timeout: Duration,
|
||||
different_from: Option<u64>,
|
||||
) -> Result<u64, OpenRaftClusterError> {
|
||||
let snapshot =
|
||||
wait_for_observed_snapshot(handle, timeout, "observed leader agreement", |snapshot| {
|
||||
snapshot.agreed_leader(different_from).is_some()
|
||||
})
|
||||
.await?;
|
||||
|
||||
snapshot
|
||||
.value
|
||||
.agreed_leader(different_from)
|
||||
.ok_or(OpenRaftClusterError::MissingObservation)
|
||||
}
|
||||
|
||||
/// Waits until the observer reports the expected voter set on every node.
|
||||
pub async fn wait_for_observed_membership(
|
||||
handle: &ObservationHandle<OpenRaftClusterObserver>,
|
||||
expected_voters: &BTreeSet<u64>,
|
||||
timeout: Duration,
|
||||
) -> Result<(), OpenRaftClusterError> {
|
||||
wait_for_observed_snapshot(
|
||||
handle,
|
||||
timeout,
|
||||
"observed membership convergence",
|
||||
|snapshot| snapshot.all_voters_match(expected_voters),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Waits until the observer reports the full replicated key set.
|
||||
pub async fn wait_for_observed_replication(
|
||||
handle: &ObservationHandle<OpenRaftClusterObserver>,
|
||||
expected: &BTreeMap<String, String>,
|
||||
timeout: Duration,
|
||||
) -> Result<(), OpenRaftClusterError> {
|
||||
wait_for_observed_snapshot(
|
||||
handle,
|
||||
timeout,
|
||||
"observed replicated state convergence",
|
||||
|snapshot| snapshot.all_kv_match(expected, &FULL_VOTER_SET),
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Resolves the client handle that currently identifies as `node_id`.
|
||||
pub async fn resolve_client_for_node(
|
||||
clients: &[OpenRaftKvClient],
|
||||
node_id: u64,
|
||||
timeout: Duration,
|
||||
) -> Result<OpenRaftKvClient, OpenRaftClusterError> {
|
||||
let deadline = Instant::now() + timeout;
|
||||
|
||||
loop {
|
||||
for client in clients {
|
||||
let Ok(state) = client.state().await else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if state.node_id == node_id {
|
||||
return Ok(client.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if Instant::now() >= deadline {
|
||||
return Err(OpenRaftClusterError::ClientResolution { node_id, timeout });
|
||||
}
|
||||
|
||||
sleep(CLIENT_RESOLUTION_INTERVAL).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Issues a contiguous batch of writes through the current leader.
|
||||
pub async fn write_batch(
|
||||
leader: &OpenRaftKvClient,
|
||||
prefix: &str,
|
||||
start: usize,
|
||||
count: usize,
|
||||
) -> Result<(), OpenRaftClusterError> {
|
||||
for index in start..(start + count) {
|
||||
let key = format!("{prefix}-{index}");
|
||||
let value = format!("value-{index}");
|
||||
|
||||
leader
|
||||
.write(&key, &value, index as u64 + 1)
|
||||
.await
|
||||
.map_err(OpenRaftClusterError::Client)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Builds the replicated key/value map expected after the workload completes.
|
||||
#[must_use]
|
||||
pub fn expected_kv(prefix: &str, total_writes: usize) -> BTreeMap<String, String> {
|
||||
(0..total_writes)
|
||||
.map(|index| (format!("{prefix}-{index}"), format!("value-{index}")))
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn wait_for_observed_snapshot(
|
||||
handle: &ObservationHandle<OpenRaftClusterObserver>,
|
||||
timeout: Duration,
|
||||
action: &'static str,
|
||||
matches: impl Fn(&OpenRaftClusterSnapshot) -> bool,
|
||||
) -> Result<ObservationSnapshot<OpenRaftClusterSnapshot>, OpenRaftClusterError> {
|
||||
let deadline = Instant::now() + timeout;
|
||||
let mut last_summary = "no state observed yet".to_owned();
|
||||
|
||||
loop {
|
||||
if let Some(snapshot) = handle.latest_snapshot() {
|
||||
last_summary = snapshot.value.summary();
|
||||
|
||||
if matches(&snapshot.value) {
|
||||
return Ok(snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
if Instant::now() >= deadline {
|
||||
return Err(OpenRaftClusterError::Timeout {
|
||||
action,
|
||||
timeout,
|
||||
last_observation: last_summary,
|
||||
});
|
||||
}
|
||||
|
||||
sleep(POLL_INTERVAL).await;
|
||||
}
|
||||
}
|
||||
@ -26,15 +26,15 @@ Each example follows the same pattern:
|
||||
## Run locally
|
||||
|
||||
```bash
|
||||
cargo run -p pubsub-examples --bin basic_ws_roundtrip
|
||||
cargo run -p pubsub-examples --bin basic_ws_reconnect
|
||||
cargo run -p pubsub-examples --bin pubsub_basic_ws_roundtrip
|
||||
cargo run -p pubsub-examples --bin pubsub_basic_ws_reconnect
|
||||
```
|
||||
|
||||
## Run with Docker Compose
|
||||
|
||||
```bash
|
||||
cargo run -p pubsub-examples --bin compose_ws_roundtrip
|
||||
cargo run -p pubsub-examples --bin compose_ws_reconnect
|
||||
cargo run -p pubsub-examples --bin pubsub_compose_ws_roundtrip
|
||||
cargo run -p pubsub-examples --bin pubsub_compose_ws_reconnect
|
||||
```
|
||||
|
||||
Set `PUBSUB_IMAGE` to override the default compose image tag.
|
||||
@ -43,7 +43,7 @@ Set `PUBSUB_IMAGE` to override the default compose image tag.
|
||||
|
||||
```bash
|
||||
docker build -t pubsub-node:local -f examples/pubsub/Dockerfile .
|
||||
cargo run -p pubsub-examples --bin k8s_ws_roundtrip
|
||||
cargo run -p pubsub-examples --bin pubsub_k8s_ws_roundtrip
|
||||
```
|
||||
|
||||
Prerequisites:
|
||||
@ -57,5 +57,5 @@ Optional image override:
|
||||
|
||||
```bash
|
||||
docker build -t pubsub-node:local -f examples/pubsub/Dockerfile .
|
||||
cargo run -p pubsub-examples --bin k8s_manual_ws_roundtrip
|
||||
cargo run -p pubsub-examples --bin pubsub_k8s_manual_ws_roundtrip
|
||||
```
|
||||
|
||||
@ -4,6 +4,30 @@ license.workspace = true
|
||||
name = "pubsub-examples"
|
||||
version.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "pubsub_basic_ws_roundtrip"
|
||||
path = "src/bin/basic_ws_roundtrip.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "pubsub_basic_ws_reconnect"
|
||||
path = "src/bin/basic_ws_reconnect.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "pubsub_compose_ws_roundtrip"
|
||||
path = "src/bin/compose_ws_roundtrip.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "pubsub_compose_ws_reconnect"
|
||||
path = "src/bin/compose_ws_reconnect.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "pubsub_k8s_ws_roundtrip"
|
||||
path = "src/bin/k8s_ws_roundtrip.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "pubsub_k8s_manual_ws_roundtrip"
|
||||
path = "src/bin/k8s_manual_ws_roundtrip.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
pubsub-node = { path = "../pubsub-node" }
|
||||
|
||||
24
examples/queue/Dockerfile
Normal file
24
examples/queue/Dockerfile
Normal file
@ -0,0 +1,24 @@
|
||||
FROM rustlang/rust:nightly-bookworm AS builder
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
COPY Cargo.toml Cargo.lock ./
|
||||
COPY cfgsync/ ./cfgsync/
|
||||
COPY examples/ ./examples/
|
||||
COPY testing-framework/ ./testing-framework/
|
||||
|
||||
RUN cargo build --release -p queue-node
|
||||
|
||||
FROM debian:bookworm-slim
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y ca-certificates && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /build/target/release/queue-node /usr/local/bin/queue-node
|
||||
|
||||
RUN mkdir -p /etc/queue
|
||||
WORKDIR /app
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/queue-node"]
|
||||
CMD ["--config", "/etc/queue/config.yaml"]
|
||||
47
examples/queue/README.md
Normal file
47
examples/queue/README.md
Normal file
@ -0,0 +1,47 @@
|
||||
# Queue Example
|
||||
|
||||
This example runs a small replicated FIFO queue.
|
||||
|
||||
The scenarios enqueue messages, dequeue them again, and check that queue state
|
||||
either converges or drains as expected.
|
||||
|
||||
## How TF runs this
|
||||
|
||||
Each example follows the same pattern:
|
||||
|
||||
- TF starts a small deployment of queue nodes
|
||||
- a workload produces messages, or produces and consumes them
|
||||
- an expectation checks either that queue state converges or that the queue drains
|
||||
|
||||
## Scenarios
|
||||
|
||||
- `basic_convergence` produces messages and checks that queue state converges locally
|
||||
- `basic_roundtrip` produces and consumes messages locally until the queue drains
|
||||
- `basic_restart_chaos` injects random local node restarts during the run
|
||||
- `compose_convergence` and `compose_roundtrip` run the same checks in Docker Compose
|
||||
|
||||
## API
|
||||
|
||||
Each node exposes:
|
||||
|
||||
- `POST /queue/enqueue` to add a message
|
||||
- `POST /queue/dequeue` to remove a message
|
||||
- `GET /queue/state` to inspect the current queue state
|
||||
- `GET /internal/snapshot` to read the local replicated state
|
||||
|
||||
## Run locally
|
||||
|
||||
```bash
|
||||
cargo run -p queue-examples --bin queue_basic_convergence
|
||||
cargo run -p queue-examples --bin queue_basic_roundtrip
|
||||
cargo run -p queue-examples --bin queue_basic_restart_chaos
|
||||
```
|
||||
|
||||
## Run with Docker Compose
|
||||
|
||||
```bash
|
||||
cargo run -p queue-examples --bin queue_compose_convergence
|
||||
cargo run -p queue-examples --bin queue_compose_roundtrip
|
||||
```
|
||||
|
||||
Set `QUEUE_IMAGE` to override the default compose image tag.
|
||||
36
examples/queue/examples/Cargo.toml
Normal file
36
examples/queue/examples/Cargo.toml
Normal file
@ -0,0 +1,36 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "queue-examples"
|
||||
version.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "queue_basic_convergence"
|
||||
path = "src/bin/basic_convergence.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "queue_basic_restart_chaos"
|
||||
path = "src/bin/basic_restart_chaos.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "queue_basic_roundtrip"
|
||||
path = "src/bin/basic_roundtrip.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "queue_compose_convergence"
|
||||
path = "src/bin/compose_convergence.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "queue_compose_roundtrip"
|
||||
path = "src/bin/compose_roundtrip.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
async-trait = { workspace = true }
|
||||
queue-runtime-ext = { path = "../testing/integration" }
|
||||
queue-runtime-workloads = { path = "../testing/workloads" }
|
||||
testing-framework-core = { workspace = true }
|
||||
testing-framework-runner-compose = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
32
examples/queue/examples/src/bin/basic_convergence.rs
Normal file
32
examples/queue/examples/src/bin/basic_convergence.rs
Normal file
@ -0,0 +1,32 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use queue_runtime_ext::QueueLocalDeployer;
|
||||
use queue_runtime_workloads::{
|
||||
QueueBuilderExt, QueueConverges, QueueProduceWorkload, QueueScenarioBuilder, QueueTopology,
|
||||
};
|
||||
use testing_framework_core::scenario::Deployer;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let operations = 300;
|
||||
|
||||
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
|
||||
.with_run_duration(Duration::from_secs(30))
|
||||
.with_workload(
|
||||
QueueProduceWorkload::new()
|
||||
.operations(operations)
|
||||
.rate_per_sec(30)
|
||||
.payload_prefix("demo"),
|
||||
)
|
||||
.with_expectation(QueueConverges::new(operations).timeout(Duration::from_secs(25)))
|
||||
.build()?;
|
||||
|
||||
let deployer = QueueLocalDeployer::default();
|
||||
let runner = deployer.deploy(&scenario).await?;
|
||||
runner.run(&mut scenario).await?;
|
||||
Ok(())
|
||||
}
|
||||
84
examples/queue/examples/src/bin/basic_restart_chaos.rs
Normal file
84
examples/queue/examples/src/bin/basic_restart_chaos.rs
Normal file
@ -0,0 +1,84 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use queue_runtime_ext::QueueLocalDeployer;
|
||||
use queue_runtime_workloads::{
|
||||
QueueBuilderExt, QueueConverges, QueueProduceWorkload, QueueScenarioBuilder, QueueTopology,
|
||||
};
|
||||
use testing_framework_core::{
|
||||
scenario::{Deployer, DynError, RunContext, Workload},
|
||||
topology::DeploymentDescriptor,
|
||||
};
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct FixedRestartChaosWorkload {
|
||||
restarts: usize,
|
||||
delay: Duration,
|
||||
}
|
||||
|
||||
impl FixedRestartChaosWorkload {
|
||||
const fn new(restarts: usize, delay: Duration) -> Self {
|
||||
Self { restarts, delay }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Workload<queue_runtime_workloads::QueueEnv> for FixedRestartChaosWorkload {
|
||||
fn name(&self) -> &str {
|
||||
"fixed_restart_chaos"
|
||||
}
|
||||
|
||||
async fn start(
|
||||
&self,
|
||||
ctx: &RunContext<queue_runtime_workloads::QueueEnv>,
|
||||
) -> Result<(), DynError> {
|
||||
let Some(control) = ctx.node_control() else {
|
||||
return Err("fixed restart chaos requires node control".into());
|
||||
};
|
||||
|
||||
let node_count = ctx.descriptors().node_count();
|
||||
if node_count == 0 {
|
||||
return Err("fixed restart chaos requires at least one node".into());
|
||||
}
|
||||
|
||||
for step in 0..self.restarts {
|
||||
tokio::time::sleep(self.delay).await;
|
||||
let target_index = if node_count > 1 {
|
||||
(step % (node_count - 1)) + 1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let target = format!("node-{target_index}");
|
||||
info!(step, %target, "triggering controlled chaos restart");
|
||||
control.restart_node(&target).await?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
|
||||
.enable_node_control()
|
||||
.with_workload(FixedRestartChaosWorkload::new(3, Duration::from_secs(8)))
|
||||
.with_run_duration(Duration::from_secs(30))
|
||||
.with_workload(
|
||||
QueueProduceWorkload::new()
|
||||
.operations(400)
|
||||
.rate_per_sec(40)
|
||||
.payload_prefix("queue-chaos"),
|
||||
)
|
||||
.with_expectation(QueueConverges::new(200).timeout(Duration::from_secs(30)))
|
||||
.build()?;
|
||||
|
||||
let deployer = QueueLocalDeployer::default();
|
||||
let runner = deployer.deploy(&scenario).await?;
|
||||
runner.run(&mut scenario).await?;
|
||||
Ok(())
|
||||
}
|
||||
31
examples/queue/examples/src/bin/basic_roundtrip.rs
Normal file
31
examples/queue/examples/src/bin/basic_roundtrip.rs
Normal file
@ -0,0 +1,31 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use queue_runtime_ext::QueueLocalDeployer;
|
||||
use queue_runtime_workloads::{
|
||||
QueueBuilderExt, QueueDrained, QueueRoundTripWorkload, QueueScenarioBuilder, QueueTopology,
|
||||
};
|
||||
use testing_framework_core::scenario::Deployer;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let operations = 200;
|
||||
|
||||
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
|
||||
.with_run_duration(Duration::from_secs(30))
|
||||
.with_workload(
|
||||
QueueRoundTripWorkload::new()
|
||||
.operations(operations)
|
||||
.rate_per_sec(25),
|
||||
)
|
||||
.with_expectation(QueueDrained::new().timeout(Duration::from_secs(25)))
|
||||
.build()?;
|
||||
|
||||
let deployer = QueueLocalDeployer::default();
|
||||
let runner = deployer.deploy(&scenario).await?;
|
||||
runner.run(&mut scenario).await?;
|
||||
Ok(())
|
||||
}
|
||||
47
examples/queue/examples/src/bin/compose_convergence.rs
Normal file
47
examples/queue/examples/src/bin/compose_convergence.rs
Normal file
@ -0,0 +1,47 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context as _, Result};
|
||||
use queue_runtime_workloads::{
|
||||
QueueBuilderExt, QueueConverges, QueueProduceWorkload, QueueScenarioBuilder, QueueTopology,
|
||||
};
|
||||
use testing_framework_core::scenario::Deployer;
|
||||
use testing_framework_runner_compose::ComposeRunnerError;
|
||||
use tracing::{info, warn};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let operations = 200;
|
||||
|
||||
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
|
||||
.with_run_duration(Duration::from_secs(30))
|
||||
.with_workload(
|
||||
QueueProduceWorkload::new()
|
||||
.operations(operations)
|
||||
.rate_per_sec(20),
|
||||
)
|
||||
.with_expectation(QueueConverges::new(operations).timeout(Duration::from_secs(25)))
|
||||
.build()?;
|
||||
|
||||
let deployer = queue_runtime_ext::QueueComposeDeployer::new();
|
||||
let runner = match deployer.deploy(&scenario).await {
|
||||
Ok(runner) => runner,
|
||||
Err(ComposeRunnerError::DockerUnavailable) => {
|
||||
warn!("docker unavailable; skipping compose queue run");
|
||||
return Ok(());
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(anyhow::Error::new(error)).context("deploying queue compose stack");
|
||||
}
|
||||
};
|
||||
|
||||
info!("running queue compose convergence scenario");
|
||||
runner
|
||||
.run(&mut scenario)
|
||||
.await
|
||||
.context("running queue compose scenario")?;
|
||||
Ok(())
|
||||
}
|
||||
48
examples/queue/examples/src/bin/compose_roundtrip.rs
Normal file
48
examples/queue/examples/src/bin/compose_roundtrip.rs
Normal file
@ -0,0 +1,48 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context as _, Result};
|
||||
use queue_runtime_workloads::{
|
||||
QueueBuilderExt, QueueDrained, QueueRoundTripWorkload, QueueScenarioBuilder, QueueTopology,
|
||||
};
|
||||
use testing_framework_core::scenario::Deployer;
|
||||
use testing_framework_runner_compose::ComposeRunnerError;
|
||||
use tracing::{info, warn};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||
.init();
|
||||
|
||||
let operations = 200;
|
||||
|
||||
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
|
||||
.with_run_duration(Duration::from_secs(30))
|
||||
.with_workload(
|
||||
QueueRoundTripWorkload::new()
|
||||
.operations(operations)
|
||||
.rate_per_sec(20),
|
||||
)
|
||||
.with_expectation(QueueDrained::new().timeout(Duration::from_secs(25)))
|
||||
.build()?;
|
||||
|
||||
let deployer = queue_runtime_ext::QueueComposeDeployer::new();
|
||||
let runner = match deployer.deploy(&scenario).await {
|
||||
Ok(runner) => runner,
|
||||
Err(ComposeRunnerError::DockerUnavailable) => {
|
||||
warn!("docker unavailable; skipping compose queue roundtrip run");
|
||||
return Ok(());
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(anyhow::Error::new(error))
|
||||
.context("deploying queue compose roundtrip stack");
|
||||
}
|
||||
};
|
||||
|
||||
info!("running queue compose roundtrip scenario");
|
||||
runner
|
||||
.run(&mut scenario)
|
||||
.await
|
||||
.context("running queue compose roundtrip scenario")?;
|
||||
Ok(())
|
||||
}
|
||||
21
examples/queue/queue-node/Cargo.toml
Normal file
21
examples/queue/queue-node/Cargo.toml
Normal file
@ -0,0 +1,21 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "queue-node"
|
||||
version.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "queue-node"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
axum = "0.7"
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
reqwest = { workspace = true, features = ["json"] }
|
||||
serde = { workspace = true }
|
||||
serde_yaml = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
tower-http = { version = "0.6", features = ["trace"] }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
40
examples/queue/queue-node/src/client.rs
Normal file
40
examples/queue/queue-node/src/client.rs
Normal file
@ -0,0 +1,40 @@
|
||||
use reqwest::Url;
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct QueueHttpClient {
|
||||
base_url: Url,
|
||||
client: reqwest::Client,
|
||||
}
|
||||
|
||||
impl QueueHttpClient {
|
||||
#[must_use]
|
||||
pub fn new(base_url: Url) -> Self {
|
||||
Self {
|
||||
base_url,
|
||||
client: reqwest::Client::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get<T: serde::de::DeserializeOwned>(&self, path: &str) -> anyhow::Result<T> {
|
||||
let url = self.base_url.join(path)?;
|
||||
let response = self.client.get(url).send().await?.error_for_status()?;
|
||||
Ok(response.json().await?)
|
||||
}
|
||||
|
||||
pub async fn post<B: Serialize, T: serde::de::DeserializeOwned>(
|
||||
&self,
|
||||
path: &str,
|
||||
body: &B,
|
||||
) -> anyhow::Result<T> {
|
||||
let url = self.base_url.join(path)?;
|
||||
let response = self
|
||||
.client
|
||||
.post(url)
|
||||
.json(body)
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?;
|
||||
Ok(response.json().await?)
|
||||
}
|
||||
}
|
||||
29
examples/queue/queue-node/src/config.rs
Normal file
29
examples/queue/queue-node/src/config.rs
Normal file
@ -0,0 +1,29 @@
|
||||
use std::{fs, path::Path};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct PeerInfo {
|
||||
pub node_id: u64,
|
||||
pub http_address: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct QueueConfig {
|
||||
pub node_id: u64,
|
||||
pub http_port: u16,
|
||||
pub peers: Vec<PeerInfo>,
|
||||
#[serde(default = "default_sync_interval_ms")]
|
||||
pub sync_interval_ms: u64,
|
||||
}
|
||||
|
||||
impl QueueConfig {
|
||||
pub fn load(path: &Path) -> anyhow::Result<Self> {
|
||||
let raw = fs::read_to_string(path)?;
|
||||
Ok(serde_yaml::from_str(&raw)?)
|
||||
}
|
||||
}
|
||||
|
||||
const fn default_sync_interval_ms() -> u64 {
|
||||
1000
|
||||
}
|
||||
3
examples/queue/queue-node/src/lib.rs
Normal file
3
examples/queue/queue-node/src/lib.rs
Normal file
@ -0,0 +1,3 @@
|
||||
pub mod client;
|
||||
|
||||
pub use client::QueueHttpClient;
|
||||
36
examples/queue/queue-node/src/main.rs
Normal file
36
examples/queue/queue-node/src/main.rs
Normal file
@ -0,0 +1,36 @@
|
||||
mod config;
|
||||
mod server;
|
||||
mod state;
|
||||
mod sync;
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Parser;
|
||||
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||
|
||||
use crate::{config::QueueConfig, state::QueueState, sync::SyncService};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "queue-node")]
|
||||
struct Args {
|
||||
#[arg(short, long)]
|
||||
config: PathBuf,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::registry()
|
||||
.with(
|
||||
tracing_subscriber::EnvFilter::try_from_default_env()
|
||||
.unwrap_or_else(|_| "queue_node=info,tower_http=debug".into()),
|
||||
)
|
||||
.with(tracing_subscriber::fmt::layer())
|
||||
.init();
|
||||
|
||||
let args = Args::parse();
|
||||
let config = QueueConfig::load(&args.config)?;
|
||||
|
||||
let state = QueueState::new(config.node_id);
|
||||
SyncService::new(config.clone(), state.clone()).start();
|
||||
server::start_server(config, state).await
|
||||
}
|
||||
115
examples/queue/queue-node/src/server.rs
Normal file
115
examples/queue/queue-node/src/server.rs
Normal file
@ -0,0 +1,115 @@
|
||||
use std::net::SocketAddr;
|
||||
|
||||
use axum::{
|
||||
Router,
|
||||
extract::State,
|
||||
http::StatusCode,
|
||||
response::Json,
|
||||
routing::{get, post},
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tower_http::trace::TraceLayer;
|
||||
|
||||
use crate::{
|
||||
config::QueueConfig,
|
||||
state::{QueueMessage, QueueRevision, QueueState, QueueStateView, Snapshot},
|
||||
};
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct HealthResponse {
|
||||
status: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct EnqueueRequest {
|
||||
payload: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct EnqueueResponse {
|
||||
accepted: bool,
|
||||
id: u64,
|
||||
queue_len: usize,
|
||||
revision: QueueRevision,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct DequeueResponse {
|
||||
message: Option<QueueMessage>,
|
||||
queue_len: usize,
|
||||
revision: QueueRevision,
|
||||
}
|
||||
|
||||
pub async fn start_server(config: QueueConfig, state: QueueState) -> anyhow::Result<()> {
|
||||
let app = Router::new()
|
||||
.route("/health/live", get(health_live))
|
||||
.route("/health/ready", get(health_ready))
|
||||
.route("/queue/enqueue", post(enqueue))
|
||||
.route("/queue/dequeue", post(dequeue))
|
||||
.route("/queue/state", get(queue_state))
|
||||
.route("/internal/snapshot", get(get_snapshot))
|
||||
.layer(TraceLayer::new_for_http())
|
||||
.with_state(state.clone());
|
||||
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], config.http_port));
|
||||
let listener = tokio::net::TcpListener::bind(addr).await?;
|
||||
|
||||
state.set_ready(true).await;
|
||||
tracing::info!(node_id = state.node_id(), %addr, "queue node ready");
|
||||
|
||||
axum::serve(listener, app).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn health_live() -> (StatusCode, Json<HealthResponse>) {
|
||||
(StatusCode::OK, Json(HealthResponse { status: "alive" }))
|
||||
}
|
||||
|
||||
async fn health_ready(State(state): State<QueueState>) -> (StatusCode, Json<HealthResponse>) {
|
||||
if state.is_ready().await {
|
||||
(StatusCode::OK, Json(HealthResponse { status: "ready" }))
|
||||
} else {
|
||||
(
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
Json(HealthResponse {
|
||||
status: "not-ready",
|
||||
}),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async fn enqueue(
|
||||
State(state): State<QueueState>,
|
||||
Json(request): Json<EnqueueRequest>,
|
||||
) -> (StatusCode, Json<EnqueueResponse>) {
|
||||
let outcome = state.enqueue_local(request.payload).await;
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(EnqueueResponse {
|
||||
accepted: outcome.accepted,
|
||||
id: outcome.id,
|
||||
queue_len: outcome.queue_len,
|
||||
revision: outcome.revision,
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
async fn dequeue(State(state): State<QueueState>) -> (StatusCode, Json<DequeueResponse>) {
|
||||
let outcome = state.dequeue_local().await;
|
||||
(
|
||||
StatusCode::OK,
|
||||
Json(DequeueResponse {
|
||||
message: outcome.message,
|
||||
queue_len: outcome.queue_len,
|
||||
revision: outcome.revision,
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
async fn queue_state(State(state): State<QueueState>) -> Json<QueueStateView> {
|
||||
Json(state.queue_state().await)
|
||||
}
|
||||
|
||||
async fn get_snapshot(State(state): State<QueueState>) -> Json<Snapshot> {
|
||||
Json(state.snapshot().await)
|
||||
}
|
||||
151
examples/queue/queue-node/src/state.rs
Normal file
151
examples/queue/queue-node/src/state.rs
Normal file
@ -0,0 +1,151 @@
|
||||
use std::{collections::VecDeque, sync::Arc};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct QueueRevision {
|
||||
pub version: u64,
|
||||
pub origin: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct QueueMessage {
|
||||
pub id: u64,
|
||||
pub payload: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct Snapshot {
|
||||
pub node_id: u64,
|
||||
pub revision: QueueRevision,
|
||||
pub messages: Vec<QueueMessage>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
|
||||
pub struct QueueStateView {
|
||||
pub revision: QueueRevision,
|
||||
pub queue_len: usize,
|
||||
pub head_id: Option<u64>,
|
||||
pub tail_id: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct EnqueueOutcome {
|
||||
pub accepted: bool,
|
||||
pub id: u64,
|
||||
pub queue_len: usize,
|
||||
pub revision: QueueRevision,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DequeueOutcome {
|
||||
pub message: Option<QueueMessage>,
|
||||
pub queue_len: usize,
|
||||
pub revision: QueueRevision,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct QueueData {
|
||||
revision: QueueRevision,
|
||||
messages: VecDeque<QueueMessage>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct QueueState {
|
||||
node_id: u64,
|
||||
ready: Arc<RwLock<bool>>,
|
||||
data: Arc<RwLock<QueueData>>,
|
||||
}
|
||||
|
||||
impl QueueState {
|
||||
pub fn new(node_id: u64) -> Self {
|
||||
Self {
|
||||
node_id,
|
||||
ready: Arc::new(RwLock::new(false)),
|
||||
data: Arc::new(RwLock::new(QueueData::default())),
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn node_id(&self) -> u64 {
|
||||
self.node_id
|
||||
}
|
||||
|
||||
pub async fn set_ready(&self, value: bool) {
|
||||
*self.ready.write().await = value;
|
||||
}
|
||||
|
||||
pub async fn is_ready(&self) -> bool {
|
||||
*self.ready.read().await
|
||||
}
|
||||
|
||||
pub async fn enqueue_local(&self, payload: String) -> EnqueueOutcome {
|
||||
let mut data = self.data.write().await;
|
||||
let id = next_message_id(&data.messages);
|
||||
data.messages.push_back(QueueMessage { id, payload });
|
||||
bump_revision(&mut data.revision, self.node_id);
|
||||
|
||||
EnqueueOutcome {
|
||||
accepted: true,
|
||||
id,
|
||||
queue_len: data.messages.len(),
|
||||
revision: data.revision,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn dequeue_local(&self) -> DequeueOutcome {
|
||||
let mut data = self.data.write().await;
|
||||
let message = data.messages.pop_front();
|
||||
if message.is_some() {
|
||||
bump_revision(&mut data.revision, self.node_id);
|
||||
}
|
||||
|
||||
DequeueOutcome {
|
||||
message,
|
||||
queue_len: data.messages.len(),
|
||||
revision: data.revision,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn queue_state(&self) -> QueueStateView {
|
||||
let data = self.data.read().await;
|
||||
QueueStateView {
|
||||
revision: data.revision,
|
||||
queue_len: data.messages.len(),
|
||||
head_id: data.messages.front().map(|message| message.id),
|
||||
tail_id: data.messages.back().map(|message| message.id),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn merge_snapshot(&self, snapshot: Snapshot) {
|
||||
let mut data = self.data.write().await;
|
||||
if is_newer_revision(snapshot.revision, data.revision) {
|
||||
data.revision = snapshot.revision;
|
||||
data.messages = snapshot.messages.into();
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn snapshot(&self) -> Snapshot {
|
||||
let data = self.data.read().await;
|
||||
Snapshot {
|
||||
node_id: self.node_id,
|
||||
revision: data.revision,
|
||||
messages: data.messages.iter().cloned().collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn next_message_id(messages: &VecDeque<QueueMessage>) -> u64 {
|
||||
messages
|
||||
.back()
|
||||
.map_or(1, |message| message.id.saturating_add(1))
|
||||
}
|
||||
|
||||
fn bump_revision(revision: &mut QueueRevision, node_id: u64) {
|
||||
revision.version = revision.version.saturating_add(1);
|
||||
revision.origin = node_id;
|
||||
}
|
||||
|
||||
fn is_newer_revision(candidate: QueueRevision, existing: QueueRevision) -> bool {
|
||||
(candidate.version, candidate.origin) > (existing.version, existing.origin)
|
||||
}
|
||||
103
examples/queue/queue-node/src/sync.rs
Normal file
103
examples/queue/queue-node/src/sync.rs
Normal file
@ -0,0 +1,103 @@
|
||||
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||
|
||||
use reqwest::Client;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::{
|
||||
config::QueueConfig,
|
||||
state::{QueueState, Snapshot},
|
||||
};
|
||||
|
||||
const WARN_AFTER_CONSECUTIVE_FAILURES: u32 = 5;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct SyncService {
|
||||
config: Arc<QueueConfig>,
|
||||
state: QueueState,
|
||||
client: Client,
|
||||
failures_by_peer: Arc<Mutex<HashMap<String, u32>>>,
|
||||
}
|
||||
|
||||
impl SyncService {
|
||||
pub fn new(config: QueueConfig, state: QueueState) -> Self {
|
||||
Self {
|
||||
config: Arc::new(config),
|
||||
state,
|
||||
client: Client::new(),
|
||||
failures_by_peer: Arc::new(Mutex::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start(&self) {
|
||||
let service = self.clone();
|
||||
tokio::spawn(async move {
|
||||
service.run().await;
|
||||
});
|
||||
}
|
||||
|
||||
async fn run(self) {
|
||||
let interval = Duration::from_millis(self.config.sync_interval_ms.max(100));
|
||||
loop {
|
||||
self.sync_once().await;
|
||||
tokio::time::sleep(interval).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn sync_once(&self) {
|
||||
for peer in &self.config.peers {
|
||||
match self.fetch_snapshot(&peer.http_address).await {
|
||||
Ok(snapshot) => {
|
||||
self.state.merge_snapshot(snapshot).await;
|
||||
self.clear_failure_counter(&peer.http_address).await;
|
||||
}
|
||||
Err(error) => {
|
||||
self.record_sync_failure(&peer.http_address, &error).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_snapshot(&self, peer_address: &str) -> anyhow::Result<Snapshot> {
|
||||
let url = format!("http://{peer_address}/internal/snapshot");
|
||||
let snapshot = self
|
||||
.client
|
||||
.get(url)
|
||||
.send()
|
||||
.await?
|
||||
.error_for_status()?
|
||||
.json()
|
||||
.await?;
|
||||
Ok(snapshot)
|
||||
}
|
||||
|
||||
async fn clear_failure_counter(&self, peer_address: &str) {
|
||||
let mut failures = self.failures_by_peer.lock().await;
|
||||
failures.remove(peer_address);
|
||||
}
|
||||
|
||||
async fn record_sync_failure(&self, peer_address: &str, error: &anyhow::Error) {
|
||||
let consecutive_failures = {
|
||||
let mut failures = self.failures_by_peer.lock().await;
|
||||
let entry = failures.entry(peer_address.to_owned()).or_insert(0);
|
||||
*entry += 1;
|
||||
*entry
|
||||
};
|
||||
|
||||
if consecutive_failures >= WARN_AFTER_CONSECUTIVE_FAILURES {
|
||||
warn!(
|
||||
peer = %peer_address,
|
||||
%error,
|
||||
consecutive_failures,
|
||||
"queue sync repeatedly failing"
|
||||
);
|
||||
} else {
|
||||
debug!(
|
||||
peer = %peer_address,
|
||||
%error,
|
||||
consecutive_failures,
|
||||
"queue sync failed"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
13
examples/queue/testing/integration/Cargo.toml
Normal file
13
examples/queue/testing/integration/Cargo.toml
Normal file
@ -0,0 +1,13 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "queue-runtime-ext"
|
||||
version.workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait = { workspace = true }
|
||||
queue-node = { path = "../../queue-node" }
|
||||
serde = { workspace = true }
|
||||
testing-framework-core = { workspace = true }
|
||||
testing-framework-runner-compose = { workspace = true }
|
||||
testing-framework-runner-local = { workspace = true }
|
||||
75
examples/queue/testing/integration/src/app.rs
Normal file
75
examples/queue/testing/integration/src/app.rs
Normal file
@ -0,0 +1,75 @@
|
||||
use std::io::Error;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use queue_node::QueueHttpClient;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use testing_framework_core::scenario::{
|
||||
Application, ClusterNodeConfigApplication, ClusterNodeView, ClusterPeerView, DynError,
|
||||
NodeAccess, serialize_cluster_yaml_config,
|
||||
};
|
||||
|
||||
pub type QueueTopology = testing_framework_core::topology::ClusterTopology;
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct QueuePeerInfo {
|
||||
pub node_id: u64,
|
||||
pub http_address: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
pub struct QueueNodeConfig {
|
||||
pub node_id: u64,
|
||||
pub http_port: u16,
|
||||
pub peers: Vec<QueuePeerInfo>,
|
||||
pub sync_interval_ms: u64,
|
||||
}
|
||||
|
||||
pub struct QueueEnv;
|
||||
|
||||
#[async_trait]
|
||||
impl Application for QueueEnv {
|
||||
type Deployment = QueueTopology;
|
||||
type NodeClient = QueueHttpClient;
|
||||
type NodeConfig = QueueNodeConfig;
|
||||
fn build_node_client(access: &NodeAccess) -> Result<Self::NodeClient, DynError> {
|
||||
Ok(QueueHttpClient::new(access.api_base_url()?))
|
||||
}
|
||||
|
||||
fn node_readiness_path() -> &'static str {
|
||||
"/health/ready"
|
||||
}
|
||||
}
|
||||
|
||||
impl ClusterNodeConfigApplication for QueueEnv {
|
||||
type ConfigError = Error;
|
||||
|
||||
fn static_network_port() -> u16 {
|
||||
8080
|
||||
}
|
||||
|
||||
fn build_cluster_node_config(
|
||||
node: &ClusterNodeView,
|
||||
peers: &[ClusterPeerView],
|
||||
) -> Result<Self::NodeConfig, Self::ConfigError> {
|
||||
let peers = peers
|
||||
.iter()
|
||||
.map(|peer| QueuePeerInfo {
|
||||
node_id: peer.index() as u64,
|
||||
http_address: peer.authority(),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(QueueNodeConfig {
|
||||
node_id: node.index() as u64,
|
||||
http_port: node.network_port(),
|
||||
peers,
|
||||
sync_interval_ms: 500,
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_cluster_node_config(
|
||||
config: &Self::NodeConfig,
|
||||
) -> Result<String, Self::ConfigError> {
|
||||
serialize_cluster_yaml_config(config).map_err(Error::other)
|
||||
}
|
||||
}
|
||||
15
examples/queue/testing/integration/src/compose_env.rs
Normal file
15
examples/queue/testing/integration/src/compose_env.rs
Normal file
@ -0,0 +1,15 @@
|
||||
use testing_framework_runner_compose::{BinaryConfigNodeSpec, ComposeBinaryApp};
|
||||
|
||||
use crate::QueueEnv;
|
||||
|
||||
const NODE_CONFIG_PATH: &str = "/etc/queue/config.yaml";
|
||||
|
||||
impl ComposeBinaryApp for QueueEnv {
|
||||
fn compose_node_spec() -> BinaryConfigNodeSpec {
|
||||
BinaryConfigNodeSpec::conventional(
|
||||
"/usr/local/bin/queue-node",
|
||||
NODE_CONFIG_PATH,
|
||||
vec![8080, 8081],
|
||||
)
|
||||
}
|
||||
}
|
||||
10
examples/queue/testing/integration/src/lib.rs
Normal file
10
examples/queue/testing/integration/src/lib.rs
Normal file
@ -0,0 +1,10 @@
|
||||
mod app;
|
||||
mod compose_env;
|
||||
mod local_env;
|
||||
pub mod scenario;
|
||||
|
||||
pub use app::*;
|
||||
pub use scenario::{QueueBuilderExt, QueueScenarioBuilder};
|
||||
|
||||
pub type QueueLocalDeployer = testing_framework_runner_local::ProcessDeployer<QueueEnv>;
|
||||
pub type QueueComposeDeployer = testing_framework_runner_compose::ComposeDeployer<QueueEnv>;
|
||||
41
examples/queue/testing/integration/src/local_env.rs
Normal file
41
examples/queue/testing/integration/src/local_env.rs
Normal file
@ -0,0 +1,41 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use testing_framework_core::scenario::{DynError, StartNodeOptions};
|
||||
use testing_framework_runner_local::{
|
||||
LocalBinaryApp, LocalNodePorts, LocalPeerNode, LocalProcessSpec,
|
||||
build_local_cluster_node_config, yaml_node_config,
|
||||
};
|
||||
|
||||
use crate::{QueueEnv, QueueNodeConfig};
|
||||
|
||||
impl LocalBinaryApp for QueueEnv {
|
||||
fn initial_node_name_prefix() -> &'static str {
|
||||
"queue-node"
|
||||
}
|
||||
|
||||
fn build_local_node_config_with_peers(
|
||||
_topology: &Self::Deployment,
|
||||
index: usize,
|
||||
ports: &LocalNodePorts,
|
||||
peers: &[LocalPeerNode],
|
||||
_peer_ports_by_name: &HashMap<String, u16>,
|
||||
_options: &StartNodeOptions<Self>,
|
||||
_template_config: Option<
|
||||
&<Self as testing_framework_core::scenario::Application>::NodeConfig,
|
||||
>,
|
||||
) -> Result<<Self as testing_framework_core::scenario::Application>::NodeConfig, DynError> {
|
||||
build_local_cluster_node_config::<Self>(index, ports, peers)
|
||||
}
|
||||
|
||||
fn local_process_spec() -> LocalProcessSpec {
|
||||
LocalProcessSpec::new("QUEUE_NODE_BIN", "queue-node").with_rust_log("queue_node=info")
|
||||
}
|
||||
|
||||
fn render_local_config(config: &QueueNodeConfig) -> Result<Vec<u8>, DynError> {
|
||||
yaml_node_config(config)
|
||||
}
|
||||
|
||||
fn http_api_port(config: &QueueNodeConfig) -> u16 {
|
||||
config.http_port
|
||||
}
|
||||
}
|
||||
15
examples/queue/testing/integration/src/scenario.rs
Normal file
15
examples/queue/testing/integration/src/scenario.rs
Normal file
@ -0,0 +1,15 @@
|
||||
use testing_framework_core::scenario::ScenarioBuilder;
|
||||
|
||||
use crate::{QueueEnv, QueueTopology};
|
||||
|
||||
pub type QueueScenarioBuilder = ScenarioBuilder<QueueEnv>;
|
||||
|
||||
pub trait QueueBuilderExt: Sized {
|
||||
fn deployment_with(f: impl FnOnce(QueueTopology) -> QueueTopology) -> Self;
|
||||
}
|
||||
|
||||
impl QueueBuilderExt for QueueScenarioBuilder {
|
||||
fn deployment_with(f: impl FnOnce(QueueTopology) -> QueueTopology) -> Self {
|
||||
QueueScenarioBuilder::with_deployment(f(QueueTopology::new(3)))
|
||||
}
|
||||
}
|
||||
14
examples/queue/testing/workloads/Cargo.toml
Normal file
14
examples/queue/testing/workloads/Cargo.toml
Normal file
@ -0,0 +1,14 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "queue-runtime-workloads"
|
||||
version.workspace = true
|
||||
|
||||
[dependencies]
|
||||
async-trait = { workspace = true }
|
||||
queue-node = { path = "../../queue-node" }
|
||||
queue-runtime-ext = { path = "../integration" }
|
||||
serde = { workspace = true }
|
||||
testing-framework-core = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
tracing = { workspace = true }
|
||||
104
examples/queue/testing/workloads/src/drained.rs
Normal file
104
examples/queue/testing/workloads/src/drained.rs
Normal file
@ -0,0 +1,104 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use queue_runtime_ext::QueueEnv;
|
||||
use serde::Deserialize;
|
||||
use testing_framework_core::scenario::{DynError, Expectation, RunContext};
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct QueueDrained {
|
||||
timeout: Duration,
|
||||
poll_interval: Duration,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
|
||||
struct QueueRevision {
|
||||
version: u64,
|
||||
origin: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
|
||||
struct QueueStateResponse {
|
||||
revision: QueueRevision,
|
||||
queue_len: usize,
|
||||
head_id: Option<u64>,
|
||||
tail_id: Option<u64>,
|
||||
}
|
||||
|
||||
impl QueueDrained {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
timeout: Duration::from_secs(20),
|
||||
poll_interval: Duration::from_millis(500),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn timeout(mut self, timeout: Duration) -> Self {
|
||||
self.timeout = timeout;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for QueueDrained {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Expectation<QueueEnv> for QueueDrained {
|
||||
fn name(&self) -> &str {
|
||||
"queue_drained"
|
||||
}
|
||||
|
||||
async fn evaluate(&mut self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
|
||||
let clients = ctx.node_clients().snapshot();
|
||||
if clients.is_empty() {
|
||||
return Err("no queue node clients available".into());
|
||||
}
|
||||
|
||||
let deadline = tokio::time::Instant::now() + self.timeout;
|
||||
while tokio::time::Instant::now() < deadline {
|
||||
if is_drained_and_converged(&clients).await? {
|
||||
info!("queue drained and converged");
|
||||
return Ok(());
|
||||
}
|
||||
tokio::time::sleep(self.poll_interval).await;
|
||||
}
|
||||
|
||||
Err(format!("queue not drained within {:?}", self.timeout).into())
|
||||
}
|
||||
}
|
||||
|
||||
async fn is_drained_and_converged(
|
||||
clients: &[queue_node::QueueHttpClient],
|
||||
) -> Result<bool, DynError> {
|
||||
let Some((first, rest)) = clients.split_first() else {
|
||||
return Ok(false);
|
||||
};
|
||||
|
||||
let baseline = read_state(first).await?;
|
||||
if !is_drained(&baseline) {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
for client in rest {
|
||||
let current = read_state(client).await?;
|
||||
if current != baseline {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn is_drained(state: &QueueStateResponse) -> bool {
|
||||
state.queue_len == 0 && state.head_id.is_none() && state.tail_id.is_none()
|
||||
}
|
||||
|
||||
async fn read_state(client: &queue_node::QueueHttpClient) -> Result<QueueStateResponse, DynError> {
|
||||
Ok(client.get("/queue/state").await?)
|
||||
}
|
||||
106
examples/queue/testing/workloads/src/expectations.rs
Normal file
106
examples/queue/testing/workloads/src/expectations.rs
Normal file
@ -0,0 +1,106 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use queue_runtime_ext::QueueEnv;
|
||||
use serde::Deserialize;
|
||||
use testing_framework_core::scenario::{DynError, Expectation, RunContext};
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct QueueConverges {
|
||||
min_queue_len: usize,
|
||||
timeout: Duration,
|
||||
poll_interval: Duration,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
|
||||
struct QueueRevision {
|
||||
version: u64,
|
||||
origin: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
|
||||
struct QueueStateResponse {
|
||||
revision: QueueRevision,
|
||||
queue_len: usize,
|
||||
head_id: Option<u64>,
|
||||
tail_id: Option<u64>,
|
||||
}
|
||||
|
||||
impl QueueConverges {
|
||||
#[must_use]
|
||||
pub fn new(min_queue_len: usize) -> Self {
|
||||
Self {
|
||||
min_queue_len,
|
||||
timeout: Duration::from_secs(20),
|
||||
poll_interval: Duration::from_millis(500),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn timeout(mut self, timeout: Duration) -> Self {
|
||||
self.timeout = timeout;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Expectation<QueueEnv> for QueueConverges {
|
||||
fn name(&self) -> &str {
|
||||
"queue_converges"
|
||||
}
|
||||
|
||||
async fn evaluate(&mut self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
|
||||
let clients = ctx.node_clients().snapshot();
|
||||
if clients.is_empty() {
|
||||
return Err("no queue node clients available".into());
|
||||
}
|
||||
|
||||
let deadline = tokio::time::Instant::now() + self.timeout;
|
||||
while tokio::time::Instant::now() < deadline {
|
||||
if self.is_converged(&clients).await? {
|
||||
info!(
|
||||
min_queue_len = self.min_queue_len,
|
||||
"queue convergence reached"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
tokio::time::sleep(self.poll_interval).await;
|
||||
}
|
||||
|
||||
Err(format!(
|
||||
"queue convergence not reached within {:?} (min_queue_len={})",
|
||||
self.timeout, self.min_queue_len
|
||||
)
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl QueueConverges {
|
||||
async fn is_converged(
|
||||
&self,
|
||||
clients: &[queue_node::QueueHttpClient],
|
||||
) -> Result<bool, DynError> {
|
||||
let Some((first, rest)) = clients.split_first() else {
|
||||
return Ok(false);
|
||||
};
|
||||
|
||||
let baseline = read_state(first).await?;
|
||||
if baseline.queue_len < self.min_queue_len {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
for client in rest {
|
||||
let current = read_state(client).await?;
|
||||
if current != baseline {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_state(client: &queue_node::QueueHttpClient) -> Result<QueueStateResponse, DynError> {
|
||||
Ok(client.get("/queue/state").await?)
|
||||
}
|
||||
10
examples/queue/testing/workloads/src/lib.rs
Normal file
10
examples/queue/testing/workloads/src/lib.rs
Normal file
@ -0,0 +1,10 @@
|
||||
mod drained;
|
||||
mod expectations;
|
||||
mod produce;
|
||||
mod roundtrip;
|
||||
|
||||
pub use drained::QueueDrained;
|
||||
pub use expectations::QueueConverges;
|
||||
pub use produce::QueueProduceWorkload;
|
||||
pub use queue_runtime_ext::{QueueBuilderExt, QueueEnv, QueueScenarioBuilder, QueueTopology};
|
||||
pub use roundtrip::QueueRoundTripWorkload;
|
||||
116
examples/queue/testing/workloads/src/produce.rs
Normal file
116
examples/queue/testing/workloads/src/produce.rs
Normal file
@ -0,0 +1,116 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use queue_runtime_ext::QueueEnv;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use testing_framework_core::scenario::{DynError, RunContext, Workload};
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct QueueProduceWorkload {
|
||||
operations: usize,
|
||||
rate_per_sec: Option<usize>,
|
||||
payload_prefix: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct EnqueueRequest {
|
||||
payload: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct EnqueueResponse {
|
||||
accepted: bool,
|
||||
id: u64,
|
||||
queue_len: usize,
|
||||
}
|
||||
|
||||
impl QueueProduceWorkload {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
operations: 200,
|
||||
rate_per_sec: Some(25),
|
||||
payload_prefix: "queue-demo".to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn operations(mut self, value: usize) -> Self {
|
||||
self.operations = value;
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn rate_per_sec(mut self, value: usize) -> Self {
|
||||
self.rate_per_sec = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn payload_prefix(mut self, value: impl Into<String>) -> Self {
|
||||
self.payload_prefix = value.into();
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for QueueProduceWorkload {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Workload<QueueEnv> for QueueProduceWorkload {
|
||||
fn name(&self) -> &str {
|
||||
"queue_produce_workload"
|
||||
}
|
||||
|
||||
async fn start(&self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
|
||||
let clients = ctx.node_clients().snapshot();
|
||||
let Some(producer) = clients.first() else {
|
||||
return Err("no queue node clients available".into());
|
||||
};
|
||||
|
||||
let interval = self.rate_per_sec.and_then(compute_interval);
|
||||
info!(
|
||||
operations = self.operations,
|
||||
rate_per_sec = ?self.rate_per_sec,
|
||||
"starting queue produce workload"
|
||||
);
|
||||
|
||||
for idx in 0..self.operations {
|
||||
let payload = format!("{}-{idx}", self.payload_prefix);
|
||||
let response: EnqueueResponse = producer
|
||||
.post("/queue/enqueue", &EnqueueRequest { payload })
|
||||
.await?;
|
||||
|
||||
if !response.accepted {
|
||||
return Err(format!("node rejected enqueue at operation {idx}").into());
|
||||
}
|
||||
|
||||
if (idx + 1) % 25 == 0 {
|
||||
info!(
|
||||
completed = idx + 1,
|
||||
last_id = response.id,
|
||||
queue_len = response.queue_len,
|
||||
"queue produce progress"
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(delay) = interval {
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_interval(rate_per_sec: usize) -> Option<Duration> {
|
||||
if rate_per_sec == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Duration::from_millis((1000 / rate_per_sec as u64).max(1)))
|
||||
}
|
||||
179
examples/queue/testing/workloads/src/roundtrip.rs
Normal file
179
examples/queue/testing/workloads/src/roundtrip.rs
Normal file
@ -0,0 +1,179 @@
|
||||
use std::{collections::HashSet, time::Duration};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use queue_runtime_ext::QueueEnv;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use testing_framework_core::scenario::{DynError, RunContext, Workload};
|
||||
use tokio::time::{Instant, sleep};
|
||||
use tracing::info;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct QueueRoundTripWorkload {
|
||||
operations: usize,
|
||||
rate_per_sec: Option<usize>,
|
||||
payload_prefix: String,
|
||||
drain_timeout: Duration,
|
||||
empty_poll_interval: Duration,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct EnqueueRequest {
|
||||
payload: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct EnqueueResponse {
|
||||
accepted: bool,
|
||||
id: u64,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct DequeueRequest {}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct QueueMessage {
|
||||
id: u64,
|
||||
payload: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct DequeueResponse {
|
||||
message: Option<QueueMessage>,
|
||||
}
|
||||
|
||||
impl QueueRoundTripWorkload {
|
||||
#[must_use]
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
operations: 200,
|
||||
rate_per_sec: Some(25),
|
||||
payload_prefix: "queue-roundtrip".to_owned(),
|
||||
drain_timeout: Duration::from_secs(20),
|
||||
empty_poll_interval: Duration::from_millis(100),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn operations(mut self, value: usize) -> Self {
|
||||
self.operations = value;
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn rate_per_sec(mut self, value: usize) -> Self {
|
||||
self.rate_per_sec = Some(value);
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn payload_prefix(mut self, value: impl Into<String>) -> Self {
|
||||
self.payload_prefix = value.into();
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn drain_timeout(mut self, value: Duration) -> Self {
|
||||
self.drain_timeout = value;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for QueueRoundTripWorkload {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Workload<QueueEnv> for QueueRoundTripWorkload {
|
||||
fn name(&self) -> &str {
|
||||
"queue_roundtrip_workload"
|
||||
}
|
||||
|
||||
async fn start(&self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
|
||||
let clients = ctx.node_clients().snapshot();
|
||||
let Some(driver) = clients.first() else {
|
||||
return Err("no queue node clients available".into());
|
||||
};
|
||||
|
||||
let interval = self.rate_per_sec.and_then(compute_interval);
|
||||
let mut produced_ids = HashSet::with_capacity(self.operations);
|
||||
|
||||
info!(
|
||||
operations = self.operations,
|
||||
"queue roundtrip: produce phase"
|
||||
);
|
||||
for idx in 0..self.operations {
|
||||
let payload = format!("{}-{idx}", self.payload_prefix);
|
||||
let response: EnqueueResponse = driver
|
||||
.post("/queue/enqueue", &EnqueueRequest { payload })
|
||||
.await?;
|
||||
|
||||
if !response.accepted {
|
||||
return Err(format!("enqueue rejected at operation {idx}").into());
|
||||
}
|
||||
|
||||
if !produced_ids.insert(response.id) {
|
||||
return Err(format!("duplicate enqueue id observed: {}", response.id).into());
|
||||
}
|
||||
|
||||
if let Some(delay) = interval {
|
||||
sleep(delay).await;
|
||||
}
|
||||
}
|
||||
|
||||
info!(
|
||||
operations = self.operations,
|
||||
"queue roundtrip: consume phase"
|
||||
);
|
||||
let mut consumed = 0usize;
|
||||
let deadline = Instant::now() + self.drain_timeout;
|
||||
|
||||
while consumed < self.operations && Instant::now() < deadline {
|
||||
let response: DequeueResponse =
|
||||
driver.post("/queue/dequeue", &DequeueRequest {}).await?;
|
||||
|
||||
match response.message {
|
||||
Some(message) => {
|
||||
if !message.payload.starts_with(&self.payload_prefix) {
|
||||
return Err(format!("unexpected payload: {}", message.payload).into());
|
||||
}
|
||||
if !produced_ids.remove(&message.id) {
|
||||
return Err(
|
||||
format!("unknown or duplicate dequeue id: {}", message.id).into()
|
||||
);
|
||||
}
|
||||
consumed += 1;
|
||||
}
|
||||
None => sleep(self.empty_poll_interval).await,
|
||||
}
|
||||
}
|
||||
|
||||
if consumed != self.operations {
|
||||
return Err(format!(
|
||||
"queue roundtrip timed out: consumed {consumed}/{} messages",
|
||||
self.operations
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
if !produced_ids.is_empty() {
|
||||
return Err(format!(
|
||||
"queue roundtrip ended with {} undrained produced ids",
|
||||
produced_ids.len()
|
||||
)
|
||||
.into());
|
||||
}
|
||||
|
||||
info!(operations = self.operations, "queue roundtrip finished");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_interval(rate_per_sec: usize) -> Option<Duration> {
|
||||
if rate_per_sec == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Duration::from_millis((1000 / rate_per_sec as u64).max(1)))
|
||||
}
|
||||
@ -25,11 +25,11 @@ Each example follows the same pattern:
|
||||
## Run with Docker Compose
|
||||
|
||||
```bash
|
||||
cargo run -p redis-streams-examples --bin compose_roundtrip
|
||||
cargo run -p redis-streams-examples --bin redis_streams_compose_roundtrip
|
||||
```
|
||||
|
||||
## Run the reclaim scenario
|
||||
|
||||
```bash
|
||||
cargo run -p redis-streams-examples --bin compose_failover
|
||||
cargo run -p redis-streams-examples --bin redis_streams_compose_failover
|
||||
```
|
||||
|
||||
@ -4,6 +4,14 @@ license.workspace = true
|
||||
name = "redis-streams-examples"
|
||||
version.workspace = true
|
||||
|
||||
[[bin]]
|
||||
name = "redis_streams_compose_roundtrip"
|
||||
path = "src/bin/compose_roundtrip.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "redis_streams_compose_failover"
|
||||
path = "src/bin/compose_failover.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
redis-streams-runtime-ext = { path = "../testing/integration" }
|
||||
|
||||
@ -29,5 +29,5 @@ reqwest = { features = ["json"], workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_yaml = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { features = ["macros", "process", "rt-multi-thread", "time"], workspace = true }
|
||||
tokio = { features = ["macros", "process", "rt-multi-thread", "sync", "time"], workspace = true }
|
||||
tracing = { workspace = true }
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
pub mod cfgsync;
|
||||
pub mod env;
|
||||
pub mod observation;
|
||||
pub mod runtime;
|
||||
pub mod scenario;
|
||||
pub mod topology;
|
||||
|
||||
161
testing-framework/core/src/observation/factory.rs
Normal file
161
testing-framework/core/src/observation/factory.rs
Normal file
@ -0,0 +1,161 @@
|
||||
use std::{marker::PhantomData, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use super::{
|
||||
ObservationConfig, ObservationHandle, ObservationRuntime, ObservedSource, Observer,
|
||||
SourceProvider,
|
||||
};
|
||||
use crate::scenario::{
|
||||
Application, DynError, NodeClients, PreparedRuntimeExtension, RuntimeExtensionFactory,
|
||||
};
|
||||
|
||||
/// Boxed source provider used by observation factories.
|
||||
pub type BoxedSourceProvider<S> = Box<dyn SourceProvider<S>>;
|
||||
|
||||
/// Builds an observation source provider once node clients are available.
|
||||
pub trait SourceProviderFactory<E: Application, S>: Send + Sync + 'static {
|
||||
/// Builds the source provider for one scenario run.
|
||||
fn build_source_provider(
|
||||
&self,
|
||||
deployment: &E::Deployment,
|
||||
node_clients: NodeClients<E>,
|
||||
) -> Result<BoxedSourceProvider<S>, DynError>;
|
||||
}
|
||||
|
||||
impl<E, S, F> SourceProviderFactory<E, S> for F
|
||||
where
|
||||
E: Application,
|
||||
S: Clone + Send + Sync + 'static,
|
||||
F: Fn(&E::Deployment, NodeClients<E>) -> Result<BoxedSourceProvider<S>, DynError>
|
||||
+ Send
|
||||
+ Sync
|
||||
+ 'static,
|
||||
{
|
||||
fn build_source_provider(
|
||||
&self,
|
||||
deployment: &E::Deployment,
|
||||
node_clients: NodeClients<E>,
|
||||
) -> Result<BoxedSourceProvider<S>, DynError> {
|
||||
self(deployment, node_clients)
|
||||
}
|
||||
}
|
||||
|
||||
/// Fixed source provider for scenario runs with a stable source set.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct StaticSourceProvider<S> {
|
||||
sources: Vec<ObservedSource<S>>,
|
||||
}
|
||||
|
||||
impl<S> StaticSourceProvider<S> {
|
||||
/// Builds a provider from a fixed source list.
|
||||
#[must_use]
|
||||
pub fn new(sources: Vec<ObservedSource<S>>) -> Self {
|
||||
Self { sources }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<S> SourceProvider<S> for StaticSourceProvider<S>
|
||||
where
|
||||
S: Clone + Send + Sync + 'static,
|
||||
{
|
||||
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError> {
|
||||
Ok(self.sources.clone())
|
||||
}
|
||||
}
|
||||
|
||||
/// Runtime extension factory that starts one observer and stores its handle in
|
||||
/// `RunContext`.
|
||||
pub struct ObservationExtensionFactory<E: Application, O: Observer> {
|
||||
observer_builder: Arc<dyn Fn() -> O + Send + Sync>,
|
||||
source_provider_factory: Arc<dyn SourceProviderFactory<E, O::Source>>,
|
||||
config: ObservationConfig,
|
||||
env_marker: PhantomData<E>,
|
||||
}
|
||||
|
||||
impl<E: Application, O: Observer> ObservationExtensionFactory<E, O> {
|
||||
/// Builds an observation extension factory from builders.
|
||||
#[must_use]
|
||||
pub fn from_parts(
|
||||
observer_builder: impl Fn() -> O + Send + Sync + 'static,
|
||||
source_provider_factory: impl SourceProviderFactory<E, O::Source>,
|
||||
config: ObservationConfig,
|
||||
) -> Self {
|
||||
Self {
|
||||
observer_builder: Arc::new(observer_builder),
|
||||
source_provider_factory: Arc::new(source_provider_factory),
|
||||
config,
|
||||
env_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<E, O> ObservationExtensionFactory<E, O>
|
||||
where
|
||||
E: Application,
|
||||
O: Observer + Clone,
|
||||
{
|
||||
/// Builds an observation extension factory from one clonable observer.
|
||||
#[must_use]
|
||||
pub fn new(
|
||||
observer: O,
|
||||
source_provider_factory: impl SourceProviderFactory<E, O::Source>,
|
||||
config: ObservationConfig,
|
||||
) -> Self {
|
||||
Self::from_parts(move || observer.clone(), source_provider_factory, config)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<E, O> RuntimeExtensionFactory<E> for ObservationExtensionFactory<E, O>
|
||||
where
|
||||
E: Application,
|
||||
O: Observer,
|
||||
{
|
||||
async fn prepare(
|
||||
&self,
|
||||
deployment: &E::Deployment,
|
||||
node_clients: NodeClients<E>,
|
||||
) -> Result<PreparedRuntimeExtension, DynError> {
|
||||
let source_provider = self
|
||||
.source_provider_factory
|
||||
.build_source_provider(deployment, node_clients)?;
|
||||
|
||||
let observer = (self.observer_builder)();
|
||||
let runtime =
|
||||
ObservationRuntime::start(source_provider, observer, self.config.clone()).await?;
|
||||
|
||||
let (handle, task) = runtime.into_parts();
|
||||
|
||||
Ok(PreparedRuntimeExtension::from_task(handle, task))
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<S, P> SourceProvider<S> for Box<P>
|
||||
where
|
||||
S: Clone + Send + Sync + 'static,
|
||||
P: SourceProvider<S> + ?Sized,
|
||||
{
|
||||
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError> {
|
||||
(**self).sources().await
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<S, P> SourceProvider<S> for Arc<P>
|
||||
where
|
||||
S: Clone + Send + Sync + 'static,
|
||||
P: SourceProvider<S> + ?Sized,
|
||||
{
|
||||
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError> {
|
||||
(**self).sources().await
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Observer> From<ObservationHandle<O>> for PreparedRuntimeExtension {
|
||||
fn from(handle: ObservationHandle<O>) -> Self {
|
||||
PreparedRuntimeExtension::new(handle)
|
||||
}
|
||||
}
|
||||
503
testing-framework/core/src/observation/mod.rs
Normal file
503
testing-framework/core/src/observation/mod.rs
Normal file
@ -0,0 +1,503 @@
|
||||
//! Generic continuous observation runtime.
|
||||
//!
|
||||
//! This module provides the reusable runtime needed by both TF scenarios and
|
||||
//! manual-cluster consumers such as Cucumber worlds. It does not know any app
|
||||
//! semantics. Apps provide source types, observation logic, materialized state,
|
||||
//! snapshots, and delta events.
|
||||
|
||||
mod factory;
|
||||
|
||||
use std::{
|
||||
any::type_name,
|
||||
collections::VecDeque,
|
||||
sync::Arc,
|
||||
time::{Duration, SystemTime},
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
pub use factory::{
|
||||
BoxedSourceProvider, ObservationExtensionFactory, SourceProviderFactory, StaticSourceProvider,
|
||||
};
|
||||
use parking_lot::Mutex;
|
||||
use tokio::{
|
||||
sync::broadcast,
|
||||
task::JoinHandle,
|
||||
time::{MissedTickBehavior, interval},
|
||||
};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::scenario::DynError;
|
||||
|
||||
/// Configuration for a background observation runtime.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ObservationConfig {
|
||||
/// Time between observation cycles.
|
||||
pub interval: Duration,
|
||||
/// Maximum number of non-empty event batches retained in memory.
|
||||
pub history_limit: usize,
|
||||
}
|
||||
|
||||
impl Default for ObservationConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
interval: Duration::from_secs(1),
|
||||
history_limit: 64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// One named observation source.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ObservedSource<S> {
|
||||
/// Human-readable source name used in logs and app-level reporting.
|
||||
pub name: String,
|
||||
/// App-owned source handle.
|
||||
pub source: S,
|
||||
}
|
||||
|
||||
impl<S> ObservedSource<S> {
|
||||
/// Builds one named observation source.
|
||||
#[must_use]
|
||||
pub fn new(name: &str, source: S) -> Self {
|
||||
Self {
|
||||
name: name.to_owned(),
|
||||
source,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Supplies the current observation source set.
|
||||
#[async_trait]
|
||||
pub trait SourceProvider<S>: Send + Sync + 'static {
|
||||
/// Returns the current source set for the next observation cycle.
|
||||
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError>;
|
||||
}
|
||||
|
||||
/// App-owned observation logic.
|
||||
#[async_trait]
|
||||
pub trait Observer: Send + Sync + 'static {
|
||||
/// App-owned source type.
|
||||
type Source: Clone + Send + Sync + 'static;
|
||||
/// App-owned retained materialized state.
|
||||
type State: Send + Sync + 'static;
|
||||
/// App-owned current snapshot view.
|
||||
type Snapshot: Clone + Send + Sync + 'static;
|
||||
/// App-owned delta event type emitted per cycle.
|
||||
type Event: Clone + Send + Sync + 'static;
|
||||
|
||||
/// Builds the initial retained state from the current source set.
|
||||
async fn init(&self, sources: &[ObservedSource<Self::Source>])
|
||||
-> Result<Self::State, DynError>;
|
||||
|
||||
/// Advances retained state by one cycle and returns any new delta events.
|
||||
async fn poll(
|
||||
&self,
|
||||
sources: &[ObservedSource<Self::Source>],
|
||||
state: &mut Self::State,
|
||||
) -> Result<Vec<Self::Event>, DynError>;
|
||||
|
||||
/// Builds the current snapshot view from retained state.
|
||||
fn snapshot(&self, state: &Self::State) -> Self::Snapshot;
|
||||
}
|
||||
|
||||
/// One materialized snapshot emitted by the runtime.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ObservationSnapshot<S> {
|
||||
/// Monotonic cycle number.
|
||||
pub cycle: u64,
|
||||
/// Capture timestamp.
|
||||
pub observed_at: SystemTime,
|
||||
/// Number of sources used for this snapshot.
|
||||
pub source_count: usize,
|
||||
/// App-owned snapshot payload.
|
||||
pub value: S,
|
||||
}
|
||||
|
||||
/// One delta batch emitted by a successful observation cycle.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ObservationBatch<E> {
|
||||
/// Monotonic cycle number.
|
||||
pub cycle: u64,
|
||||
/// Capture timestamp.
|
||||
pub observed_at: SystemTime,
|
||||
/// Number of sources used for this batch.
|
||||
pub source_count: usize,
|
||||
/// App-owned delta events discovered in this cycle.
|
||||
pub events: Vec<E>,
|
||||
}
|
||||
|
||||
/// Observation runtime failure stage.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum ObservationFailureStage {
|
||||
/// Source refresh failed before a poll could run.
|
||||
SourceRefresh,
|
||||
/// Observer poll failed after sources were refreshed.
|
||||
Poll,
|
||||
}
|
||||
|
||||
/// Last failed observation cycle.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ObservationFailure {
|
||||
/// Monotonic cycle number.
|
||||
pub cycle: u64,
|
||||
/// Failure timestamp.
|
||||
pub observed_at: SystemTime,
|
||||
/// Number of sources involved in the failed cycle.
|
||||
pub source_count: usize,
|
||||
/// Runtime stage that failed.
|
||||
pub stage: ObservationFailureStage,
|
||||
/// Human-readable failure message.
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
/// Errors returned while starting an observation runtime.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ObservationRuntimeError {
|
||||
/// The configured interval is invalid.
|
||||
#[error("observation interval must be greater than zero")]
|
||||
InvalidInterval,
|
||||
/// Source discovery failed during runtime startup.
|
||||
#[error("failed to refresh observation sources during startup: {source}")]
|
||||
SourceRefresh {
|
||||
#[source]
|
||||
source: DynError,
|
||||
},
|
||||
/// Observer state initialization failed during runtime startup.
|
||||
#[error("failed to initialize observation state: {source}")]
|
||||
ObserverInit {
|
||||
#[source]
|
||||
source: DynError,
|
||||
},
|
||||
}
|
||||
|
||||
/// Read-side handle for one running observer.
|
||||
pub struct ObservationHandle<O: Observer> {
|
||||
shared: Arc<Mutex<SharedObservationState<O>>>,
|
||||
batches: broadcast::Sender<Arc<ObservationBatch<O::Event>>>,
|
||||
}
|
||||
|
||||
impl<O: Observer> Clone for ObservationHandle<O> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
shared: Arc::clone(&self.shared),
|
||||
batches: self.batches.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Observer> ObservationHandle<O> {
|
||||
/// Returns the latest successful snapshot, if one has been produced.
|
||||
#[must_use]
|
||||
pub fn latest_snapshot(&self) -> Option<ObservationSnapshot<O::Snapshot>> {
|
||||
self.shared.lock().latest_snapshot.clone()
|
||||
}
|
||||
|
||||
/// Returns retained non-empty event batches.
|
||||
#[must_use]
|
||||
pub fn history(&self) -> Vec<Arc<ObservationBatch<O::Event>>> {
|
||||
self.shared.lock().history.iter().cloned().collect()
|
||||
}
|
||||
|
||||
/// Returns the most recent cycle failure, if any.
|
||||
#[must_use]
|
||||
pub fn last_error(&self) -> Option<ObservationFailure> {
|
||||
self.shared.lock().last_error.clone()
|
||||
}
|
||||
|
||||
/// Subscribes to future non-empty event batches.
|
||||
#[must_use]
|
||||
pub fn subscribe(&self) -> broadcast::Receiver<Arc<ObservationBatch<O::Event>>> {
|
||||
self.batches.subscribe()
|
||||
}
|
||||
}
|
||||
|
||||
/// Lifecycle owner for one background observation runtime.
|
||||
pub struct ObservationRuntime<O: Observer> {
|
||||
handle: ObservationHandle<O>,
|
||||
task: Option<JoinHandle<()>>,
|
||||
}
|
||||
|
||||
impl<O: Observer> ObservationRuntime<O> {
|
||||
/// Starts one background observation runtime.
|
||||
pub async fn start<P>(
|
||||
provider: P,
|
||||
observer: O,
|
||||
config: ObservationConfig,
|
||||
) -> Result<Self, ObservationRuntimeError>
|
||||
where
|
||||
P: SourceProvider<O::Source>,
|
||||
{
|
||||
ensure_positive_interval(config.interval)?;
|
||||
|
||||
let sources = provider
|
||||
.sources()
|
||||
.await
|
||||
.map_err(|source| ObservationRuntimeError::SourceRefresh { source })?;
|
||||
|
||||
let source_count = sources.len();
|
||||
let state = observer
|
||||
.init(&sources)
|
||||
.await
|
||||
.map_err(|source| ObservationRuntimeError::ObserverInit { source })?;
|
||||
|
||||
let snapshot = build_snapshot(0, source_count, &observer, &state);
|
||||
let batches = broadcast::channel(config.history_limit.max(1)).0;
|
||||
let shared = Arc::new(Mutex::new(SharedObservationState::new(snapshot)));
|
||||
let handle = ObservationHandle {
|
||||
shared: Arc::clone(&shared),
|
||||
batches,
|
||||
};
|
||||
|
||||
info!(
|
||||
observer = type_name::<O>(),
|
||||
interval_ms = config.interval.as_millis(),
|
||||
history_limit = config.history_limit,
|
||||
source_count,
|
||||
"starting observation runtime"
|
||||
);
|
||||
|
||||
let runtime_handle = handle.clone();
|
||||
let task = tokio::spawn(run_observation_loop(
|
||||
provider,
|
||||
observer,
|
||||
config,
|
||||
shared,
|
||||
runtime_handle.batches.clone(),
|
||||
state,
|
||||
));
|
||||
|
||||
Ok(Self {
|
||||
handle: runtime_handle,
|
||||
task: Some(task),
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a read-side handle for the running observer.
|
||||
#[must_use]
|
||||
pub fn handle(&self) -> ObservationHandle<O> {
|
||||
self.handle.clone()
|
||||
}
|
||||
|
||||
/// Splits the runtime into its handle and background task.
|
||||
#[must_use]
|
||||
pub fn into_parts(mut self) -> (ObservationHandle<O>, JoinHandle<()>) {
|
||||
let task = self
|
||||
.task
|
||||
.take()
|
||||
.expect("observation runtime task is always present before into_parts");
|
||||
|
||||
(self.handle.clone(), task)
|
||||
}
|
||||
|
||||
/// Aborts the background task.
|
||||
pub fn abort(&mut self) {
|
||||
if let Some(task) = self.task.take() {
|
||||
task.abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<O: Observer> Drop for ObservationRuntime<O> {
|
||||
fn drop(&mut self) {
|
||||
self.abort();
|
||||
}
|
||||
}
|
||||
|
||||
struct SharedObservationState<O: Observer> {
|
||||
latest_snapshot: Option<ObservationSnapshot<O::Snapshot>>,
|
||||
history: VecDeque<Arc<ObservationBatch<O::Event>>>,
|
||||
last_error: Option<ObservationFailure>,
|
||||
}
|
||||
|
||||
impl<O: Observer> SharedObservationState<O> {
|
||||
fn new(snapshot: ObservationSnapshot<O::Snapshot>) -> Self {
|
||||
Self {
|
||||
latest_snapshot: Some(snapshot),
|
||||
history: VecDeque::new(),
|
||||
last_error: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_observation_loop<O, P>(
|
||||
provider: P,
|
||||
observer: O,
|
||||
config: ObservationConfig,
|
||||
shared: Arc<Mutex<SharedObservationState<O>>>,
|
||||
batches: broadcast::Sender<Arc<ObservationBatch<O::Event>>>,
|
||||
mut state: O::State,
|
||||
) where
|
||||
O: Observer,
|
||||
P: SourceProvider<O::Source>,
|
||||
{
|
||||
let mut ticker = build_interval(config.interval);
|
||||
let mut cycle = 1u64;
|
||||
|
||||
ticker.tick().await;
|
||||
|
||||
loop {
|
||||
ticker.tick().await;
|
||||
|
||||
let cycle_outcome = observe_cycle(&provider, &observer, cycle, &mut state).await;
|
||||
|
||||
match cycle_outcome {
|
||||
Ok(success) => record_cycle_success(&shared, &batches, &config, success),
|
||||
Err(failure) => record_cycle_failure(&shared, failure),
|
||||
}
|
||||
|
||||
cycle += 1;
|
||||
}
|
||||
}
|
||||
|
||||
struct CycleSuccess<O: Observer> {
|
||||
snapshot: ObservationSnapshot<O::Snapshot>,
|
||||
batch: Option<Arc<ObservationBatch<O::Event>>>,
|
||||
}
|
||||
|
||||
async fn observe_cycle<O, P>(
|
||||
provider: &P,
|
||||
observer: &O,
|
||||
cycle: u64,
|
||||
state: &mut O::State,
|
||||
) -> Result<CycleSuccess<O>, ObservationFailure>
|
||||
where
|
||||
O: Observer,
|
||||
P: SourceProvider<O::Source>,
|
||||
{
|
||||
let sources = provider.sources().await.map_err(|source| {
|
||||
build_failure(cycle, 0, ObservationFailureStage::SourceRefresh, source)
|
||||
})?;
|
||||
|
||||
let source_count = sources.len();
|
||||
let events = observer.poll(&sources, state).await.map_err(|source| {
|
||||
build_failure(cycle, source_count, ObservationFailureStage::Poll, source)
|
||||
})?;
|
||||
|
||||
let snapshot = build_snapshot(cycle, source_count, observer, state);
|
||||
let batch = build_batch(cycle, source_count, events);
|
||||
|
||||
Ok(CycleSuccess { snapshot, batch })
|
||||
}
|
||||
|
||||
fn record_cycle_success<O: Observer>(
|
||||
shared: &Arc<Mutex<SharedObservationState<O>>>,
|
||||
batches: &broadcast::Sender<Arc<ObservationBatch<O::Event>>>,
|
||||
config: &ObservationConfig,
|
||||
success: CycleSuccess<O>,
|
||||
) {
|
||||
debug!(
|
||||
observer = type_name::<O>(),
|
||||
cycle = success.snapshot.cycle,
|
||||
source_count = success.snapshot.source_count,
|
||||
event_count = success.batch.as_ref().map_or(0, |batch| batch.events.len()),
|
||||
"observation cycle completed"
|
||||
);
|
||||
|
||||
let mut state = shared.lock();
|
||||
state.latest_snapshot = Some(success.snapshot);
|
||||
state.last_error = None;
|
||||
|
||||
let Some(batch) = success.batch else {
|
||||
return;
|
||||
};
|
||||
|
||||
push_history(&mut state.history, Arc::clone(&batch), config.history_limit);
|
||||
drop(state);
|
||||
|
||||
let _ = batches.send(batch);
|
||||
}
|
||||
|
||||
fn record_cycle_failure<O: Observer>(
|
||||
shared: &Arc<Mutex<SharedObservationState<O>>>,
|
||||
failure: ObservationFailure,
|
||||
) {
|
||||
warn!(
|
||||
observer = type_name::<O>(),
|
||||
cycle = failure.cycle,
|
||||
source_count = failure.source_count,
|
||||
stage = ?failure.stage,
|
||||
message = %failure.message,
|
||||
"observation cycle failed"
|
||||
);
|
||||
|
||||
shared.lock().last_error = Some(failure);
|
||||
}
|
||||
|
||||
fn ensure_positive_interval(interval: Duration) -> Result<(), ObservationRuntimeError> {
|
||||
if interval.is_zero() {
|
||||
return Err(ObservationRuntimeError::InvalidInterval);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn build_interval(period: Duration) -> tokio::time::Interval {
|
||||
let mut ticker = interval(period);
|
||||
ticker.set_missed_tick_behavior(MissedTickBehavior::Delay);
|
||||
ticker
|
||||
}
|
||||
|
||||
fn build_snapshot<O: Observer>(
|
||||
cycle: u64,
|
||||
source_count: usize,
|
||||
observer: &O,
|
||||
state: &O::State,
|
||||
) -> ObservationSnapshot<O::Snapshot> {
|
||||
ObservationSnapshot {
|
||||
cycle,
|
||||
observed_at: SystemTime::now(),
|
||||
source_count,
|
||||
value: observer.snapshot(state),
|
||||
}
|
||||
}
|
||||
|
||||
fn build_batch<E>(
|
||||
cycle: u64,
|
||||
source_count: usize,
|
||||
events: Vec<E>,
|
||||
) -> Option<Arc<ObservationBatch<E>>> {
|
||||
if events.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Arc::new(ObservationBatch {
|
||||
cycle,
|
||||
observed_at: SystemTime::now(),
|
||||
source_count,
|
||||
events,
|
||||
}))
|
||||
}
|
||||
|
||||
fn build_failure(
|
||||
cycle: u64,
|
||||
source_count: usize,
|
||||
stage: ObservationFailureStage,
|
||||
source: DynError,
|
||||
) -> ObservationFailure {
|
||||
ObservationFailure {
|
||||
cycle,
|
||||
observed_at: SystemTime::now(),
|
||||
source_count,
|
||||
stage,
|
||||
message: source.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn push_history<E>(
|
||||
history: &mut VecDeque<Arc<ObservationBatch<E>>>,
|
||||
batch: Arc<ObservationBatch<E>>,
|
||||
history_limit: usize,
|
||||
) {
|
||||
if history_limit == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
history.push_back(batch);
|
||||
|
||||
while history.len() > history_limit {
|
||||
history.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
250
testing-framework/core/src/observation/tests.rs
Normal file
250
testing-framework/core/src/observation/tests.rs
Normal file
@ -0,0 +1,250 @@
|
||||
use std::{
|
||||
sync::{
|
||||
Arc,
|
||||
atomic::{AtomicUsize, Ordering},
|
||||
},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use parking_lot::Mutex;
|
||||
use tokio::time::{Instant, sleep};
|
||||
|
||||
use super::{
|
||||
ObservationConfig, ObservationFailureStage, ObservationRuntime, ObservedSource, Observer,
|
||||
SourceProvider,
|
||||
};
|
||||
use crate::scenario::DynError;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct TestSourceProvider {
|
||||
sources: Arc<Mutex<Vec<ObservedSource<u64>>>>,
|
||||
fail_refreshes: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
impl TestSourceProvider {
|
||||
fn new(sources: Vec<ObservedSource<u64>>) -> Self {
|
||||
Self {
|
||||
sources: Arc::new(Mutex::new(sources)),
|
||||
fail_refreshes: Arc::new(AtomicUsize::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
fn replace_sources(&self, sources: Vec<ObservedSource<u64>>) {
|
||||
*self.sources.lock() = sources;
|
||||
}
|
||||
|
||||
fn fail_next_refresh(&self) {
|
||||
self.fail_refreshes.store(1, Ordering::SeqCst);
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl SourceProvider<u64> for TestSourceProvider {
|
||||
async fn sources(&self) -> Result<Vec<ObservedSource<u64>>, DynError> {
|
||||
if self.fail_refreshes.swap(0, Ordering::SeqCst) == 1 {
|
||||
return Err("refresh failed".into());
|
||||
}
|
||||
|
||||
Ok(self.sources.lock().clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct TestSnapshot {
|
||||
total_sources_seen: u64,
|
||||
last_source_count: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct TestEvent {
|
||||
total_sources_seen: u64,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct TestState {
|
||||
total_sources_seen: u64,
|
||||
last_source_count: usize,
|
||||
}
|
||||
|
||||
struct CountingObserver;
|
||||
|
||||
#[async_trait]
|
||||
impl Observer for CountingObserver {
|
||||
type Source = u64;
|
||||
type State = TestState;
|
||||
type Snapshot = TestSnapshot;
|
||||
type Event = TestEvent;
|
||||
|
||||
async fn init(
|
||||
&self,
|
||||
sources: &[ObservedSource<Self::Source>],
|
||||
) -> Result<Self::State, DynError> {
|
||||
Ok(TestState {
|
||||
total_sources_seen: sources.iter().map(|source| source.source).sum(),
|
||||
last_source_count: sources.len(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn poll(
|
||||
&self,
|
||||
sources: &[ObservedSource<Self::Source>],
|
||||
state: &mut Self::State,
|
||||
) -> Result<Vec<Self::Event>, DynError> {
|
||||
state.total_sources_seen += sources.iter().map(|source| source.source).sum::<u64>();
|
||||
state.last_source_count = sources.len();
|
||||
|
||||
Ok(vec![TestEvent {
|
||||
total_sources_seen: state.total_sources_seen,
|
||||
}])
|
||||
}
|
||||
|
||||
fn snapshot(&self, state: &Self::State) -> Self::Snapshot {
|
||||
TestSnapshot {
|
||||
total_sources_seen: state.total_sources_seen,
|
||||
last_source_count: state.last_source_count,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn runtime_updates_snapshot_and_history() {
|
||||
let provider = TestSourceProvider::new(vec![ObservedSource::new("node-0", 2)]);
|
||||
let runtime = ObservationRuntime::start(
|
||||
provider,
|
||||
CountingObserver,
|
||||
ObservationConfig {
|
||||
interval: Duration::from_millis(25),
|
||||
history_limit: 2,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.expect("runtime should start");
|
||||
|
||||
let handle = runtime.handle();
|
||||
wait_for_cycle(&handle, 2).await;
|
||||
|
||||
let snapshot = handle.latest_snapshot().expect("snapshot should exist");
|
||||
assert!(snapshot.cycle >= 2);
|
||||
assert_eq!(snapshot.source_count, 1);
|
||||
assert_eq!(snapshot.value.last_source_count, 1);
|
||||
assert!(snapshot.value.total_sources_seen >= 6);
|
||||
|
||||
let history = handle.history();
|
||||
assert_eq!(history.len(), 2);
|
||||
assert!(history.iter().all(|batch| !batch.events.is_empty()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn runtime_refreshes_sources_each_cycle() {
|
||||
let provider = TestSourceProvider::new(vec![ObservedSource::new("node-0", 1)]);
|
||||
let runtime = ObservationRuntime::start(
|
||||
provider.clone(),
|
||||
CountingObserver,
|
||||
ObservationConfig {
|
||||
interval: Duration::from_millis(25),
|
||||
history_limit: 4,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.expect("runtime should start");
|
||||
|
||||
let handle = runtime.handle();
|
||||
wait_for_cycle(&handle, 1).await;
|
||||
|
||||
provider.replace_sources(vec![
|
||||
ObservedSource::new("node-0", 1),
|
||||
ObservedSource::new("node-1", 3),
|
||||
]);
|
||||
|
||||
wait_for_snapshot_source_count(&handle, 2).await;
|
||||
|
||||
let snapshot = handle.latest_snapshot().expect("snapshot should exist");
|
||||
assert_eq!(snapshot.source_count, 2);
|
||||
assert_eq!(snapshot.value.last_source_count, 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn runtime_records_cycle_failures() {
|
||||
let provider = TestSourceProvider::new(vec![ObservedSource::new("node-0", 1)]);
|
||||
let runtime = ObservationRuntime::start(
|
||||
provider.clone(),
|
||||
CountingObserver,
|
||||
ObservationConfig {
|
||||
interval: Duration::from_millis(25),
|
||||
history_limit: 2,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.expect("runtime should start");
|
||||
|
||||
let handle = runtime.handle();
|
||||
provider.fail_next_refresh();
|
||||
|
||||
wait_for_failure(&handle).await;
|
||||
|
||||
let failure = handle.last_error().expect("failure should exist");
|
||||
assert_eq!(failure.stage, ObservationFailureStage::SourceRefresh);
|
||||
assert_eq!(failure.message, "refresh failed");
|
||||
}
|
||||
|
||||
async fn wait_for_cycle(handle: &super::ObservationHandle<CountingObserver>, cycle: u64) {
|
||||
let deadline = Instant::now() + Duration::from_secs(2);
|
||||
|
||||
loop {
|
||||
let Some(snapshot) = handle.latest_snapshot() else {
|
||||
sleep(Duration::from_millis(10)).await;
|
||||
continue;
|
||||
};
|
||||
|
||||
if snapshot.cycle >= cycle {
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(
|
||||
Instant::now() < deadline,
|
||||
"timed out waiting for cycle {cycle}"
|
||||
);
|
||||
|
||||
sleep(Duration::from_millis(10)).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_snapshot_source_count(
|
||||
handle: &super::ObservationHandle<CountingObserver>,
|
||||
source_count: usize,
|
||||
) {
|
||||
let deadline = Instant::now() + Duration::from_secs(2);
|
||||
|
||||
loop {
|
||||
let Some(snapshot) = handle.latest_snapshot() else {
|
||||
sleep(Duration::from_millis(10)).await;
|
||||
continue;
|
||||
};
|
||||
|
||||
if snapshot.source_count == source_count {
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(
|
||||
Instant::now() < deadline,
|
||||
"timed out waiting for source_count {source_count}"
|
||||
);
|
||||
|
||||
sleep(Duration::from_millis(10)).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_failure(handle: &super::ObservationHandle<CountingObserver>) {
|
||||
let deadline = Instant::now() + Duration::from_secs(2);
|
||||
|
||||
loop {
|
||||
if handle.last_error().is_some() {
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(Instant::now() < deadline, "timed out waiting for failure");
|
||||
|
||||
sleep(Duration::from_millis(10)).await;
|
||||
}
|
||||
}
|
||||
@ -4,7 +4,12 @@ use super::{
|
||||
Application, CleanupPolicy, DeploymentPolicy, Expectation, HttpReadinessRequirement,
|
||||
RetryPolicy, RuntimeExtensionFactory, Workload, internal::CoreBuilderAccess,
|
||||
};
|
||||
use crate::topology::{DeploymentProvider, DeploymentSeed};
|
||||
use crate::{
|
||||
observation::{
|
||||
ObservationConfig, ObservationExtensionFactory, Observer, SourceProviderFactory,
|
||||
},
|
||||
topology::{DeploymentProvider, DeploymentSeed},
|
||||
};
|
||||
|
||||
type DeploymentProviderHandle<E> = Box<dyn DeploymentProvider<<E as Application>::Deployment>>;
|
||||
|
||||
@ -60,6 +65,48 @@ pub trait CoreBuilderExt: CoreBuilderAccess + Sized {
|
||||
self.map_core_builder(|builder| builder.with_runtime_extension_factory(extension))
|
||||
}
|
||||
|
||||
/// Registers one clonable observer as a runtime extension.
|
||||
#[must_use]
|
||||
fn with_observer<O>(
|
||||
self,
|
||||
observer: O,
|
||||
source_provider_factory: impl SourceProviderFactory<Self::Env, O::Source>,
|
||||
config: ObservationConfig,
|
||||
) -> Self
|
||||
where
|
||||
O: Observer + Clone,
|
||||
Self::Env: Application,
|
||||
{
|
||||
let extension = ObservationExtensionFactory::<Self::Env, O>::new(
|
||||
observer,
|
||||
source_provider_factory,
|
||||
config,
|
||||
);
|
||||
|
||||
self.with_runtime_extension_factory(Box::new(extension))
|
||||
}
|
||||
|
||||
/// Registers one observer built lazily per run as a runtime extension.
|
||||
#[must_use]
|
||||
fn with_observer_factory<O>(
|
||||
self,
|
||||
observer_builder: impl Fn() -> O + Send + Sync + 'static,
|
||||
source_provider_factory: impl SourceProviderFactory<Self::Env, O::Source>,
|
||||
config: ObservationConfig,
|
||||
) -> Self
|
||||
where
|
||||
O: Observer,
|
||||
Self::Env: Application,
|
||||
{
|
||||
let extension = ObservationExtensionFactory::<Self::Env, O>::from_parts(
|
||||
observer_builder,
|
||||
source_provider_factory,
|
||||
config,
|
||||
);
|
||||
|
||||
self.with_runtime_extension_factory(Box::new(extension))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn with_run_duration(self, duration: Duration) -> Self {
|
||||
self.map_core_builder(|builder| builder.with_run_duration(duration))
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user