mirror of
https://github.com/logos-blockchain/logos-blockchain-testing.git
synced 2026-06-06 17:09:27 +00:00
demo-apps: kvstore, queue, and openraft_kv
This commit is contained in:
commit
8700bd5a6c
@ -6,7 +6,11 @@ exclude-dev = true
|
|||||||
no-default-features = true
|
no-default-features = true
|
||||||
|
|
||||||
[advisories]
|
[advisories]
|
||||||
ignore = []
|
ignore = [
|
||||||
|
# Existing workspace dependencies still resolve rand 0.8 via tera/tokio-retry.
|
||||||
|
# Track removal when those upstream edges move to a fixed release.
|
||||||
|
"RUSTSEC-2026-0097",
|
||||||
|
]
|
||||||
yanked = "deny"
|
yanked = "deny"
|
||||||
|
|
||||||
[bans]
|
[bans]
|
||||||
|
|||||||
766
Cargo.lock
generated
766
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
14
Cargo.toml
14
Cargo.toml
@ -4,6 +4,18 @@ members = [
|
|||||||
"cfgsync/artifacts",
|
"cfgsync/artifacts",
|
||||||
"cfgsync/core",
|
"cfgsync/core",
|
||||||
"cfgsync/runtime",
|
"cfgsync/runtime",
|
||||||
|
"examples/kvstore/examples",
|
||||||
|
"examples/kvstore/kvstore-node",
|
||||||
|
"examples/kvstore/testing/integration",
|
||||||
|
"examples/kvstore/testing/workloads",
|
||||||
|
"examples/openraft_kv/examples",
|
||||||
|
"examples/openraft_kv/openraft-kv-node",
|
||||||
|
"examples/openraft_kv/testing/integration",
|
||||||
|
"examples/openraft_kv/testing/workloads",
|
||||||
|
"examples/queue/examples",
|
||||||
|
"examples/queue/queue-node",
|
||||||
|
"examples/queue/testing/integration",
|
||||||
|
"examples/queue/testing/workloads",
|
||||||
"examples/metrics_counter/examples",
|
"examples/metrics_counter/examples",
|
||||||
"examples/metrics_counter/metrics-counter-node",
|
"examples/metrics_counter/metrics-counter-node",
|
||||||
"examples/metrics_counter/testing/integration",
|
"examples/metrics_counter/testing/integration",
|
||||||
@ -56,6 +68,8 @@ bytes = { default-features = false, version = "1.3" }
|
|||||||
hex = { default-features = false, version = "0.4.3" }
|
hex = { default-features = false, version = "0.4.3" }
|
||||||
libp2p = { default-features = false, version = "0.55" }
|
libp2p = { default-features = false, version = "0.55" }
|
||||||
num-bigint = { default-features = false, version = "0.4" }
|
num-bigint = { default-features = false, version = "0.4" }
|
||||||
|
openraft = { default-features = true, features = ["serde", "type-alias"], version = "0.10.0-alpha.17" }
|
||||||
|
openraft-memstore = { default-features = true, version = "0.10.0-alpha.17" }
|
||||||
parking_lot = { default-features = false, version = "0.12" }
|
parking_lot = { default-features = false, version = "0.12" }
|
||||||
rand = { default-features = false, features = ["std", "std_rng"], version = "0.8" }
|
rand = { default-features = false, features = ["std", "std_rng"], version = "0.8" }
|
||||||
reqwest = { default-features = false, version = "0.12" }
|
reqwest = { default-features = false, version = "0.12" }
|
||||||
|
|||||||
314
docs/observation-runtime-plan.md
Normal file
314
docs/observation-runtime-plan.md
Normal file
@ -0,0 +1,314 @@
|
|||||||
|
# Observation Runtime Plan
|
||||||
|
|
||||||
|
## Why this work exists
|
||||||
|
|
||||||
|
TF is good at deployment plumbing. It is weak at continuous observation.
|
||||||
|
|
||||||
|
Today, the same problems are solved repeatedly with custom loops:
|
||||||
|
- TF block feed logic in Logos
|
||||||
|
- Cucumber manual-cluster polling loops
|
||||||
|
- ad hoc catch-up scans for wallet and chain state
|
||||||
|
- app-local state polling in expectations
|
||||||
|
|
||||||
|
That is the gap this work should close.
|
||||||
|
|
||||||
|
The goal is not a generic "distributed systems DSL".
|
||||||
|
The goal is one reusable observation runtime that:
|
||||||
|
- continuously collects data from dynamic sources
|
||||||
|
- keeps typed materialized state
|
||||||
|
- exposes both current snapshot and delta/history views
|
||||||
|
- fits naturally in TF scenarios and Cucumber manual-cluster code
|
||||||
|
|
||||||
|
## Constraints
|
||||||
|
|
||||||
|
### TF constraints
|
||||||
|
- TF abstractions must stay universal and simple.
|
||||||
|
- TF must not know app semantics like blocks, wallets, leaders, jobs, or topics.
|
||||||
|
- TF must remain useful for simple apps such as `openraft_kv`, not only Logos.
|
||||||
|
|
||||||
|
### App constraints
|
||||||
|
- Apps must be able to build richer abstractions on top of TF.
|
||||||
|
- Logos must be able to support:
|
||||||
|
- current block-feed replacement
|
||||||
|
- fork-aware chain state
|
||||||
|
- public-peer sync targets
|
||||||
|
- multi-wallet UTXO tracking
|
||||||
|
- Apps must be able to adopt this incrementally.
|
||||||
|
|
||||||
|
### Migration constraints
|
||||||
|
- We do not want a flag-day rewrite.
|
||||||
|
- Existing loops can coexist with the new runtime until replacements are proven.
|
||||||
|
|
||||||
|
## Non-goals
|
||||||
|
|
||||||
|
This work should not:
|
||||||
|
- put feed back onto the base `Application` trait
|
||||||
|
- build app-specific semantics into TF core
|
||||||
|
- replace filesystem blockchain snapshots used for startup/restore
|
||||||
|
- force every app to use continuous observation
|
||||||
|
- introduce a large public abstraction stack that nobody can explain
|
||||||
|
|
||||||
|
## Core idea
|
||||||
|
|
||||||
|
Introduce one TF-level observation runtime.
|
||||||
|
|
||||||
|
That runtime owns:
|
||||||
|
- source refresh
|
||||||
|
- scheduling
|
||||||
|
- polling/ingestion
|
||||||
|
- bounded history
|
||||||
|
- latest snapshot caching
|
||||||
|
- delta publication
|
||||||
|
- freshness/error tracking
|
||||||
|
- lifecycle hooks for TF and Cucumber
|
||||||
|
|
||||||
|
Apps own:
|
||||||
|
- source types
|
||||||
|
- raw observation logic
|
||||||
|
- materialized state
|
||||||
|
- snapshot shape
|
||||||
|
- delta/event shape
|
||||||
|
- higher-level projections such as wallet state
|
||||||
|
|
||||||
|
## Public TF surface
|
||||||
|
|
||||||
|
The TF public surface should stay small.
|
||||||
|
|
||||||
|
### `ObservedSource<S>`
|
||||||
|
A named source instance.
|
||||||
|
|
||||||
|
Used for:
|
||||||
|
- local node clients
|
||||||
|
- public peer endpoints
|
||||||
|
- any other app-owned source type
|
||||||
|
|
||||||
|
### `SourceProvider<S>`
|
||||||
|
Returns the current source set.
|
||||||
|
|
||||||
|
This must support dynamic source lists because:
|
||||||
|
- manual cluster nodes come and go
|
||||||
|
- Cucumber worlds may attach public peers
|
||||||
|
- node control may restart or replace sources during a run
|
||||||
|
|
||||||
|
### `Observer`
|
||||||
|
App-owned observation logic.
|
||||||
|
|
||||||
|
It defines:
|
||||||
|
- `Source`
|
||||||
|
- `State`
|
||||||
|
- `Snapshot`
|
||||||
|
- `Event`
|
||||||
|
|
||||||
|
And it implements:
|
||||||
|
- `init(...)`
|
||||||
|
- `poll(...)`
|
||||||
|
- `snapshot(...)`
|
||||||
|
|
||||||
|
The important boundary is:
|
||||||
|
- TF owns the runtime
|
||||||
|
- app code owns materialization
|
||||||
|
|
||||||
|
### `ObservationRuntime`
|
||||||
|
The engine that:
|
||||||
|
- starts the loop
|
||||||
|
- refreshes sources
|
||||||
|
- calls `poll(...)`
|
||||||
|
- stores history
|
||||||
|
- publishes deltas
|
||||||
|
- updates latest snapshot
|
||||||
|
- tracks last error and freshness
|
||||||
|
|
||||||
|
### `ObservationHandle`
|
||||||
|
The read-side interface for workloads, expectations, and Cucumber steps.
|
||||||
|
|
||||||
|
It should expose at least:
|
||||||
|
- latest snapshot
|
||||||
|
- delta subscription
|
||||||
|
- bounded history
|
||||||
|
- last error
|
||||||
|
|
||||||
|
## Intended shape
|
||||||
|
|
||||||
|
```rust
|
||||||
|
pub struct ObservedSource<S> {
|
||||||
|
pub name: String,
|
||||||
|
pub source: S,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait SourceProvider<S>: Send + Sync + 'static {
|
||||||
|
async fn sources(&self) -> Vec<ObservedSource<S>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait Observer: Send + Sync + 'static {
|
||||||
|
type Source: Clone + Send + Sync + 'static;
|
||||||
|
type State: Send + Sync + 'static;
|
||||||
|
type Snapshot: Clone + Send + Sync + 'static;
|
||||||
|
type Event: Clone + Send + Sync + 'static;
|
||||||
|
|
||||||
|
async fn init(
|
||||||
|
&self,
|
||||||
|
sources: &[ObservedSource<Self::Source>],
|
||||||
|
) -> Result<Self::State, DynError>;
|
||||||
|
|
||||||
|
async fn poll(
|
||||||
|
&self,
|
||||||
|
sources: &[ObservedSource<Self::Source>],
|
||||||
|
state: &mut Self::State,
|
||||||
|
) -> Result<Vec<Self::Event>, DynError>;
|
||||||
|
|
||||||
|
fn snapshot(&self, state: &Self::State) -> Self::Snapshot;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This is enough.
|
||||||
|
|
||||||
|
If more helper layers are needed, they should stay internal first.
|
||||||
|
|
||||||
|
## How current use cases fit
|
||||||
|
|
||||||
|
### `openraft_kv`
|
||||||
|
Use one simple observer.
|
||||||
|
|
||||||
|
- sources: node clients
|
||||||
|
- state: latest per-node Raft state
|
||||||
|
- snapshot: sorted node-state view
|
||||||
|
- events: optional deltas, possibly empty at first
|
||||||
|
|
||||||
|
This is the simplest proving case.
|
||||||
|
It validates the runtime without dragging in Logos complexity.
|
||||||
|
|
||||||
|
### Logos block feed replacement
|
||||||
|
Use one shared chain observer.
|
||||||
|
|
||||||
|
- sources: local node clients
|
||||||
|
- state:
|
||||||
|
- node heads
|
||||||
|
- block graph
|
||||||
|
- heights
|
||||||
|
- seen headers
|
||||||
|
- recent history
|
||||||
|
- snapshot:
|
||||||
|
- current head/lib/graph summary
|
||||||
|
- events:
|
||||||
|
- newly discovered blocks
|
||||||
|
|
||||||
|
This covers both existing Logos feed use cases:
|
||||||
|
- current snapshot consumers
|
||||||
|
- delta/subscription consumers
|
||||||
|
|
||||||
|
### Cucumber manual-cluster sync
|
||||||
|
Use the same observer runtime with a different source set.
|
||||||
|
|
||||||
|
- sources:
|
||||||
|
- local manual-cluster node clients
|
||||||
|
- public peer endpoints
|
||||||
|
- state:
|
||||||
|
- local consensus views
|
||||||
|
- public consensus views
|
||||||
|
- derived majority public target
|
||||||
|
- snapshot:
|
||||||
|
- current local and public sync picture
|
||||||
|
|
||||||
|
This removes custom poll/sleep loops from steps.
|
||||||
|
|
||||||
|
### Multi-wallet fork-aware tracking
|
||||||
|
This should not be a TF concept.
|
||||||
|
|
||||||
|
It should be a Logos projection built on top of the shared chain observer.
|
||||||
|
|
||||||
|
- input: chain observer state
|
||||||
|
- output: per-header wallet state cache keyed by block header
|
||||||
|
- property: naturally fork-aware because it follows actual ancestry
|
||||||
|
|
||||||
|
That replaces repeated backward scans from tip with continuous maintained state.
|
||||||
|
|
||||||
|
## Logos layering
|
||||||
|
|
||||||
|
Logos should not put every concern into one giant impl.
|
||||||
|
|
||||||
|
Recommended layering:
|
||||||
|
|
||||||
|
1. **Chain source adapter**
|
||||||
|
- local node reads
|
||||||
|
- public peer reads
|
||||||
|
|
||||||
|
2. **Shared chain observer**
|
||||||
|
- catch-up
|
||||||
|
- continuous ingestion
|
||||||
|
- graph/history materialization
|
||||||
|
|
||||||
|
3. **Logos projections**
|
||||||
|
- head view
|
||||||
|
- public sync target
|
||||||
|
- fork graph queries
|
||||||
|
- wallet state
|
||||||
|
- tx inclusion helpers
|
||||||
|
|
||||||
|
TF provides the runtime.
|
||||||
|
Logos provides the domain model built on top.
|
||||||
|
|
||||||
|
## Adoption plan
|
||||||
|
|
||||||
|
### Phase 1: add TF observation runtime
|
||||||
|
- add `ObservedSource`, `SourceProvider`, `Observer`, `ObservationRuntime`, `ObservationHandle`
|
||||||
|
- keep the public API small
|
||||||
|
- no app migrations yet
|
||||||
|
|
||||||
|
### Phase 2: prove it on `openraft_kv`
|
||||||
|
- add one simple observer over `/state`
|
||||||
|
- migrate one expectation to use the observation handle
|
||||||
|
- validate local, compose, and k8s
|
||||||
|
|
||||||
|
### Phase 3: add Logos shared chain observer
|
||||||
|
- implement it alongside current feed/loops
|
||||||
|
- do not remove existing consumers yet
|
||||||
|
- prove snapshot and delta outputs are useful
|
||||||
|
|
||||||
|
### Phase 4: migrate one Logos consumer at a time
|
||||||
|
Suggested order:
|
||||||
|
1. fork/head snapshot consumer
|
||||||
|
2. tx inclusion consumer
|
||||||
|
3. Cucumber sync-to-public-chain logic
|
||||||
|
4. wallet/UTXO tracking
|
||||||
|
|
||||||
|
### Phase 5: delete old loops and feed paths
|
||||||
|
- only after the new runtime has replaced real consumers cleanly
|
||||||
|
|
||||||
|
## Validation gates
|
||||||
|
|
||||||
|
Each phase should have clear checks.
|
||||||
|
|
||||||
|
### Runtime-level
|
||||||
|
- crate-level `cargo check`
|
||||||
|
- targeted tests for runtime lifecycle and history retention
|
||||||
|
- explicit tests for dynamic source refresh
|
||||||
|
|
||||||
|
### App-level
|
||||||
|
- `openraft_kv`:
|
||||||
|
- local failover
|
||||||
|
- compose failover
|
||||||
|
- k8s failover
|
||||||
|
- Logos:
|
||||||
|
- one snapshot consumer migrated
|
||||||
|
- one delta consumer migrated
|
||||||
|
- Cucumber:
|
||||||
|
- one manual-cluster sync path migrated
|
||||||
|
|
||||||
|
## Open questions
|
||||||
|
|
||||||
|
These should stay open until implementation forces a decision:
|
||||||
|
- whether `ObservationHandle` should expose full history directly or only cursor/subscription access
|
||||||
|
- how much error/freshness metadata belongs in the generic runtime vs app snapshot types
|
||||||
|
- whether multiple observers should share one scheduler/runtime instance or simply run independently first
|
||||||
|
|
||||||
|
## Design guardrails
|
||||||
|
|
||||||
|
When implementing this work:
|
||||||
|
- keep TF public abstractions minimal
|
||||||
|
- keep app semantics out of TF core
|
||||||
|
- do not chase a generic testing DSL
|
||||||
|
- build from reusable blocks, not one-off mega impls
|
||||||
|
- keep migration incremental
|
||||||
|
- prefer simple, explainable runtime behavior over clever abstraction
|
||||||
28
examples/kvstore/Dockerfile
Normal file
28
examples/kvstore/Dockerfile
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# Build stage
|
||||||
|
FROM rustlang/rust:nightly-bookworm AS builder
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
|
# Copy all workspace files required for workspace build.
|
||||||
|
COPY Cargo.toml Cargo.lock ./
|
||||||
|
COPY cfgsync/ ./cfgsync/
|
||||||
|
COPY examples/ ./examples/
|
||||||
|
COPY testing-framework/ ./testing-framework/
|
||||||
|
|
||||||
|
# Build kvstore-node in release mode.
|
||||||
|
RUN cargo build --release -p kvstore-node
|
||||||
|
|
||||||
|
# Runtime stage
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y ca-certificates && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY --from=builder /build/target/release/kvstore-node /usr/local/bin/kvstore-node
|
||||||
|
|
||||||
|
RUN mkdir -p /etc/kvstore
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/local/bin/kvstore-node"]
|
||||||
|
CMD ["--config", "/etc/kvstore/config.yaml"]
|
||||||
64
examples/kvstore/README.md
Normal file
64
examples/kvstore/README.md
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
# KV Store Example
|
||||||
|
|
||||||
|
This example runs a small replicated key-value store.
|
||||||
|
|
||||||
|
The usual scenario writes keys through one node and checks that the other nodes
|
||||||
|
eventually return the same values.
|
||||||
|
|
||||||
|
## How TF runs this
|
||||||
|
|
||||||
|
Each example follows the same pattern:
|
||||||
|
|
||||||
|
- TF starts a small deployment of kvstore nodes
|
||||||
|
- a workload writes keys through one node
|
||||||
|
- an expectation keeps reading from all nodes until they agree on the values
|
||||||
|
|
||||||
|
## Scenarios
|
||||||
|
|
||||||
|
- `basic_convergence` runs the convergence check locally
|
||||||
|
- `compose_convergence` runs the same check in Docker Compose
|
||||||
|
- `k8s_convergence` runs it on Kubernetes
|
||||||
|
- `k8s_manual_convergence` starts the nodes through the k8s manual cluster API, restarts one node, and checks convergence again
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
Each node exposes:
|
||||||
|
|
||||||
|
- `PUT /kv/:key` to write a value
|
||||||
|
- `GET /kv/:key` to read a value
|
||||||
|
- `GET /internal/snapshot` to read the local replicated state
|
||||||
|
|
||||||
|
## Run locally
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo run -p kvstore-examples --bin kvstore_basic_convergence
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run with Docker Compose
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo run -p kvstore-examples --bin kvstore_compose_convergence
|
||||||
|
```
|
||||||
|
|
||||||
|
Set `KVSTORE_IMAGE` to override the default compose image tag.
|
||||||
|
|
||||||
|
## Run with Kubernetes
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build -t kvstore-node:local -f examples/kvstore/Dockerfile .
|
||||||
|
cargo run -p kvstore-examples --bin kvstore_k8s_convergence
|
||||||
|
```
|
||||||
|
|
||||||
|
Prerequisites:
|
||||||
|
- `kubectl` configured with a reachable cluster
|
||||||
|
- `helm` installed
|
||||||
|
|
||||||
|
Optional image override:
|
||||||
|
- `KVSTORE_K8S_IMAGE` (falls back to `KVSTORE_IMAGE`, then `kvstore-node:local`)
|
||||||
|
|
||||||
|
## Run with Kubernetes manual cluster
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build -t kvstore-node:local -f examples/kvstore/Dockerfile .
|
||||||
|
cargo run -p kvstore-examples --bin kvstore_k8s_manual_convergence
|
||||||
|
```
|
||||||
35
examples/kvstore/examples/Cargo.toml
Normal file
35
examples/kvstore/examples/Cargo.toml
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "kvstore-examples"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "kvstore_basic_convergence"
|
||||||
|
path = "src/bin/basic_convergence.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "kvstore_compose_convergence"
|
||||||
|
path = "src/bin/compose_convergence.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "kvstore_k8s_convergence"
|
||||||
|
path = "src/bin/k8s_convergence.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "kvstore_k8s_manual_convergence"
|
||||||
|
path = "src/bin/k8s_manual_convergence.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
kvstore-node = { path = "../kvstore-node" }
|
||||||
|
kvstore-runtime-ext = { path = "../testing/integration" }
|
||||||
|
kvstore-runtime-workloads = { path = "../testing/workloads" }
|
||||||
|
testing-framework-core = { workspace = true }
|
||||||
|
testing-framework-runner-compose = { workspace = true }
|
||||||
|
testing-framework-runner-k8s = { workspace = true }
|
||||||
|
|
||||||
|
anyhow = "1.0"
|
||||||
|
serde = { workspace = true }
|
||||||
|
tokio = { workspace = true, features = ["full"] }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
31
examples/kvstore/examples/src/bin/basic_convergence.rs
Normal file
31
examples/kvstore/examples/src/bin/basic_convergence.rs
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use kvstore_runtime_ext::KvLocalDeployer;
|
||||||
|
use kvstore_runtime_workloads::{
|
||||||
|
KvBuilderExt, KvConverges, KvScenarioBuilder, KvTopology, KvWriteWorkload,
|
||||||
|
};
|
||||||
|
use testing_framework_core::scenario::Deployer;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let mut scenario = KvScenarioBuilder::deployment_with(|_| KvTopology::new(3))
|
||||||
|
.with_run_duration(Duration::from_secs(30))
|
||||||
|
.with_workload(
|
||||||
|
KvWriteWorkload::new()
|
||||||
|
.operations(300)
|
||||||
|
.key_count(30)
|
||||||
|
.rate_per_sec(30)
|
||||||
|
.key_prefix("demo"),
|
||||||
|
)
|
||||||
|
.with_expectation(KvConverges::new("demo", 30).timeout(Duration::from_secs(25)))
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let deployer = KvLocalDeployer::default();
|
||||||
|
let runner = deployer.deploy(&scenario).await?;
|
||||||
|
runner.run(&mut scenario).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
44
examples/kvstore/examples/src/bin/compose_convergence.rs
Normal file
44
examples/kvstore/examples/src/bin/compose_convergence.rs
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use anyhow::{Context as _, Result};
|
||||||
|
use kvstore_runtime_workloads::{
|
||||||
|
KvBuilderExt, KvConverges, KvScenarioBuilder, KvTopology, KvWriteWorkload,
|
||||||
|
};
|
||||||
|
use testing_framework_core::scenario::Deployer;
|
||||||
|
use testing_framework_runner_compose::ComposeRunnerError;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let mut scenario = KvScenarioBuilder::deployment_with(|_| KvTopology::new(3))
|
||||||
|
.with_run_duration(Duration::from_secs(30))
|
||||||
|
.with_workload(
|
||||||
|
KvWriteWorkload::new()
|
||||||
|
.operations(200)
|
||||||
|
.key_count(20)
|
||||||
|
.rate_per_sec(20),
|
||||||
|
)
|
||||||
|
.with_expectation(KvConverges::new("kv-demo", 20).timeout(Duration::from_secs(25)))
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let deployer = kvstore_runtime_ext::KvComposeDeployer::new();
|
||||||
|
let runner = match deployer.deploy(&scenario).await {
|
||||||
|
Ok(runner) => runner,
|
||||||
|
Err(ComposeRunnerError::DockerUnavailable) => {
|
||||||
|
warn!("docker unavailable; skipping compose kv run");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(error) => return Err(anyhow::Error::new(error)).context("deploying kv compose stack"),
|
||||||
|
};
|
||||||
|
|
||||||
|
info!("running kv compose convergence scenario");
|
||||||
|
runner
|
||||||
|
.run(&mut scenario)
|
||||||
|
.await
|
||||||
|
.context("running kv compose scenario")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
58
examples/kvstore/examples/src/bin/k8s_convergence.rs
Normal file
58
examples/kvstore/examples/src/bin/k8s_convergence.rs
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use anyhow::{Context as _, Result};
|
||||||
|
use kvstore_runtime_ext::KvK8sDeployer;
|
||||||
|
use kvstore_runtime_workloads::{
|
||||||
|
KvBuilderExt, KvConverges, KvScenarioBuilder, KvTopology, KvWriteWorkload,
|
||||||
|
};
|
||||||
|
use testing_framework_core::scenario::Deployer;
|
||||||
|
use testing_framework_runner_k8s::K8sRunnerError;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let mut scenario = KvScenarioBuilder::deployment_with(|_| KvTopology::new(3))
|
||||||
|
.with_run_duration(Duration::from_secs(30))
|
||||||
|
.with_workload(
|
||||||
|
KvWriteWorkload::new()
|
||||||
|
.operations(200)
|
||||||
|
.key_count(20)
|
||||||
|
.rate_per_sec(20),
|
||||||
|
)
|
||||||
|
.with_expectation(KvConverges::new("kv-demo", 20).timeout(Duration::from_secs(25)))
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let deployer = KvK8sDeployer::new();
|
||||||
|
let runner = match deployer.deploy(&scenario).await {
|
||||||
|
Ok(runner) => runner,
|
||||||
|
Err(K8sRunnerError::ClientInit { source }) => {
|
||||||
|
warn!("k8s unavailable ({source}); skipping kv k8s run");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(K8sRunnerError::InstallStack { source })
|
||||||
|
if k8s_cluster_unavailable(&source.to_string()) =>
|
||||||
|
{
|
||||||
|
warn!("k8s unavailable ({source}); skipping kv k8s run");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(error) => return Err(anyhow::Error::new(error)).context("deploying kv k8s stack"),
|
||||||
|
};
|
||||||
|
|
||||||
|
info!("running kv k8s convergence scenario");
|
||||||
|
runner
|
||||||
|
.run(&mut scenario)
|
||||||
|
.await
|
||||||
|
.context("running kv k8s scenario")?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn k8s_cluster_unavailable(message: &str) -> bool {
|
||||||
|
message.contains("Unable to connect to the server")
|
||||||
|
|| message.contains("TLS handshake timeout")
|
||||||
|
|| message.contains("connection refused")
|
||||||
|
}
|
||||||
155
examples/kvstore/examples/src/bin/k8s_manual_convergence.rs
Normal file
155
examples/kvstore/examples/src/bin/k8s_manual_convergence.rs
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use anyhow::{Context as _, Result, anyhow};
|
||||||
|
use kvstore_node::KvHttpClient;
|
||||||
|
use kvstore_runtime_ext::{KvK8sDeployer, KvTopology};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use testing_framework_runner_k8s::ManualClusterError;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct PutRequest {
|
||||||
|
value: String,
|
||||||
|
expected_version: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct PutResponse {
|
||||||
|
applied: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
|
||||||
|
struct ValueRecord {
|
||||||
|
value: String,
|
||||||
|
version: u64,
|
||||||
|
origin: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct GetResponse {
|
||||||
|
record: Option<ValueRecord>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let deployer = KvK8sDeployer::new();
|
||||||
|
let cluster = match deployer
|
||||||
|
.manual_cluster_from_descriptors(KvTopology::new(3))
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(cluster) => cluster,
|
||||||
|
Err(ManualClusterError::ClientInit { source }) => {
|
||||||
|
warn!("k8s unavailable ({source}); skipping kv k8s manual run");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(ManualClusterError::InstallStack { source })
|
||||||
|
if k8s_cluster_unavailable(&source.to_string()) =>
|
||||||
|
{
|
||||||
|
warn!("k8s unavailable ({source}); skipping kv k8s manual run");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
return Err(anyhow::Error::new(error)).context("creating kv k8s manual cluster");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let node0 = cluster.start_node("node-0").await?.client;
|
||||||
|
let node1 = cluster.start_node("node-1").await?.client;
|
||||||
|
let node2 = cluster.start_node("node-2").await?.client;
|
||||||
|
|
||||||
|
cluster.wait_network_ready().await?;
|
||||||
|
|
||||||
|
write_keys(&node0, "kv-manual", 12).await?;
|
||||||
|
wait_for_convergence(
|
||||||
|
&[node0.clone(), node1.clone(), node2.clone()],
|
||||||
|
"kv-manual",
|
||||||
|
12,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
info!("restarting node-2 in manual cluster");
|
||||||
|
cluster.restart_node("node-2").await?;
|
||||||
|
cluster.wait_network_ready().await?;
|
||||||
|
|
||||||
|
let node2 = cluster
|
||||||
|
.node_client("node-2")
|
||||||
|
.ok_or_else(|| anyhow!("node-2 client missing after restart"))?;
|
||||||
|
wait_for_convergence(&[node0, node1, node2], "kv-manual", 12).await?;
|
||||||
|
|
||||||
|
cluster.stop_all();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write_keys(client: &KvHttpClient, prefix: &str, key_count: usize) -> Result<()> {
|
||||||
|
for index in 0..key_count {
|
||||||
|
let key = format!("{prefix}-{index}");
|
||||||
|
let response: PutResponse = client
|
||||||
|
.put(
|
||||||
|
&format!("/kv/{key}"),
|
||||||
|
&PutRequest {
|
||||||
|
value: format!("value-{index}"),
|
||||||
|
expected_version: None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.map_err(|error| anyhow!(error.to_string()))
|
||||||
|
.with_context(|| format!("writing key {key}"))?;
|
||||||
|
|
||||||
|
if !response.applied {
|
||||||
|
return Err(anyhow!("write rejected for key {key}"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_convergence(
|
||||||
|
clients: &[KvHttpClient],
|
||||||
|
prefix: &str,
|
||||||
|
key_count: usize,
|
||||||
|
) -> Result<()> {
|
||||||
|
let deadline = tokio::time::Instant::now() + Duration::from_secs(30);
|
||||||
|
|
||||||
|
while tokio::time::Instant::now() < deadline {
|
||||||
|
if is_converged(clients, prefix, key_count).await? {
|
||||||
|
info!(key_count, "kv manual cluster converged");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(anyhow!("kv manual cluster did not converge within timeout"))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn is_converged(clients: &[KvHttpClient], prefix: &str, key_count: usize) -> Result<bool> {
|
||||||
|
for index in 0..key_count {
|
||||||
|
let key = format!("{prefix}-{index}");
|
||||||
|
let first = read_key(&clients[0], &key).await?;
|
||||||
|
for client in &clients[1..] {
|
||||||
|
if read_key(client, &key).await? != first {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn read_key(client: &KvHttpClient, key: &str) -> Result<Option<ValueRecord>> {
|
||||||
|
let response: GetResponse = client
|
||||||
|
.get(&format!("/kv/{key}"))
|
||||||
|
.await
|
||||||
|
.map_err(|error| anyhow!(error.to_string()))
|
||||||
|
.with_context(|| format!("reading key {key}"))?;
|
||||||
|
Ok(response.record)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn k8s_cluster_unavailable(message: &str) -> bool {
|
||||||
|
message.contains("Unable to connect to the server")
|
||||||
|
|| message.contains("TLS handshake timeout")
|
||||||
|
|| message.contains("connection refused")
|
||||||
|
}
|
||||||
24
examples/kvstore/kvstore-node/Cargo.toml
Normal file
24
examples/kvstore/kvstore-node/Cargo.toml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "kvstore-node"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "kvstore-node"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
axum = "0.7"
|
||||||
|
tower-http = { version = "0.6", features = ["trace"] }
|
||||||
|
|
||||||
|
serde = { workspace = true }
|
||||||
|
serde_yaml = { workspace = true }
|
||||||
|
|
||||||
|
tokio = { workspace = true, features = ["full"] }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
|
|
||||||
|
anyhow = "1.0"
|
||||||
|
clap = { version = "4.0", features = ["derive"] }
|
||||||
|
reqwest = { workspace = true, features = ["json"] }
|
||||||
40
examples/kvstore/kvstore-node/src/client.rs
Normal file
40
examples/kvstore/kvstore-node/src/client.rs
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
use reqwest::Url;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct KvHttpClient {
|
||||||
|
base_url: Url,
|
||||||
|
client: reqwest::Client,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KvHttpClient {
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(base_url: Url) -> Self {
|
||||||
|
Self {
|
||||||
|
base_url,
|
||||||
|
client: reqwest::Client::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get<T: serde::de::DeserializeOwned>(&self, path: &str) -> anyhow::Result<T> {
|
||||||
|
let url = self.base_url.join(path)?;
|
||||||
|
let response = self.client.get(url).send().await?.error_for_status()?;
|
||||||
|
Ok(response.json().await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn put<B: Serialize, T: serde::de::DeserializeOwned>(
|
||||||
|
&self,
|
||||||
|
path: &str,
|
||||||
|
body: &B,
|
||||||
|
) -> anyhow::Result<T> {
|
||||||
|
let url = self.base_url.join(path)?;
|
||||||
|
let response = self
|
||||||
|
.client
|
||||||
|
.put(url)
|
||||||
|
.json(body)
|
||||||
|
.send()
|
||||||
|
.await?
|
||||||
|
.error_for_status()?;
|
||||||
|
Ok(response.json().await?)
|
||||||
|
}
|
||||||
|
}
|
||||||
30
examples/kvstore/kvstore-node/src/config.rs
Normal file
30
examples/kvstore/kvstore-node/src/config.rs
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
use std::{fs, path::Path};
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct PeerInfo {
|
||||||
|
pub node_id: u64,
|
||||||
|
pub http_address: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct KvConfig {
|
||||||
|
pub node_id: u64,
|
||||||
|
pub http_port: u16,
|
||||||
|
pub peers: Vec<PeerInfo>,
|
||||||
|
#[serde(default = "default_sync_interval_ms")]
|
||||||
|
pub sync_interval_ms: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KvConfig {
|
||||||
|
pub fn load(path: &Path) -> anyhow::Result<Self> {
|
||||||
|
let raw = fs::read_to_string(path)?;
|
||||||
|
let config = serde_yaml::from_str(&raw)?;
|
||||||
|
Ok(config)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn default_sync_interval_ms() -> u64 {
|
||||||
|
1000
|
||||||
|
}
|
||||||
3
examples/kvstore/kvstore-node/src/lib.rs
Normal file
3
examples/kvstore/kvstore-node/src/lib.rs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
pub mod client;
|
||||||
|
|
||||||
|
pub use client::KvHttpClient;
|
||||||
36
examples/kvstore/kvstore-node/src/main.rs
Normal file
36
examples/kvstore/kvstore-node/src/main.rs
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
mod config;
|
||||||
|
mod server;
|
||||||
|
mod state;
|
||||||
|
mod sync;
|
||||||
|
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||||
|
|
||||||
|
use crate::{config::KvConfig, state::KvState, sync::SyncService};
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(name = "kvstore-node")]
|
||||||
|
struct Args {
|
||||||
|
#[arg(short, long)]
|
||||||
|
config: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
tracing_subscriber::registry()
|
||||||
|
.with(
|
||||||
|
tracing_subscriber::EnvFilter::try_from_default_env()
|
||||||
|
.unwrap_or_else(|_| "kvstore_node=info,tower_http=debug".into()),
|
||||||
|
)
|
||||||
|
.with(tracing_subscriber::fmt::layer())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let args = Args::parse();
|
||||||
|
let config = KvConfig::load(&args.config)?;
|
||||||
|
|
||||||
|
let state = KvState::new(config.node_id);
|
||||||
|
SyncService::new(config.clone(), state.clone()).start();
|
||||||
|
server::start_server(config, state).await
|
||||||
|
}
|
||||||
112
examples/kvstore/kvstore-node/src/server.rs
Normal file
112
examples/kvstore/kvstore-node/src/server.rs
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
use std::net::SocketAddr;
|
||||||
|
|
||||||
|
use axum::{
|
||||||
|
Router,
|
||||||
|
extract::{Path, State},
|
||||||
|
http::StatusCode,
|
||||||
|
response::Json,
|
||||||
|
routing::get,
|
||||||
|
};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tower_http::trace::TraceLayer;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
config::KvConfig,
|
||||||
|
state::{KvState, Snapshot, ValueRecord},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct HealthResponse {
|
||||||
|
status: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct PutRequest {
|
||||||
|
value: String,
|
||||||
|
expected_version: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct PutResponse {
|
||||||
|
applied: bool,
|
||||||
|
version: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct GetResponse {
|
||||||
|
key: String,
|
||||||
|
record: Option<ValueRecord>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn start_server(config: KvConfig, state: KvState) -> anyhow::Result<()> {
|
||||||
|
let app = Router::new()
|
||||||
|
.route("/health/live", get(health_live))
|
||||||
|
.route("/health/ready", get(health_ready))
|
||||||
|
.route("/kv/:key", get(get_key).put(put_key))
|
||||||
|
.route("/internal/snapshot", get(get_snapshot))
|
||||||
|
.layer(TraceLayer::new_for_http())
|
||||||
|
.with_state(state.clone());
|
||||||
|
|
||||||
|
let addr = SocketAddr::from(([0, 0, 0, 0], config.http_port));
|
||||||
|
let listener = tokio::net::TcpListener::bind(addr).await?;
|
||||||
|
|
||||||
|
state.set_ready(true).await;
|
||||||
|
tracing::info!(node_id = state.node_id(), %addr, "kv node ready");
|
||||||
|
|
||||||
|
axum::serve(listener, app).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn health_live() -> (StatusCode, Json<HealthResponse>) {
|
||||||
|
(StatusCode::OK, Json(HealthResponse { status: "alive" }))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn health_ready(State(state): State<KvState>) -> (StatusCode, Json<HealthResponse>) {
|
||||||
|
if state.is_ready().await {
|
||||||
|
(StatusCode::OK, Json(HealthResponse { status: "ready" }))
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
|
Json(HealthResponse {
|
||||||
|
status: "not-ready",
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_key(Path(key): Path<String>, State(state): State<KvState>) -> Json<GetResponse> {
|
||||||
|
let record = state.get(&key).await;
|
||||||
|
Json(GetResponse { key, record })
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn put_key(
|
||||||
|
Path(key): Path<String>,
|
||||||
|
State(state): State<KvState>,
|
||||||
|
Json(request): Json<PutRequest>,
|
||||||
|
) -> (StatusCode, Json<PutResponse>) {
|
||||||
|
let outcome = state
|
||||||
|
.put_local(key, request.value, request.expected_version)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
if outcome.applied {
|
||||||
|
(
|
||||||
|
StatusCode::OK,
|
||||||
|
Json(PutResponse {
|
||||||
|
applied: true,
|
||||||
|
version: outcome.current_version,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
StatusCode::CONFLICT,
|
||||||
|
Json(PutResponse {
|
||||||
|
applied: false,
|
||||||
|
version: outcome.current_version,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_snapshot(State(state): State<KvState>) -> Json<Snapshot> {
|
||||||
|
Json(state.snapshot().await)
|
||||||
|
}
|
||||||
111
examples/kvstore/kvstore-node/src/state.rs
Normal file
111
examples/kvstore/kvstore-node/src/state.rs
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tokio::sync::RwLock;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
|
||||||
|
pub struct ValueRecord {
|
||||||
|
pub value: String,
|
||||||
|
pub version: u64,
|
||||||
|
pub origin: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct Snapshot {
|
||||||
|
pub node_id: u64,
|
||||||
|
pub entries: HashMap<String, ValueRecord>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct PutOutcome {
|
||||||
|
pub applied: bool,
|
||||||
|
pub current_version: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct KvState {
|
||||||
|
node_id: u64,
|
||||||
|
ready: Arc<RwLock<bool>>,
|
||||||
|
entries: Arc<RwLock<HashMap<String, ValueRecord>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KvState {
|
||||||
|
pub fn new(node_id: u64) -> Self {
|
||||||
|
Self {
|
||||||
|
node_id,
|
||||||
|
ready: Arc::new(RwLock::new(false)),
|
||||||
|
entries: Arc::new(RwLock::new(HashMap::new())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn node_id(&self) -> u64 {
|
||||||
|
self.node_id
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn set_ready(&self, value: bool) {
|
||||||
|
*self.ready.write().await = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn is_ready(&self) -> bool {
|
||||||
|
*self.ready.read().await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get(&self, key: &str) -> Option<ValueRecord> {
|
||||||
|
self.entries.read().await.get(key).cloned()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn put_local(
|
||||||
|
&self,
|
||||||
|
key: String,
|
||||||
|
value: String,
|
||||||
|
expected_version: Option<u64>,
|
||||||
|
) -> PutOutcome {
|
||||||
|
let mut entries = self.entries.write().await;
|
||||||
|
let current_version = entries.get(&key).map_or(0, |record| record.version);
|
||||||
|
|
||||||
|
if expected_version.is_some_and(|expected| expected != current_version) {
|
||||||
|
return PutOutcome {
|
||||||
|
applied: false,
|
||||||
|
current_version,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
let next_version = current_version.saturating_add(1);
|
||||||
|
entries.insert(
|
||||||
|
key,
|
||||||
|
ValueRecord {
|
||||||
|
value,
|
||||||
|
version: next_version,
|
||||||
|
origin: self.node_id,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
PutOutcome {
|
||||||
|
applied: true,
|
||||||
|
current_version: next_version,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn merge_snapshot(&self, snapshot: Snapshot) {
|
||||||
|
let mut local = self.entries.write().await;
|
||||||
|
for (key, incoming) in snapshot.entries {
|
||||||
|
match local.get(&key) {
|
||||||
|
Some(existing) if !is_newer_record(&incoming, existing) => {}
|
||||||
|
_ => {
|
||||||
|
local.insert(key, incoming);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn snapshot(&self) -> Snapshot {
|
||||||
|
Snapshot {
|
||||||
|
node_id: self.node_id,
|
||||||
|
entries: self.entries.read().await.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_newer_record(candidate: &ValueRecord, existing: &ValueRecord) -> bool {
|
||||||
|
(candidate.version, candidate.origin) > (existing.version, existing.origin)
|
||||||
|
}
|
||||||
103
examples/kvstore/kvstore-node/src/sync.rs
Normal file
103
examples/kvstore/kvstore-node/src/sync.rs
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||||
|
|
||||||
|
use reqwest::Client;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
config::KvConfig,
|
||||||
|
state::{KvState, Snapshot},
|
||||||
|
};
|
||||||
|
|
||||||
|
const WARN_AFTER_CONSECUTIVE_FAILURES: u32 = 5;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct SyncService {
|
||||||
|
config: Arc<KvConfig>,
|
||||||
|
state: KvState,
|
||||||
|
client: Client,
|
||||||
|
failures_by_peer: Arc<Mutex<HashMap<String, u32>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SyncService {
|
||||||
|
pub fn new(config: KvConfig, state: KvState) -> Self {
|
||||||
|
Self {
|
||||||
|
config: Arc::new(config),
|
||||||
|
state,
|
||||||
|
client: Client::new(),
|
||||||
|
failures_by_peer: Arc::new(Mutex::new(HashMap::new())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn start(&self) {
|
||||||
|
let service = self.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
service.run().await;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run(self) {
|
||||||
|
let interval = Duration::from_millis(self.config.sync_interval_ms.max(100));
|
||||||
|
loop {
|
||||||
|
self.sync_once().await;
|
||||||
|
tokio::time::sleep(interval).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn sync_once(&self) {
|
||||||
|
for peer in &self.config.peers {
|
||||||
|
match self.fetch_snapshot(&peer.http_address).await {
|
||||||
|
Ok(snapshot) => {
|
||||||
|
self.state.merge_snapshot(snapshot).await;
|
||||||
|
self.clear_failure_counter(&peer.http_address).await;
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
self.record_sync_failure(&peer.http_address, &error).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_snapshot(&self, peer_address: &str) -> anyhow::Result<Snapshot> {
|
||||||
|
let url = format!("http://{peer_address}/internal/snapshot");
|
||||||
|
let snapshot = self
|
||||||
|
.client
|
||||||
|
.get(url)
|
||||||
|
.send()
|
||||||
|
.await?
|
||||||
|
.error_for_status()?
|
||||||
|
.json()
|
||||||
|
.await?;
|
||||||
|
Ok(snapshot)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn clear_failure_counter(&self, peer_address: &str) {
|
||||||
|
let mut failures = self.failures_by_peer.lock().await;
|
||||||
|
failures.remove(peer_address);
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn record_sync_failure(&self, peer_address: &str, error: &anyhow::Error) {
|
||||||
|
let consecutive_failures = {
|
||||||
|
let mut failures = self.failures_by_peer.lock().await;
|
||||||
|
let entry = failures.entry(peer_address.to_owned()).or_insert(0);
|
||||||
|
*entry += 1;
|
||||||
|
*entry
|
||||||
|
};
|
||||||
|
|
||||||
|
if consecutive_failures >= WARN_AFTER_CONSECUTIVE_FAILURES {
|
||||||
|
warn!(
|
||||||
|
peer = %peer_address,
|
||||||
|
%error,
|
||||||
|
consecutive_failures,
|
||||||
|
"kv sync repeatedly failing"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
debug!(
|
||||||
|
peer = %peer_address,
|
||||||
|
%error,
|
||||||
|
consecutive_failures,
|
||||||
|
"kv sync failed"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
15
examples/kvstore/testing/integration/Cargo.toml
Normal file
15
examples/kvstore/testing/integration/Cargo.toml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "kvstore-runtime-ext"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
testing-framework-core = { workspace = true }
|
||||||
|
testing-framework-runner-compose = { workspace = true }
|
||||||
|
testing-framework-runner-k8s = { workspace = true }
|
||||||
|
testing-framework-runner-local = { workspace = true }
|
||||||
|
|
||||||
|
async-trait = { workspace = true }
|
||||||
|
kvstore-node = { path = "../../kvstore-node" }
|
||||||
|
serde = { workspace = true }
|
||||||
75
examples/kvstore/testing/integration/src/app.rs
Normal file
75
examples/kvstore/testing/integration/src/app.rs
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
use std::io::Error;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use kvstore_node::KvHttpClient;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use testing_framework_core::scenario::{
|
||||||
|
Application, ClusterNodeConfigApplication, ClusterNodeView, ClusterPeerView, DynError,
|
||||||
|
NodeAccess, serialize_cluster_yaml_config,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub type KvTopology = testing_framework_core::topology::ClusterTopology;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct KvPeerInfo {
|
||||||
|
pub node_id: u64,
|
||||||
|
pub http_address: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct KvNodeConfig {
|
||||||
|
pub node_id: u64,
|
||||||
|
pub http_port: u16,
|
||||||
|
pub peers: Vec<KvPeerInfo>,
|
||||||
|
pub sync_interval_ms: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct KvEnv;
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Application for KvEnv {
|
||||||
|
type Deployment = KvTopology;
|
||||||
|
type NodeClient = KvHttpClient;
|
||||||
|
type NodeConfig = KvNodeConfig;
|
||||||
|
fn build_node_client(access: &NodeAccess) -> Result<Self::NodeClient, DynError> {
|
||||||
|
Ok(KvHttpClient::new(access.api_base_url()?))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn node_readiness_path() -> &'static str {
|
||||||
|
"/health/ready"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClusterNodeConfigApplication for KvEnv {
|
||||||
|
type ConfigError = Error;
|
||||||
|
|
||||||
|
fn static_network_port() -> u16 {
|
||||||
|
8080
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_cluster_node_config(
|
||||||
|
node: &ClusterNodeView,
|
||||||
|
peers: &[ClusterPeerView],
|
||||||
|
) -> Result<Self::NodeConfig, Self::ConfigError> {
|
||||||
|
let peers = peers
|
||||||
|
.iter()
|
||||||
|
.map(|peer| KvPeerInfo {
|
||||||
|
node_id: peer.index() as u64,
|
||||||
|
http_address: peer.authority(),
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
Ok(KvNodeConfig {
|
||||||
|
node_id: node.index() as u64,
|
||||||
|
http_port: node.network_port(),
|
||||||
|
peers,
|
||||||
|
sync_interval_ms: 500,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_cluster_node_config(
|
||||||
|
config: &Self::NodeConfig,
|
||||||
|
) -> Result<String, Self::ConfigError> {
|
||||||
|
serialize_cluster_yaml_config(config).map_err(Error::other)
|
||||||
|
}
|
||||||
|
}
|
||||||
15
examples/kvstore/testing/integration/src/compose_env.rs
Normal file
15
examples/kvstore/testing/integration/src/compose_env.rs
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
use testing_framework_runner_compose::{BinaryConfigNodeSpec, ComposeBinaryApp};
|
||||||
|
|
||||||
|
use crate::KvEnv;
|
||||||
|
|
||||||
|
const NODE_CONFIG_PATH: &str = "/etc/kvstore/config.yaml";
|
||||||
|
|
||||||
|
impl ComposeBinaryApp for KvEnv {
|
||||||
|
fn compose_node_spec() -> BinaryConfigNodeSpec {
|
||||||
|
BinaryConfigNodeSpec::conventional(
|
||||||
|
"/usr/local/bin/kvstore-node",
|
||||||
|
NODE_CONFIG_PATH,
|
||||||
|
vec![8080, 8081],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
21
examples/kvstore/testing/integration/src/k8s_env.rs
Normal file
21
examples/kvstore/testing/integration/src/k8s_env.rs
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
use testing_framework_runner_k8s::{BinaryConfigK8sSpec, K8sBinaryApp};
|
||||||
|
|
||||||
|
use crate::KvEnv;
|
||||||
|
|
||||||
|
const CONTAINER_CONFIG_PATH: &str = "/etc/kvstore/config.yaml";
|
||||||
|
const CONTAINER_HTTP_PORT: u16 = 8080;
|
||||||
|
const SERVICE_TESTING_PORT: u16 = 8081;
|
||||||
|
const NODE_NAME_PREFIX: &str = "kvstore-node";
|
||||||
|
|
||||||
|
impl K8sBinaryApp for KvEnv {
|
||||||
|
fn k8s_binary_spec() -> BinaryConfigK8sSpec {
|
||||||
|
BinaryConfigK8sSpec::conventional(
|
||||||
|
"kvstore",
|
||||||
|
NODE_NAME_PREFIX,
|
||||||
|
"/usr/local/bin/kvstore-node",
|
||||||
|
CONTAINER_CONFIG_PATH,
|
||||||
|
CONTAINER_HTTP_PORT,
|
||||||
|
SERVICE_TESTING_PORT,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
12
examples/kvstore/testing/integration/src/lib.rs
Normal file
12
examples/kvstore/testing/integration/src/lib.rs
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
mod app;
|
||||||
|
mod compose_env;
|
||||||
|
mod k8s_env;
|
||||||
|
mod local_env;
|
||||||
|
pub mod scenario;
|
||||||
|
|
||||||
|
pub use app::*;
|
||||||
|
pub use scenario::{KvBuilderExt, KvScenarioBuilder};
|
||||||
|
|
||||||
|
pub type KvLocalDeployer = testing_framework_runner_local::ProcessDeployer<KvEnv>;
|
||||||
|
pub type KvComposeDeployer = testing_framework_runner_compose::ComposeDeployer<KvEnv>;
|
||||||
|
pub type KvK8sDeployer = testing_framework_runner_k8s::K8sDeployer<KvEnv>;
|
||||||
41
examples/kvstore/testing/integration/src/local_env.rs
Normal file
41
examples/kvstore/testing/integration/src/local_env.rs
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use testing_framework_core::scenario::{DynError, StartNodeOptions};
|
||||||
|
use testing_framework_runner_local::{
|
||||||
|
LocalBinaryApp, LocalNodePorts, LocalPeerNode, LocalProcessSpec,
|
||||||
|
build_local_cluster_node_config, yaml_node_config,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{KvEnv, KvNodeConfig};
|
||||||
|
|
||||||
|
impl LocalBinaryApp for KvEnv {
|
||||||
|
fn initial_node_name_prefix() -> &'static str {
|
||||||
|
"kv-node"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_local_node_config_with_peers(
|
||||||
|
_topology: &Self::Deployment,
|
||||||
|
index: usize,
|
||||||
|
ports: &LocalNodePorts,
|
||||||
|
peers: &[LocalPeerNode],
|
||||||
|
_peer_ports_by_name: &HashMap<String, u16>,
|
||||||
|
_options: &StartNodeOptions<Self>,
|
||||||
|
_template_config: Option<
|
||||||
|
&<Self as testing_framework_core::scenario::Application>::NodeConfig,
|
||||||
|
>,
|
||||||
|
) -> Result<<Self as testing_framework_core::scenario::Application>::NodeConfig, DynError> {
|
||||||
|
build_local_cluster_node_config::<Self>(index, ports, peers)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn local_process_spec() -> LocalProcessSpec {
|
||||||
|
LocalProcessSpec::new("KVSTORE_NODE_BIN", "kvstore-node").with_rust_log("kvstore_node=info")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_local_config(config: &KvNodeConfig) -> Result<Vec<u8>, DynError> {
|
||||||
|
yaml_node_config(config)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn http_api_port(config: &KvNodeConfig) -> u16 {
|
||||||
|
config.http_port
|
||||||
|
}
|
||||||
|
}
|
||||||
15
examples/kvstore/testing/integration/src/scenario.rs
Normal file
15
examples/kvstore/testing/integration/src/scenario.rs
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
use testing_framework_core::scenario::ScenarioBuilder;
|
||||||
|
|
||||||
|
use crate::{KvEnv, KvTopology};
|
||||||
|
|
||||||
|
pub type KvScenarioBuilder = ScenarioBuilder<KvEnv>;
|
||||||
|
|
||||||
|
pub trait KvBuilderExt: Sized {
|
||||||
|
fn deployment_with(f: impl FnOnce(KvTopology) -> KvTopology) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KvBuilderExt for KvScenarioBuilder {
|
||||||
|
fn deployment_with(f: impl FnOnce(KvTopology) -> KvTopology) -> Self {
|
||||||
|
KvScenarioBuilder::with_deployment(f(KvTopology::new(3)))
|
||||||
|
}
|
||||||
|
}
|
||||||
15
examples/kvstore/testing/workloads/Cargo.toml
Normal file
15
examples/kvstore/testing/workloads/Cargo.toml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "kvstore-runtime-workloads"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
kvstore-node = { path = "../../kvstore-node" }
|
||||||
|
kvstore-runtime-ext = { path = "../integration" }
|
||||||
|
testing-framework-core = { workspace = true }
|
||||||
|
|
||||||
|
async-trait = { workspace = true }
|
||||||
|
serde = { workspace = true }
|
||||||
|
tokio = { workspace = true, features = ["full"] }
|
||||||
|
tracing = { workspace = true }
|
||||||
100
examples/kvstore/testing/workloads/src/expectations.rs
Normal file
100
examples/kvstore/testing/workloads/src/expectations.rs
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use kvstore_runtime_ext::KvEnv;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use testing_framework_core::scenario::{DynError, Expectation, RunContext};
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct KvConverges {
|
||||||
|
key_prefix: String,
|
||||||
|
key_count: usize,
|
||||||
|
timeout: Duration,
|
||||||
|
poll_interval: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Clone, Debug, Eq, PartialEq)]
|
||||||
|
struct ValueRecord {
|
||||||
|
value: String,
|
||||||
|
version: u64,
|
||||||
|
origin: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct GetResponse {
|
||||||
|
record: Option<ValueRecord>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KvConverges {
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(key_prefix: impl Into<String>, key_count: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
key_prefix: key_prefix.into(),
|
||||||
|
key_count,
|
||||||
|
timeout: Duration::from_secs(20),
|
||||||
|
poll_interval: Duration::from_millis(500),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn timeout(mut self, timeout: Duration) -> Self {
|
||||||
|
self.timeout = timeout;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Expectation<KvEnv> for KvConverges {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"kv_converges"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn evaluate(&mut self, ctx: &RunContext<KvEnv>) -> Result<(), DynError> {
|
||||||
|
let clients = ctx.node_clients().snapshot();
|
||||||
|
if clients.is_empty() {
|
||||||
|
return Err("no kv node clients available".into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let deadline = tokio::time::Instant::now() + self.timeout;
|
||||||
|
while tokio::time::Instant::now() < deadline {
|
||||||
|
if self.is_converged(&clients).await? {
|
||||||
|
info!(key_count = self.key_count, "kv convergence reached");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
tokio::time::sleep(self.poll_interval).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(format!(
|
||||||
|
"kv convergence not reached within {:?} for {} keys",
|
||||||
|
self.timeout, self.key_count
|
||||||
|
)
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KvConverges {
|
||||||
|
async fn is_converged(&self, clients: &[kvstore_node::KvHttpClient]) -> Result<bool, DynError> {
|
||||||
|
for key_idx in 0..self.key_count {
|
||||||
|
let key = format!("{}-{key_idx}", self.key_prefix);
|
||||||
|
let first = read_key(clients, &key, 0).await?;
|
||||||
|
for node_idx in 1..clients.len() {
|
||||||
|
let current = read_key(clients, &key, node_idx).await?;
|
||||||
|
if current != first {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn read_key(
|
||||||
|
clients: &[kvstore_node::KvHttpClient],
|
||||||
|
key: &str,
|
||||||
|
index: usize,
|
||||||
|
) -> Result<Option<ValueRecord>, DynError> {
|
||||||
|
let response: GetResponse = clients[index].get(&format!("/kv/{key}")).await?;
|
||||||
|
Ok(response.record)
|
||||||
|
}
|
||||||
6
examples/kvstore/testing/workloads/src/lib.rs
Normal file
6
examples/kvstore/testing/workloads/src/lib.rs
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
mod expectations;
|
||||||
|
mod write;
|
||||||
|
|
||||||
|
pub use expectations::KvConverges;
|
||||||
|
pub use kvstore_runtime_ext::{KvBuilderExt, KvEnv, KvScenarioBuilder, KvTopology};
|
||||||
|
pub use write::KvWriteWorkload;
|
||||||
135
examples/kvstore/testing/workloads/src/write.rs
Normal file
135
examples/kvstore/testing/workloads/src/write.rs
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use kvstore_runtime_ext::KvEnv;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use testing_framework_core::scenario::{DynError, RunContext, Workload};
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct KvWriteWorkload {
|
||||||
|
operations: usize,
|
||||||
|
key_count: usize,
|
||||||
|
rate_per_sec: Option<usize>,
|
||||||
|
key_prefix: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct PutRequest {
|
||||||
|
value: String,
|
||||||
|
expected_version: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct PutResponse {
|
||||||
|
applied: bool,
|
||||||
|
version: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KvWriteWorkload {
|
||||||
|
#[must_use]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
operations: 200,
|
||||||
|
key_count: 20,
|
||||||
|
rate_per_sec: Some(25),
|
||||||
|
key_prefix: "kv-demo".to_owned(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn operations(mut self, value: usize) -> Self {
|
||||||
|
self.operations = value;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn key_count(mut self, value: usize) -> Self {
|
||||||
|
self.key_count = value;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn rate_per_sec(mut self, value: usize) -> Self {
|
||||||
|
self.rate_per_sec = Some(value);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub fn key_prefix(mut self, value: impl Into<String>) -> Self {
|
||||||
|
self.key_prefix = value.into();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for KvWriteWorkload {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Workload<KvEnv> for KvWriteWorkload {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"kv_write_workload"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start(&self, ctx: &RunContext<KvEnv>) -> Result<(), DynError> {
|
||||||
|
let clients = ctx.node_clients().snapshot();
|
||||||
|
let Some(leader) = clients.first() else {
|
||||||
|
return Err("no kv node clients available".into());
|
||||||
|
};
|
||||||
|
|
||||||
|
if self.key_count == 0 {
|
||||||
|
return Err("kv workload key_count must be > 0".into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let interval = self.rate_per_sec.and_then(compute_interval);
|
||||||
|
info!(
|
||||||
|
operations = self.operations,
|
||||||
|
key_count = self.key_count,
|
||||||
|
rate_per_sec = ?self.rate_per_sec,
|
||||||
|
"starting kv write workload"
|
||||||
|
);
|
||||||
|
|
||||||
|
for idx in 0..self.operations {
|
||||||
|
let key = format!("{}-{}", self.key_prefix, idx % self.key_count);
|
||||||
|
let value = format!("value-{idx}");
|
||||||
|
let response: PutResponse = leader
|
||||||
|
.put(
|
||||||
|
&format!("/kv/{key}"),
|
||||||
|
&PutRequest {
|
||||||
|
value,
|
||||||
|
expected_version: None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !response.applied {
|
||||||
|
return Err(format!("leader rejected write for key {key}").into());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (idx + 1) % 25 == 0 {
|
||||||
|
info!(
|
||||||
|
completed = idx + 1,
|
||||||
|
version = response.version,
|
||||||
|
"kv write progress"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(delay) = interval {
|
||||||
|
tokio::time::sleep(delay).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_interval(rate_per_sec: usize) -> Option<Duration> {
|
||||||
|
if rate_per_sec == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Duration::from_millis((1000 / rate_per_sec as u64).max(1)))
|
||||||
|
}
|
||||||
@ -31,7 +31,7 @@ Each node exposes:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:19091 \
|
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:19091 \
|
||||||
cargo run -p metrics-counter-examples --bin compose_prometheus_expectation
|
cargo run -p metrics-counter-examples --bin metrics_counter_compose_prometheus_expectation
|
||||||
```
|
```
|
||||||
|
|
||||||
## Run with Kubernetes
|
## Run with Kubernetes
|
||||||
@ -39,7 +39,7 @@ cargo run -p metrics-counter-examples --bin compose_prometheus_expectation
|
|||||||
```bash
|
```bash
|
||||||
docker build -t metrics-counter-node:local -f examples/metrics_counter/Dockerfile .
|
docker build -t metrics-counter-node:local -f examples/metrics_counter/Dockerfile .
|
||||||
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:30991 \
|
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:30991 \
|
||||||
cargo run -p metrics-counter-examples --bin k8s_prometheus_expectation
|
cargo run -p metrics-counter-examples --bin metrics_counter_k8s_prometheus_expectation
|
||||||
```
|
```
|
||||||
|
|
||||||
Overrides:
|
Overrides:
|
||||||
@ -51,5 +51,5 @@ Overrides:
|
|||||||
```bash
|
```bash
|
||||||
docker build -t metrics-counter-node:local -f examples/metrics_counter/Dockerfile .
|
docker build -t metrics-counter-node:local -f examples/metrics_counter/Dockerfile .
|
||||||
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:30991 \
|
LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://127.0.0.1:30991 \
|
||||||
cargo run -p metrics-counter-examples --bin k8s_manual_prometheus
|
cargo run -p metrics-counter-examples --bin metrics_counter_k8s_manual_prometheus
|
||||||
```
|
```
|
||||||
|
|||||||
@ -4,6 +4,18 @@ license.workspace = true
|
|||||||
name = "metrics-counter-examples"
|
name = "metrics-counter-examples"
|
||||||
version.workspace = true
|
version.workspace = true
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "metrics_counter_compose_prometheus_expectation"
|
||||||
|
path = "src/bin/compose_prometheus_expectation.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "metrics_counter_k8s_prometheus_expectation"
|
||||||
|
path = "src/bin/k8s_prometheus_expectation.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "metrics_counter_k8s_manual_prometheus"
|
||||||
|
path = "src/bin/k8s_manual_prometheus.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
metrics-counter-node = { path = "../metrics-counter-node" }
|
metrics-counter-node = { path = "../metrics-counter-node" }
|
||||||
|
|||||||
@ -23,23 +23,23 @@ Each example follows the same pattern:
|
|||||||
## Run locally
|
## Run locally
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run -p nats-examples --bin basic_roundtrip
|
cargo run -p nats-examples --bin nats_basic_roundtrip
|
||||||
```
|
```
|
||||||
|
|
||||||
If `nats-server` is not on `PATH`:
|
If `nats-server` is not on `PATH`:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
NATS_SERVER_BIN=/path/to/nats-server cargo run -p nats-examples --bin basic_roundtrip
|
NATS_SERVER_BIN=/path/to/nats-server cargo run -p nats-examples --bin nats_basic_roundtrip
|
||||||
```
|
```
|
||||||
|
|
||||||
## Run with Docker Compose
|
## Run with Docker Compose
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run -p nats-examples --bin compose_roundtrip
|
cargo run -p nats-examples --bin nats_compose_roundtrip
|
||||||
```
|
```
|
||||||
|
|
||||||
## Run the parity check
|
## Run the parity check
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run -p nats-examples --bin parity_check
|
cargo run -p nats-examples --bin nats_parity_check
|
||||||
```
|
```
|
||||||
|
|||||||
@ -4,6 +4,18 @@ license.workspace = true
|
|||||||
name = "nats-examples"
|
name = "nats-examples"
|
||||||
version.workspace = true
|
version.workspace = true
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "nats_basic_roundtrip"
|
||||||
|
path = "src/bin/basic_roundtrip.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "nats_compose_roundtrip"
|
||||||
|
path = "src/bin/compose_roundtrip.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "nats_parity_check"
|
||||||
|
path = "src/bin/parity_check.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
nats-runtime-ext = { path = "../testing/integration" }
|
nats-runtime-ext = { path = "../testing/integration" }
|
||||||
|
|||||||
25
examples/openraft_kv/Dockerfile
Normal file
25
examples/openraft_kv/Dockerfile
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# Build stage
|
||||||
|
FROM rustlang/rust:nightly-bookworm AS builder
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
|
COPY Cargo.toml Cargo.lock ./
|
||||||
|
COPY cfgsync/ ./cfgsync/
|
||||||
|
COPY examples/ ./examples/
|
||||||
|
COPY testing-framework/ ./testing-framework/
|
||||||
|
|
||||||
|
RUN cargo build --release -p openraft-kv-node
|
||||||
|
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y ca-certificates && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY --from=builder /build/target/release/openraft-kv-node /usr/local/bin/openraft-kv-node
|
||||||
|
|
||||||
|
RUN mkdir -p /etc/openraft-kv
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/local/bin/openraft-kv-node"]
|
||||||
|
CMD ["--config", "/etc/openraft-kv/config.yaml"]
|
||||||
87
examples/openraft_kv/README.md
Normal file
87
examples/openraft_kv/README.md
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
# OpenRaft KV Example
|
||||||
|
|
||||||
|
This example runs a small key-value service built on top of `OpenRaft`.
|
||||||
|
|
||||||
|
The main scenario does four things:
|
||||||
|
|
||||||
|
- bootstraps node 0 as a one-node cluster
|
||||||
|
- adds nodes 1 and 2 as learners and promotes them to voters
|
||||||
|
- writes one batch of keys through the current leader
|
||||||
|
- restarts that leader, waits for a new leader, writes again, and then checks
|
||||||
|
that all three nodes expose the same replicated state
|
||||||
|
|
||||||
|
## How TF runs this
|
||||||
|
|
||||||
|
- TF starts three OpenRaft nodes
|
||||||
|
- the workload bootstraps the cluster through the admin API
|
||||||
|
- the workload writes a first batch, restarts the current leader, waits for failover, and writes again
|
||||||
|
- the expectation checks that all three nodes converge on the same key/value state and membership
|
||||||
|
|
||||||
|
## Scenario
|
||||||
|
|
||||||
|
- `basic_failover` runs the leader-restart flow locally
|
||||||
|
- `compose_failover` runs the same flow in Docker Compose
|
||||||
|
- `k8s_failover` runs the same flow against a manual Kubernetes cluster deployment
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
Each node exposes:
|
||||||
|
|
||||||
|
- `GET /healthz` for readiness
|
||||||
|
- `GET /state` for current Raft role, leader, membership, log progress, and replicated key/value data
|
||||||
|
- `POST /kv/write` to submit a write through the local Raft node
|
||||||
|
- `POST /kv/read` to read a key from the local state machine
|
||||||
|
- `POST /admin/init` to initialize a single-node cluster
|
||||||
|
- `POST /admin/add-learner` to add a new Raft learner
|
||||||
|
- `POST /admin/change-membership` to promote learners into the voting set
|
||||||
|
|
||||||
|
The node also exposes internal Raft RPC endpoints used only for replication:
|
||||||
|
|
||||||
|
- `POST /raft/vote`
|
||||||
|
- `POST /raft/append`
|
||||||
|
- `POST /raft/snapshot`
|
||||||
|
|
||||||
|
## Run locally
|
||||||
|
|
||||||
|
```bash
|
||||||
|
OPENRAFT_KV_NODE_BIN="$(pwd)/target/debug/openraft-kv-node" \
|
||||||
|
cargo run -p openraft-kv-examples --bin openraft_kv_basic_failover
|
||||||
|
```
|
||||||
|
|
||||||
|
Build the node first if you have not done that yet:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo build -p openraft-kv-node
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run with Docker Compose
|
||||||
|
|
||||||
|
Build the image first:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build -t openraft-kv-node:local -f examples/openraft_kv/Dockerfile .
|
||||||
|
```
|
||||||
|
|
||||||
|
Then run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo run -p openraft-kv-examples --bin openraft_kv_compose_failover
|
||||||
|
```
|
||||||
|
|
||||||
|
Set `OPENRAFT_KV_IMAGE` to override the default compose image tag.
|
||||||
|
|
||||||
|
## Run on Kubernetes
|
||||||
|
|
||||||
|
Build the same image first:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build -t openraft-kv-node:local -f examples/openraft_kv/Dockerfile .
|
||||||
|
```
|
||||||
|
|
||||||
|
Then run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo run -p openraft-kv-examples --bin openraft_kv_k8s_failover
|
||||||
|
```
|
||||||
|
|
||||||
|
If no cluster is available, the example exits early and prints a skip message.
|
||||||
28
examples/openraft_kv/examples/Cargo.toml
Normal file
28
examples/openraft_kv/examples/Cargo.toml
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "openraft-kv-examples"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "openraft_kv_basic_failover"
|
||||||
|
path = "src/bin/basic_failover.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "openraft_kv_compose_failover"
|
||||||
|
path = "src/bin/compose_failover.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "openraft_kv_k8s_failover"
|
||||||
|
path = "src/bin/k8s_failover.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0"
|
||||||
|
openraft-kv-node = { path = "../openraft-kv-node" }
|
||||||
|
openraft-kv-runtime-ext = { path = "../testing/integration" }
|
||||||
|
openraft-kv-runtime-workloads = { path = "../testing/workloads" }
|
||||||
|
testing-framework-core = { workspace = true }
|
||||||
|
testing-framework-runner-k8s = { workspace = true }
|
||||||
|
tokio = { workspace = true, features = ["full"] }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
20
examples/openraft_kv/examples/src/bin/basic_failover.rs
Normal file
20
examples/openraft_kv/examples/src/bin/basic_failover.rs
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use openraft_kv_examples::build_failover_scenario;
|
||||||
|
use openraft_kv_runtime_ext::OpenRaftKvLocalDeployer;
|
||||||
|
use testing_framework_core::scenario::Deployer;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let mut scenario = build_failover_scenario(Duration::from_secs(45), Duration::from_secs(30))?;
|
||||||
|
|
||||||
|
let deployer = OpenRaftKvLocalDeployer::default();
|
||||||
|
let runner = deployer.deploy(&scenario).await?;
|
||||||
|
runner.run(&mut scenario).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
20
examples/openraft_kv/examples/src/bin/compose_failover.rs
Normal file
20
examples/openraft_kv/examples/src/bin/compose_failover.rs
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use openraft_kv_examples::build_failover_scenario;
|
||||||
|
use openraft_kv_runtime_ext::OpenRaftKvComposeDeployer;
|
||||||
|
use testing_framework_core::scenario::Deployer;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let mut scenario = build_failover_scenario(Duration::from_secs(60), Duration::from_secs(40))?;
|
||||||
|
|
||||||
|
let deployer = OpenRaftKvComposeDeployer::new();
|
||||||
|
let runner = deployer.deploy(&scenario).await?;
|
||||||
|
runner.run(&mut scenario).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
195
examples/openraft_kv/examples/src/bin/k8s_failover.rs
Normal file
195
examples/openraft_kv/examples/src/bin/k8s_failover.rs
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
use std::{sync::Arc, time::Duration};
|
||||||
|
|
||||||
|
use anyhow::{Context as _, Result, anyhow};
|
||||||
|
use openraft_kv_examples::{
|
||||||
|
INITIAL_WRITE_BATCH, RAFT_KEY_PREFIX, SECOND_WRITE_BATCH, TOTAL_WRITES,
|
||||||
|
};
|
||||||
|
use openraft_kv_node::OpenRaftKvClient;
|
||||||
|
use openraft_kv_runtime_ext::{
|
||||||
|
OpenRaftClusterObserver, OpenRaftKvEnv, OpenRaftKvK8sDeployer, OpenRaftKvTopology,
|
||||||
|
OpenRaftManualClusterSourceProvider,
|
||||||
|
};
|
||||||
|
use openraft_kv_runtime_workloads::{
|
||||||
|
OpenRaftMembership, expected_kv, wait_for_observed_leader, wait_for_observed_membership,
|
||||||
|
wait_for_observed_replication, write_batch,
|
||||||
|
};
|
||||||
|
use testing_framework_core::observation::{ObservationHandle, ObservationRuntime};
|
||||||
|
use testing_framework_runner_k8s::{ManualCluster, ManualClusterError};
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let deployer = OpenRaftKvK8sDeployer::new();
|
||||||
|
let cluster = match deployer
|
||||||
|
.manual_cluster_from_descriptors(OpenRaftKvTopology::new(3))
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(cluster) => cluster,
|
||||||
|
Err(ManualClusterError::ClientInit { source }) => {
|
||||||
|
warn!("k8s unavailable ({source}); skipping openraft k8s run");
|
||||||
|
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(ManualClusterError::InstallStack { source })
|
||||||
|
if k8s_cluster_unavailable(&source.to_string()) =>
|
||||||
|
{
|
||||||
|
warn!("k8s unavailable ({source}); skipping openraft k8s run");
|
||||||
|
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
return Err(anyhow::Error::new(error)).context("creating openraft k8s cluster");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
run_failover(Arc::new(cluster), Duration::from_secs(40)).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run_failover(cluster: Arc<ManualCluster<OpenRaftKvEnv>>, timeout: Duration) -> Result<()> {
|
||||||
|
start_cluster(cluster.as_ref()).await?;
|
||||||
|
|
||||||
|
let observation_runtime = start_observer(Arc::clone(&cluster)).await?;
|
||||||
|
let observer = observation_runtime.handle();
|
||||||
|
|
||||||
|
client_for_node(cluster.as_ref(), 0)?.init_self().await?;
|
||||||
|
|
||||||
|
let initial_leader = wait_for_observed_leader(&observer, timeout, None).await?;
|
||||||
|
let membership = current_membership(&observer)?;
|
||||||
|
|
||||||
|
add_learners_and_promote(
|
||||||
|
cluster.as_ref(),
|
||||||
|
&observer,
|
||||||
|
initial_leader,
|
||||||
|
&membership,
|
||||||
|
timeout,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
write_initial_batch(cluster.as_ref(), initial_leader).await?;
|
||||||
|
|
||||||
|
restart_leader(cluster.as_ref(), initial_leader).await?;
|
||||||
|
|
||||||
|
let new_leader = wait_for_observed_leader(&observer, timeout, Some(initial_leader)).await?;
|
||||||
|
write_second_batch(cluster.as_ref(), new_leader).await?;
|
||||||
|
|
||||||
|
let expected = expected_kv(RAFT_KEY_PREFIX, TOTAL_WRITES);
|
||||||
|
wait_for_observed_replication(&observer, &expected, timeout).await?;
|
||||||
|
|
||||||
|
cluster.stop_all();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start_cluster(cluster: &ManualCluster<OpenRaftKvEnv>) -> Result<()> {
|
||||||
|
cluster.start_node("node-0").await?;
|
||||||
|
cluster.start_node("node-1").await?;
|
||||||
|
cluster.start_node("node-2").await?;
|
||||||
|
|
||||||
|
cluster.wait_network_ready().await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start_observer(
|
||||||
|
cluster: Arc<ManualCluster<OpenRaftKvEnv>>,
|
||||||
|
) -> Result<ObservationRuntime<OpenRaftClusterObserver>> {
|
||||||
|
let provider = OpenRaftManualClusterSourceProvider::new(cluster, 3);
|
||||||
|
|
||||||
|
ObservationRuntime::start(
|
||||||
|
provider,
|
||||||
|
OpenRaftClusterObserver,
|
||||||
|
OpenRaftClusterObserver::config(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.map_err(anyhow::Error::new)
|
||||||
|
.context("starting openraft k8s observer")
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn add_learners_and_promote(
|
||||||
|
cluster: &ManualCluster<OpenRaftKvEnv>,
|
||||||
|
observer: &ObservationHandle<OpenRaftClusterObserver>,
|
||||||
|
leader_id: u64,
|
||||||
|
membership: &OpenRaftMembership,
|
||||||
|
timeout: Duration,
|
||||||
|
) -> Result<()> {
|
||||||
|
let leader = client_for_node(cluster, leader_id)?;
|
||||||
|
|
||||||
|
for learner in membership.learner_targets(leader_id) {
|
||||||
|
info!(
|
||||||
|
target = learner.node_id,
|
||||||
|
addr = %learner.public_addr,
|
||||||
|
"adding learner"
|
||||||
|
);
|
||||||
|
|
||||||
|
leader
|
||||||
|
.add_learner(learner.node_id, &learner.public_addr)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let voter_ids = membership.voter_ids();
|
||||||
|
leader.change_membership(voter_ids.iter().copied()).await?;
|
||||||
|
|
||||||
|
wait_for_observed_membership(observer, &voter_ids, timeout).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write_initial_batch(cluster: &ManualCluster<OpenRaftKvEnv>, leader_id: u64) -> Result<()> {
|
||||||
|
let leader = client_for_node(cluster, leader_id)?;
|
||||||
|
|
||||||
|
write_batch(&leader, RAFT_KEY_PREFIX, 0, INITIAL_WRITE_BATCH).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write_second_batch(cluster: &ManualCluster<OpenRaftKvEnv>, leader_id: u64) -> Result<()> {
|
||||||
|
let leader = client_for_node(cluster, leader_id)?;
|
||||||
|
|
||||||
|
write_batch(
|
||||||
|
&leader,
|
||||||
|
RAFT_KEY_PREFIX,
|
||||||
|
INITIAL_WRITE_BATCH,
|
||||||
|
SECOND_WRITE_BATCH,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn restart_leader(cluster: &ManualCluster<OpenRaftKvEnv>, leader_id: u64) -> Result<()> {
|
||||||
|
let leader_name = format!("node-{leader_id}");
|
||||||
|
info!(%leader_name, "restarting current leader");
|
||||||
|
|
||||||
|
cluster.restart_node(&leader_name).await?;
|
||||||
|
cluster.wait_network_ready().await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn current_membership(
|
||||||
|
observer: &ObservationHandle<OpenRaftClusterObserver>,
|
||||||
|
) -> Result<OpenRaftMembership> {
|
||||||
|
let snapshot = observer
|
||||||
|
.latest_snapshot()
|
||||||
|
.ok_or_else(|| anyhow!("openraft observer has not produced a snapshot yet"))?;
|
||||||
|
|
||||||
|
Ok(OpenRaftMembership::from_states(snapshot.value.states()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn client_for_node(
|
||||||
|
cluster: &ManualCluster<OpenRaftKvEnv>,
|
||||||
|
node_id: u64,
|
||||||
|
) -> Result<OpenRaftKvClient> {
|
||||||
|
cluster
|
||||||
|
.node_client(&format!("node-{node_id}"))
|
||||||
|
.ok_or_else(|| anyhow!("node-{node_id} client missing"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn k8s_cluster_unavailable(message: &str) -> bool {
|
||||||
|
message.contains("Unable to connect to the server")
|
||||||
|
|| message.contains("TLS handshake timeout")
|
||||||
|
|| message.contains("connection refused")
|
||||||
|
}
|
||||||
41
examples/openraft_kv/examples/src/lib.rs
Normal file
41
examples/openraft_kv/examples/src/lib.rs
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use openraft_kv_runtime_ext::{OpenRaftKvBuilderExt, OpenRaftKvEnv, OpenRaftKvScenarioBuilder};
|
||||||
|
use openraft_kv_runtime_workloads::{OpenRaftKvConverges, OpenRaftKvFailoverWorkload};
|
||||||
|
use testing_framework_core::scenario::{NodeControlCapability, Scenario};
|
||||||
|
|
||||||
|
/// Number of writes issued before the leader restart.
|
||||||
|
pub const INITIAL_WRITE_BATCH: usize = 8;
|
||||||
|
/// Number of writes issued after the leader restart.
|
||||||
|
pub const SECOND_WRITE_BATCH: usize = 8;
|
||||||
|
/// Total write count expected after the scenario completes.
|
||||||
|
pub const TOTAL_WRITES: usize = INITIAL_WRITE_BATCH + SECOND_WRITE_BATCH;
|
||||||
|
/// Key prefix shared by the failover workload and convergence expectation.
|
||||||
|
pub const RAFT_KEY_PREFIX: &str = "raft-key";
|
||||||
|
|
||||||
|
/// Builds the standard failover scenario used by the local and compose
|
||||||
|
/// binaries.
|
||||||
|
pub fn build_failover_scenario(
|
||||||
|
run_duration: Duration,
|
||||||
|
workload_timeout: Duration,
|
||||||
|
) -> anyhow::Result<Scenario<OpenRaftKvEnv, NodeControlCapability>> {
|
||||||
|
Ok(
|
||||||
|
OpenRaftKvScenarioBuilder::deployment_with(|deployment| deployment)
|
||||||
|
.with_cluster_observer()
|
||||||
|
.enable_node_control()
|
||||||
|
.with_run_duration(run_duration)
|
||||||
|
.with_workload(
|
||||||
|
OpenRaftKvFailoverWorkload::new()
|
||||||
|
.first_batch(INITIAL_WRITE_BATCH)
|
||||||
|
.second_batch(SECOND_WRITE_BATCH)
|
||||||
|
.timeout(workload_timeout)
|
||||||
|
.key_prefix(RAFT_KEY_PREFIX),
|
||||||
|
)
|
||||||
|
.with_expectation(
|
||||||
|
OpenRaftKvConverges::new(TOTAL_WRITES)
|
||||||
|
.timeout(run_duration)
|
||||||
|
.key_prefix(RAFT_KEY_PREFIX),
|
||||||
|
)
|
||||||
|
.build()?,
|
||||||
|
)
|
||||||
|
}
|
||||||
23
examples/openraft_kv/openraft-kv-node/Cargo.toml
Normal file
23
examples/openraft_kv/openraft-kv-node/Cargo.toml
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "openraft-kv-node"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "openraft-kv-node"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0"
|
||||||
|
axum = "0.7"
|
||||||
|
clap = { version = "4.0", features = ["derive"] }
|
||||||
|
openraft = { workspace = true }
|
||||||
|
openraft-memstore = { workspace = true }
|
||||||
|
reqwest = { workspace = true, features = ["json"] }
|
||||||
|
serde = { workspace = true }
|
||||||
|
serde_yaml = { workspace = true }
|
||||||
|
tokio = { workspace = true, features = ["full"] }
|
||||||
|
tower-http = { version = "0.6", features = ["trace"] }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
136
examples/openraft_kv/openraft-kv-node/src/client.rs
Normal file
136
examples/openraft_kv/openraft-kv-node/src/client.rs
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
use std::{collections::BTreeSet, time::Duration};
|
||||||
|
|
||||||
|
use reqwest::Url;
|
||||||
|
use serde::{Serialize, de::DeserializeOwned};
|
||||||
|
|
||||||
|
use crate::types::{
|
||||||
|
AddLearnerRequest, AddLearnerResult, ChangeMembershipRequest, ChangeMembershipResult,
|
||||||
|
InitResult, OpenRaftKvReadRequest, OpenRaftKvReadResponse, OpenRaftKvState,
|
||||||
|
OpenRaftKvWriteRequest, OpenRaftKvWriteResponse,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Small HTTP client for the OpenRaft example node and its admin endpoints.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct OpenRaftKvClient {
|
||||||
|
base_url: Url,
|
||||||
|
client: reqwest::Client,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OpenRaftKvClient {
|
||||||
|
/// Builds a client for one node base URL.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(base_url: Url) -> Self {
|
||||||
|
Self {
|
||||||
|
base_url,
|
||||||
|
client: reqwest::Client::builder()
|
||||||
|
.timeout(Duration::from_secs(2))
|
||||||
|
.connect_timeout(Duration::from_secs(2))
|
||||||
|
.build()
|
||||||
|
.expect("openraft kv client timeout configuration is valid"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fetches the node's current Raft and application state.
|
||||||
|
pub async fn state(&self) -> anyhow::Result<OpenRaftKvState> {
|
||||||
|
self.get("state").await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Replicates one key/value write through the current leader.
|
||||||
|
pub async fn write(
|
||||||
|
&self,
|
||||||
|
key: &str,
|
||||||
|
value: &str,
|
||||||
|
serial: u64,
|
||||||
|
) -> anyhow::Result<OpenRaftKvWriteResponse> {
|
||||||
|
self.post_result(
|
||||||
|
"kv/write",
|
||||||
|
&OpenRaftKvWriteRequest {
|
||||||
|
key: key.to_owned(),
|
||||||
|
value: value.to_owned(),
|
||||||
|
serial,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads one key from the replicated state machine.
|
||||||
|
pub async fn read(&self, key: &str) -> anyhow::Result<Option<String>> {
|
||||||
|
let response: OpenRaftKvReadResponse = self
|
||||||
|
.post_result(
|
||||||
|
"kv/read",
|
||||||
|
&OpenRaftKvReadRequest {
|
||||||
|
key: key.to_owned(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(response.value)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Bootstraps a one-node cluster on this node.
|
||||||
|
pub async fn init_self(&self) -> anyhow::Result<()> {
|
||||||
|
let _: InitResult = self.post("admin/init", &()).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Registers another node as a learner with the current leader.
|
||||||
|
pub async fn add_learner(&self, node_id: u64, addr: &str) -> anyhow::Result<()> {
|
||||||
|
let _: AddLearnerResult = self
|
||||||
|
.post(
|
||||||
|
"admin/add-learner",
|
||||||
|
&AddLearnerRequest {
|
||||||
|
node_id,
|
||||||
|
addr: addr.to_owned(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Promotes the cluster to the provided voter set.
|
||||||
|
pub async fn change_membership(
|
||||||
|
&self,
|
||||||
|
voters: impl IntoIterator<Item = u64>,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let voters = normalize_voters(voters);
|
||||||
|
let request = ChangeMembershipRequest { voters };
|
||||||
|
|
||||||
|
let _: ChangeMembershipResult = self.post("admin/change-membership", &request).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get<T: DeserializeOwned>(&self, path: &str) -> anyhow::Result<T> {
|
||||||
|
let url = self.base_url.join(path)?;
|
||||||
|
let response = self.client.get(url).send().await?;
|
||||||
|
let response = response.error_for_status()?;
|
||||||
|
|
||||||
|
Ok(response.json().await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn post<B: Serialize, T: DeserializeOwned>(
|
||||||
|
&self,
|
||||||
|
path: &str,
|
||||||
|
body: &B,
|
||||||
|
) -> anyhow::Result<T> {
|
||||||
|
let url = self.base_url.join(path)?;
|
||||||
|
|
||||||
|
let response = self.client.post(url).json(body).send().await?;
|
||||||
|
|
||||||
|
let response = response.error_for_status()?;
|
||||||
|
|
||||||
|
Ok(response.json().await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn post_result<B: Serialize, T: DeserializeOwned>(
|
||||||
|
&self,
|
||||||
|
path: &str,
|
||||||
|
body: &B,
|
||||||
|
) -> anyhow::Result<T> {
|
||||||
|
let result: Result<T, String> = self.post(path, body).await?;
|
||||||
|
result.map_err(anyhow::Error::msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn normalize_voters(voters: impl IntoIterator<Item = u64>) -> Vec<u64> {
|
||||||
|
let unique_voters = voters.into_iter().collect::<BTreeSet<_>>();
|
||||||
|
unique_voters.into_iter().collect()
|
||||||
|
}
|
||||||
46
examples/openraft_kv/openraft-kv-node/src/config.rs
Normal file
46
examples/openraft_kv/openraft-kv-node/src/config.rs
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
use std::{collections::BTreeMap, fs, path::Path};
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
/// Static node config written by TF for one OpenRaft node process.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct OpenRaftKvNodeConfig {
|
||||||
|
/// Stable OpenRaft node identifier.
|
||||||
|
pub node_id: u64,
|
||||||
|
/// HTTP port bound by the node process.
|
||||||
|
pub http_port: u16,
|
||||||
|
/// Advertised Raft address for this node.
|
||||||
|
pub public_addr: String,
|
||||||
|
/// Advertised Raft addresses for the other known nodes.
|
||||||
|
#[serde(default)]
|
||||||
|
pub peer_addrs: BTreeMap<u64, String>,
|
||||||
|
/// Heartbeat interval passed to the OpenRaft config.
|
||||||
|
#[serde(default = "default_heartbeat_interval_ms")]
|
||||||
|
pub heartbeat_interval_ms: u64,
|
||||||
|
/// Lower election timeout bound passed to OpenRaft.
|
||||||
|
#[serde(default = "default_election_timeout_min_ms")]
|
||||||
|
pub election_timeout_min_ms: u64,
|
||||||
|
/// Upper election timeout bound passed to OpenRaft.
|
||||||
|
#[serde(default = "default_election_timeout_max_ms")]
|
||||||
|
pub election_timeout_max_ms: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OpenRaftKvNodeConfig {
|
||||||
|
/// Loads one node config from YAML on disk.
|
||||||
|
pub fn load(path: &Path) -> anyhow::Result<Self> {
|
||||||
|
let raw = fs::read_to_string(path)?;
|
||||||
|
Ok(serde_yaml::from_str(&raw)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn default_heartbeat_interval_ms() -> u64 {
|
||||||
|
500
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn default_election_timeout_min_ms() -> u64 {
|
||||||
|
1_500
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn default_election_timeout_max_ms() -> u64 {
|
||||||
|
3_000
|
||||||
|
}
|
||||||
25
examples/openraft_kv/openraft-kv-node/src/lib.rs
Normal file
25
examples/openraft_kv/openraft-kv-node/src/lib.rs
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
//! OpenRaft-backed key-value node used by the `examples-simple-clusters`
|
||||||
|
//! branch.
|
||||||
|
|
||||||
|
/// HTTP client for interacting with one OpenRaft node.
|
||||||
|
pub mod client;
|
||||||
|
/// YAML node configuration used by TF and the node binary.
|
||||||
|
pub mod config;
|
||||||
|
mod network;
|
||||||
|
/// Axum server bootstrap and request handlers for one node process.
|
||||||
|
pub mod server;
|
||||||
|
/// Shared request, response, and state payload types.
|
||||||
|
pub mod types;
|
||||||
|
|
||||||
|
/// Re-export of the node HTTP client.
|
||||||
|
pub use client::OpenRaftKvClient;
|
||||||
|
/// Re-export of the node YAML config type.
|
||||||
|
pub use config::OpenRaftKvNodeConfig;
|
||||||
|
/// Re-export of the public request and state payloads.
|
||||||
|
pub use types::{
|
||||||
|
AddLearnerRequest, ChangeMembershipRequest, OpenRaftKvReadRequest, OpenRaftKvReadResponse,
|
||||||
|
OpenRaftKvState, OpenRaftKvWriteRequest, OpenRaftKvWriteResponse,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// OpenRaft type configuration shared by the in-memory log and state machine.
|
||||||
|
pub type TypeConfig = openraft_memstore::TypeConfig;
|
||||||
24
examples/openraft_kv/openraft-kv-node/src/main.rs
Normal file
24
examples/openraft_kv/openraft-kv-node/src/main.rs
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
use openraft_kv_node::{config::OpenRaftKvNodeConfig, server::run_server};
|
||||||
|
use tracing_subscriber::EnvFilter;
|
||||||
|
|
||||||
|
#[derive(Parser, Clone, Debug)]
|
||||||
|
#[command(author, version, about)]
|
||||||
|
struct Opt {
|
||||||
|
#[arg(long)]
|
||||||
|
config: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(EnvFilter::from_default_env())
|
||||||
|
.with_ansi(false)
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let options = Opt::parse();
|
||||||
|
let config = OpenRaftKvNodeConfig::load(&options.config)?;
|
||||||
|
run_server(config).await
|
||||||
|
}
|
||||||
158
examples/openraft_kv/openraft-kv-node/src/network.rs
Normal file
158
examples/openraft_kv/openraft-kv-node/src/network.rs
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
//! HTTP transport used by OpenRaft to replicate between example nodes.
|
||||||
|
|
||||||
|
use std::{collections::BTreeMap, sync::Arc};
|
||||||
|
|
||||||
|
use openraft::{
|
||||||
|
RaftNetworkFactory, RaftNetworkV2,
|
||||||
|
alias::{SnapshotOf, VoteOf},
|
||||||
|
errors::{RPCError, StreamingError, Unreachable},
|
||||||
|
network::RPCOption,
|
||||||
|
};
|
||||||
|
use reqwest::Url;
|
||||||
|
use tokio::sync::RwLock;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
TypeConfig,
|
||||||
|
types::{InstallFullSnapshotBody, SnapshotRpcResult},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Shared node-address book used by Raft RPC clients.
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
pub struct HttpNetworkFactory {
|
||||||
|
client: reqwest::Client,
|
||||||
|
known_nodes: Arc<RwLock<BTreeMap<u64, String>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Per-target HTTP client used for Raft replication traffic.
|
||||||
|
pub struct HttpNetworkClient {
|
||||||
|
client: reqwest::Client,
|
||||||
|
target: u64,
|
||||||
|
target_addr: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HttpNetworkFactory {
|
||||||
|
/// Creates a network factory backed by one shared node-address map.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(known_nodes: Arc<RwLock<BTreeMap<u64, String>>>) -> Self {
|
||||||
|
Self {
|
||||||
|
client: reqwest::Client::new(),
|
||||||
|
known_nodes,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RaftNetworkFactory<TypeConfig> for HttpNetworkFactory {
|
||||||
|
type Network = HttpNetworkClient;
|
||||||
|
|
||||||
|
async fn new_client(&mut self, target: u64, _node: &()) -> Self::Network {
|
||||||
|
let target_addr = self.known_nodes.read().await.get(&target).cloned();
|
||||||
|
|
||||||
|
HttpNetworkClient {
|
||||||
|
client: self.client.clone(),
|
||||||
|
target,
|
||||||
|
target_addr,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RaftNetworkV2<TypeConfig> for HttpNetworkClient {
|
||||||
|
async fn append_entries(
|
||||||
|
&mut self,
|
||||||
|
rpc: openraft::raft::AppendEntriesRequest<TypeConfig>,
|
||||||
|
_option: RPCOption,
|
||||||
|
) -> Result<openraft::raft::AppendEntriesResponse<TypeConfig>, RPCError<TypeConfig>> {
|
||||||
|
self.post_rpc("raft/append", &rpc).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn vote(
|
||||||
|
&mut self,
|
||||||
|
rpc: openraft::raft::VoteRequest<TypeConfig>,
|
||||||
|
_option: RPCOption,
|
||||||
|
) -> Result<openraft::raft::VoteResponse<TypeConfig>, RPCError<TypeConfig>> {
|
||||||
|
self.post_rpc("raft/vote", &rpc).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn full_snapshot(
|
||||||
|
&mut self,
|
||||||
|
vote: VoteOf<TypeConfig>,
|
||||||
|
snapshot: SnapshotOf<TypeConfig>,
|
||||||
|
_cancel: impl std::future::Future<Output = openraft::errors::ReplicationClosed>
|
||||||
|
+ openraft::OptionalSend
|
||||||
|
+ 'static,
|
||||||
|
_option: RPCOption,
|
||||||
|
) -> Result<openraft::raft::SnapshotResponse<TypeConfig>, StreamingError<TypeConfig>> {
|
||||||
|
let body = InstallFullSnapshotBody {
|
||||||
|
vote,
|
||||||
|
meta: snapshot.meta,
|
||||||
|
data: snapshot.snapshot.into_inner(),
|
||||||
|
};
|
||||||
|
|
||||||
|
self.post_snapshot("raft/snapshot", &body).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HttpNetworkClient {
|
||||||
|
async fn post_rpc<B, T>(&self, path: &str, body: &B) -> Result<T, RPCError<TypeConfig>>
|
||||||
|
where
|
||||||
|
B: serde::Serialize,
|
||||||
|
T: serde::de::DeserializeOwned,
|
||||||
|
{
|
||||||
|
let url = self.endpoint_url(path)?;
|
||||||
|
let response = self
|
||||||
|
.client
|
||||||
|
.post(url)
|
||||||
|
.json(body)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|err| RPCError::Unreachable(Unreachable::new(&err)))?
|
||||||
|
.error_for_status()
|
||||||
|
.map_err(|err| RPCError::Unreachable(Unreachable::new(&err)))?;
|
||||||
|
|
||||||
|
let result: Result<T, String> = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(|err| RPCError::Unreachable(Unreachable::new(&err)))?;
|
||||||
|
|
||||||
|
result.map_err(|err| RPCError::Unreachable(Unreachable::from_string(err)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn post_snapshot(
|
||||||
|
&self,
|
||||||
|
path: &str,
|
||||||
|
body: &InstallFullSnapshotBody,
|
||||||
|
) -> Result<openraft::raft::SnapshotResponse<TypeConfig>, StreamingError<TypeConfig>> {
|
||||||
|
let url = self
|
||||||
|
.endpoint_url(path)
|
||||||
|
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?;
|
||||||
|
let response = self
|
||||||
|
.client
|
||||||
|
.post(url)
|
||||||
|
.json(body)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?
|
||||||
|
.error_for_status()
|
||||||
|
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?;
|
||||||
|
|
||||||
|
let result: SnapshotRpcResult = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(|err| StreamingError::Unreachable(Unreachable::new(&err)))?;
|
||||||
|
|
||||||
|
result.map_err(|err| StreamingError::Unreachable(Unreachable::from_string(err)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn endpoint_url(&self, path: &str) -> Result<Url, Unreachable<TypeConfig>> {
|
||||||
|
let Some(addr) = &self.target_addr else {
|
||||||
|
return Err(Unreachable::from_string(format!(
|
||||||
|
"target {} has no known address",
|
||||||
|
self.target
|
||||||
|
)));
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut url =
|
||||||
|
Url::parse(&format!("http://{addr}/")).map_err(|err| Unreachable::new(&err))?;
|
||||||
|
url.set_path(path);
|
||||||
|
Ok(url)
|
||||||
|
}
|
||||||
|
}
|
||||||
276
examples/openraft_kv/openraft-kv-node/src/server.rs
Normal file
276
examples/openraft_kv/openraft-kv-node/src/server.rs
Normal file
@ -0,0 +1,276 @@
|
|||||||
|
//! Axum server that exposes the OpenRaft example node and its admin endpoints.
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
collections::{BTreeMap, BTreeSet},
|
||||||
|
sync::Arc,
|
||||||
|
};
|
||||||
|
|
||||||
|
use axum::{
|
||||||
|
Json, Router,
|
||||||
|
extract::State,
|
||||||
|
http::StatusCode,
|
||||||
|
routing::{get, post},
|
||||||
|
};
|
||||||
|
use openraft::{Config, Raft, SnapshotPolicy, type_config::async_runtime::WatchReceiver};
|
||||||
|
use openraft_memstore::{ClientRequest, MemLogStore, MemStateMachine, new_mem_store};
|
||||||
|
use tokio::sync::RwLock;
|
||||||
|
use tower_http::trace::TraceLayer;
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
TypeConfig,
|
||||||
|
config::OpenRaftKvNodeConfig,
|
||||||
|
network::HttpNetworkFactory,
|
||||||
|
types::{
|
||||||
|
AddLearnerRequest, AppendRpcResult, ChangeMembershipRequest, InitResult,
|
||||||
|
InstallSnapshotBody, MetricsResult, OpenRaftKvReadRequest, OpenRaftKvReadResponse,
|
||||||
|
OpenRaftKvState, OpenRaftKvWriteRequest, OpenRaftKvWriteResponse, SnapshotRpcResult,
|
||||||
|
VoteRpcResult,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
type KnownNodes = Arc<RwLock<BTreeMap<u64, String>>>;
|
||||||
|
|
||||||
|
/// Shared state used by the HTTP handlers exposed by one node.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct AppState {
|
||||||
|
config: OpenRaftKvNodeConfig,
|
||||||
|
raft: Raft<TypeConfig, Arc<MemStateMachine>>,
|
||||||
|
state_machine: Arc<MemStateMachine>,
|
||||||
|
known_nodes: KnownNodes,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AppState {
|
||||||
|
/// Builds the application state for one node process.
|
||||||
|
pub fn new(
|
||||||
|
config: OpenRaftKvNodeConfig,
|
||||||
|
raft: Raft<TypeConfig, Arc<MemStateMachine>>,
|
||||||
|
state_machine: Arc<MemStateMachine>,
|
||||||
|
known_nodes: KnownNodes,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
config,
|
||||||
|
raft,
|
||||||
|
state_machine,
|
||||||
|
known_nodes,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Starts one OpenRaft-backed HTTP node.
|
||||||
|
pub async fn run_server(config: OpenRaftKvNodeConfig) -> anyhow::Result<()> {
|
||||||
|
let raft_config = Arc::new(
|
||||||
|
Config {
|
||||||
|
cluster_name: "openraft-kv".to_owned(),
|
||||||
|
heartbeat_interval: config.heartbeat_interval_ms,
|
||||||
|
election_timeout_min: config.election_timeout_min_ms,
|
||||||
|
election_timeout_max: config.election_timeout_max_ms,
|
||||||
|
snapshot_policy: SnapshotPolicy::Never,
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
.validate()?,
|
||||||
|
);
|
||||||
|
|
||||||
|
let known_nodes = Arc::new(RwLock::new(known_nodes(&config)));
|
||||||
|
|
||||||
|
let (log_store, state_machine): (Arc<MemLogStore>, Arc<MemStateMachine>) = new_mem_store();
|
||||||
|
let network = HttpNetworkFactory::new(known_nodes.clone());
|
||||||
|
|
||||||
|
let raft = Raft::new(
|
||||||
|
config.node_id,
|
||||||
|
raft_config,
|
||||||
|
network,
|
||||||
|
log_store,
|
||||||
|
state_machine.clone(),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let app_state = AppState::new(config.clone(), raft, state_machine, known_nodes);
|
||||||
|
let app = router(app_state);
|
||||||
|
let address = std::net::SocketAddr::from(([0, 0, 0, 0], config.http_port));
|
||||||
|
|
||||||
|
info!(
|
||||||
|
node_id = config.node_id,
|
||||||
|
public_addr = %config.public_addr,
|
||||||
|
peers = ?config.peer_addrs,
|
||||||
|
%address,
|
||||||
|
"starting openraft kv node"
|
||||||
|
);
|
||||||
|
|
||||||
|
let listener = tokio::net::TcpListener::bind(address).await?;
|
||||||
|
axum::serve(listener, app).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn router(app_state: AppState) -> Router {
|
||||||
|
let app_routes = Router::new()
|
||||||
|
.route("/healthz", get(healthz))
|
||||||
|
.route("/state", get(cluster_state))
|
||||||
|
.route("/kv/write", post(write))
|
||||||
|
.route("/kv/read", post(read));
|
||||||
|
|
||||||
|
let admin_routes = Router::new()
|
||||||
|
.route("/admin/init", post(init))
|
||||||
|
.route("/admin/add-learner", post(add_learner))
|
||||||
|
.route("/admin/change-membership", post(change_membership))
|
||||||
|
.route("/admin/metrics", get(metrics));
|
||||||
|
|
||||||
|
let raft_routes = Router::new()
|
||||||
|
.route("/raft/vote", post(vote))
|
||||||
|
.route("/raft/append", post(append))
|
||||||
|
.route("/raft/snapshot", post(snapshot));
|
||||||
|
|
||||||
|
app_routes
|
||||||
|
.merge(admin_routes)
|
||||||
|
.merge(raft_routes)
|
||||||
|
.layer(TraceLayer::new_for_http())
|
||||||
|
.with_state(app_state)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn healthz() -> &'static str {
|
||||||
|
"ok"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn cluster_state(State(app): State<AppState>) -> Result<Json<OpenRaftKvState>, StatusCode> {
|
||||||
|
let metrics = app.raft.metrics().borrow_watched().clone();
|
||||||
|
|
||||||
|
let sm = app.state_machine.get_state_machine().await;
|
||||||
|
|
||||||
|
let voters = metrics
|
||||||
|
.membership_config
|
||||||
|
.membership()
|
||||||
|
.voter_ids()
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let kv = sm.client_status.into_iter().collect::<BTreeMap<_, _>>();
|
||||||
|
|
||||||
|
Ok(Json(OpenRaftKvState {
|
||||||
|
node_id: app.config.node_id,
|
||||||
|
public_addr: app.config.public_addr.clone(),
|
||||||
|
role: format!("{:?}", metrics.state),
|
||||||
|
current_leader: metrics.current_leader,
|
||||||
|
current_term: metrics.current_term,
|
||||||
|
last_log_index: metrics.last_log_index,
|
||||||
|
last_applied_index: metrics.last_applied.as_ref().map(|log_id| log_id.index()),
|
||||||
|
voters,
|
||||||
|
kv,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn metrics(State(app): State<AppState>) -> Json<MetricsResult> {
|
||||||
|
Json(Ok(app.raft.metrics().borrow_watched().clone()))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn init(State(app): State<AppState>) -> Json<InitResult> {
|
||||||
|
let members = BTreeSet::from([app.config.node_id]);
|
||||||
|
|
||||||
|
Json(
|
||||||
|
app.raft
|
||||||
|
.initialize(members)
|
||||||
|
.await
|
||||||
|
.map_err(|err| err.to_string()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn add_learner(
|
||||||
|
State(app): State<AppState>,
|
||||||
|
Json(request): Json<AddLearnerRequest>,
|
||||||
|
) -> Json<InitResult> {
|
||||||
|
let mut known_nodes = app.known_nodes.write().await;
|
||||||
|
known_nodes.insert(request.node_id, request.addr.clone());
|
||||||
|
drop(known_nodes);
|
||||||
|
|
||||||
|
Json(
|
||||||
|
app.raft
|
||||||
|
.add_learner(request.node_id, (), true)
|
||||||
|
.await
|
||||||
|
.map(|_| ())
|
||||||
|
.map_err(|err| err.to_string()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn change_membership(
|
||||||
|
State(app): State<AppState>,
|
||||||
|
Json(request): Json<ChangeMembershipRequest>,
|
||||||
|
) -> Json<InitResult> {
|
||||||
|
Json(
|
||||||
|
app.raft
|
||||||
|
.change_membership(request.voters.into_iter().collect::<BTreeSet<_>>(), false)
|
||||||
|
.await
|
||||||
|
.map(|_| ())
|
||||||
|
.map_err(|err| err.to_string()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write(
|
||||||
|
State(app): State<AppState>,
|
||||||
|
Json(request): Json<OpenRaftKvWriteRequest>,
|
||||||
|
) -> Json<Result<OpenRaftKvWriteResponse, String>> {
|
||||||
|
let result = app
|
||||||
|
.raft
|
||||||
|
.client_write(ClientRequest {
|
||||||
|
client: request.key,
|
||||||
|
serial: request.serial,
|
||||||
|
status: request.value,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map(|response| OpenRaftKvWriteResponse {
|
||||||
|
previous: response.response().0.clone(),
|
||||||
|
})
|
||||||
|
.map_err(|err| err.to_string());
|
||||||
|
|
||||||
|
Json(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn read(
|
||||||
|
State(app): State<AppState>,
|
||||||
|
Json(request): Json<OpenRaftKvReadRequest>,
|
||||||
|
) -> Json<Result<OpenRaftKvReadResponse, String>> {
|
||||||
|
let sm = app.state_machine.get_state_machine().await;
|
||||||
|
|
||||||
|
Json(Ok(OpenRaftKvReadResponse {
|
||||||
|
value: sm.client_status.get(&request.key).cloned(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn vote(
|
||||||
|
State(app): State<AppState>,
|
||||||
|
Json(request): Json<openraft::raft::VoteRequest<TypeConfig>>,
|
||||||
|
) -> Json<VoteRpcResult> {
|
||||||
|
Json(app.raft.vote(request).await.map_err(|err| err.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn append(
|
||||||
|
State(app): State<AppState>,
|
||||||
|
Json(request): Json<openraft::raft::AppendEntriesRequest<TypeConfig>>,
|
||||||
|
) -> Json<AppendRpcResult> {
|
||||||
|
Json(
|
||||||
|
app.raft
|
||||||
|
.append_entries(request)
|
||||||
|
.await
|
||||||
|
.map_err(|err| err.to_string()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn snapshot(
|
||||||
|
State(app): State<AppState>,
|
||||||
|
Json(request): Json<InstallSnapshotBody>,
|
||||||
|
) -> Json<SnapshotRpcResult> {
|
||||||
|
let snapshot = openraft::alias::SnapshotOf::<TypeConfig> {
|
||||||
|
meta: request.meta,
|
||||||
|
snapshot: std::io::Cursor::new(request.data),
|
||||||
|
};
|
||||||
|
|
||||||
|
Json(
|
||||||
|
app.raft
|
||||||
|
.install_full_snapshot(request.vote, snapshot)
|
||||||
|
.await
|
||||||
|
.map_err(|err| err.to_string()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn known_nodes(config: &OpenRaftKvNodeConfig) -> BTreeMap<u64, String> {
|
||||||
|
let mut known_nodes = config.peer_addrs.clone();
|
||||||
|
known_nodes.insert(config.node_id, config.public_addr.clone());
|
||||||
|
known_nodes
|
||||||
|
}
|
||||||
112
examples/openraft_kv/openraft-kv-node/src/types.rs
Normal file
112
examples/openraft_kv/openraft-kv-node/src/types.rs
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use openraft::{
|
||||||
|
RaftMetrics,
|
||||||
|
alias::{SnapshotMetaOf, VoteOf},
|
||||||
|
raft::InstallSnapshotRequest,
|
||||||
|
};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::TypeConfig;
|
||||||
|
|
||||||
|
/// Result shape used by the simple admin endpoints in this example.
|
||||||
|
pub type OpenRaftResult<T> = Result<T, String>;
|
||||||
|
|
||||||
|
/// Request body for a replicated write submitted through the leader.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct OpenRaftKvWriteRequest {
|
||||||
|
/// Application key to write.
|
||||||
|
pub key: String,
|
||||||
|
/// Value stored for the key.
|
||||||
|
pub value: String,
|
||||||
|
/// Client-side serial used by OpenRaft's example state machine.
|
||||||
|
pub serial: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Response body returned after a replicated write is committed.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct OpenRaftKvWriteResponse {
|
||||||
|
/// Previous value stored under the key, if any.
|
||||||
|
pub previous: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Request body for a key lookup.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct OpenRaftKvReadRequest {
|
||||||
|
/// Application key to look up.
|
||||||
|
pub key: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Response body returned by a key lookup.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct OpenRaftKvReadResponse {
|
||||||
|
/// Current value stored under the key, if any.
|
||||||
|
pub value: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Admin request used to register a learner in the current cluster.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct AddLearnerRequest {
|
||||||
|
/// OpenRaft node identifier for the learner.
|
||||||
|
pub node_id: u64,
|
||||||
|
/// Advertised Raft address for the learner.
|
||||||
|
pub addr: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Admin request used to promote the cluster to a concrete voter set.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct ChangeMembershipRequest {
|
||||||
|
/// Full voter set that should own the cluster after the change.
|
||||||
|
pub voters: Vec<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Snapshot of one node's externally visible Raft and application state.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct OpenRaftKvState {
|
||||||
|
/// Stable OpenRaft node identifier.
|
||||||
|
pub node_id: u64,
|
||||||
|
/// Advertised Raft address for this node.
|
||||||
|
pub public_addr: String,
|
||||||
|
/// Current OpenRaft role rendered as text.
|
||||||
|
pub role: String,
|
||||||
|
/// Leader known by this node, if any.
|
||||||
|
pub current_leader: Option<u64>,
|
||||||
|
/// Current term reported by this node.
|
||||||
|
pub current_term: u64,
|
||||||
|
/// Highest log index stored locally.
|
||||||
|
pub last_log_index: Option<u64>,
|
||||||
|
/// Highest log index applied to the state machine.
|
||||||
|
pub last_applied_index: Option<u64>,
|
||||||
|
/// Current voter set reported by this node.
|
||||||
|
pub voters: Vec<u64>,
|
||||||
|
/// Application state machine contents.
|
||||||
|
pub kv: BTreeMap<String, String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// JSON representation used for full-snapshot replication over HTTP.
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct InstallFullSnapshotBody {
|
||||||
|
/// Vote bundled with the snapshot transfer.
|
||||||
|
pub vote: VoteOf<TypeConfig>,
|
||||||
|
/// Snapshot metadata describing the transferred state.
|
||||||
|
pub meta: SnapshotMetaOf<TypeConfig>,
|
||||||
|
/// Serialized state machine bytes.
|
||||||
|
pub data: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Serialized result of a vote RPC.
|
||||||
|
pub type VoteRpcResult = Result<openraft::raft::VoteResponse<TypeConfig>, String>;
|
||||||
|
/// Serialized result of an append-entries RPC.
|
||||||
|
pub type AppendRpcResult = Result<openraft::raft::AppendEntriesResponse<TypeConfig>, String>;
|
||||||
|
/// Serialized result of a full-snapshot RPC.
|
||||||
|
pub type SnapshotRpcResult = Result<openraft::raft::SnapshotResponse<TypeConfig>, String>;
|
||||||
|
/// JSON payload returned by the metrics endpoint.
|
||||||
|
pub type MetricsResult = Result<RaftMetrics<TypeConfig>, String>;
|
||||||
|
/// JSON payload returned by `/admin/init`.
|
||||||
|
pub type InitResult = Result<(), String>;
|
||||||
|
/// JSON payload returned by `/admin/add-learner`.
|
||||||
|
pub type AddLearnerResult = Result<(), String>;
|
||||||
|
/// JSON payload returned by `/admin/change-membership`.
|
||||||
|
pub type ChangeMembershipResult = Result<(), String>;
|
||||||
|
/// Request type accepted by the snapshot endpoint.
|
||||||
|
pub type InstallSnapshotBody = InstallSnapshotRequest<TypeConfig>;
|
||||||
14
examples/openraft_kv/testing/integration/Cargo.toml
Normal file
14
examples/openraft_kv/testing/integration/Cargo.toml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "openraft-kv-runtime-ext"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
async-trait = { workspace = true }
|
||||||
|
openraft-kv-node = { path = "../../openraft-kv-node" }
|
||||||
|
reqwest = { workspace = true }
|
||||||
|
testing-framework-core = { workspace = true }
|
||||||
|
testing-framework-runner-compose = { workspace = true }
|
||||||
|
testing-framework-runner-k8s = { workspace = true }
|
||||||
|
testing-framework-runner-local = { workspace = true }
|
||||||
59
examples/openraft_kv/testing/integration/src/app.rs
Normal file
59
examples/openraft_kv/testing/integration/src/app.rs
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
use std::io::Error;
|
||||||
|
|
||||||
|
use openraft_kv_node::{OpenRaftKvClient, OpenRaftKvNodeConfig};
|
||||||
|
use testing_framework_core::scenario::{
|
||||||
|
Application, ClusterNodeConfigApplication, ClusterNodeView, ClusterPeerView, DynError,
|
||||||
|
NodeAccess, serialize_cluster_yaml_config,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Three-node topology used by the OpenRaft example scenarios.
|
||||||
|
pub type OpenRaftKvTopology = testing_framework_core::topology::ClusterTopology;
|
||||||
|
|
||||||
|
/// Application environment wiring for the OpenRaft-backed key-value example.
|
||||||
|
pub struct OpenRaftKvEnv;
|
||||||
|
|
||||||
|
impl Application for OpenRaftKvEnv {
|
||||||
|
type Deployment = OpenRaftKvTopology;
|
||||||
|
type NodeClient = OpenRaftKvClient;
|
||||||
|
type NodeConfig = OpenRaftKvNodeConfig;
|
||||||
|
|
||||||
|
fn build_node_client(access: &NodeAccess) -> Result<Self::NodeClient, DynError> {
|
||||||
|
Ok(OpenRaftKvClient::new(access.api_base_url()?))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn node_readiness_path() -> &'static str {
|
||||||
|
"/healthz"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClusterNodeConfigApplication for OpenRaftKvEnv {
|
||||||
|
type ConfigError = Error;
|
||||||
|
|
||||||
|
fn static_network_port() -> u16 {
|
||||||
|
8080
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_cluster_node_config(
|
||||||
|
node: &ClusterNodeView,
|
||||||
|
peers: &[ClusterPeerView],
|
||||||
|
) -> Result<Self::NodeConfig, Self::ConfigError> {
|
||||||
|
Ok(OpenRaftKvNodeConfig {
|
||||||
|
node_id: node.index() as u64,
|
||||||
|
http_port: node.network_port(),
|
||||||
|
public_addr: node.authority(),
|
||||||
|
peer_addrs: peers
|
||||||
|
.iter()
|
||||||
|
.map(|peer| (peer.index() as u64, peer.authority()))
|
||||||
|
.collect(),
|
||||||
|
heartbeat_interval_ms: 500,
|
||||||
|
election_timeout_min_ms: 1_500,
|
||||||
|
election_timeout_max_ms: 3_000,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_cluster_node_config(
|
||||||
|
config: &Self::NodeConfig,
|
||||||
|
) -> Result<String, Self::ConfigError> {
|
||||||
|
serialize_cluster_yaml_config(config).map_err(Error::other)
|
||||||
|
}
|
||||||
|
}
|
||||||
112
examples/openraft_kv/testing/integration/src/compose_env.rs
Normal file
112
examples/openraft_kv/testing/integration/src/compose_env.rs
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
use std::{fs, path::Path};
|
||||||
|
|
||||||
|
use testing_framework_core::{
|
||||||
|
cfgsync::StaticNodeConfigProvider,
|
||||||
|
scenario::{Application, DynError},
|
||||||
|
topology::DeploymentDescriptor,
|
||||||
|
};
|
||||||
|
use testing_framework_runner_compose::{
|
||||||
|
BinaryConfigNodeSpec, ComposeDeployEnv, ComposeDescriptor, NodeDescriptor,
|
||||||
|
binary_config_node_runtime_spec, node_identifier,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::OpenRaftKvEnv;
|
||||||
|
|
||||||
|
const NODE_CONFIG_PATH: &str = "/etc/openraft-kv/config.yaml";
|
||||||
|
const COMPOSE_HTTP_PORT_BASE: u16 = 47_080;
|
||||||
|
|
||||||
|
fn compose_node_spec() -> BinaryConfigNodeSpec {
|
||||||
|
BinaryConfigNodeSpec::conventional(
|
||||||
|
"/usr/local/bin/openraft-kv-node",
|
||||||
|
NODE_CONFIG_PATH,
|
||||||
|
vec![8080],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fixed_loopback_port_binding(host_port: u16, container_port: u16) -> String {
|
||||||
|
format!("127.0.0.1:{host_port}:{container_port}")
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ComposeDeployEnv for OpenRaftKvEnv {
|
||||||
|
fn prepare_compose_configs(
|
||||||
|
path: &Path,
|
||||||
|
topology: &<Self as Application>::Deployment,
|
||||||
|
_cfgsync_port: u16,
|
||||||
|
_metrics_otlp_ingest_url: Option<&reqwest::Url>,
|
||||||
|
) -> Result<(), DynError> {
|
||||||
|
let hostnames = Self::cfgsync_hostnames(topology);
|
||||||
|
let stack_dir = path
|
||||||
|
.parent()
|
||||||
|
.ok_or_else(|| std::io::Error::other("compose config path has no parent"))?;
|
||||||
|
let configs_dir = stack_dir.join("configs");
|
||||||
|
fs::create_dir_all(&configs_dir)?;
|
||||||
|
|
||||||
|
for index in 0..topology.node_count() {
|
||||||
|
let mut config = Self::build_node_config(topology, index)?;
|
||||||
|
Self::rewrite_for_hostnames(topology, index, &hostnames, &mut config)?;
|
||||||
|
let rendered = Self::serialize_node_config(&config)?;
|
||||||
|
fs::write(
|
||||||
|
configs_dir.join(Self::static_node_config_file_name(index)),
|
||||||
|
rendered,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn static_node_config_file_name(index: usize) -> String {
|
||||||
|
format!("node-{index}.yaml")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn binary_config_node_spec(
|
||||||
|
_topology: &<Self as Application>::Deployment,
|
||||||
|
_index: usize,
|
||||||
|
) -> Result<Option<BinaryConfigNodeSpec>, DynError> {
|
||||||
|
Ok(Some(compose_node_spec()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compose_descriptor(
|
||||||
|
topology: &<Self as Application>::Deployment,
|
||||||
|
_cfgsync_port: u16,
|
||||||
|
) -> Result<ComposeDescriptor, DynError> {
|
||||||
|
let spec = compose_node_spec();
|
||||||
|
|
||||||
|
let nodes = (0..topology.node_count())
|
||||||
|
.map(|index| {
|
||||||
|
let runtime = binary_config_node_runtime_spec(index, &spec);
|
||||||
|
let file_name = Self::static_node_config_file_name(index);
|
||||||
|
|
||||||
|
let host_port = COMPOSE_HTTP_PORT_BASE + index as u16;
|
||||||
|
let ports = compose_node_ports(host_port, &runtime.container_ports);
|
||||||
|
|
||||||
|
NodeDescriptor::new(
|
||||||
|
node_identifier(index),
|
||||||
|
runtime.image,
|
||||||
|
runtime.entrypoint,
|
||||||
|
vec![format!(
|
||||||
|
"./stack/configs/{file_name}:{}:ro",
|
||||||
|
spec.config_container_path
|
||||||
|
)],
|
||||||
|
runtime.extra_hosts,
|
||||||
|
ports,
|
||||||
|
runtime.container_ports,
|
||||||
|
runtime.environment,
|
||||||
|
runtime.platform,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(ComposeDescriptor::new(nodes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compose_node_ports(host_port: u16, container_ports: &[u16]) -> Vec<String> {
|
||||||
|
container_ports
|
||||||
|
.iter()
|
||||||
|
.map(|port| {
|
||||||
|
// OpenRaft failover restarts the leader. Fixed host ports keep TF
|
||||||
|
// clients stable across `docker compose restart`.
|
||||||
|
fixed_loopback_port_binding(host_port, *port)
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
21
examples/openraft_kv/testing/integration/src/k8s_env.rs
Normal file
21
examples/openraft_kv/testing/integration/src/k8s_env.rs
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
use testing_framework_runner_k8s::{BinaryConfigK8sSpec, K8sBinaryApp};
|
||||||
|
|
||||||
|
use crate::OpenRaftKvEnv;
|
||||||
|
|
||||||
|
const CONTAINER_CONFIG_PATH: &str = "/etc/openraft-kv/config.yaml";
|
||||||
|
const CONTAINER_HTTP_PORT: u16 = 8080;
|
||||||
|
const SERVICE_TESTING_PORT: u16 = 8081;
|
||||||
|
const NODE_NAME_PREFIX: &str = "openraft-kv-node";
|
||||||
|
|
||||||
|
impl K8sBinaryApp for OpenRaftKvEnv {
|
||||||
|
fn k8s_binary_spec() -> BinaryConfigK8sSpec {
|
||||||
|
BinaryConfigK8sSpec::conventional(
|
||||||
|
"openraft-kv",
|
||||||
|
NODE_NAME_PREFIX,
|
||||||
|
"/usr/local/bin/openraft-kv-node",
|
||||||
|
CONTAINER_CONFIG_PATH,
|
||||||
|
CONTAINER_HTTP_PORT,
|
||||||
|
SERVICE_TESTING_PORT,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
18
examples/openraft_kv/testing/integration/src/lib.rs
Normal file
18
examples/openraft_kv/testing/integration/src/lib.rs
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
mod app;
|
||||||
|
mod compose_env;
|
||||||
|
mod k8s_env;
|
||||||
|
mod local_env;
|
||||||
|
mod observation;
|
||||||
|
pub mod scenario;
|
||||||
|
|
||||||
|
pub use app::*;
|
||||||
|
pub use observation::*;
|
||||||
|
pub use scenario::{OpenRaftKvBuilderExt, OpenRaftKvScenarioBuilder};
|
||||||
|
|
||||||
|
/// Local process deployer for the OpenRaft example app.
|
||||||
|
pub type OpenRaftKvLocalDeployer = testing_framework_runner_local::ProcessDeployer<OpenRaftKvEnv>;
|
||||||
|
/// Docker Compose deployer for the OpenRaft example app.
|
||||||
|
pub type OpenRaftKvComposeDeployer =
|
||||||
|
testing_framework_runner_compose::ComposeDeployer<OpenRaftKvEnv>;
|
||||||
|
/// Kubernetes deployer for the OpenRaft example app.
|
||||||
|
pub type OpenRaftKvK8sDeployer = testing_framework_runner_k8s::K8sDeployer<OpenRaftKvEnv>;
|
||||||
125
examples/openraft_kv/testing/integration/src/local_env.rs
Normal file
125
examples/openraft_kv/testing/integration/src/local_env.rs
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
use std::collections::{BTreeMap, HashMap};
|
||||||
|
|
||||||
|
use openraft_kv_node::OpenRaftKvNodeConfig;
|
||||||
|
use testing_framework_core::{
|
||||||
|
scenario::{DynError, StartNodeOptions},
|
||||||
|
topology::DeploymentDescriptor,
|
||||||
|
};
|
||||||
|
use testing_framework_runner_local::{
|
||||||
|
BuiltNodeConfig, LocalDeployerEnv, LocalNodePorts, LocalProcessSpec, NodeConfigEntry,
|
||||||
|
reserve_local_node_ports, yaml_node_config,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::OpenRaftKvEnv;
|
||||||
|
|
||||||
|
impl LocalDeployerEnv for OpenRaftKvEnv {
|
||||||
|
fn build_node_config_from_template(
|
||||||
|
_topology: &Self::Deployment,
|
||||||
|
index: usize,
|
||||||
|
_peer_ports_by_name: &HashMap<String, u16>,
|
||||||
|
_options: &StartNodeOptions<Self>,
|
||||||
|
peer_ports: &[u16],
|
||||||
|
template_config: Option<&OpenRaftKvNodeConfig>,
|
||||||
|
) -> Result<BuiltNodeConfig<OpenRaftKvNodeConfig>, DynError> {
|
||||||
|
let mut reserved = reserve_local_node_ports(1, &[], "node")
|
||||||
|
.map_err(|source| -> DynError { source.into() })?;
|
||||||
|
|
||||||
|
let ports = reserved
|
||||||
|
.pop()
|
||||||
|
.ok_or_else(|| std::io::Error::other("failed to reserve local node ports"))?;
|
||||||
|
|
||||||
|
let mut config = template_config
|
||||||
|
.cloned()
|
||||||
|
.unwrap_or_else(|| local_node_config(index, ports.network_port(), BTreeMap::new()));
|
||||||
|
|
||||||
|
// OpenRaft peer config is index-sensitive, so local restarts must rebuild
|
||||||
|
// the full peer map from the current reserved port set.
|
||||||
|
let network_port = ports.network_port();
|
||||||
|
config.node_id = index as u64;
|
||||||
|
config.http_port = network_port;
|
||||||
|
config.public_addr = local_addr(network_port);
|
||||||
|
config.peer_addrs = peer_addrs_from_ports(peer_ports, index);
|
||||||
|
|
||||||
|
Ok(BuiltNodeConfig {
|
||||||
|
config,
|
||||||
|
network_port,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_initial_node_configs(
|
||||||
|
topology: &Self::Deployment,
|
||||||
|
) -> Result<
|
||||||
|
Vec<NodeConfigEntry<OpenRaftKvNodeConfig>>,
|
||||||
|
testing_framework_runner_local::process::ProcessSpawnError,
|
||||||
|
> {
|
||||||
|
let reserved_ports = reserve_local_node_ports(topology.node_count(), &[], "node")?;
|
||||||
|
|
||||||
|
let peer_ports = reserved_ports
|
||||||
|
.iter()
|
||||||
|
.map(LocalNodePorts::network_port)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
// Build every node from the same reserved port view so the initial
|
||||||
|
// cluster starts with a consistent peer list on all nodes.
|
||||||
|
Ok(reserved_ports
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(index, ports)| NodeConfigEntry {
|
||||||
|
name: format!("node-{index}"),
|
||||||
|
config: local_node_config(
|
||||||
|
index,
|
||||||
|
ports.network_port(),
|
||||||
|
peer_addrs_from_ports(&peer_ports, index),
|
||||||
|
),
|
||||||
|
})
|
||||||
|
.collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn initial_node_name_prefix() -> &'static str {
|
||||||
|
"node"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn local_process_spec() -> Option<LocalProcessSpec> {
|
||||||
|
Some(
|
||||||
|
LocalProcessSpec::new("OPENRAFT_KV_NODE_BIN", "openraft-kv-node").with_rust_log("info"),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_local_config(config: &OpenRaftKvNodeConfig) -> Result<Vec<u8>, DynError> {
|
||||||
|
yaml_node_config(config)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn http_api_port(config: &OpenRaftKvNodeConfig) -> Option<u16> {
|
||||||
|
Some(config.http_port)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn local_node_config(
|
||||||
|
index: usize,
|
||||||
|
network_port: u16,
|
||||||
|
peer_addrs: BTreeMap<u64, String>,
|
||||||
|
) -> OpenRaftKvNodeConfig {
|
||||||
|
OpenRaftKvNodeConfig {
|
||||||
|
node_id: index as u64,
|
||||||
|
http_port: network_port,
|
||||||
|
public_addr: local_addr(network_port),
|
||||||
|
peer_addrs,
|
||||||
|
|
||||||
|
heartbeat_interval_ms: 500,
|
||||||
|
election_timeout_min_ms: 1_500,
|
||||||
|
election_timeout_max_ms: 3_000,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn peer_addrs_from_ports(peer_ports: &[u16], local_index: usize) -> BTreeMap<u64, String> {
|
||||||
|
peer_ports
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.filter(|(peer_index, _)| *peer_index != local_index)
|
||||||
|
.map(|(peer_index, peer_port)| (peer_index as u64, local_addr(*peer_port)))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn local_addr(port: u16) -> String {
|
||||||
|
format!("127.0.0.1:{port}")
|
||||||
|
}
|
||||||
262
examples/openraft_kv/testing/integration/src/observation.rs
Normal file
262
examples/openraft_kv/testing/integration/src/observation.rs
Normal file
@ -0,0 +1,262 @@
|
|||||||
|
use std::{
|
||||||
|
collections::{BTreeMap, BTreeSet},
|
||||||
|
sync::Arc,
|
||||||
|
time::Duration,
|
||||||
|
};
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use openraft_kv_node::{OpenRaftKvClient, OpenRaftKvState};
|
||||||
|
use testing_framework_core::{
|
||||||
|
observation::{
|
||||||
|
BoxedSourceProvider, ObservationConfig, ObservedSource, Observer, StaticSourceProvider,
|
||||||
|
},
|
||||||
|
scenario::{Application, DynError, NodeClients},
|
||||||
|
};
|
||||||
|
use testing_framework_runner_k8s::ManualCluster;
|
||||||
|
|
||||||
|
use crate::OpenRaftKvEnv;
|
||||||
|
|
||||||
|
const OBSERVATION_INTERVAL: Duration = Duration::from_millis(250);
|
||||||
|
const OBSERVATION_HISTORY_LIMIT: usize = 16;
|
||||||
|
|
||||||
|
/// Materialized OpenRaft cluster state built from the latest node polls.
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct OpenRaftClusterSnapshot {
|
||||||
|
states: Vec<OpenRaftKvState>,
|
||||||
|
failures: Vec<OpenRaftSourceFailure>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OpenRaftClusterSnapshot {
|
||||||
|
/// Returns the successfully observed node states sorted by node id.
|
||||||
|
#[must_use]
|
||||||
|
pub fn states(&self) -> &[OpenRaftKvState] {
|
||||||
|
&self.states
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` when the snapshot contains no successful node states.
|
||||||
|
#[must_use]
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.states.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the unique observed leader when all responding nodes agree.
|
||||||
|
#[must_use]
|
||||||
|
pub fn agreed_leader(&self, different_from: Option<u64>) -> Option<u64> {
|
||||||
|
let observed = self
|
||||||
|
.states
|
||||||
|
.iter()
|
||||||
|
.filter_map(|state| state.current_leader)
|
||||||
|
.collect::<BTreeSet<_>>();
|
||||||
|
|
||||||
|
let leader = observed.iter().next().copied()?;
|
||||||
|
|
||||||
|
(observed.len() == 1 && different_from != Some(leader)).then_some(leader)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` when every observed node reports the expected voter set.
|
||||||
|
#[must_use]
|
||||||
|
pub fn all_voters_match(&self, expected_voters: &BTreeSet<u64>) -> bool {
|
||||||
|
!self.states.is_empty()
|
||||||
|
&& self.failures.is_empty()
|
||||||
|
&& self.states.iter().all(|state| {
|
||||||
|
state.voters.iter().copied().collect::<BTreeSet<_>>() == *expected_voters
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` when every observed node exposes the expected replicated
|
||||||
|
/// key/value data.
|
||||||
|
#[must_use]
|
||||||
|
pub fn all_kv_match(
|
||||||
|
&self,
|
||||||
|
expected: &BTreeMap<String, String>,
|
||||||
|
full_voter_set: &[u64],
|
||||||
|
) -> bool {
|
||||||
|
!self.states.is_empty()
|
||||||
|
&& self.failures.is_empty()
|
||||||
|
&& self.states.iter().all(|state| {
|
||||||
|
state.current_leader.is_some()
|
||||||
|
&& state.voters == full_voter_set
|
||||||
|
&& expected
|
||||||
|
.iter()
|
||||||
|
.all(|(key, value)| state.kv.get(key) == Some(value))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a concise summary for timeout and validation errors.
|
||||||
|
#[must_use]
|
||||||
|
pub fn summary(&self) -> String {
|
||||||
|
let mut lines = self
|
||||||
|
.states
|
||||||
|
.iter()
|
||||||
|
.map(|state| {
|
||||||
|
format!(
|
||||||
|
"node={} leader={:?} voters={:?} keys={}",
|
||||||
|
state.node_id,
|
||||||
|
state.current_leader,
|
||||||
|
state.voters,
|
||||||
|
state.kv.len()
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
lines.extend(self.failures.iter().map(OpenRaftSourceFailure::summary));
|
||||||
|
|
||||||
|
if lines.is_empty() {
|
||||||
|
return "no state observed yet".to_owned();
|
||||||
|
}
|
||||||
|
|
||||||
|
lines.join("; ")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One failed source read captured during an observation cycle.
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
pub struct OpenRaftSourceFailure {
|
||||||
|
source_name: String,
|
||||||
|
message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OpenRaftSourceFailure {
|
||||||
|
fn new(source_name: &str, message: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
source_name: source_name.to_owned(),
|
||||||
|
message: message.to_owned(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn summary(&self) -> String {
|
||||||
|
format!("source={} error={}", self.source_name, self.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Observer that keeps the latest per-node OpenRaft state.
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
pub struct OpenRaftClusterObserver;
|
||||||
|
|
||||||
|
impl OpenRaftClusterObserver {
|
||||||
|
/// Default runtime configuration for the OpenRaft example observer.
|
||||||
|
#[must_use]
|
||||||
|
pub fn config() -> ObservationConfig {
|
||||||
|
ObservationConfig {
|
||||||
|
interval: OBSERVATION_INTERVAL,
|
||||||
|
history_limit: OBSERVATION_HISTORY_LIMIT,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Captures one best-effort OpenRaft cluster snapshot from the provided node
|
||||||
|
/// clients.
|
||||||
|
pub async fn capture_openraft_cluster_snapshot(
|
||||||
|
clients: &[OpenRaftKvClient],
|
||||||
|
) -> OpenRaftClusterSnapshot {
|
||||||
|
capture_cluster_snapshot(&named_sources(clients.to_vec())).await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Observer for OpenRaftClusterObserver {
|
||||||
|
type Source = OpenRaftKvClient;
|
||||||
|
type State = OpenRaftClusterSnapshot;
|
||||||
|
type Snapshot = OpenRaftClusterSnapshot;
|
||||||
|
type Event = ();
|
||||||
|
|
||||||
|
async fn init(
|
||||||
|
&self,
|
||||||
|
sources: &[ObservedSource<Self::Source>],
|
||||||
|
) -> Result<Self::State, DynError> {
|
||||||
|
Ok(capture_cluster_snapshot(sources).await)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn poll(
|
||||||
|
&self,
|
||||||
|
sources: &[ObservedSource<Self::Source>],
|
||||||
|
state: &mut Self::State,
|
||||||
|
) -> Result<Vec<Self::Event>, DynError> {
|
||||||
|
*state = capture_cluster_snapshot(sources).await;
|
||||||
|
|
||||||
|
Ok(Vec::new())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn snapshot(&self, state: &Self::State) -> Self::Snapshot {
|
||||||
|
state.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builds the fixed source provider used by the scenario-based OpenRaft
|
||||||
|
/// examples.
|
||||||
|
pub fn openraft_cluster_source_provider(
|
||||||
|
_deployment: &<OpenRaftKvEnv as Application>::Deployment,
|
||||||
|
node_clients: NodeClients<OpenRaftKvEnv>,
|
||||||
|
) -> Result<BoxedSourceProvider<OpenRaftKvClient>, DynError> {
|
||||||
|
Ok(Box::new(StaticSourceProvider::new(named_sources(
|
||||||
|
node_clients.snapshot(),
|
||||||
|
))))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Dynamic source provider backed by a manual cluster.
|
||||||
|
///
|
||||||
|
/// This keeps observation aligned with the latest client handles after manual
|
||||||
|
/// node restarts.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct OpenRaftManualClusterSourceProvider {
|
||||||
|
cluster: Arc<ManualCluster<OpenRaftKvEnv>>,
|
||||||
|
node_names: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OpenRaftManualClusterSourceProvider {
|
||||||
|
/// Builds a provider for the fixed node names used by the OpenRaft
|
||||||
|
/// examples.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(cluster: Arc<ManualCluster<OpenRaftKvEnv>>, node_count: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
cluster,
|
||||||
|
node_names: (0..node_count)
|
||||||
|
.map(|index| format!("node-{index}"))
|
||||||
|
.collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl testing_framework_core::observation::SourceProvider<OpenRaftKvClient>
|
||||||
|
for OpenRaftManualClusterSourceProvider
|
||||||
|
{
|
||||||
|
async fn sources(&self) -> Result<Vec<ObservedSource<OpenRaftKvClient>>, DynError> {
|
||||||
|
Ok(self
|
||||||
|
.node_names
|
||||||
|
.iter()
|
||||||
|
.filter_map(|name| {
|
||||||
|
self.cluster
|
||||||
|
.node_client(name)
|
||||||
|
.map(|client| ObservedSource::new(name, client))
|
||||||
|
})
|
||||||
|
.collect())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn named_sources(clients: Vec<OpenRaftKvClient>) -> Vec<ObservedSource<OpenRaftKvClient>> {
|
||||||
|
clients
|
||||||
|
.into_iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(index, client)| ObservedSource::new(&format!("node-{index}"), client))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn capture_cluster_snapshot(
|
||||||
|
sources: &[ObservedSource<OpenRaftKvClient>],
|
||||||
|
) -> OpenRaftClusterSnapshot {
|
||||||
|
let mut states = Vec::with_capacity(sources.len());
|
||||||
|
let mut failures = Vec::new();
|
||||||
|
|
||||||
|
for source in sources {
|
||||||
|
match source.source.state().await {
|
||||||
|
Ok(state) => states.push(state),
|
||||||
|
Err(error) => {
|
||||||
|
failures.push(OpenRaftSourceFailure::new(&source.name, &error.to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
states.sort_by_key(|state| state.node_id);
|
||||||
|
|
||||||
|
OpenRaftClusterSnapshot { states, failures }
|
||||||
|
}
|
||||||
32
examples/openraft_kv/testing/integration/src/scenario.rs
Normal file
32
examples/openraft_kv/testing/integration/src/scenario.rs
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
use testing_framework_core::scenario::{CoreBuilderExt, ScenarioBuilder};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
OpenRaftClusterObserver, OpenRaftKvEnv, OpenRaftKvTopology, openraft_cluster_source_provider,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Scenario builder alias used by the OpenRaft example binaries.
|
||||||
|
pub type OpenRaftKvScenarioBuilder = ScenarioBuilder<OpenRaftKvEnv>;
|
||||||
|
|
||||||
|
/// Convenience helpers for constructing the fixed three-node OpenRaft topology.
|
||||||
|
pub trait OpenRaftKvBuilderExt: Sized {
|
||||||
|
/// Starts from the default three-node deployment and lets callers adjust
|
||||||
|
/// it.
|
||||||
|
fn deployment_with(f: impl FnOnce(OpenRaftKvTopology) -> OpenRaftKvTopology) -> Self;
|
||||||
|
|
||||||
|
/// Attaches the default OpenRaft cluster observer to the scenario.
|
||||||
|
fn with_cluster_observer(self) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OpenRaftKvBuilderExt for OpenRaftKvScenarioBuilder {
|
||||||
|
fn deployment_with(f: impl FnOnce(OpenRaftKvTopology) -> OpenRaftKvTopology) -> Self {
|
||||||
|
OpenRaftKvScenarioBuilder::with_deployment(f(OpenRaftKvTopology::new(3)))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn with_cluster_observer(self) -> Self {
|
||||||
|
self.with_observer(
|
||||||
|
OpenRaftClusterObserver,
|
||||||
|
openraft_cluster_source_provider,
|
||||||
|
OpenRaftClusterObserver::config(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
15
examples/openraft_kv/testing/workloads/Cargo.toml
Normal file
15
examples/openraft_kv/testing/workloads/Cargo.toml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "openraft-kv-runtime-workloads"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0"
|
||||||
|
async-trait = { workspace = true }
|
||||||
|
openraft-kv-node = { path = "../../openraft-kv-node" }
|
||||||
|
openraft-kv-runtime-ext = { path = "../integration" }
|
||||||
|
testing-framework-core = { workspace = true }
|
||||||
|
thiserror = "2.0"
|
||||||
|
tokio = { workspace = true, features = ["full"] }
|
||||||
|
tracing = { workspace = true }
|
||||||
61
examples/openraft_kv/testing/workloads/src/convergence.rs
Normal file
61
examples/openraft_kv/testing/workloads/src/convergence.rs
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use openraft_kv_runtime_ext::{OpenRaftClusterObserver, OpenRaftKvEnv};
|
||||||
|
use testing_framework_core::{
|
||||||
|
observation::ObservationHandle,
|
||||||
|
scenario::{DynError, Expectation, RunContext},
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::support::{expected_kv, wait_for_observed_replication};
|
||||||
|
|
||||||
|
/// Expectation that waits for the full voter set and the writes from this run
|
||||||
|
/// to converge on every node.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct OpenRaftKvConverges {
|
||||||
|
total_writes: usize,
|
||||||
|
timeout: Duration,
|
||||||
|
key_prefix: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OpenRaftKvConverges {
|
||||||
|
/// Creates a convergence check for the given number of replicated writes.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(total_writes: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
total_writes,
|
||||||
|
timeout: Duration::from_secs(30),
|
||||||
|
key_prefix: "raft-key".to_owned(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Overrides the key prefix used to derive expected writes.
|
||||||
|
#[must_use]
|
||||||
|
pub fn key_prefix(mut self, value: &str) -> Self {
|
||||||
|
self.key_prefix = value.to_owned();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Overrides the convergence timeout.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn timeout(mut self, value: Duration) -> Self {
|
||||||
|
self.timeout = value;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Expectation<OpenRaftKvEnv> for OpenRaftKvConverges {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"openraft_kv_converges"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn evaluate(&mut self, ctx: &RunContext<OpenRaftKvEnv>) -> Result<(), DynError> {
|
||||||
|
let expected = expected_kv(&self.key_prefix, self.total_writes);
|
||||||
|
let observer = ctx.require_extension::<ObservationHandle<OpenRaftClusterObserver>>()?;
|
||||||
|
|
||||||
|
wait_for_observed_replication(&observer, &expected, self.timeout).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
207
examples/openraft_kv/testing/workloads/src/failover.rs
Normal file
207
examples/openraft_kv/testing/workloads/src/failover.rs
Normal file
@ -0,0 +1,207 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use openraft_kv_node::OpenRaftKvClient;
|
||||||
|
use openraft_kv_runtime_ext::{OpenRaftClusterObserver, OpenRaftKvEnv};
|
||||||
|
use testing_framework_core::{
|
||||||
|
observation::ObservationHandle,
|
||||||
|
scenario::{DynError, RunContext, Workload},
|
||||||
|
};
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
use crate::support::{
|
||||||
|
OpenRaftMembership, ensure_cluster_size, resolve_client_for_node, wait_for_observed_leader,
|
||||||
|
wait_for_observed_membership, write_batch,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Workload that bootstraps the cluster, expands it to three voters, writes one
|
||||||
|
/// batch, restarts the leader, then writes a second batch through the new
|
||||||
|
/// leader.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct OpenRaftKvFailoverWorkload {
|
||||||
|
first_batch: usize,
|
||||||
|
second_batch: usize,
|
||||||
|
timeout: Duration,
|
||||||
|
key_prefix: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OpenRaftKvFailoverWorkload {
|
||||||
|
/// Creates the default failover workload configuration.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
first_batch: 8,
|
||||||
|
second_batch: 8,
|
||||||
|
timeout: Duration::from_secs(30),
|
||||||
|
key_prefix: "raft-key".to_owned(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sets the number of writes issued before the leader restart.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn first_batch(mut self, value: usize) -> Self {
|
||||||
|
self.first_batch = value;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sets the number of writes issued after the leader restart.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn second_batch(mut self, value: usize) -> Self {
|
||||||
|
self.second_batch = value;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Overrides the key prefix used for generated writes.
|
||||||
|
#[must_use]
|
||||||
|
pub fn key_prefix(mut self, value: &str) -> Self {
|
||||||
|
self.key_prefix = value.to_owned();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Overrides the timeout used for leader and membership transitions.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn timeout(mut self, value: Duration) -> Self {
|
||||||
|
self.timeout = value;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for OpenRaftKvFailoverWorkload {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Workload<OpenRaftKvEnv> for OpenRaftKvFailoverWorkload {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"openraft_kv_failover_workload"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start(&self, ctx: &RunContext<OpenRaftKvEnv>) -> Result<(), DynError> {
|
||||||
|
let clients = ctx.node_clients().snapshot();
|
||||||
|
let observer = ctx.require_extension::<ObservationHandle<OpenRaftClusterObserver>>()?;
|
||||||
|
|
||||||
|
ensure_cluster_size(&clients, 3)?;
|
||||||
|
|
||||||
|
self.bootstrap_cluster(&clients).await?;
|
||||||
|
|
||||||
|
let initial_leader = wait_for_observed_leader(&observer, self.timeout, None).await?;
|
||||||
|
let membership = OpenRaftMembership::discover(&clients).await?;
|
||||||
|
|
||||||
|
self.promote_cluster(&observer, &clients, initial_leader, &membership)
|
||||||
|
.await?;
|
||||||
|
self.write_initial_batch(&clients, initial_leader).await?;
|
||||||
|
|
||||||
|
let new_leader = self
|
||||||
|
.restart_leader_and_wait_for_failover(ctx, &observer, initial_leader)
|
||||||
|
.await?;
|
||||||
|
self.write_second_batch(&clients, new_leader).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OpenRaftKvFailoverWorkload {
|
||||||
|
async fn bootstrap_cluster(&self, clients: &[OpenRaftKvClient]) -> Result<(), DynError> {
|
||||||
|
info!("initializing openraft cluster");
|
||||||
|
|
||||||
|
clients[0].init_self().await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn promote_cluster(
|
||||||
|
&self,
|
||||||
|
observer: &ObservationHandle<OpenRaftClusterObserver>,
|
||||||
|
clients: &[OpenRaftKvClient],
|
||||||
|
leader_id: u64,
|
||||||
|
membership: &OpenRaftMembership,
|
||||||
|
) -> Result<(), DynError> {
|
||||||
|
let leader = resolve_client_for_node(clients, leader_id, self.timeout).await?;
|
||||||
|
|
||||||
|
for learner in membership.learner_targets(leader_id) {
|
||||||
|
info!(
|
||||||
|
target = learner.node_id,
|
||||||
|
addr = %learner.public_addr,
|
||||||
|
"adding learner"
|
||||||
|
);
|
||||||
|
|
||||||
|
leader
|
||||||
|
.add_learner(learner.node_id, &learner.public_addr)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let voter_ids = membership.voter_ids();
|
||||||
|
leader.change_membership(voter_ids.iter().copied()).await?;
|
||||||
|
|
||||||
|
wait_for_observed_membership(observer, &voter_ids, self.timeout).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write_initial_batch(
|
||||||
|
&self,
|
||||||
|
clients: &[OpenRaftKvClient],
|
||||||
|
leader_id: u64,
|
||||||
|
) -> Result<(), DynError> {
|
||||||
|
info!(
|
||||||
|
leader = leader_id,
|
||||||
|
writes = self.first_batch,
|
||||||
|
"writing initial batch"
|
||||||
|
);
|
||||||
|
|
||||||
|
let leader = resolve_client_for_node(clients, leader_id, self.timeout).await?;
|
||||||
|
write_batch(&leader, &self.key_prefix, 0, self.first_batch).await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn restart_leader_and_wait_for_failover(
|
||||||
|
&self,
|
||||||
|
ctx: &RunContext<OpenRaftKvEnv>,
|
||||||
|
observer: &ObservationHandle<OpenRaftClusterObserver>,
|
||||||
|
leader_id: u64,
|
||||||
|
) -> Result<u64, DynError> {
|
||||||
|
let Some(control) = ctx.node_control() else {
|
||||||
|
return Err("openraft failover workload requires node control".into());
|
||||||
|
};
|
||||||
|
|
||||||
|
let leader_name = format!("node-{leader_id}");
|
||||||
|
info!(%leader_name, "restarting current leader");
|
||||||
|
|
||||||
|
control.restart_node(&leader_name).await?;
|
||||||
|
|
||||||
|
let new_leader = wait_for_observed_leader(observer, self.timeout, Some(leader_id)).await?;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
old_leader = leader_id,
|
||||||
|
new_leader, "leader changed after restart"
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(new_leader)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write_second_batch(
|
||||||
|
&self,
|
||||||
|
clients: &[OpenRaftKvClient],
|
||||||
|
leader_id: u64,
|
||||||
|
) -> Result<(), DynError> {
|
||||||
|
info!(
|
||||||
|
leader = leader_id,
|
||||||
|
writes = self.second_batch,
|
||||||
|
"writing second batch"
|
||||||
|
);
|
||||||
|
|
||||||
|
let leader = resolve_client_for_node(clients, leader_id, self.timeout).await?;
|
||||||
|
write_batch(
|
||||||
|
&leader,
|
||||||
|
&self.key_prefix,
|
||||||
|
self.first_batch,
|
||||||
|
self.second_batch,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
14
examples/openraft_kv/testing/workloads/src/lib.rs
Normal file
14
examples/openraft_kv/testing/workloads/src/lib.rs
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
mod convergence;
|
||||||
|
mod failover;
|
||||||
|
mod support;
|
||||||
|
|
||||||
|
/// Replication expectation used by the OpenRaft example binaries.
|
||||||
|
pub use convergence::OpenRaftKvConverges;
|
||||||
|
/// Failover workload used by the OpenRaft example binaries.
|
||||||
|
pub use failover::OpenRaftKvFailoverWorkload;
|
||||||
|
/// Shared cluster helpers used by the OpenRaft workload and manual k8s example.
|
||||||
|
pub use support::{
|
||||||
|
FULL_VOTER_SET, OpenRaftClusterError, OpenRaftMembership, ensure_cluster_size, expected_kv,
|
||||||
|
resolve_client_for_node, wait_for_leader, wait_for_membership, wait_for_observed_leader,
|
||||||
|
wait_for_observed_membership, wait_for_observed_replication, wait_for_replication, write_batch,
|
||||||
|
};
|
||||||
328
examples/openraft_kv/testing/workloads/src/support.rs
Normal file
328
examples/openraft_kv/testing/workloads/src/support.rs
Normal file
@ -0,0 +1,328 @@
|
|||||||
|
use std::{
|
||||||
|
collections::{BTreeMap, BTreeSet},
|
||||||
|
time::Duration,
|
||||||
|
};
|
||||||
|
|
||||||
|
use openraft_kv_node::{OpenRaftKvClient, OpenRaftKvState};
|
||||||
|
use openraft_kv_runtime_ext::{
|
||||||
|
OpenRaftClusterObserver, OpenRaftClusterSnapshot, capture_openraft_cluster_snapshot,
|
||||||
|
};
|
||||||
|
use testing_framework_core::observation::{ObservationHandle, ObservationSnapshot};
|
||||||
|
use thiserror::Error;
|
||||||
|
use tokio::time::{Instant, sleep};
|
||||||
|
|
||||||
|
const POLL_INTERVAL: Duration = Duration::from_millis(250);
|
||||||
|
const CLIENT_RESOLUTION_INTERVAL: Duration = Duration::from_millis(200);
|
||||||
|
|
||||||
|
/// Fixed voter set used by the example cluster.
|
||||||
|
pub const FULL_VOTER_SET: [u64; 3] = [0, 1, 2];
|
||||||
|
|
||||||
|
/// One learner candidate discovered from cluster state.
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
pub struct LearnerTarget {
|
||||||
|
/// Node identifier used by OpenRaft membership.
|
||||||
|
pub node_id: u64,
|
||||||
|
/// Public address advertised for Raft traffic.
|
||||||
|
pub public_addr: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Membership view captured from the current node states.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct OpenRaftMembership {
|
||||||
|
states: Vec<OpenRaftKvState>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl OpenRaftMembership {
|
||||||
|
/// Builds a membership view from already observed node states.
|
||||||
|
#[must_use]
|
||||||
|
pub fn from_states(states: &[OpenRaftKvState]) -> Self {
|
||||||
|
let mut states = states.to_vec();
|
||||||
|
states.sort_by_key(|state| state.node_id);
|
||||||
|
|
||||||
|
Self { states }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads and sorts the current node states by id.
|
||||||
|
pub async fn discover(clients: &[OpenRaftKvClient]) -> Result<Self, OpenRaftClusterError> {
|
||||||
|
let mut states = Vec::with_capacity(clients.len());
|
||||||
|
|
||||||
|
for client in clients {
|
||||||
|
states.push(client.state().await.map_err(OpenRaftClusterError::Client)?);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self::from_states(&states))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the full voter set implied by the discovered nodes.
|
||||||
|
#[must_use]
|
||||||
|
pub fn voter_ids(&self) -> BTreeSet<u64> {
|
||||||
|
self.states.iter().map(|state| state.node_id).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns every non-leader node as a learner target.
|
||||||
|
#[must_use]
|
||||||
|
pub fn learner_targets(&self, leader_id: u64) -> Vec<LearnerTarget> {
|
||||||
|
self.states
|
||||||
|
.iter()
|
||||||
|
.filter(|state| state.node_id != leader_id)
|
||||||
|
.map(|state| LearnerTarget {
|
||||||
|
node_id: state.node_id,
|
||||||
|
public_addr: state.public_addr.clone(),
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Errors raised by the OpenRaft example cluster helpers.
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum OpenRaftClusterError {
|
||||||
|
#[error("openraft example requires at least {expected} node clients, got {actual}")]
|
||||||
|
InsufficientClients { expected: usize, actual: usize },
|
||||||
|
#[error("failed to query openraft node state: {0}")]
|
||||||
|
Client(#[source] anyhow::Error),
|
||||||
|
#[error("openraft cluster observation is not available yet")]
|
||||||
|
MissingObservation,
|
||||||
|
#[error(
|
||||||
|
"timed out waiting for {action} after {timeout:?}; last observation: {last_observation}"
|
||||||
|
)]
|
||||||
|
Timeout {
|
||||||
|
action: &'static str,
|
||||||
|
timeout: Duration,
|
||||||
|
last_observation: String,
|
||||||
|
},
|
||||||
|
#[error("timed out resolving node client for {node_id} after {timeout:?}")]
|
||||||
|
ClientResolution { node_id: u64, timeout: Duration },
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ensures the example cluster has the expected number of node clients.
|
||||||
|
pub fn ensure_cluster_size(
|
||||||
|
clients: &[OpenRaftKvClient],
|
||||||
|
expected: usize,
|
||||||
|
) -> Result<(), OpenRaftClusterError> {
|
||||||
|
if clients.len() < expected {
|
||||||
|
return Err(OpenRaftClusterError::InsufficientClients {
|
||||||
|
expected,
|
||||||
|
actual: clients.len(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Waits until the cluster converges on one leader.
|
||||||
|
pub async fn wait_for_leader(
|
||||||
|
clients: &[OpenRaftKvClient],
|
||||||
|
timeout: Duration,
|
||||||
|
different_from: Option<u64>,
|
||||||
|
) -> Result<u64, OpenRaftClusterError> {
|
||||||
|
let deadline = Instant::now() + timeout;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let last_observation = capture_openraft_cluster_snapshot(clients).await;
|
||||||
|
|
||||||
|
if let Some(leader) = last_observation.agreed_leader(different_from) {
|
||||||
|
return Ok(leader);
|
||||||
|
}
|
||||||
|
|
||||||
|
if Instant::now() >= deadline {
|
||||||
|
return Err(OpenRaftClusterError::Timeout {
|
||||||
|
action: "leader agreement",
|
||||||
|
timeout,
|
||||||
|
last_observation: last_observation.summary(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(POLL_INTERVAL).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Waits until every node reports the expected voter set.
|
||||||
|
pub async fn wait_for_membership(
|
||||||
|
clients: &[OpenRaftKvClient],
|
||||||
|
expected_voters: &BTreeSet<u64>,
|
||||||
|
timeout: Duration,
|
||||||
|
) -> Result<(), OpenRaftClusterError> {
|
||||||
|
let deadline = Instant::now() + timeout;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let last_observation = capture_openraft_cluster_snapshot(clients).await;
|
||||||
|
|
||||||
|
if last_observation.all_voters_match(expected_voters) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
if Instant::now() >= deadline {
|
||||||
|
return Err(OpenRaftClusterError::Timeout {
|
||||||
|
action: "membership convergence",
|
||||||
|
timeout,
|
||||||
|
last_observation: last_observation.summary(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(POLL_INTERVAL).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Waits until every node reports the full replicated key set.
|
||||||
|
pub async fn wait_for_replication(
|
||||||
|
clients: &[OpenRaftKvClient],
|
||||||
|
expected: &BTreeMap<String, String>,
|
||||||
|
timeout: Duration,
|
||||||
|
) -> Result<(), OpenRaftClusterError> {
|
||||||
|
let deadline = Instant::now() + timeout;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let last_observation = capture_openraft_cluster_snapshot(clients).await;
|
||||||
|
|
||||||
|
if last_observation.all_kv_match(expected, &FULL_VOTER_SET) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
if Instant::now() >= deadline {
|
||||||
|
return Err(OpenRaftClusterError::Timeout {
|
||||||
|
action: "replicated state convergence",
|
||||||
|
timeout,
|
||||||
|
last_observation: last_observation.summary(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(POLL_INTERVAL).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Waits until the observer reports one agreed leader.
|
||||||
|
pub async fn wait_for_observed_leader(
|
||||||
|
handle: &ObservationHandle<OpenRaftClusterObserver>,
|
||||||
|
timeout: Duration,
|
||||||
|
different_from: Option<u64>,
|
||||||
|
) -> Result<u64, OpenRaftClusterError> {
|
||||||
|
let snapshot =
|
||||||
|
wait_for_observed_snapshot(handle, timeout, "observed leader agreement", |snapshot| {
|
||||||
|
snapshot.agreed_leader(different_from).is_some()
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
snapshot
|
||||||
|
.value
|
||||||
|
.agreed_leader(different_from)
|
||||||
|
.ok_or(OpenRaftClusterError::MissingObservation)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Waits until the observer reports the expected voter set on every node.
|
||||||
|
pub async fn wait_for_observed_membership(
|
||||||
|
handle: &ObservationHandle<OpenRaftClusterObserver>,
|
||||||
|
expected_voters: &BTreeSet<u64>,
|
||||||
|
timeout: Duration,
|
||||||
|
) -> Result<(), OpenRaftClusterError> {
|
||||||
|
wait_for_observed_snapshot(
|
||||||
|
handle,
|
||||||
|
timeout,
|
||||||
|
"observed membership convergence",
|
||||||
|
|snapshot| snapshot.all_voters_match(expected_voters),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Waits until the observer reports the full replicated key set.
|
||||||
|
pub async fn wait_for_observed_replication(
|
||||||
|
handle: &ObservationHandle<OpenRaftClusterObserver>,
|
||||||
|
expected: &BTreeMap<String, String>,
|
||||||
|
timeout: Duration,
|
||||||
|
) -> Result<(), OpenRaftClusterError> {
|
||||||
|
wait_for_observed_snapshot(
|
||||||
|
handle,
|
||||||
|
timeout,
|
||||||
|
"observed replicated state convergence",
|
||||||
|
|snapshot| snapshot.all_kv_match(expected, &FULL_VOTER_SET),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolves the client handle that currently identifies as `node_id`.
|
||||||
|
pub async fn resolve_client_for_node(
|
||||||
|
clients: &[OpenRaftKvClient],
|
||||||
|
node_id: u64,
|
||||||
|
timeout: Duration,
|
||||||
|
) -> Result<OpenRaftKvClient, OpenRaftClusterError> {
|
||||||
|
let deadline = Instant::now() + timeout;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
for client in clients {
|
||||||
|
let Ok(state) = client.state().await else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
if state.node_id == node_id {
|
||||||
|
return Ok(client.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if Instant::now() >= deadline {
|
||||||
|
return Err(OpenRaftClusterError::ClientResolution { node_id, timeout });
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(CLIENT_RESOLUTION_INTERVAL).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Issues a contiguous batch of writes through the current leader.
|
||||||
|
pub async fn write_batch(
|
||||||
|
leader: &OpenRaftKvClient,
|
||||||
|
prefix: &str,
|
||||||
|
start: usize,
|
||||||
|
count: usize,
|
||||||
|
) -> Result<(), OpenRaftClusterError> {
|
||||||
|
for index in start..(start + count) {
|
||||||
|
let key = format!("{prefix}-{index}");
|
||||||
|
let value = format!("value-{index}");
|
||||||
|
|
||||||
|
leader
|
||||||
|
.write(&key, &value, index as u64 + 1)
|
||||||
|
.await
|
||||||
|
.map_err(OpenRaftClusterError::Client)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builds the replicated key/value map expected after the workload completes.
|
||||||
|
#[must_use]
|
||||||
|
pub fn expected_kv(prefix: &str, total_writes: usize) -> BTreeMap<String, String> {
|
||||||
|
(0..total_writes)
|
||||||
|
.map(|index| (format!("{prefix}-{index}"), format!("value-{index}")))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_observed_snapshot(
|
||||||
|
handle: &ObservationHandle<OpenRaftClusterObserver>,
|
||||||
|
timeout: Duration,
|
||||||
|
action: &'static str,
|
||||||
|
matches: impl Fn(&OpenRaftClusterSnapshot) -> bool,
|
||||||
|
) -> Result<ObservationSnapshot<OpenRaftClusterSnapshot>, OpenRaftClusterError> {
|
||||||
|
let deadline = Instant::now() + timeout;
|
||||||
|
let mut last_summary = "no state observed yet".to_owned();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if let Some(snapshot) = handle.latest_snapshot() {
|
||||||
|
last_summary = snapshot.value.summary();
|
||||||
|
|
||||||
|
if matches(&snapshot.value) {
|
||||||
|
return Ok(snapshot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if Instant::now() >= deadline {
|
||||||
|
return Err(OpenRaftClusterError::Timeout {
|
||||||
|
action,
|
||||||
|
timeout,
|
||||||
|
last_observation: last_summary,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(POLL_INTERVAL).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -26,15 +26,15 @@ Each example follows the same pattern:
|
|||||||
## Run locally
|
## Run locally
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run -p pubsub-examples --bin basic_ws_roundtrip
|
cargo run -p pubsub-examples --bin pubsub_basic_ws_roundtrip
|
||||||
cargo run -p pubsub-examples --bin basic_ws_reconnect
|
cargo run -p pubsub-examples --bin pubsub_basic_ws_reconnect
|
||||||
```
|
```
|
||||||
|
|
||||||
## Run with Docker Compose
|
## Run with Docker Compose
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run -p pubsub-examples --bin compose_ws_roundtrip
|
cargo run -p pubsub-examples --bin pubsub_compose_ws_roundtrip
|
||||||
cargo run -p pubsub-examples --bin compose_ws_reconnect
|
cargo run -p pubsub-examples --bin pubsub_compose_ws_reconnect
|
||||||
```
|
```
|
||||||
|
|
||||||
Set `PUBSUB_IMAGE` to override the default compose image tag.
|
Set `PUBSUB_IMAGE` to override the default compose image tag.
|
||||||
@ -43,7 +43,7 @@ Set `PUBSUB_IMAGE` to override the default compose image tag.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -t pubsub-node:local -f examples/pubsub/Dockerfile .
|
docker build -t pubsub-node:local -f examples/pubsub/Dockerfile .
|
||||||
cargo run -p pubsub-examples --bin k8s_ws_roundtrip
|
cargo run -p pubsub-examples --bin pubsub_k8s_ws_roundtrip
|
||||||
```
|
```
|
||||||
|
|
||||||
Prerequisites:
|
Prerequisites:
|
||||||
@ -57,5 +57,5 @@ Optional image override:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -t pubsub-node:local -f examples/pubsub/Dockerfile .
|
docker build -t pubsub-node:local -f examples/pubsub/Dockerfile .
|
||||||
cargo run -p pubsub-examples --bin k8s_manual_ws_roundtrip
|
cargo run -p pubsub-examples --bin pubsub_k8s_manual_ws_roundtrip
|
||||||
```
|
```
|
||||||
|
|||||||
@ -4,6 +4,30 @@ license.workspace = true
|
|||||||
name = "pubsub-examples"
|
name = "pubsub-examples"
|
||||||
version.workspace = true
|
version.workspace = true
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "pubsub_basic_ws_roundtrip"
|
||||||
|
path = "src/bin/basic_ws_roundtrip.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "pubsub_basic_ws_reconnect"
|
||||||
|
path = "src/bin/basic_ws_reconnect.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "pubsub_compose_ws_roundtrip"
|
||||||
|
path = "src/bin/compose_ws_roundtrip.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "pubsub_compose_ws_reconnect"
|
||||||
|
path = "src/bin/compose_ws_reconnect.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "pubsub_k8s_ws_roundtrip"
|
||||||
|
path = "src/bin/k8s_ws_roundtrip.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "pubsub_k8s_manual_ws_roundtrip"
|
||||||
|
path = "src/bin/k8s_manual_ws_roundtrip.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
pubsub-node = { path = "../pubsub-node" }
|
pubsub-node = { path = "../pubsub-node" }
|
||||||
|
|||||||
24
examples/queue/Dockerfile
Normal file
24
examples/queue/Dockerfile
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
FROM rustlang/rust:nightly-bookworm AS builder
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
|
||||||
|
COPY Cargo.toml Cargo.lock ./
|
||||||
|
COPY cfgsync/ ./cfgsync/
|
||||||
|
COPY examples/ ./examples/
|
||||||
|
COPY testing-framework/ ./testing-framework/
|
||||||
|
|
||||||
|
RUN cargo build --release -p queue-node
|
||||||
|
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y ca-certificates && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY --from=builder /build/target/release/queue-node /usr/local/bin/queue-node
|
||||||
|
|
||||||
|
RUN mkdir -p /etc/queue
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
ENTRYPOINT ["/usr/local/bin/queue-node"]
|
||||||
|
CMD ["--config", "/etc/queue/config.yaml"]
|
||||||
47
examples/queue/README.md
Normal file
47
examples/queue/README.md
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# Queue Example
|
||||||
|
|
||||||
|
This example runs a small replicated FIFO queue.
|
||||||
|
|
||||||
|
The scenarios enqueue messages, dequeue them again, and check that queue state
|
||||||
|
either converges or drains as expected.
|
||||||
|
|
||||||
|
## How TF runs this
|
||||||
|
|
||||||
|
Each example follows the same pattern:
|
||||||
|
|
||||||
|
- TF starts a small deployment of queue nodes
|
||||||
|
- a workload produces messages, or produces and consumes them
|
||||||
|
- an expectation checks either that queue state converges or that the queue drains
|
||||||
|
|
||||||
|
## Scenarios
|
||||||
|
|
||||||
|
- `basic_convergence` produces messages and checks that queue state converges locally
|
||||||
|
- `basic_roundtrip` produces and consumes messages locally until the queue drains
|
||||||
|
- `basic_restart_chaos` injects random local node restarts during the run
|
||||||
|
- `compose_convergence` and `compose_roundtrip` run the same checks in Docker Compose
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
Each node exposes:
|
||||||
|
|
||||||
|
- `POST /queue/enqueue` to add a message
|
||||||
|
- `POST /queue/dequeue` to remove a message
|
||||||
|
- `GET /queue/state` to inspect the current queue state
|
||||||
|
- `GET /internal/snapshot` to read the local replicated state
|
||||||
|
|
||||||
|
## Run locally
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo run -p queue-examples --bin queue_basic_convergence
|
||||||
|
cargo run -p queue-examples --bin queue_basic_roundtrip
|
||||||
|
cargo run -p queue-examples --bin queue_basic_restart_chaos
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run with Docker Compose
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo run -p queue-examples --bin queue_compose_convergence
|
||||||
|
cargo run -p queue-examples --bin queue_compose_roundtrip
|
||||||
|
```
|
||||||
|
|
||||||
|
Set `QUEUE_IMAGE` to override the default compose image tag.
|
||||||
36
examples/queue/examples/Cargo.toml
Normal file
36
examples/queue/examples/Cargo.toml
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "queue-examples"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "queue_basic_convergence"
|
||||||
|
path = "src/bin/basic_convergence.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "queue_basic_restart_chaos"
|
||||||
|
path = "src/bin/basic_restart_chaos.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "queue_basic_roundtrip"
|
||||||
|
path = "src/bin/basic_roundtrip.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "queue_compose_convergence"
|
||||||
|
path = "src/bin/compose_convergence.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "queue_compose_roundtrip"
|
||||||
|
path = "src/bin/compose_roundtrip.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0"
|
||||||
|
async-trait = { workspace = true }
|
||||||
|
queue-runtime-ext = { path = "../testing/integration" }
|
||||||
|
queue-runtime-workloads = { path = "../testing/workloads" }
|
||||||
|
testing-framework-core = { workspace = true }
|
||||||
|
testing-framework-runner-compose = { workspace = true }
|
||||||
|
tokio = { workspace = true, features = ["full"] }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
32
examples/queue/examples/src/bin/basic_convergence.rs
Normal file
32
examples/queue/examples/src/bin/basic_convergence.rs
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use queue_runtime_ext::QueueLocalDeployer;
|
||||||
|
use queue_runtime_workloads::{
|
||||||
|
QueueBuilderExt, QueueConverges, QueueProduceWorkload, QueueScenarioBuilder, QueueTopology,
|
||||||
|
};
|
||||||
|
use testing_framework_core::scenario::Deployer;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let operations = 300;
|
||||||
|
|
||||||
|
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
|
||||||
|
.with_run_duration(Duration::from_secs(30))
|
||||||
|
.with_workload(
|
||||||
|
QueueProduceWorkload::new()
|
||||||
|
.operations(operations)
|
||||||
|
.rate_per_sec(30)
|
||||||
|
.payload_prefix("demo"),
|
||||||
|
)
|
||||||
|
.with_expectation(QueueConverges::new(operations).timeout(Duration::from_secs(25)))
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let deployer = QueueLocalDeployer::default();
|
||||||
|
let runner = deployer.deploy(&scenario).await?;
|
||||||
|
runner.run(&mut scenario).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
84
examples/queue/examples/src/bin/basic_restart_chaos.rs
Normal file
84
examples/queue/examples/src/bin/basic_restart_chaos.rs
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use queue_runtime_ext::QueueLocalDeployer;
|
||||||
|
use queue_runtime_workloads::{
|
||||||
|
QueueBuilderExt, QueueConverges, QueueProduceWorkload, QueueScenarioBuilder, QueueTopology,
|
||||||
|
};
|
||||||
|
use testing_framework_core::{
|
||||||
|
scenario::{Deployer, DynError, RunContext, Workload},
|
||||||
|
topology::DeploymentDescriptor,
|
||||||
|
};
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct FixedRestartChaosWorkload {
|
||||||
|
restarts: usize,
|
||||||
|
delay: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FixedRestartChaosWorkload {
|
||||||
|
const fn new(restarts: usize, delay: Duration) -> Self {
|
||||||
|
Self { restarts, delay }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Workload<queue_runtime_workloads::QueueEnv> for FixedRestartChaosWorkload {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"fixed_restart_chaos"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start(
|
||||||
|
&self,
|
||||||
|
ctx: &RunContext<queue_runtime_workloads::QueueEnv>,
|
||||||
|
) -> Result<(), DynError> {
|
||||||
|
let Some(control) = ctx.node_control() else {
|
||||||
|
return Err("fixed restart chaos requires node control".into());
|
||||||
|
};
|
||||||
|
|
||||||
|
let node_count = ctx.descriptors().node_count();
|
||||||
|
if node_count == 0 {
|
||||||
|
return Err("fixed restart chaos requires at least one node".into());
|
||||||
|
}
|
||||||
|
|
||||||
|
for step in 0..self.restarts {
|
||||||
|
tokio::time::sleep(self.delay).await;
|
||||||
|
let target_index = if node_count > 1 {
|
||||||
|
(step % (node_count - 1)) + 1
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
let target = format!("node-{target_index}");
|
||||||
|
info!(step, %target, "triggering controlled chaos restart");
|
||||||
|
control.restart_node(&target).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
|
||||||
|
.enable_node_control()
|
||||||
|
.with_workload(FixedRestartChaosWorkload::new(3, Duration::from_secs(8)))
|
||||||
|
.with_run_duration(Duration::from_secs(30))
|
||||||
|
.with_workload(
|
||||||
|
QueueProduceWorkload::new()
|
||||||
|
.operations(400)
|
||||||
|
.rate_per_sec(40)
|
||||||
|
.payload_prefix("queue-chaos"),
|
||||||
|
)
|
||||||
|
.with_expectation(QueueConverges::new(200).timeout(Duration::from_secs(30)))
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let deployer = QueueLocalDeployer::default();
|
||||||
|
let runner = deployer.deploy(&scenario).await?;
|
||||||
|
runner.run(&mut scenario).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
31
examples/queue/examples/src/bin/basic_roundtrip.rs
Normal file
31
examples/queue/examples/src/bin/basic_roundtrip.rs
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use queue_runtime_ext::QueueLocalDeployer;
|
||||||
|
use queue_runtime_workloads::{
|
||||||
|
QueueBuilderExt, QueueDrained, QueueRoundTripWorkload, QueueScenarioBuilder, QueueTopology,
|
||||||
|
};
|
||||||
|
use testing_framework_core::scenario::Deployer;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let operations = 200;
|
||||||
|
|
||||||
|
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
|
||||||
|
.with_run_duration(Duration::from_secs(30))
|
||||||
|
.with_workload(
|
||||||
|
QueueRoundTripWorkload::new()
|
||||||
|
.operations(operations)
|
||||||
|
.rate_per_sec(25),
|
||||||
|
)
|
||||||
|
.with_expectation(QueueDrained::new().timeout(Duration::from_secs(25)))
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let deployer = QueueLocalDeployer::default();
|
||||||
|
let runner = deployer.deploy(&scenario).await?;
|
||||||
|
runner.run(&mut scenario).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
47
examples/queue/examples/src/bin/compose_convergence.rs
Normal file
47
examples/queue/examples/src/bin/compose_convergence.rs
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use anyhow::{Context as _, Result};
|
||||||
|
use queue_runtime_workloads::{
|
||||||
|
QueueBuilderExt, QueueConverges, QueueProduceWorkload, QueueScenarioBuilder, QueueTopology,
|
||||||
|
};
|
||||||
|
use testing_framework_core::scenario::Deployer;
|
||||||
|
use testing_framework_runner_compose::ComposeRunnerError;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let operations = 200;
|
||||||
|
|
||||||
|
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
|
||||||
|
.with_run_duration(Duration::from_secs(30))
|
||||||
|
.with_workload(
|
||||||
|
QueueProduceWorkload::new()
|
||||||
|
.operations(operations)
|
||||||
|
.rate_per_sec(20),
|
||||||
|
)
|
||||||
|
.with_expectation(QueueConverges::new(operations).timeout(Duration::from_secs(25)))
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let deployer = queue_runtime_ext::QueueComposeDeployer::new();
|
||||||
|
let runner = match deployer.deploy(&scenario).await {
|
||||||
|
Ok(runner) => runner,
|
||||||
|
Err(ComposeRunnerError::DockerUnavailable) => {
|
||||||
|
warn!("docker unavailable; skipping compose queue run");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
return Err(anyhow::Error::new(error)).context("deploying queue compose stack");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
info!("running queue compose convergence scenario");
|
||||||
|
runner
|
||||||
|
.run(&mut scenario)
|
||||||
|
.await
|
||||||
|
.context("running queue compose scenario")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
48
examples/queue/examples/src/bin/compose_roundtrip.rs
Normal file
48
examples/queue/examples/src/bin/compose_roundtrip.rs
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use anyhow::{Context as _, Result};
|
||||||
|
use queue_runtime_workloads::{
|
||||||
|
QueueBuilderExt, QueueDrained, QueueRoundTripWorkload, QueueScenarioBuilder, QueueTopology,
|
||||||
|
};
|
||||||
|
use testing_framework_core::scenario::Deployer;
|
||||||
|
use testing_framework_runner_compose::ComposeRunnerError;
|
||||||
|
use tracing::{info, warn};
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
tracing_subscriber::fmt()
|
||||||
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let operations = 200;
|
||||||
|
|
||||||
|
let mut scenario = QueueScenarioBuilder::deployment_with(|_| QueueTopology::new(3))
|
||||||
|
.with_run_duration(Duration::from_secs(30))
|
||||||
|
.with_workload(
|
||||||
|
QueueRoundTripWorkload::new()
|
||||||
|
.operations(operations)
|
||||||
|
.rate_per_sec(20),
|
||||||
|
)
|
||||||
|
.with_expectation(QueueDrained::new().timeout(Duration::from_secs(25)))
|
||||||
|
.build()?;
|
||||||
|
|
||||||
|
let deployer = queue_runtime_ext::QueueComposeDeployer::new();
|
||||||
|
let runner = match deployer.deploy(&scenario).await {
|
||||||
|
Ok(runner) => runner,
|
||||||
|
Err(ComposeRunnerError::DockerUnavailable) => {
|
||||||
|
warn!("docker unavailable; skipping compose queue roundtrip run");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
return Err(anyhow::Error::new(error))
|
||||||
|
.context("deploying queue compose roundtrip stack");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
info!("running queue compose roundtrip scenario");
|
||||||
|
runner
|
||||||
|
.run(&mut scenario)
|
||||||
|
.await
|
||||||
|
.context("running queue compose roundtrip scenario")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
21
examples/queue/queue-node/Cargo.toml
Normal file
21
examples/queue/queue-node/Cargo.toml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "queue-node"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "queue-node"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0"
|
||||||
|
axum = "0.7"
|
||||||
|
clap = { version = "4.0", features = ["derive"] }
|
||||||
|
reqwest = { workspace = true, features = ["json"] }
|
||||||
|
serde = { workspace = true }
|
||||||
|
serde_yaml = { workspace = true }
|
||||||
|
tokio = { workspace = true, features = ["full"] }
|
||||||
|
tower-http = { version = "0.6", features = ["trace"] }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||||
40
examples/queue/queue-node/src/client.rs
Normal file
40
examples/queue/queue-node/src/client.rs
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
use reqwest::Url;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct QueueHttpClient {
|
||||||
|
base_url: Url,
|
||||||
|
client: reqwest::Client,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueueHttpClient {
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(base_url: Url) -> Self {
|
||||||
|
Self {
|
||||||
|
base_url,
|
||||||
|
client: reqwest::Client::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn get<T: serde::de::DeserializeOwned>(&self, path: &str) -> anyhow::Result<T> {
|
||||||
|
let url = self.base_url.join(path)?;
|
||||||
|
let response = self.client.get(url).send().await?.error_for_status()?;
|
||||||
|
Ok(response.json().await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn post<B: Serialize, T: serde::de::DeserializeOwned>(
|
||||||
|
&self,
|
||||||
|
path: &str,
|
||||||
|
body: &B,
|
||||||
|
) -> anyhow::Result<T> {
|
||||||
|
let url = self.base_url.join(path)?;
|
||||||
|
let response = self
|
||||||
|
.client
|
||||||
|
.post(url)
|
||||||
|
.json(body)
|
||||||
|
.send()
|
||||||
|
.await?
|
||||||
|
.error_for_status()?;
|
||||||
|
Ok(response.json().await?)
|
||||||
|
}
|
||||||
|
}
|
||||||
29
examples/queue/queue-node/src/config.rs
Normal file
29
examples/queue/queue-node/src/config.rs
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
use std::{fs, path::Path};
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct PeerInfo {
|
||||||
|
pub node_id: u64,
|
||||||
|
pub http_address: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct QueueConfig {
|
||||||
|
pub node_id: u64,
|
||||||
|
pub http_port: u16,
|
||||||
|
pub peers: Vec<PeerInfo>,
|
||||||
|
#[serde(default = "default_sync_interval_ms")]
|
||||||
|
pub sync_interval_ms: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueueConfig {
|
||||||
|
pub fn load(path: &Path) -> anyhow::Result<Self> {
|
||||||
|
let raw = fs::read_to_string(path)?;
|
||||||
|
Ok(serde_yaml::from_str(&raw)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn default_sync_interval_ms() -> u64 {
|
||||||
|
1000
|
||||||
|
}
|
||||||
3
examples/queue/queue-node/src/lib.rs
Normal file
3
examples/queue/queue-node/src/lib.rs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
pub mod client;
|
||||||
|
|
||||||
|
pub use client::QueueHttpClient;
|
||||||
36
examples/queue/queue-node/src/main.rs
Normal file
36
examples/queue/queue-node/src/main.rs
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
mod config;
|
||||||
|
mod server;
|
||||||
|
mod state;
|
||||||
|
mod sync;
|
||||||
|
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
|
||||||
|
|
||||||
|
use crate::{config::QueueConfig, state::QueueState, sync::SyncService};
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(name = "queue-node")]
|
||||||
|
struct Args {
|
||||||
|
#[arg(short, long)]
|
||||||
|
config: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> anyhow::Result<()> {
|
||||||
|
tracing_subscriber::registry()
|
||||||
|
.with(
|
||||||
|
tracing_subscriber::EnvFilter::try_from_default_env()
|
||||||
|
.unwrap_or_else(|_| "queue_node=info,tower_http=debug".into()),
|
||||||
|
)
|
||||||
|
.with(tracing_subscriber::fmt::layer())
|
||||||
|
.init();
|
||||||
|
|
||||||
|
let args = Args::parse();
|
||||||
|
let config = QueueConfig::load(&args.config)?;
|
||||||
|
|
||||||
|
let state = QueueState::new(config.node_id);
|
||||||
|
SyncService::new(config.clone(), state.clone()).start();
|
||||||
|
server::start_server(config, state).await
|
||||||
|
}
|
||||||
115
examples/queue/queue-node/src/server.rs
Normal file
115
examples/queue/queue-node/src/server.rs
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
use std::net::SocketAddr;
|
||||||
|
|
||||||
|
use axum::{
|
||||||
|
Router,
|
||||||
|
extract::State,
|
||||||
|
http::StatusCode,
|
||||||
|
response::Json,
|
||||||
|
routing::{get, post},
|
||||||
|
};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tower_http::trace::TraceLayer;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
config::QueueConfig,
|
||||||
|
state::{QueueMessage, QueueRevision, QueueState, QueueStateView, Snapshot},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct HealthResponse {
|
||||||
|
status: &'static str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct EnqueueRequest {
|
||||||
|
payload: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct EnqueueResponse {
|
||||||
|
accepted: bool,
|
||||||
|
id: u64,
|
||||||
|
queue_len: usize,
|
||||||
|
revision: QueueRevision,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct DequeueResponse {
|
||||||
|
message: Option<QueueMessage>,
|
||||||
|
queue_len: usize,
|
||||||
|
revision: QueueRevision,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn start_server(config: QueueConfig, state: QueueState) -> anyhow::Result<()> {
|
||||||
|
let app = Router::new()
|
||||||
|
.route("/health/live", get(health_live))
|
||||||
|
.route("/health/ready", get(health_ready))
|
||||||
|
.route("/queue/enqueue", post(enqueue))
|
||||||
|
.route("/queue/dequeue", post(dequeue))
|
||||||
|
.route("/queue/state", get(queue_state))
|
||||||
|
.route("/internal/snapshot", get(get_snapshot))
|
||||||
|
.layer(TraceLayer::new_for_http())
|
||||||
|
.with_state(state.clone());
|
||||||
|
|
||||||
|
let addr = SocketAddr::from(([0, 0, 0, 0], config.http_port));
|
||||||
|
let listener = tokio::net::TcpListener::bind(addr).await?;
|
||||||
|
|
||||||
|
state.set_ready(true).await;
|
||||||
|
tracing::info!(node_id = state.node_id(), %addr, "queue node ready");
|
||||||
|
|
||||||
|
axum::serve(listener, app).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn health_live() -> (StatusCode, Json<HealthResponse>) {
|
||||||
|
(StatusCode::OK, Json(HealthResponse { status: "alive" }))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn health_ready(State(state): State<QueueState>) -> (StatusCode, Json<HealthResponse>) {
|
||||||
|
if state.is_ready().await {
|
||||||
|
(StatusCode::OK, Json(HealthResponse { status: "ready" }))
|
||||||
|
} else {
|
||||||
|
(
|
||||||
|
StatusCode::SERVICE_UNAVAILABLE,
|
||||||
|
Json(HealthResponse {
|
||||||
|
status: "not-ready",
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn enqueue(
|
||||||
|
State(state): State<QueueState>,
|
||||||
|
Json(request): Json<EnqueueRequest>,
|
||||||
|
) -> (StatusCode, Json<EnqueueResponse>) {
|
||||||
|
let outcome = state.enqueue_local(request.payload).await;
|
||||||
|
(
|
||||||
|
StatusCode::OK,
|
||||||
|
Json(EnqueueResponse {
|
||||||
|
accepted: outcome.accepted,
|
||||||
|
id: outcome.id,
|
||||||
|
queue_len: outcome.queue_len,
|
||||||
|
revision: outcome.revision,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn dequeue(State(state): State<QueueState>) -> (StatusCode, Json<DequeueResponse>) {
|
||||||
|
let outcome = state.dequeue_local().await;
|
||||||
|
(
|
||||||
|
StatusCode::OK,
|
||||||
|
Json(DequeueResponse {
|
||||||
|
message: outcome.message,
|
||||||
|
queue_len: outcome.queue_len,
|
||||||
|
revision: outcome.revision,
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn queue_state(State(state): State<QueueState>) -> Json<QueueStateView> {
|
||||||
|
Json(state.queue_state().await)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_snapshot(State(state): State<QueueState>) -> Json<Snapshot> {
|
||||||
|
Json(state.snapshot().await)
|
||||||
|
}
|
||||||
151
examples/queue/queue-node/src/state.rs
Normal file
151
examples/queue/queue-node/src/state.rs
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
use std::{collections::VecDeque, sync::Arc};
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use tokio::sync::RwLock;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
|
||||||
|
pub struct QueueRevision {
|
||||||
|
pub version: u64,
|
||||||
|
pub origin: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||||
|
pub struct QueueMessage {
|
||||||
|
pub id: u64,
|
||||||
|
pub payload: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct Snapshot {
|
||||||
|
pub node_id: u64,
|
||||||
|
pub revision: QueueRevision,
|
||||||
|
pub messages: Vec<QueueMessage>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
|
||||||
|
pub struct QueueStateView {
|
||||||
|
pub revision: QueueRevision,
|
||||||
|
pub queue_len: usize,
|
||||||
|
pub head_id: Option<u64>,
|
||||||
|
pub tail_id: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct EnqueueOutcome {
|
||||||
|
pub accepted: bool,
|
||||||
|
pub id: u64,
|
||||||
|
pub queue_len: usize,
|
||||||
|
pub revision: QueueRevision,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct DequeueOutcome {
|
||||||
|
pub message: Option<QueueMessage>,
|
||||||
|
pub queue_len: usize,
|
||||||
|
pub revision: QueueRevision,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
struct QueueData {
|
||||||
|
revision: QueueRevision,
|
||||||
|
messages: VecDeque<QueueMessage>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct QueueState {
|
||||||
|
node_id: u64,
|
||||||
|
ready: Arc<RwLock<bool>>,
|
||||||
|
data: Arc<RwLock<QueueData>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueueState {
|
||||||
|
pub fn new(node_id: u64) -> Self {
|
||||||
|
Self {
|
||||||
|
node_id,
|
||||||
|
ready: Arc::new(RwLock::new(false)),
|
||||||
|
data: Arc::new(RwLock::new(QueueData::default())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn node_id(&self) -> u64 {
|
||||||
|
self.node_id
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn set_ready(&self, value: bool) {
|
||||||
|
*self.ready.write().await = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn is_ready(&self) -> bool {
|
||||||
|
*self.ready.read().await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn enqueue_local(&self, payload: String) -> EnqueueOutcome {
|
||||||
|
let mut data = self.data.write().await;
|
||||||
|
let id = next_message_id(&data.messages);
|
||||||
|
data.messages.push_back(QueueMessage { id, payload });
|
||||||
|
bump_revision(&mut data.revision, self.node_id);
|
||||||
|
|
||||||
|
EnqueueOutcome {
|
||||||
|
accepted: true,
|
||||||
|
id,
|
||||||
|
queue_len: data.messages.len(),
|
||||||
|
revision: data.revision,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn dequeue_local(&self) -> DequeueOutcome {
|
||||||
|
let mut data = self.data.write().await;
|
||||||
|
let message = data.messages.pop_front();
|
||||||
|
if message.is_some() {
|
||||||
|
bump_revision(&mut data.revision, self.node_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
DequeueOutcome {
|
||||||
|
message,
|
||||||
|
queue_len: data.messages.len(),
|
||||||
|
revision: data.revision,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn queue_state(&self) -> QueueStateView {
|
||||||
|
let data = self.data.read().await;
|
||||||
|
QueueStateView {
|
||||||
|
revision: data.revision,
|
||||||
|
queue_len: data.messages.len(),
|
||||||
|
head_id: data.messages.front().map(|message| message.id),
|
||||||
|
tail_id: data.messages.back().map(|message| message.id),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn merge_snapshot(&self, snapshot: Snapshot) {
|
||||||
|
let mut data = self.data.write().await;
|
||||||
|
if is_newer_revision(snapshot.revision, data.revision) {
|
||||||
|
data.revision = snapshot.revision;
|
||||||
|
data.messages = snapshot.messages.into();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn snapshot(&self) -> Snapshot {
|
||||||
|
let data = self.data.read().await;
|
||||||
|
Snapshot {
|
||||||
|
node_id: self.node_id,
|
||||||
|
revision: data.revision,
|
||||||
|
messages: data.messages.iter().cloned().collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_message_id(messages: &VecDeque<QueueMessage>) -> u64 {
|
||||||
|
messages
|
||||||
|
.back()
|
||||||
|
.map_or(1, |message| message.id.saturating_add(1))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bump_revision(revision: &mut QueueRevision, node_id: u64) {
|
||||||
|
revision.version = revision.version.saturating_add(1);
|
||||||
|
revision.origin = node_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_newer_revision(candidate: QueueRevision, existing: QueueRevision) -> bool {
|
||||||
|
(candidate.version, candidate.origin) > (existing.version, existing.origin)
|
||||||
|
}
|
||||||
103
examples/queue/queue-node/src/sync.rs
Normal file
103
examples/queue/queue-node/src/sync.rs
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||||
|
|
||||||
|
use reqwest::Client;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
config::QueueConfig,
|
||||||
|
state::{QueueState, Snapshot},
|
||||||
|
};
|
||||||
|
|
||||||
|
const WARN_AFTER_CONSECUTIVE_FAILURES: u32 = 5;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct SyncService {
|
||||||
|
config: Arc<QueueConfig>,
|
||||||
|
state: QueueState,
|
||||||
|
client: Client,
|
||||||
|
failures_by_peer: Arc<Mutex<HashMap<String, u32>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SyncService {
|
||||||
|
pub fn new(config: QueueConfig, state: QueueState) -> Self {
|
||||||
|
Self {
|
||||||
|
config: Arc::new(config),
|
||||||
|
state,
|
||||||
|
client: Client::new(),
|
||||||
|
failures_by_peer: Arc::new(Mutex::new(HashMap::new())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn start(&self) {
|
||||||
|
let service = self.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
service.run().await;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run(self) {
|
||||||
|
let interval = Duration::from_millis(self.config.sync_interval_ms.max(100));
|
||||||
|
loop {
|
||||||
|
self.sync_once().await;
|
||||||
|
tokio::time::sleep(interval).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn sync_once(&self) {
|
||||||
|
for peer in &self.config.peers {
|
||||||
|
match self.fetch_snapshot(&peer.http_address).await {
|
||||||
|
Ok(snapshot) => {
|
||||||
|
self.state.merge_snapshot(snapshot).await;
|
||||||
|
self.clear_failure_counter(&peer.http_address).await;
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
self.record_sync_failure(&peer.http_address, &error).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_snapshot(&self, peer_address: &str) -> anyhow::Result<Snapshot> {
|
||||||
|
let url = format!("http://{peer_address}/internal/snapshot");
|
||||||
|
let snapshot = self
|
||||||
|
.client
|
||||||
|
.get(url)
|
||||||
|
.send()
|
||||||
|
.await?
|
||||||
|
.error_for_status()?
|
||||||
|
.json()
|
||||||
|
.await?;
|
||||||
|
Ok(snapshot)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn clear_failure_counter(&self, peer_address: &str) {
|
||||||
|
let mut failures = self.failures_by_peer.lock().await;
|
||||||
|
failures.remove(peer_address);
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn record_sync_failure(&self, peer_address: &str, error: &anyhow::Error) {
|
||||||
|
let consecutive_failures = {
|
||||||
|
let mut failures = self.failures_by_peer.lock().await;
|
||||||
|
let entry = failures.entry(peer_address.to_owned()).or_insert(0);
|
||||||
|
*entry += 1;
|
||||||
|
*entry
|
||||||
|
};
|
||||||
|
|
||||||
|
if consecutive_failures >= WARN_AFTER_CONSECUTIVE_FAILURES {
|
||||||
|
warn!(
|
||||||
|
peer = %peer_address,
|
||||||
|
%error,
|
||||||
|
consecutive_failures,
|
||||||
|
"queue sync repeatedly failing"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
debug!(
|
||||||
|
peer = %peer_address,
|
||||||
|
%error,
|
||||||
|
consecutive_failures,
|
||||||
|
"queue sync failed"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
13
examples/queue/testing/integration/Cargo.toml
Normal file
13
examples/queue/testing/integration/Cargo.toml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "queue-runtime-ext"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
async-trait = { workspace = true }
|
||||||
|
queue-node = { path = "../../queue-node" }
|
||||||
|
serde = { workspace = true }
|
||||||
|
testing-framework-core = { workspace = true }
|
||||||
|
testing-framework-runner-compose = { workspace = true }
|
||||||
|
testing-framework-runner-local = { workspace = true }
|
||||||
75
examples/queue/testing/integration/src/app.rs
Normal file
75
examples/queue/testing/integration/src/app.rs
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
use std::io::Error;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use queue_node::QueueHttpClient;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use testing_framework_core::scenario::{
|
||||||
|
Application, ClusterNodeConfigApplication, ClusterNodeView, ClusterPeerView, DynError,
|
||||||
|
NodeAccess, serialize_cluster_yaml_config,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub type QueueTopology = testing_framework_core::topology::ClusterTopology;
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct QueuePeerInfo {
|
||||||
|
pub node_id: u64,
|
||||||
|
pub http_address: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
|
pub struct QueueNodeConfig {
|
||||||
|
pub node_id: u64,
|
||||||
|
pub http_port: u16,
|
||||||
|
pub peers: Vec<QueuePeerInfo>,
|
||||||
|
pub sync_interval_ms: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct QueueEnv;
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Application for QueueEnv {
|
||||||
|
type Deployment = QueueTopology;
|
||||||
|
type NodeClient = QueueHttpClient;
|
||||||
|
type NodeConfig = QueueNodeConfig;
|
||||||
|
fn build_node_client(access: &NodeAccess) -> Result<Self::NodeClient, DynError> {
|
||||||
|
Ok(QueueHttpClient::new(access.api_base_url()?))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn node_readiness_path() -> &'static str {
|
||||||
|
"/health/ready"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ClusterNodeConfigApplication for QueueEnv {
|
||||||
|
type ConfigError = Error;
|
||||||
|
|
||||||
|
fn static_network_port() -> u16 {
|
||||||
|
8080
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_cluster_node_config(
|
||||||
|
node: &ClusterNodeView,
|
||||||
|
peers: &[ClusterPeerView],
|
||||||
|
) -> Result<Self::NodeConfig, Self::ConfigError> {
|
||||||
|
let peers = peers
|
||||||
|
.iter()
|
||||||
|
.map(|peer| QueuePeerInfo {
|
||||||
|
node_id: peer.index() as u64,
|
||||||
|
http_address: peer.authority(),
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
Ok(QueueNodeConfig {
|
||||||
|
node_id: node.index() as u64,
|
||||||
|
http_port: node.network_port(),
|
||||||
|
peers,
|
||||||
|
sync_interval_ms: 500,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn serialize_cluster_node_config(
|
||||||
|
config: &Self::NodeConfig,
|
||||||
|
) -> Result<String, Self::ConfigError> {
|
||||||
|
serialize_cluster_yaml_config(config).map_err(Error::other)
|
||||||
|
}
|
||||||
|
}
|
||||||
15
examples/queue/testing/integration/src/compose_env.rs
Normal file
15
examples/queue/testing/integration/src/compose_env.rs
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
use testing_framework_runner_compose::{BinaryConfigNodeSpec, ComposeBinaryApp};
|
||||||
|
|
||||||
|
use crate::QueueEnv;
|
||||||
|
|
||||||
|
const NODE_CONFIG_PATH: &str = "/etc/queue/config.yaml";
|
||||||
|
|
||||||
|
impl ComposeBinaryApp for QueueEnv {
|
||||||
|
fn compose_node_spec() -> BinaryConfigNodeSpec {
|
||||||
|
BinaryConfigNodeSpec::conventional(
|
||||||
|
"/usr/local/bin/queue-node",
|
||||||
|
NODE_CONFIG_PATH,
|
||||||
|
vec![8080, 8081],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
10
examples/queue/testing/integration/src/lib.rs
Normal file
10
examples/queue/testing/integration/src/lib.rs
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
mod app;
|
||||||
|
mod compose_env;
|
||||||
|
mod local_env;
|
||||||
|
pub mod scenario;
|
||||||
|
|
||||||
|
pub use app::*;
|
||||||
|
pub use scenario::{QueueBuilderExt, QueueScenarioBuilder};
|
||||||
|
|
||||||
|
pub type QueueLocalDeployer = testing_framework_runner_local::ProcessDeployer<QueueEnv>;
|
||||||
|
pub type QueueComposeDeployer = testing_framework_runner_compose::ComposeDeployer<QueueEnv>;
|
||||||
41
examples/queue/testing/integration/src/local_env.rs
Normal file
41
examples/queue/testing/integration/src/local_env.rs
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use testing_framework_core::scenario::{DynError, StartNodeOptions};
|
||||||
|
use testing_framework_runner_local::{
|
||||||
|
LocalBinaryApp, LocalNodePorts, LocalPeerNode, LocalProcessSpec,
|
||||||
|
build_local_cluster_node_config, yaml_node_config,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{QueueEnv, QueueNodeConfig};
|
||||||
|
|
||||||
|
impl LocalBinaryApp for QueueEnv {
|
||||||
|
fn initial_node_name_prefix() -> &'static str {
|
||||||
|
"queue-node"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_local_node_config_with_peers(
|
||||||
|
_topology: &Self::Deployment,
|
||||||
|
index: usize,
|
||||||
|
ports: &LocalNodePorts,
|
||||||
|
peers: &[LocalPeerNode],
|
||||||
|
_peer_ports_by_name: &HashMap<String, u16>,
|
||||||
|
_options: &StartNodeOptions<Self>,
|
||||||
|
_template_config: Option<
|
||||||
|
&<Self as testing_framework_core::scenario::Application>::NodeConfig,
|
||||||
|
>,
|
||||||
|
) -> Result<<Self as testing_framework_core::scenario::Application>::NodeConfig, DynError> {
|
||||||
|
build_local_cluster_node_config::<Self>(index, ports, peers)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn local_process_spec() -> LocalProcessSpec {
|
||||||
|
LocalProcessSpec::new("QUEUE_NODE_BIN", "queue-node").with_rust_log("queue_node=info")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_local_config(config: &QueueNodeConfig) -> Result<Vec<u8>, DynError> {
|
||||||
|
yaml_node_config(config)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn http_api_port(config: &QueueNodeConfig) -> u16 {
|
||||||
|
config.http_port
|
||||||
|
}
|
||||||
|
}
|
||||||
15
examples/queue/testing/integration/src/scenario.rs
Normal file
15
examples/queue/testing/integration/src/scenario.rs
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
use testing_framework_core::scenario::ScenarioBuilder;
|
||||||
|
|
||||||
|
use crate::{QueueEnv, QueueTopology};
|
||||||
|
|
||||||
|
pub type QueueScenarioBuilder = ScenarioBuilder<QueueEnv>;
|
||||||
|
|
||||||
|
pub trait QueueBuilderExt: Sized {
|
||||||
|
fn deployment_with(f: impl FnOnce(QueueTopology) -> QueueTopology) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueueBuilderExt for QueueScenarioBuilder {
|
||||||
|
fn deployment_with(f: impl FnOnce(QueueTopology) -> QueueTopology) -> Self {
|
||||||
|
QueueScenarioBuilder::with_deployment(f(QueueTopology::new(3)))
|
||||||
|
}
|
||||||
|
}
|
||||||
14
examples/queue/testing/workloads/Cargo.toml
Normal file
14
examples/queue/testing/workloads/Cargo.toml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
[package]
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
name = "queue-runtime-workloads"
|
||||||
|
version.workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
async-trait = { workspace = true }
|
||||||
|
queue-node = { path = "../../queue-node" }
|
||||||
|
queue-runtime-ext = { path = "../integration" }
|
||||||
|
serde = { workspace = true }
|
||||||
|
testing-framework-core = { workspace = true }
|
||||||
|
tokio = { workspace = true, features = ["full"] }
|
||||||
|
tracing = { workspace = true }
|
||||||
104
examples/queue/testing/workloads/src/drained.rs
Normal file
104
examples/queue/testing/workloads/src/drained.rs
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use queue_runtime_ext::QueueEnv;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use testing_framework_core::scenario::{DynError, Expectation, RunContext};
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct QueueDrained {
|
||||||
|
timeout: Duration,
|
||||||
|
poll_interval: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
|
||||||
|
struct QueueRevision {
|
||||||
|
version: u64,
|
||||||
|
origin: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
|
||||||
|
struct QueueStateResponse {
|
||||||
|
revision: QueueRevision,
|
||||||
|
queue_len: usize,
|
||||||
|
head_id: Option<u64>,
|
||||||
|
tail_id: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueueDrained {
|
||||||
|
#[must_use]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
timeout: Duration::from_secs(20),
|
||||||
|
poll_interval: Duration::from_millis(500),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn timeout(mut self, timeout: Duration) -> Self {
|
||||||
|
self.timeout = timeout;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for QueueDrained {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Expectation<QueueEnv> for QueueDrained {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"queue_drained"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn evaluate(&mut self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
|
||||||
|
let clients = ctx.node_clients().snapshot();
|
||||||
|
if clients.is_empty() {
|
||||||
|
return Err("no queue node clients available".into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let deadline = tokio::time::Instant::now() + self.timeout;
|
||||||
|
while tokio::time::Instant::now() < deadline {
|
||||||
|
if is_drained_and_converged(&clients).await? {
|
||||||
|
info!("queue drained and converged");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
tokio::time::sleep(self.poll_interval).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(format!("queue not drained within {:?}", self.timeout).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn is_drained_and_converged(
|
||||||
|
clients: &[queue_node::QueueHttpClient],
|
||||||
|
) -> Result<bool, DynError> {
|
||||||
|
let Some((first, rest)) = clients.split_first() else {
|
||||||
|
return Ok(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
let baseline = read_state(first).await?;
|
||||||
|
if !is_drained(&baseline) {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
for client in rest {
|
||||||
|
let current = read_state(client).await?;
|
||||||
|
if current != baseline {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_drained(state: &QueueStateResponse) -> bool {
|
||||||
|
state.queue_len == 0 && state.head_id.is_none() && state.tail_id.is_none()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn read_state(client: &queue_node::QueueHttpClient) -> Result<QueueStateResponse, DynError> {
|
||||||
|
Ok(client.get("/queue/state").await?)
|
||||||
|
}
|
||||||
106
examples/queue/testing/workloads/src/expectations.rs
Normal file
106
examples/queue/testing/workloads/src/expectations.rs
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use queue_runtime_ext::QueueEnv;
|
||||||
|
use serde::Deserialize;
|
||||||
|
use testing_framework_core::scenario::{DynError, Expectation, RunContext};
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct QueueConverges {
|
||||||
|
min_queue_len: usize,
|
||||||
|
timeout: Duration,
|
||||||
|
poll_interval: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
|
||||||
|
struct QueueRevision {
|
||||||
|
version: u64,
|
||||||
|
origin: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
|
||||||
|
struct QueueStateResponse {
|
||||||
|
revision: QueueRevision,
|
||||||
|
queue_len: usize,
|
||||||
|
head_id: Option<u64>,
|
||||||
|
tail_id: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueueConverges {
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(min_queue_len: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
min_queue_len,
|
||||||
|
timeout: Duration::from_secs(20),
|
||||||
|
poll_interval: Duration::from_millis(500),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn timeout(mut self, timeout: Duration) -> Self {
|
||||||
|
self.timeout = timeout;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Expectation<QueueEnv> for QueueConverges {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"queue_converges"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn evaluate(&mut self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
|
||||||
|
let clients = ctx.node_clients().snapshot();
|
||||||
|
if clients.is_empty() {
|
||||||
|
return Err("no queue node clients available".into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let deadline = tokio::time::Instant::now() + self.timeout;
|
||||||
|
while tokio::time::Instant::now() < deadline {
|
||||||
|
if self.is_converged(&clients).await? {
|
||||||
|
info!(
|
||||||
|
min_queue_len = self.min_queue_len,
|
||||||
|
"queue convergence reached"
|
||||||
|
);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
tokio::time::sleep(self.poll_interval).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(format!(
|
||||||
|
"queue convergence not reached within {:?} (min_queue_len={})",
|
||||||
|
self.timeout, self.min_queue_len
|
||||||
|
)
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueueConverges {
|
||||||
|
async fn is_converged(
|
||||||
|
&self,
|
||||||
|
clients: &[queue_node::QueueHttpClient],
|
||||||
|
) -> Result<bool, DynError> {
|
||||||
|
let Some((first, rest)) = clients.split_first() else {
|
||||||
|
return Ok(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
let baseline = read_state(first).await?;
|
||||||
|
if baseline.queue_len < self.min_queue_len {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
for client in rest {
|
||||||
|
let current = read_state(client).await?;
|
||||||
|
if current != baseline {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn read_state(client: &queue_node::QueueHttpClient) -> Result<QueueStateResponse, DynError> {
|
||||||
|
Ok(client.get("/queue/state").await?)
|
||||||
|
}
|
||||||
10
examples/queue/testing/workloads/src/lib.rs
Normal file
10
examples/queue/testing/workloads/src/lib.rs
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
mod drained;
|
||||||
|
mod expectations;
|
||||||
|
mod produce;
|
||||||
|
mod roundtrip;
|
||||||
|
|
||||||
|
pub use drained::QueueDrained;
|
||||||
|
pub use expectations::QueueConverges;
|
||||||
|
pub use produce::QueueProduceWorkload;
|
||||||
|
pub use queue_runtime_ext::{QueueBuilderExt, QueueEnv, QueueScenarioBuilder, QueueTopology};
|
||||||
|
pub use roundtrip::QueueRoundTripWorkload;
|
||||||
116
examples/queue/testing/workloads/src/produce.rs
Normal file
116
examples/queue/testing/workloads/src/produce.rs
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use queue_runtime_ext::QueueEnv;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use testing_framework_core::scenario::{DynError, RunContext, Workload};
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct QueueProduceWorkload {
|
||||||
|
operations: usize,
|
||||||
|
rate_per_sec: Option<usize>,
|
||||||
|
payload_prefix: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct EnqueueRequest {
|
||||||
|
payload: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct EnqueueResponse {
|
||||||
|
accepted: bool,
|
||||||
|
id: u64,
|
||||||
|
queue_len: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueueProduceWorkload {
|
||||||
|
#[must_use]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
operations: 200,
|
||||||
|
rate_per_sec: Some(25),
|
||||||
|
payload_prefix: "queue-demo".to_owned(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn operations(mut self, value: usize) -> Self {
|
||||||
|
self.operations = value;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn rate_per_sec(mut self, value: usize) -> Self {
|
||||||
|
self.rate_per_sec = Some(value);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub fn payload_prefix(mut self, value: impl Into<String>) -> Self {
|
||||||
|
self.payload_prefix = value.into();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for QueueProduceWorkload {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Workload<QueueEnv> for QueueProduceWorkload {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"queue_produce_workload"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start(&self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
|
||||||
|
let clients = ctx.node_clients().snapshot();
|
||||||
|
let Some(producer) = clients.first() else {
|
||||||
|
return Err("no queue node clients available".into());
|
||||||
|
};
|
||||||
|
|
||||||
|
let interval = self.rate_per_sec.and_then(compute_interval);
|
||||||
|
info!(
|
||||||
|
operations = self.operations,
|
||||||
|
rate_per_sec = ?self.rate_per_sec,
|
||||||
|
"starting queue produce workload"
|
||||||
|
);
|
||||||
|
|
||||||
|
for idx in 0..self.operations {
|
||||||
|
let payload = format!("{}-{idx}", self.payload_prefix);
|
||||||
|
let response: EnqueueResponse = producer
|
||||||
|
.post("/queue/enqueue", &EnqueueRequest { payload })
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !response.accepted {
|
||||||
|
return Err(format!("node rejected enqueue at operation {idx}").into());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (idx + 1) % 25 == 0 {
|
||||||
|
info!(
|
||||||
|
completed = idx + 1,
|
||||||
|
last_id = response.id,
|
||||||
|
queue_len = response.queue_len,
|
||||||
|
"queue produce progress"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(delay) = interval {
|
||||||
|
tokio::time::sleep(delay).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_interval(rate_per_sec: usize) -> Option<Duration> {
|
||||||
|
if rate_per_sec == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Duration::from_millis((1000 / rate_per_sec as u64).max(1)))
|
||||||
|
}
|
||||||
179
examples/queue/testing/workloads/src/roundtrip.rs
Normal file
179
examples/queue/testing/workloads/src/roundtrip.rs
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
use std::{collections::HashSet, time::Duration};
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use queue_runtime_ext::QueueEnv;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use testing_framework_core::scenario::{DynError, RunContext, Workload};
|
||||||
|
use tokio::time::{Instant, sleep};
|
||||||
|
use tracing::info;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct QueueRoundTripWorkload {
|
||||||
|
operations: usize,
|
||||||
|
rate_per_sec: Option<usize>,
|
||||||
|
payload_prefix: String,
|
||||||
|
drain_timeout: Duration,
|
||||||
|
empty_poll_interval: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct EnqueueRequest {
|
||||||
|
payload: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct EnqueueResponse {
|
||||||
|
accepted: bool,
|
||||||
|
id: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize)]
|
||||||
|
struct DequeueRequest {}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct QueueMessage {
|
||||||
|
id: u64,
|
||||||
|
payload: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
struct DequeueResponse {
|
||||||
|
message: Option<QueueMessage>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl QueueRoundTripWorkload {
|
||||||
|
#[must_use]
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
operations: 200,
|
||||||
|
rate_per_sec: Some(25),
|
||||||
|
payload_prefix: "queue-roundtrip".to_owned(),
|
||||||
|
drain_timeout: Duration::from_secs(20),
|
||||||
|
empty_poll_interval: Duration::from_millis(100),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn operations(mut self, value: usize) -> Self {
|
||||||
|
self.operations = value;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn rate_per_sec(mut self, value: usize) -> Self {
|
||||||
|
self.rate_per_sec = Some(value);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub fn payload_prefix(mut self, value: impl Into<String>) -> Self {
|
||||||
|
self.payload_prefix = value.into();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn drain_timeout(mut self, value: Duration) -> Self {
|
||||||
|
self.drain_timeout = value;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for QueueRoundTripWorkload {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Workload<QueueEnv> for QueueRoundTripWorkload {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"queue_roundtrip_workload"
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn start(&self, ctx: &RunContext<QueueEnv>) -> Result<(), DynError> {
|
||||||
|
let clients = ctx.node_clients().snapshot();
|
||||||
|
let Some(driver) = clients.first() else {
|
||||||
|
return Err("no queue node clients available".into());
|
||||||
|
};
|
||||||
|
|
||||||
|
let interval = self.rate_per_sec.and_then(compute_interval);
|
||||||
|
let mut produced_ids = HashSet::with_capacity(self.operations);
|
||||||
|
|
||||||
|
info!(
|
||||||
|
operations = self.operations,
|
||||||
|
"queue roundtrip: produce phase"
|
||||||
|
);
|
||||||
|
for idx in 0..self.operations {
|
||||||
|
let payload = format!("{}-{idx}", self.payload_prefix);
|
||||||
|
let response: EnqueueResponse = driver
|
||||||
|
.post("/queue/enqueue", &EnqueueRequest { payload })
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
if !response.accepted {
|
||||||
|
return Err(format!("enqueue rejected at operation {idx}").into());
|
||||||
|
}
|
||||||
|
|
||||||
|
if !produced_ids.insert(response.id) {
|
||||||
|
return Err(format!("duplicate enqueue id observed: {}", response.id).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(delay) = interval {
|
||||||
|
sleep(delay).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
info!(
|
||||||
|
operations = self.operations,
|
||||||
|
"queue roundtrip: consume phase"
|
||||||
|
);
|
||||||
|
let mut consumed = 0usize;
|
||||||
|
let deadline = Instant::now() + self.drain_timeout;
|
||||||
|
|
||||||
|
while consumed < self.operations && Instant::now() < deadline {
|
||||||
|
let response: DequeueResponse =
|
||||||
|
driver.post("/queue/dequeue", &DequeueRequest {}).await?;
|
||||||
|
|
||||||
|
match response.message {
|
||||||
|
Some(message) => {
|
||||||
|
if !message.payload.starts_with(&self.payload_prefix) {
|
||||||
|
return Err(format!("unexpected payload: {}", message.payload).into());
|
||||||
|
}
|
||||||
|
if !produced_ids.remove(&message.id) {
|
||||||
|
return Err(
|
||||||
|
format!("unknown or duplicate dequeue id: {}", message.id).into()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
consumed += 1;
|
||||||
|
}
|
||||||
|
None => sleep(self.empty_poll_interval).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if consumed != self.operations {
|
||||||
|
return Err(format!(
|
||||||
|
"queue roundtrip timed out: consumed {consumed}/{} messages",
|
||||||
|
self.operations
|
||||||
|
)
|
||||||
|
.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
if !produced_ids.is_empty() {
|
||||||
|
return Err(format!(
|
||||||
|
"queue roundtrip ended with {} undrained produced ids",
|
||||||
|
produced_ids.len()
|
||||||
|
)
|
||||||
|
.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
info!(operations = self.operations, "queue roundtrip finished");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_interval(rate_per_sec: usize) -> Option<Duration> {
|
||||||
|
if rate_per_sec == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Duration::from_millis((1000 / rate_per_sec as u64).max(1)))
|
||||||
|
}
|
||||||
@ -25,11 +25,11 @@ Each example follows the same pattern:
|
|||||||
## Run with Docker Compose
|
## Run with Docker Compose
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run -p redis-streams-examples --bin compose_roundtrip
|
cargo run -p redis-streams-examples --bin redis_streams_compose_roundtrip
|
||||||
```
|
```
|
||||||
|
|
||||||
## Run the reclaim scenario
|
## Run the reclaim scenario
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run -p redis-streams-examples --bin compose_failover
|
cargo run -p redis-streams-examples --bin redis_streams_compose_failover
|
||||||
```
|
```
|
||||||
|
|||||||
@ -4,6 +4,14 @@ license.workspace = true
|
|||||||
name = "redis-streams-examples"
|
name = "redis-streams-examples"
|
||||||
version.workspace = true
|
version.workspace = true
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "redis_streams_compose_roundtrip"
|
||||||
|
path = "src/bin/compose_roundtrip.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "redis_streams_compose_failover"
|
||||||
|
path = "src/bin/compose_failover.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
redis-streams-runtime-ext = { path = "../testing/integration" }
|
redis-streams-runtime-ext = { path = "../testing/integration" }
|
||||||
|
|||||||
@ -29,5 +29,5 @@ reqwest = { features = ["json"], workspace = true }
|
|||||||
serde = { workspace = true }
|
serde = { workspace = true }
|
||||||
serde_yaml = { workspace = true }
|
serde_yaml = { workspace = true }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
tokio = { features = ["macros", "process", "rt-multi-thread", "time"], workspace = true }
|
tokio = { features = ["macros", "process", "rt-multi-thread", "sync", "time"], workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
pub mod cfgsync;
|
pub mod cfgsync;
|
||||||
pub mod env;
|
pub mod env;
|
||||||
|
pub mod observation;
|
||||||
pub mod runtime;
|
pub mod runtime;
|
||||||
pub mod scenario;
|
pub mod scenario;
|
||||||
pub mod topology;
|
pub mod topology;
|
||||||
|
|||||||
161
testing-framework/core/src/observation/factory.rs
Normal file
161
testing-framework/core/src/observation/factory.rs
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
use std::{marker::PhantomData, sync::Arc};
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
ObservationConfig, ObservationHandle, ObservationRuntime, ObservedSource, Observer,
|
||||||
|
SourceProvider,
|
||||||
|
};
|
||||||
|
use crate::scenario::{
|
||||||
|
Application, DynError, NodeClients, PreparedRuntimeExtension, RuntimeExtensionFactory,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Boxed source provider used by observation factories.
|
||||||
|
pub type BoxedSourceProvider<S> = Box<dyn SourceProvider<S>>;
|
||||||
|
|
||||||
|
/// Builds an observation source provider once node clients are available.
|
||||||
|
pub trait SourceProviderFactory<E: Application, S>: Send + Sync + 'static {
|
||||||
|
/// Builds the source provider for one scenario run.
|
||||||
|
fn build_source_provider(
|
||||||
|
&self,
|
||||||
|
deployment: &E::Deployment,
|
||||||
|
node_clients: NodeClients<E>,
|
||||||
|
) -> Result<BoxedSourceProvider<S>, DynError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E, S, F> SourceProviderFactory<E, S> for F
|
||||||
|
where
|
||||||
|
E: Application,
|
||||||
|
S: Clone + Send + Sync + 'static,
|
||||||
|
F: Fn(&E::Deployment, NodeClients<E>) -> Result<BoxedSourceProvider<S>, DynError>
|
||||||
|
+ Send
|
||||||
|
+ Sync
|
||||||
|
+ 'static,
|
||||||
|
{
|
||||||
|
fn build_source_provider(
|
||||||
|
&self,
|
||||||
|
deployment: &E::Deployment,
|
||||||
|
node_clients: NodeClients<E>,
|
||||||
|
) -> Result<BoxedSourceProvider<S>, DynError> {
|
||||||
|
self(deployment, node_clients)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fixed source provider for scenario runs with a stable source set.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct StaticSourceProvider<S> {
|
||||||
|
sources: Vec<ObservedSource<S>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> StaticSourceProvider<S> {
|
||||||
|
/// Builds a provider from a fixed source list.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(sources: Vec<ObservedSource<S>>) -> Self {
|
||||||
|
Self { sources }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl<S> SourceProvider<S> for StaticSourceProvider<S>
|
||||||
|
where
|
||||||
|
S: Clone + Send + Sync + 'static,
|
||||||
|
{
|
||||||
|
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError> {
|
||||||
|
Ok(self.sources.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Runtime extension factory that starts one observer and stores its handle in
|
||||||
|
/// `RunContext`.
|
||||||
|
pub struct ObservationExtensionFactory<E: Application, O: Observer> {
|
||||||
|
observer_builder: Arc<dyn Fn() -> O + Send + Sync>,
|
||||||
|
source_provider_factory: Arc<dyn SourceProviderFactory<E, O::Source>>,
|
||||||
|
config: ObservationConfig,
|
||||||
|
env_marker: PhantomData<E>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E: Application, O: Observer> ObservationExtensionFactory<E, O> {
|
||||||
|
/// Builds an observation extension factory from builders.
|
||||||
|
#[must_use]
|
||||||
|
pub fn from_parts(
|
||||||
|
observer_builder: impl Fn() -> O + Send + Sync + 'static,
|
||||||
|
source_provider_factory: impl SourceProviderFactory<E, O::Source>,
|
||||||
|
config: ObservationConfig,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
observer_builder: Arc::new(observer_builder),
|
||||||
|
source_provider_factory: Arc::new(source_provider_factory),
|
||||||
|
config,
|
||||||
|
env_marker: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E, O> ObservationExtensionFactory<E, O>
|
||||||
|
where
|
||||||
|
E: Application,
|
||||||
|
O: Observer + Clone,
|
||||||
|
{
|
||||||
|
/// Builds an observation extension factory from one clonable observer.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(
|
||||||
|
observer: O,
|
||||||
|
source_provider_factory: impl SourceProviderFactory<E, O::Source>,
|
||||||
|
config: ObservationConfig,
|
||||||
|
) -> Self {
|
||||||
|
Self::from_parts(move || observer.clone(), source_provider_factory, config)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl<E, O> RuntimeExtensionFactory<E> for ObservationExtensionFactory<E, O>
|
||||||
|
where
|
||||||
|
E: Application,
|
||||||
|
O: Observer,
|
||||||
|
{
|
||||||
|
async fn prepare(
|
||||||
|
&self,
|
||||||
|
deployment: &E::Deployment,
|
||||||
|
node_clients: NodeClients<E>,
|
||||||
|
) -> Result<PreparedRuntimeExtension, DynError> {
|
||||||
|
let source_provider = self
|
||||||
|
.source_provider_factory
|
||||||
|
.build_source_provider(deployment, node_clients)?;
|
||||||
|
|
||||||
|
let observer = (self.observer_builder)();
|
||||||
|
let runtime =
|
||||||
|
ObservationRuntime::start(source_provider, observer, self.config.clone()).await?;
|
||||||
|
|
||||||
|
let (handle, task) = runtime.into_parts();
|
||||||
|
|
||||||
|
Ok(PreparedRuntimeExtension::from_task(handle, task))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl<S, P> SourceProvider<S> for Box<P>
|
||||||
|
where
|
||||||
|
S: Clone + Send + Sync + 'static,
|
||||||
|
P: SourceProvider<S> + ?Sized,
|
||||||
|
{
|
||||||
|
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError> {
|
||||||
|
(**self).sources().await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl<S, P> SourceProvider<S> for Arc<P>
|
||||||
|
where
|
||||||
|
S: Clone + Send + Sync + 'static,
|
||||||
|
P: SourceProvider<S> + ?Sized,
|
||||||
|
{
|
||||||
|
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError> {
|
||||||
|
(**self).sources().await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<O: Observer> From<ObservationHandle<O>> for PreparedRuntimeExtension {
|
||||||
|
fn from(handle: ObservationHandle<O>) -> Self {
|
||||||
|
PreparedRuntimeExtension::new(handle)
|
||||||
|
}
|
||||||
|
}
|
||||||
503
testing-framework/core/src/observation/mod.rs
Normal file
503
testing-framework/core/src/observation/mod.rs
Normal file
@ -0,0 +1,503 @@
|
|||||||
|
//! Generic continuous observation runtime.
|
||||||
|
//!
|
||||||
|
//! This module provides the reusable runtime needed by both TF scenarios and
|
||||||
|
//! manual-cluster consumers such as Cucumber worlds. It does not know any app
|
||||||
|
//! semantics. Apps provide source types, observation logic, materialized state,
|
||||||
|
//! snapshots, and delta events.
|
||||||
|
|
||||||
|
mod factory;
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
any::type_name,
|
||||||
|
collections::VecDeque,
|
||||||
|
sync::Arc,
|
||||||
|
time::{Duration, SystemTime},
|
||||||
|
};
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
pub use factory::{
|
||||||
|
BoxedSourceProvider, ObservationExtensionFactory, SourceProviderFactory, StaticSourceProvider,
|
||||||
|
};
|
||||||
|
use parking_lot::Mutex;
|
||||||
|
use tokio::{
|
||||||
|
sync::broadcast,
|
||||||
|
task::JoinHandle,
|
||||||
|
time::{MissedTickBehavior, interval},
|
||||||
|
};
|
||||||
|
use tracing::{debug, info, warn};
|
||||||
|
|
||||||
|
use crate::scenario::DynError;
|
||||||
|
|
||||||
|
/// Configuration for a background observation runtime.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct ObservationConfig {
|
||||||
|
/// Time between observation cycles.
|
||||||
|
pub interval: Duration,
|
||||||
|
/// Maximum number of non-empty event batches retained in memory.
|
||||||
|
pub history_limit: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ObservationConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
interval: Duration::from_secs(1),
|
||||||
|
history_limit: 64,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One named observation source.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct ObservedSource<S> {
|
||||||
|
/// Human-readable source name used in logs and app-level reporting.
|
||||||
|
pub name: String,
|
||||||
|
/// App-owned source handle.
|
||||||
|
pub source: S,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> ObservedSource<S> {
|
||||||
|
/// Builds one named observation source.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(name: &str, source: S) -> Self {
|
||||||
|
Self {
|
||||||
|
name: name.to_owned(),
|
||||||
|
source,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Supplies the current observation source set.
|
||||||
|
#[async_trait]
|
||||||
|
pub trait SourceProvider<S>: Send + Sync + 'static {
|
||||||
|
/// Returns the current source set for the next observation cycle.
|
||||||
|
async fn sources(&self) -> Result<Vec<ObservedSource<S>>, DynError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// App-owned observation logic.
|
||||||
|
#[async_trait]
|
||||||
|
pub trait Observer: Send + Sync + 'static {
|
||||||
|
/// App-owned source type.
|
||||||
|
type Source: Clone + Send + Sync + 'static;
|
||||||
|
/// App-owned retained materialized state.
|
||||||
|
type State: Send + Sync + 'static;
|
||||||
|
/// App-owned current snapshot view.
|
||||||
|
type Snapshot: Clone + Send + Sync + 'static;
|
||||||
|
/// App-owned delta event type emitted per cycle.
|
||||||
|
type Event: Clone + Send + Sync + 'static;
|
||||||
|
|
||||||
|
/// Builds the initial retained state from the current source set.
|
||||||
|
async fn init(&self, sources: &[ObservedSource<Self::Source>])
|
||||||
|
-> Result<Self::State, DynError>;
|
||||||
|
|
||||||
|
/// Advances retained state by one cycle and returns any new delta events.
|
||||||
|
async fn poll(
|
||||||
|
&self,
|
||||||
|
sources: &[ObservedSource<Self::Source>],
|
||||||
|
state: &mut Self::State,
|
||||||
|
) -> Result<Vec<Self::Event>, DynError>;
|
||||||
|
|
||||||
|
/// Builds the current snapshot view from retained state.
|
||||||
|
fn snapshot(&self, state: &Self::State) -> Self::Snapshot;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One materialized snapshot emitted by the runtime.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct ObservationSnapshot<S> {
|
||||||
|
/// Monotonic cycle number.
|
||||||
|
pub cycle: u64,
|
||||||
|
/// Capture timestamp.
|
||||||
|
pub observed_at: SystemTime,
|
||||||
|
/// Number of sources used for this snapshot.
|
||||||
|
pub source_count: usize,
|
||||||
|
/// App-owned snapshot payload.
|
||||||
|
pub value: S,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One delta batch emitted by a successful observation cycle.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct ObservationBatch<E> {
|
||||||
|
/// Monotonic cycle number.
|
||||||
|
pub cycle: u64,
|
||||||
|
/// Capture timestamp.
|
||||||
|
pub observed_at: SystemTime,
|
||||||
|
/// Number of sources used for this batch.
|
||||||
|
pub source_count: usize,
|
||||||
|
/// App-owned delta events discovered in this cycle.
|
||||||
|
pub events: Vec<E>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Observation runtime failure stage.
|
||||||
|
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||||
|
pub enum ObservationFailureStage {
|
||||||
|
/// Source refresh failed before a poll could run.
|
||||||
|
SourceRefresh,
|
||||||
|
/// Observer poll failed after sources were refreshed.
|
||||||
|
Poll,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Last failed observation cycle.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct ObservationFailure {
|
||||||
|
/// Monotonic cycle number.
|
||||||
|
pub cycle: u64,
|
||||||
|
/// Failure timestamp.
|
||||||
|
pub observed_at: SystemTime,
|
||||||
|
/// Number of sources involved in the failed cycle.
|
||||||
|
pub source_count: usize,
|
||||||
|
/// Runtime stage that failed.
|
||||||
|
pub stage: ObservationFailureStage,
|
||||||
|
/// Human-readable failure message.
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Errors returned while starting an observation runtime.
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub enum ObservationRuntimeError {
|
||||||
|
/// The configured interval is invalid.
|
||||||
|
#[error("observation interval must be greater than zero")]
|
||||||
|
InvalidInterval,
|
||||||
|
/// Source discovery failed during runtime startup.
|
||||||
|
#[error("failed to refresh observation sources during startup: {source}")]
|
||||||
|
SourceRefresh {
|
||||||
|
#[source]
|
||||||
|
source: DynError,
|
||||||
|
},
|
||||||
|
/// Observer state initialization failed during runtime startup.
|
||||||
|
#[error("failed to initialize observation state: {source}")]
|
||||||
|
ObserverInit {
|
||||||
|
#[source]
|
||||||
|
source: DynError,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read-side handle for one running observer.
|
||||||
|
pub struct ObservationHandle<O: Observer> {
|
||||||
|
shared: Arc<Mutex<SharedObservationState<O>>>,
|
||||||
|
batches: broadcast::Sender<Arc<ObservationBatch<O::Event>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<O: Observer> Clone for ObservationHandle<O> {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
shared: Arc::clone(&self.shared),
|
||||||
|
batches: self.batches.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<O: Observer> ObservationHandle<O> {
|
||||||
|
/// Returns the latest successful snapshot, if one has been produced.
|
||||||
|
#[must_use]
|
||||||
|
pub fn latest_snapshot(&self) -> Option<ObservationSnapshot<O::Snapshot>> {
|
||||||
|
self.shared.lock().latest_snapshot.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns retained non-empty event batches.
|
||||||
|
#[must_use]
|
||||||
|
pub fn history(&self) -> Vec<Arc<ObservationBatch<O::Event>>> {
|
||||||
|
self.shared.lock().history.iter().cloned().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the most recent cycle failure, if any.
|
||||||
|
#[must_use]
|
||||||
|
pub fn last_error(&self) -> Option<ObservationFailure> {
|
||||||
|
self.shared.lock().last_error.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Subscribes to future non-empty event batches.
|
||||||
|
#[must_use]
|
||||||
|
pub fn subscribe(&self) -> broadcast::Receiver<Arc<ObservationBatch<O::Event>>> {
|
||||||
|
self.batches.subscribe()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lifecycle owner for one background observation runtime.
|
||||||
|
pub struct ObservationRuntime<O: Observer> {
|
||||||
|
handle: ObservationHandle<O>,
|
||||||
|
task: Option<JoinHandle<()>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<O: Observer> ObservationRuntime<O> {
|
||||||
|
/// Starts one background observation runtime.
|
||||||
|
pub async fn start<P>(
|
||||||
|
provider: P,
|
||||||
|
observer: O,
|
||||||
|
config: ObservationConfig,
|
||||||
|
) -> Result<Self, ObservationRuntimeError>
|
||||||
|
where
|
||||||
|
P: SourceProvider<O::Source>,
|
||||||
|
{
|
||||||
|
ensure_positive_interval(config.interval)?;
|
||||||
|
|
||||||
|
let sources = provider
|
||||||
|
.sources()
|
||||||
|
.await
|
||||||
|
.map_err(|source| ObservationRuntimeError::SourceRefresh { source })?;
|
||||||
|
|
||||||
|
let source_count = sources.len();
|
||||||
|
let state = observer
|
||||||
|
.init(&sources)
|
||||||
|
.await
|
||||||
|
.map_err(|source| ObservationRuntimeError::ObserverInit { source })?;
|
||||||
|
|
||||||
|
let snapshot = build_snapshot(0, source_count, &observer, &state);
|
||||||
|
let batches = broadcast::channel(config.history_limit.max(1)).0;
|
||||||
|
let shared = Arc::new(Mutex::new(SharedObservationState::new(snapshot)));
|
||||||
|
let handle = ObservationHandle {
|
||||||
|
shared: Arc::clone(&shared),
|
||||||
|
batches,
|
||||||
|
};
|
||||||
|
|
||||||
|
info!(
|
||||||
|
observer = type_name::<O>(),
|
||||||
|
interval_ms = config.interval.as_millis(),
|
||||||
|
history_limit = config.history_limit,
|
||||||
|
source_count,
|
||||||
|
"starting observation runtime"
|
||||||
|
);
|
||||||
|
|
||||||
|
let runtime_handle = handle.clone();
|
||||||
|
let task = tokio::spawn(run_observation_loop(
|
||||||
|
provider,
|
||||||
|
observer,
|
||||||
|
config,
|
||||||
|
shared,
|
||||||
|
runtime_handle.batches.clone(),
|
||||||
|
state,
|
||||||
|
));
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
handle: runtime_handle,
|
||||||
|
task: Some(task),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a read-side handle for the running observer.
|
||||||
|
#[must_use]
|
||||||
|
pub fn handle(&self) -> ObservationHandle<O> {
|
||||||
|
self.handle.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Splits the runtime into its handle and background task.
|
||||||
|
#[must_use]
|
||||||
|
pub fn into_parts(mut self) -> (ObservationHandle<O>, JoinHandle<()>) {
|
||||||
|
let task = self
|
||||||
|
.task
|
||||||
|
.take()
|
||||||
|
.expect("observation runtime task is always present before into_parts");
|
||||||
|
|
||||||
|
(self.handle.clone(), task)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Aborts the background task.
|
||||||
|
pub fn abort(&mut self) {
|
||||||
|
if let Some(task) = self.task.take() {
|
||||||
|
task.abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<O: Observer> Drop for ObservationRuntime<O> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
self.abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SharedObservationState<O: Observer> {
|
||||||
|
latest_snapshot: Option<ObservationSnapshot<O::Snapshot>>,
|
||||||
|
history: VecDeque<Arc<ObservationBatch<O::Event>>>,
|
||||||
|
last_error: Option<ObservationFailure>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<O: Observer> SharedObservationState<O> {
|
||||||
|
fn new(snapshot: ObservationSnapshot<O::Snapshot>) -> Self {
|
||||||
|
Self {
|
||||||
|
latest_snapshot: Some(snapshot),
|
||||||
|
history: VecDeque::new(),
|
||||||
|
last_error: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run_observation_loop<O, P>(
|
||||||
|
provider: P,
|
||||||
|
observer: O,
|
||||||
|
config: ObservationConfig,
|
||||||
|
shared: Arc<Mutex<SharedObservationState<O>>>,
|
||||||
|
batches: broadcast::Sender<Arc<ObservationBatch<O::Event>>>,
|
||||||
|
mut state: O::State,
|
||||||
|
) where
|
||||||
|
O: Observer,
|
||||||
|
P: SourceProvider<O::Source>,
|
||||||
|
{
|
||||||
|
let mut ticker = build_interval(config.interval);
|
||||||
|
let mut cycle = 1u64;
|
||||||
|
|
||||||
|
ticker.tick().await;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
ticker.tick().await;
|
||||||
|
|
||||||
|
let cycle_outcome = observe_cycle(&provider, &observer, cycle, &mut state).await;
|
||||||
|
|
||||||
|
match cycle_outcome {
|
||||||
|
Ok(success) => record_cycle_success(&shared, &batches, &config, success),
|
||||||
|
Err(failure) => record_cycle_failure(&shared, failure),
|
||||||
|
}
|
||||||
|
|
||||||
|
cycle += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CycleSuccess<O: Observer> {
|
||||||
|
snapshot: ObservationSnapshot<O::Snapshot>,
|
||||||
|
batch: Option<Arc<ObservationBatch<O::Event>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn observe_cycle<O, P>(
|
||||||
|
provider: &P,
|
||||||
|
observer: &O,
|
||||||
|
cycle: u64,
|
||||||
|
state: &mut O::State,
|
||||||
|
) -> Result<CycleSuccess<O>, ObservationFailure>
|
||||||
|
where
|
||||||
|
O: Observer,
|
||||||
|
P: SourceProvider<O::Source>,
|
||||||
|
{
|
||||||
|
let sources = provider.sources().await.map_err(|source| {
|
||||||
|
build_failure(cycle, 0, ObservationFailureStage::SourceRefresh, source)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let source_count = sources.len();
|
||||||
|
let events = observer.poll(&sources, state).await.map_err(|source| {
|
||||||
|
build_failure(cycle, source_count, ObservationFailureStage::Poll, source)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let snapshot = build_snapshot(cycle, source_count, observer, state);
|
||||||
|
let batch = build_batch(cycle, source_count, events);
|
||||||
|
|
||||||
|
Ok(CycleSuccess { snapshot, batch })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn record_cycle_success<O: Observer>(
|
||||||
|
shared: &Arc<Mutex<SharedObservationState<O>>>,
|
||||||
|
batches: &broadcast::Sender<Arc<ObservationBatch<O::Event>>>,
|
||||||
|
config: &ObservationConfig,
|
||||||
|
success: CycleSuccess<O>,
|
||||||
|
) {
|
||||||
|
debug!(
|
||||||
|
observer = type_name::<O>(),
|
||||||
|
cycle = success.snapshot.cycle,
|
||||||
|
source_count = success.snapshot.source_count,
|
||||||
|
event_count = success.batch.as_ref().map_or(0, |batch| batch.events.len()),
|
||||||
|
"observation cycle completed"
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut state = shared.lock();
|
||||||
|
state.latest_snapshot = Some(success.snapshot);
|
||||||
|
state.last_error = None;
|
||||||
|
|
||||||
|
let Some(batch) = success.batch else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
push_history(&mut state.history, Arc::clone(&batch), config.history_limit);
|
||||||
|
drop(state);
|
||||||
|
|
||||||
|
let _ = batches.send(batch);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn record_cycle_failure<O: Observer>(
|
||||||
|
shared: &Arc<Mutex<SharedObservationState<O>>>,
|
||||||
|
failure: ObservationFailure,
|
||||||
|
) {
|
||||||
|
warn!(
|
||||||
|
observer = type_name::<O>(),
|
||||||
|
cycle = failure.cycle,
|
||||||
|
source_count = failure.source_count,
|
||||||
|
stage = ?failure.stage,
|
||||||
|
message = %failure.message,
|
||||||
|
"observation cycle failed"
|
||||||
|
);
|
||||||
|
|
||||||
|
shared.lock().last_error = Some(failure);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ensure_positive_interval(interval: Duration) -> Result<(), ObservationRuntimeError> {
|
||||||
|
if interval.is_zero() {
|
||||||
|
return Err(ObservationRuntimeError::InvalidInterval);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_interval(period: Duration) -> tokio::time::Interval {
|
||||||
|
let mut ticker = interval(period);
|
||||||
|
ticker.set_missed_tick_behavior(MissedTickBehavior::Delay);
|
||||||
|
ticker
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_snapshot<O: Observer>(
|
||||||
|
cycle: u64,
|
||||||
|
source_count: usize,
|
||||||
|
observer: &O,
|
||||||
|
state: &O::State,
|
||||||
|
) -> ObservationSnapshot<O::Snapshot> {
|
||||||
|
ObservationSnapshot {
|
||||||
|
cycle,
|
||||||
|
observed_at: SystemTime::now(),
|
||||||
|
source_count,
|
||||||
|
value: observer.snapshot(state),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_batch<E>(
|
||||||
|
cycle: u64,
|
||||||
|
source_count: usize,
|
||||||
|
events: Vec<E>,
|
||||||
|
) -> Option<Arc<ObservationBatch<E>>> {
|
||||||
|
if events.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Arc::new(ObservationBatch {
|
||||||
|
cycle,
|
||||||
|
observed_at: SystemTime::now(),
|
||||||
|
source_count,
|
||||||
|
events,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_failure(
|
||||||
|
cycle: u64,
|
||||||
|
source_count: usize,
|
||||||
|
stage: ObservationFailureStage,
|
||||||
|
source: DynError,
|
||||||
|
) -> ObservationFailure {
|
||||||
|
ObservationFailure {
|
||||||
|
cycle,
|
||||||
|
observed_at: SystemTime::now(),
|
||||||
|
source_count,
|
||||||
|
stage,
|
||||||
|
message: source.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn push_history<E>(
|
||||||
|
history: &mut VecDeque<Arc<ObservationBatch<E>>>,
|
||||||
|
batch: Arc<ObservationBatch<E>>,
|
||||||
|
history_limit: usize,
|
||||||
|
) {
|
||||||
|
if history_limit == 0 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
history.push_back(batch);
|
||||||
|
|
||||||
|
while history.len() > history_limit {
|
||||||
|
history.pop_front();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests;
|
||||||
250
testing-framework/core/src/observation/tests.rs
Normal file
250
testing-framework/core/src/observation/tests.rs
Normal file
@ -0,0 +1,250 @@
|
|||||||
|
use std::{
|
||||||
|
sync::{
|
||||||
|
Arc,
|
||||||
|
atomic::{AtomicUsize, Ordering},
|
||||||
|
},
|
||||||
|
time::Duration,
|
||||||
|
};
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use parking_lot::Mutex;
|
||||||
|
use tokio::time::{Instant, sleep};
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
ObservationConfig, ObservationFailureStage, ObservationRuntime, ObservedSource, Observer,
|
||||||
|
SourceProvider,
|
||||||
|
};
|
||||||
|
use crate::scenario::DynError;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct TestSourceProvider {
|
||||||
|
sources: Arc<Mutex<Vec<ObservedSource<u64>>>>,
|
||||||
|
fail_refreshes: Arc<AtomicUsize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TestSourceProvider {
|
||||||
|
fn new(sources: Vec<ObservedSource<u64>>) -> Self {
|
||||||
|
Self {
|
||||||
|
sources: Arc::new(Mutex::new(sources)),
|
||||||
|
fail_refreshes: Arc::new(AtomicUsize::new(0)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn replace_sources(&self, sources: Vec<ObservedSource<u64>>) {
|
||||||
|
*self.sources.lock() = sources;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fail_next_refresh(&self) {
|
||||||
|
self.fail_refreshes.store(1, Ordering::SeqCst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl SourceProvider<u64> for TestSourceProvider {
|
||||||
|
async fn sources(&self) -> Result<Vec<ObservedSource<u64>>, DynError> {
|
||||||
|
if self.fail_refreshes.swap(0, Ordering::SeqCst) == 1 {
|
||||||
|
return Err("refresh failed".into());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(self.sources.lock().clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
struct TestSnapshot {
|
||||||
|
total_sources_seen: u64,
|
||||||
|
last_source_count: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
struct TestEvent {
|
||||||
|
total_sources_seen: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
struct TestState {
|
||||||
|
total_sources_seen: u64,
|
||||||
|
last_source_count: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CountingObserver;
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Observer for CountingObserver {
|
||||||
|
type Source = u64;
|
||||||
|
type State = TestState;
|
||||||
|
type Snapshot = TestSnapshot;
|
||||||
|
type Event = TestEvent;
|
||||||
|
|
||||||
|
async fn init(
|
||||||
|
&self,
|
||||||
|
sources: &[ObservedSource<Self::Source>],
|
||||||
|
) -> Result<Self::State, DynError> {
|
||||||
|
Ok(TestState {
|
||||||
|
total_sources_seen: sources.iter().map(|source| source.source).sum(),
|
||||||
|
last_source_count: sources.len(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn poll(
|
||||||
|
&self,
|
||||||
|
sources: &[ObservedSource<Self::Source>],
|
||||||
|
state: &mut Self::State,
|
||||||
|
) -> Result<Vec<Self::Event>, DynError> {
|
||||||
|
state.total_sources_seen += sources.iter().map(|source| source.source).sum::<u64>();
|
||||||
|
state.last_source_count = sources.len();
|
||||||
|
|
||||||
|
Ok(vec![TestEvent {
|
||||||
|
total_sources_seen: state.total_sources_seen,
|
||||||
|
}])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn snapshot(&self, state: &Self::State) -> Self::Snapshot {
|
||||||
|
TestSnapshot {
|
||||||
|
total_sources_seen: state.total_sources_seen,
|
||||||
|
last_source_count: state.last_source_count,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn runtime_updates_snapshot_and_history() {
|
||||||
|
let provider = TestSourceProvider::new(vec![ObservedSource::new("node-0", 2)]);
|
||||||
|
let runtime = ObservationRuntime::start(
|
||||||
|
provider,
|
||||||
|
CountingObserver,
|
||||||
|
ObservationConfig {
|
||||||
|
interval: Duration::from_millis(25),
|
||||||
|
history_limit: 2,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("runtime should start");
|
||||||
|
|
||||||
|
let handle = runtime.handle();
|
||||||
|
wait_for_cycle(&handle, 2).await;
|
||||||
|
|
||||||
|
let snapshot = handle.latest_snapshot().expect("snapshot should exist");
|
||||||
|
assert!(snapshot.cycle >= 2);
|
||||||
|
assert_eq!(snapshot.source_count, 1);
|
||||||
|
assert_eq!(snapshot.value.last_source_count, 1);
|
||||||
|
assert!(snapshot.value.total_sources_seen >= 6);
|
||||||
|
|
||||||
|
let history = handle.history();
|
||||||
|
assert_eq!(history.len(), 2);
|
||||||
|
assert!(history.iter().all(|batch| !batch.events.is_empty()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn runtime_refreshes_sources_each_cycle() {
|
||||||
|
let provider = TestSourceProvider::new(vec![ObservedSource::new("node-0", 1)]);
|
||||||
|
let runtime = ObservationRuntime::start(
|
||||||
|
provider.clone(),
|
||||||
|
CountingObserver,
|
||||||
|
ObservationConfig {
|
||||||
|
interval: Duration::from_millis(25),
|
||||||
|
history_limit: 4,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("runtime should start");
|
||||||
|
|
||||||
|
let handle = runtime.handle();
|
||||||
|
wait_for_cycle(&handle, 1).await;
|
||||||
|
|
||||||
|
provider.replace_sources(vec![
|
||||||
|
ObservedSource::new("node-0", 1),
|
||||||
|
ObservedSource::new("node-1", 3),
|
||||||
|
]);
|
||||||
|
|
||||||
|
wait_for_snapshot_source_count(&handle, 2).await;
|
||||||
|
|
||||||
|
let snapshot = handle.latest_snapshot().expect("snapshot should exist");
|
||||||
|
assert_eq!(snapshot.source_count, 2);
|
||||||
|
assert_eq!(snapshot.value.last_source_count, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn runtime_records_cycle_failures() {
|
||||||
|
let provider = TestSourceProvider::new(vec![ObservedSource::new("node-0", 1)]);
|
||||||
|
let runtime = ObservationRuntime::start(
|
||||||
|
provider.clone(),
|
||||||
|
CountingObserver,
|
||||||
|
ObservationConfig {
|
||||||
|
interval: Duration::from_millis(25),
|
||||||
|
history_limit: 2,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("runtime should start");
|
||||||
|
|
||||||
|
let handle = runtime.handle();
|
||||||
|
provider.fail_next_refresh();
|
||||||
|
|
||||||
|
wait_for_failure(&handle).await;
|
||||||
|
|
||||||
|
let failure = handle.last_error().expect("failure should exist");
|
||||||
|
assert_eq!(failure.stage, ObservationFailureStage::SourceRefresh);
|
||||||
|
assert_eq!(failure.message, "refresh failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_cycle(handle: &super::ObservationHandle<CountingObserver>, cycle: u64) {
|
||||||
|
let deadline = Instant::now() + Duration::from_secs(2);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let Some(snapshot) = handle.latest_snapshot() else {
|
||||||
|
sleep(Duration::from_millis(10)).await;
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
if snapshot.cycle >= cycle {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
Instant::now() < deadline,
|
||||||
|
"timed out waiting for cycle {cycle}"
|
||||||
|
);
|
||||||
|
|
||||||
|
sleep(Duration::from_millis(10)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_snapshot_source_count(
|
||||||
|
handle: &super::ObservationHandle<CountingObserver>,
|
||||||
|
source_count: usize,
|
||||||
|
) {
|
||||||
|
let deadline = Instant::now() + Duration::from_secs(2);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let Some(snapshot) = handle.latest_snapshot() else {
|
||||||
|
sleep(Duration::from_millis(10)).await;
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
if snapshot.source_count == source_count {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
Instant::now() < deadline,
|
||||||
|
"timed out waiting for source_count {source_count}"
|
||||||
|
);
|
||||||
|
|
||||||
|
sleep(Duration::from_millis(10)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn wait_for_failure(handle: &super::ObservationHandle<CountingObserver>) {
|
||||||
|
let deadline = Instant::now() + Duration::from_secs(2);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if handle.last_error().is_some() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(Instant::now() < deadline, "timed out waiting for failure");
|
||||||
|
|
||||||
|
sleep(Duration::from_millis(10)).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -4,7 +4,12 @@ use super::{
|
|||||||
Application, CleanupPolicy, DeploymentPolicy, Expectation, HttpReadinessRequirement,
|
Application, CleanupPolicy, DeploymentPolicy, Expectation, HttpReadinessRequirement,
|
||||||
RetryPolicy, RuntimeExtensionFactory, Workload, internal::CoreBuilderAccess,
|
RetryPolicy, RuntimeExtensionFactory, Workload, internal::CoreBuilderAccess,
|
||||||
};
|
};
|
||||||
use crate::topology::{DeploymentProvider, DeploymentSeed};
|
use crate::{
|
||||||
|
observation::{
|
||||||
|
ObservationConfig, ObservationExtensionFactory, Observer, SourceProviderFactory,
|
||||||
|
},
|
||||||
|
topology::{DeploymentProvider, DeploymentSeed},
|
||||||
|
};
|
||||||
|
|
||||||
type DeploymentProviderHandle<E> = Box<dyn DeploymentProvider<<E as Application>::Deployment>>;
|
type DeploymentProviderHandle<E> = Box<dyn DeploymentProvider<<E as Application>::Deployment>>;
|
||||||
|
|
||||||
@ -60,6 +65,48 @@ pub trait CoreBuilderExt: CoreBuilderAccess + Sized {
|
|||||||
self.map_core_builder(|builder| builder.with_runtime_extension_factory(extension))
|
self.map_core_builder(|builder| builder.with_runtime_extension_factory(extension))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Registers one clonable observer as a runtime extension.
|
||||||
|
#[must_use]
|
||||||
|
fn with_observer<O>(
|
||||||
|
self,
|
||||||
|
observer: O,
|
||||||
|
source_provider_factory: impl SourceProviderFactory<Self::Env, O::Source>,
|
||||||
|
config: ObservationConfig,
|
||||||
|
) -> Self
|
||||||
|
where
|
||||||
|
O: Observer + Clone,
|
||||||
|
Self::Env: Application,
|
||||||
|
{
|
||||||
|
let extension = ObservationExtensionFactory::<Self::Env, O>::new(
|
||||||
|
observer,
|
||||||
|
source_provider_factory,
|
||||||
|
config,
|
||||||
|
);
|
||||||
|
|
||||||
|
self.with_runtime_extension_factory(Box::new(extension))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Registers one observer built lazily per run as a runtime extension.
|
||||||
|
#[must_use]
|
||||||
|
fn with_observer_factory<O>(
|
||||||
|
self,
|
||||||
|
observer_builder: impl Fn() -> O + Send + Sync + 'static,
|
||||||
|
source_provider_factory: impl SourceProviderFactory<Self::Env, O::Source>,
|
||||||
|
config: ObservationConfig,
|
||||||
|
) -> Self
|
||||||
|
where
|
||||||
|
O: Observer,
|
||||||
|
Self::Env: Application,
|
||||||
|
{
|
||||||
|
let extension = ObservationExtensionFactory::<Self::Env, O>::from_parts(
|
||||||
|
observer_builder,
|
||||||
|
source_provider_factory,
|
||||||
|
config,
|
||||||
|
);
|
||||||
|
|
||||||
|
self.with_runtime_extension_factory(Box::new(extension))
|
||||||
|
}
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
fn with_run_duration(self, duration: Duration) -> Self {
|
fn with_run_duration(self, duration: Duration) -> Self {
|
||||||
self.map_core_builder(|builder| builder.with_run_duration(duration))
|
self.map_core_builder(|builder| builder.with_run_duration(duration))
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user