Initial import of Nomos testing framework

This commit is contained in:
andrussal 2025-11-25 09:13:17 +01:00
commit 7f13a12a3d
115 changed files with 22194 additions and 0 deletions

50
.cargo-deny.toml Normal file
View File

@ -0,0 +1,50 @@
# Config file reference can be found at https://embarkstudios.github.io/cargo-deny/checks/cfg.html.
[graph]
all-features = true
exclude-dev = true
no-default-features = true
[advisories]
ignore = [
# Keep local ignores in sync with nomos-node if needed. Unused entries removed.
]
yanked = "deny"
[bans]
allow-wildcard-paths = false
multiple-versions = "allow"
[licenses]
allow = [
"Apache-2.0 WITH LLVM-exception",
"Apache-2.0",
"BSD-2-Clause",
"BSD-3-Clause",
"BSL-1.0",
"BlueOak-1.0.0",
"CC0-1.0",
"CDDL-1.0",
"CDLA-Permissive-2.0",
"ISC",
"MIT",
"MPL-2.0",
"NCSA",
"Unicode-3.0",
"Zlib",
]
private = { ignore = false }
unused-allowed-license = "deny"
[[licenses.clarify]]
expression = "MIT AND ISC"
license-files = [{ hash = 0xbd0eed23, path = "LICENSE" }]
name = "ring"
[sources]
allow-git = ["https://github.com/EspressoSystems/jellyfish.git"]
unknown-git = "deny"
unknown-registry = "deny"
[sources.allow-org]
github = ["logos-co"]

4
.cargo/config.toml Normal file
View File

@ -0,0 +1,4 @@
[target.'cfg(target_os = "macos")']
# when using osx, we need to link against some golang libraries, it did just work with this missing flags
# from: https://github.com/golang/go/issues/42459
rustflags = ["-C", "link-args=-framework CoreFoundation -framework Security -framework CoreServices -lresolv"]

10
.gitignore vendored Normal file
View File

@ -0,0 +1,10 @@
/target
**/target
.tmp/
# IDE / OS cruft
.idea/
.DS_Store
# Local test artifacts (kept when NOMOS_TESTS_KEEP_LOGS=1)
tests/workflows/.tmp*
tests/workflows/.tmp*/

40
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,40 @@
repos:
- repo: https://github.com/doublify/pre-commit-rust
rev: v1.0
hooks:
- id: fmt
# We're running `fmt` with `--all` and `pass_filenames: false` to format the entire workspace at once.
# Otherwise, `pre-commit` passes staged files one by one, which can lead to inconsistent results
# due to, presumably, the lack of full workspace context.
entry: cargo +nightly-2025-09-14 fmt
pass_filenames: false
- id: clippy
args: ["--all", "--all-targets", "--all-features", "--", "-D", "warnings"]
- repo: https://github.com/EmbarkStudios/cargo-deny
rev: 0.18.2
hooks:
- id: cargo-deny
args:
- check
- --hide-inclusion-graph
- -c
- .cargo-deny.toml
- --show-stats
- -D
- warnings
- repo: https://github.com/ComPWA/taplo-pre-commit
rev: v0.9.3
hooks:
- id: taplo-format
- id: taplo-lint
- repo: https://github.com/bnjbvr/cargo-machete
rev: ba1bcd4 # No tag yet with .pre-commit-hooks.yml
hooks:
- id: cargo-machete
- repo: local
hooks:
- id: cargo-hack-check
language: script
name: cargo hack check
entry: ./hooks/cargo-hack.sh
stages: [manual]

10
.taplo.toml Normal file
View File

@ -0,0 +1,10 @@
exclude = ["target/**"]
[formatting]
align_entries = true
allowed_blank_lines = 1
column_width = 120
keys = ["build-dependencies", "dependencies", "dev-dependencies"]
reorder_arrays = true
reorder_inline_tables = true
reorder_keys = true

8924
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

98
Cargo.toml Normal file
View File

@ -0,0 +1,98 @@
[workspace]
members = [
"testing-framework/configs",
"testing-framework/core",
"testing-framework/runners/compose",
"testing-framework/runners/k8s",
"testing-framework/runners/local",
"testing-framework/workflows",
"tests/workflows",
]
resolver = "2"
[workspace.package]
categories = []
description = "Nomos testing framework workspace (split out from nomos-node)"
edition = "2024"
keywords = ["framework", "nomos", "testing"]
license = "MIT OR Apache-2.0"
readme = "README.md"
repository = "https://example.invalid/nomos-testing-local"
version = "0.1.0"
[workspace.lints.rust]
unsafe_code = "allow"
[workspace.lints.clippy]
all = "allow"
[workspace.dependencies]
# Local testing framework crates
integration-configs = { default-features = false, path = "testing-framework/configs" }
testing-framework-core = { default-features = false, path = "testing-framework/core" }
testing-framework-runner-compose = { default-features = false, path = "testing-framework/runners/compose" }
testing-framework-runner-k8s = { default-features = false, path = "testing-framework/runners/k8s" }
testing-framework-runner-local = { default-features = false, path = "testing-framework/runners/local" }
testing-framework-workflows = { default-features = false, path = "testing-framework/workflows" }
# Nomos git dependencies (tracking master)
broadcast-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
cfgsync = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
chain-leader = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master", features = [
"pol-dev-mode",
] }
chain-network = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
chain-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
common-http-client = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
cryptarchia-engine = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
cryptarchia-sync = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
executor-http-client = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
groth16 = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
key-management-system = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
kzgrs = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
kzgrs-backend = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-api = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-blend-message = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-blend-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-cli = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-core = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-da-dispersal = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-da-network-core = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-da-network-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-da-sampling = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-da-verifier = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-executor = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-http-api-common = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-ledger = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-libp2p = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-network = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-node = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-sdp = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-time = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-tracing = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-tracing-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-utils = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
nomos-wallet = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
poc = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
pol = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
subnetworks-assignations = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
tests = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
tx-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
wallet = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
zksign = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", branch = "master" }
# External crates
async-trait = { default-features = false, version = "0.1" }
bytes = { default-features = false, version = "1.3" }
hex = { default-features = false, version = "0.4.3" }
libp2p = { default-features = false, version = "0.55" }
rand = { default-features = false, version = "0.8" }
reqwest = { default-features = false, version = "0.12" }
serde = { default-features = true, version = "1.0", features = ["derive"] }
serde_json = { default-features = false, version = "1.0" }
serde_with = { default-features = false, version = "3.14.0" }
serde_yaml = { default-features = false, version = "0.9.33" }
tempfile = { default-features = false, version = "3" }
thiserror = { default-features = false, version = "2.0" }
tokio = { default-features = false, version = "1" }
tracing = { default-features = false, version = "0.1" }

39
README.md Normal file
View File

@ -0,0 +1,39 @@
# Nomos Testing (split workspace)
This workspace contains only the testing framework crates pulled from the `nomos-node` repo:
- `testing-framework/configs`
- `testing-framework/core`
- `testing-framework/workflows`
- `testing-framework/runners` (compose, k8s, local)
- `tests/workflows` (demo/integration tests)
- helper scripts (`scripts/setup-nomos-circuits.sh`, `scripts/build-rapidsnark.sh`)
## Layout
The workspace expects a sibling checkout of `nomos-node`:
```
IdeaProjects/
├─ nomos-node/ # existing monorepo with all node crates
└─ nomos-testing/ # this workspace (you are here)
```
Path dependencies in `Cargo.toml` point to `../nomos-node/...`.
## Usage
```bash
cd nomos-testing
cargo test -p tests-workflows -- --ignored # or any crate you need
```
If you need circuits/prover assets, run the usual helpers from this workspace:
```bash
scripts/setup-nomos-circuits.sh
scripts/build-rapidsnark.sh
```
All code is sourced from the local branches:
`feat/testing-framework-move`, `feat/testing-framework`, `feat/testing-framework-runners`, `feat/testing-framework-k8s-runner`.

3
hooks/cargo-hack.sh Executable file
View File

@ -0,0 +1,3 @@
#!/bin/bash
RUSTFLAGS="-D warnings" cargo hack --feature-powerset --no-dev-deps check

12
rust-toolchain.toml Normal file
View File

@ -0,0 +1,12 @@
[toolchain]
# Keep this version in sync also in the following places:
# * Dockerfile
# * flake.nix
# * testnet/Dockerfile
# Also, update the version of the nightly toolchain to the latest nightly of the new version specified in the following places:
# * .github/workflows/code-check.yml (fmt job)
# * .pre-commit-config.yml (fmt hook)
# Then, if there is any new allow-by-default rustc lint introduced/stabilized, add it to the respective entry in our `config.toml`.
channel = "nightly-2025-09-14"
# Even if clippy should be included in the default profile, in some cases it is not installed. So we force it with an explicit declaration.
components = ["clippy", "rustfmt"]

5
rustfmt.toml Normal file
View File

@ -0,0 +1,5 @@
group_imports = "StdExternalCrate"
imports_granularity = "Crate"
reorder_imports = true
reorder_modules = true
wrap_comments = true

52
scripts/build-rapidsnark.sh Executable file
View File

@ -0,0 +1,52 @@
#!/bin/bash
#
# Rebuild the rapidsnark prover for the current architecture.
#
# Usage: ./scripts/build-rapidsnark.sh <circuits_dir>
set -euo pipefail
if [ $# -lt 1 ]; then
echo "usage: $0 <circuits_dir>" >&2
exit 1
fi
TARGET_ARCH="$(uname -m)"
CIRCUITS_DIR="$1"
RAPIDSNARK_REPO="${RAPIDSNARK_REPO:-https://github.com/iden3/rapidsnark.git}"
RAPIDSNARK_REF="${RAPIDSNARK_REF:-main}"
if [ ! -d "$CIRCUITS_DIR" ]; then
echo "circuits directory '$CIRCUITS_DIR' does not exist" >&2
exit 1
fi
case "$TARGET_ARCH" in
arm64 | aarch64)
;;
*)
echo "rapidsnark rebuild skipped for architecture '$TARGET_ARCH'" >&2
exit 0
;;
esac
workdir="$(mktemp -d)"
trap 'rm -rf "$workdir"' EXIT
echo "Building rapidsnark ($RAPIDSNARK_REF) for $TARGET_ARCH..." >&2
git clone --depth 1 --branch "$RAPIDSNARK_REF" "$RAPIDSNARK_REPO" "$workdir/rapidsnark" >&2
cd "$workdir/rapidsnark"
git submodule update --init --recursive >&2
if [ "${RAPIDSNARK_BUILD_GMP:-1}" = "1" ]; then
GMP_TARGET="${RAPIDSNARK_GMP_TARGET:-aarch64}"
./build_gmp.sh "$GMP_TARGET" >&2
fi
MAKE_TARGET="${RAPIDSNARK_MAKE_TARGET:-host_arm64}"
PACKAGE_DIR="${RAPIDSNARK_PACKAGE_DIR:-package_arm64}"
make "$MAKE_TARGET" -j"$(nproc)" >&2
install -m 0755 "${PACKAGE_DIR}/bin/prover" "$CIRCUITS_DIR/prover"
echo "rapidsnark prover installed to $CIRCUITS_DIR/prover" >&2

216
scripts/setup-nomos-circuits.sh Executable file
View File

@ -0,0 +1,216 @@
#!/bin/bash
#
# Setup script for nomos-circuits
#
# Usage: ./setup-nomos-circuits.sh [VERSION] [INSTALL_DIR]
#
# Arguments:
# VERSION - Optional. Version to install (default: v0.3.1)
# INSTALL_DIR - Optional. Installation directory (default: $HOME/.nomos-circuits)
#
# Examples:
# ./setup-nomos-circuits.sh # Install default version to default location
# ./setup-nomos-circuits.sh v0.2.0 # Install specific version to default location
# ./setup-nomos-circuits.sh v0.2.0 /opt/circuits # Install to custom location
set -e
# Default values
VERSION="${1:-v0.3.1}"
DEFAULT_INSTALL_DIR="$HOME/.nomos-circuits"
INSTALL_DIR="${2:-$DEFAULT_INSTALL_DIR}"
REPO="logos-co/nomos-circuits"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
print_info() {
echo -e "${BLUE}${NC} $1"
}
print_success() {
echo -e "${GREEN}${NC} $1"
}
print_warning() {
echo -e "${YELLOW}${NC} $1"
}
print_error() {
echo -e "${RED}${NC} $1"
}
# Detect OS and architecture
detect_platform() {
local os=""
local arch=""
# Detect OS
case "$(uname -s)" in
Linux*) os="linux";;
Darwin*) os="macos";;
MINGW*|MSYS*|CYGWIN*) os="windows";;
*) print_error "Unsupported operating system: $(uname -s)"; exit 1;;
esac
# Detect architecture
case "$(uname -m)" in
x86_64) arch="x86_64";;
aarch64) arch="aarch64";;
arm64) arch="aarch64";;
*) print_error "Unsupported architecture: $(uname -m)"; exit 1;;
esac
echo "${os}-${arch}"
}
# Check if installation directory exists and get confirmation
check_existing_installation() {
if [ -d "$INSTALL_DIR" ]; then
print_warning "Installation directory already exists: $INSTALL_DIR"
# Check if it has a VERSION file
if [ -f "$INSTALL_DIR/VERSION" ]; then
local current_version=$(cat "$INSTALL_DIR/VERSION")
print_info "Currently installed version: $current_version"
fi
# In non-interactive environments (CI), automatically overwrite
if [ ! -t 0 ]; then
print_info "Non-interactive environment detected, automatically overwriting..."
else
# Interactive environment - ask for confirmation
echo
read -p "Do you want to overwrite it? (y/N): " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
print_info "Installation cancelled."
exit 0
fi
fi
print_info "Removing existing installation..."
rm -rf "$INSTALL_DIR"
fi
}
# Download and extract the release
download_release() {
local platform="$1"
local artifact="nomos-circuits-${VERSION}-${platform}.tar.gz"
local url="https://github.com/${REPO}/releases/download/${VERSION}/${artifact}"
local temp_dir=$(mktemp -d)
print_info "Downloading nomos-circuits ${VERSION} for ${platform}..."
print_info "URL: $url"
# Build curl command with optional authentication
local curl_cmd="curl -L"
if [ -n "$GITHUB_TOKEN" ]; then
curl_cmd="$curl_cmd --header 'authorization: Bearer ${GITHUB_TOKEN}'"
fi
curl_cmd="$curl_cmd -o ${temp_dir}/${artifact} $url"
if ! eval "$curl_cmd"; then
print_error "Failed to download release artifact"
print_error "Please check that version ${VERSION} exists for platform ${platform}"
print_error "Available releases: https://github.com/${REPO}/releases"
rm -rf "$temp_dir"
exit 1
fi
print_success "Download complete"
print_info "Extracting to ${INSTALL_DIR}..."
mkdir -p "$INSTALL_DIR"
if ! tar -xzf "${temp_dir}/${artifact}" -C "$INSTALL_DIR" --strip-components=1; then
print_error "Failed to extract archive"
rm -rf "$temp_dir"
exit 1
fi
rm -rf "$temp_dir"
print_success "Extraction complete"
}
# Handle macOS code signing/quarantine issues
handle_macos_quarantine() {
print_info "macOS detected: Removing quarantine attributes from executables..."
# Remove quarantine attribute from all executable files
if find "$INSTALL_DIR" -type f -perm +111 -exec xattr -d com.apple.quarantine {} \; 2>/dev/null; then
print_success "Quarantine attributes removed"
else
print_warning "Could not remove quarantine attributes (they may not exist)"
fi
}
# Main installation process
main() {
print_info "Setting up nomos-circuits ${VERSION}"
print_info "Installation directory: $INSTALL_DIR"
echo
# Detect platform (allow override via NOMOS_CIRCUITS_PLATFORM)
local platform_override="${NOMOS_CIRCUITS_PLATFORM:-}"
local platform
if [ -n "$platform_override" ]; then
platform="$platform_override"
print_info "Using overridden platform: $platform"
else
platform=$(detect_platform)
print_info "Detected platform: $platform"
fi
# Check existing installation
check_existing_installation
# Download and extract
download_release "$platform"
# Handle macOS quarantine if needed
if [[ "$platform" == macos-* ]]; then
echo
handle_macos_quarantine
fi
if [[ "${NOMOS_CIRCUITS_REBUILD_RAPIDSNARK:-0}" == "1" || "$platform" == *"aarch64" ]]; then
echo
print_info "Rebuilding rapidsnark prover for ${platform}..."
"${SCRIPT_DIR}/build-rapidsnark.sh" "$INSTALL_DIR"
fi
echo
print_success "Installation complete!"
echo
print_info "nomos-circuits ${VERSION} is now installed at: $INSTALL_DIR"
print_info "The following circuits are available:"
# Discover circuits by finding directories that contain a witness_generator
for dir in "$INSTALL_DIR"/*/; do
if [ -d "$dir" ]; then
local circuit_name
circuit_name=$(basename "$dir")
if [ -f "$dir/witness_generator" ]; then
echo "$circuit_name"
fi
fi
done
# Only show export instructions if not using the default location
if [ "$INSTALL_DIR" != "$DEFAULT_INSTALL_DIR" ]; then
echo
print_info "Since you're using a custom installation directory, set the environment variable:"
print_info " export NOMOS_CIRCUITS=$INSTALL_DIR"
echo
fi
}
# Run main
main

View File

@ -0,0 +1,50 @@
[package]
categories.workspace = true
description.workspace = true
edition.workspace = true
keywords.workspace = true
license.workspace = true
name = "integration-configs"
readme.workspace = true
repository.workspace = true
version = "0.1.0"
[dependencies]
blst = "0.3.11"
chain-leader = { workspace = true }
chain-network = { workspace = true }
chain-service = { workspace = true }
cryptarchia-engine = { workspace = true, features = ["serde"] }
cryptarchia-sync = { workspace = true }
ed25519-dalek = { version = "2.2.0", features = ["rand_core", "serde"] }
groth16 = { workspace = true }
hex = { version = "0.4.3", default-features = false }
key-management-system = { workspace = true }
nomos-api = { workspace = true }
nomos-blend-message = { workspace = true }
nomos-blend-service = { workspace = true, features = ["libp2p"] }
nomos-core = { workspace = true }
nomos-da-dispersal = { workspace = true }
nomos-da-network-core = { workspace = true }
nomos-da-network-service = { workspace = true }
nomos-da-sampling = { workspace = true }
nomos-da-verifier = { workspace = true }
nomos-executor = { workspace = true, default-features = false, features = ["testing", "tracing"] }
nomos-ledger = { workspace = true, features = ["serde"] }
nomos-libp2p = { workspace = true }
nomos-node = { workspace = true, default-features = false, features = ["testing"] }
nomos-sdp = { workspace = true }
nomos-time = { workspace = true }
nomos-tracing = { workspace = true }
nomos-tracing-service = { workspace = true }
nomos-utils = { workspace = true }
nomos-wallet = { workspace = true }
num-bigint = { version = "0.4", default-features = false }
rand = { workspace = true }
subnetworks-assignations = { workspace = true }
time = { version = "0.3", default-features = true }
tracing = { workspace = true }
zksign = { workspace = true }
[lints]
workspace = true

View File

@ -0,0 +1,14 @@
use groth16::fr_to_bytes;
use key_management_system::{
backend::preload::KeyId,
keys::{Key, secured_key::SecuredKey as _},
};
#[must_use]
pub fn key_id_for_preload_backend(key: &Key) -> KeyId {
let key_id_bytes = match key {
Key::Ed25519(ed25519_secret_key) => ed25519_secret_key.as_public_key().to_bytes(),
Key::Zk(zk_secret_key) => fr_to_bytes(zk_secret_key.as_public_key().as_fr()),
};
hex::encode(key_id_bytes)
}

View File

@ -0,0 +1 @@
pub mod kms;

View File

@ -0,0 +1,45 @@
use std::{env, net::Ipv4Addr, ops::Mul as _, sync::LazyLock, time::Duration};
use nomos_core::sdp::ProviderId;
use nomos_libp2p::{Multiaddr, PeerId, multiaddr};
pub mod common;
pub mod nodes;
pub mod topology;
static IS_SLOW_TEST_ENV: LazyLock<bool> =
LazyLock::new(|| env::var("SLOW_TEST_ENV").is_ok_and(|s| s == "true"));
pub static IS_DEBUG_TRACING: LazyLock<bool> = LazyLock::new(|| {
env::var("NOMOS_TESTS_TRACING").is_ok_and(|val| val.eq_ignore_ascii_case("true"))
});
/// In slow test environments like Codecov, use 2x timeout.
#[must_use]
pub fn adjust_timeout(d: Duration) -> Duration {
if *IS_SLOW_TEST_ENV { d.mul(2) } else { d }
}
#[must_use]
pub fn node_address_from_port(port: u16) -> Multiaddr {
multiaddr(Ipv4Addr::LOCALHOST, port)
}
#[must_use]
pub fn secret_key_to_peer_id(node_key: nomos_libp2p::ed25519::SecretKey) -> PeerId {
PeerId::from_public_key(
&nomos_libp2p::ed25519::Keypair::from(node_key)
.public()
.into(),
)
}
#[must_use]
pub fn secret_key_to_provider_id(node_key: nomos_libp2p::ed25519::SecretKey) -> ProviderId {
ProviderId::try_from(
nomos_libp2p::ed25519::Keypair::from(node_key)
.public()
.to_bytes(),
)
.unwrap()
}

View File

@ -0,0 +1,328 @@
use std::{
collections::HashSet,
num::{NonZeroU64, NonZeroUsize},
path::PathBuf,
time::Duration,
};
use chain_leader::LeaderSettings;
use chain_network::{ChainNetworkSettings, OrphanConfig, SyncConfig};
use chain_service::{CryptarchiaSettings, StartingState};
use cryptarchia_engine::time::SlotConfig;
use key_management_system::keys::{Key, ZkKey};
use nomos_blend_service::{
core::settings::{CoverTrafficSettings, MessageDelayerSettings, SchedulerSettings, ZkSettings},
settings::TimingSettings,
};
use nomos_da_dispersal::{
DispersalServiceSettings,
backend::kzgrs::{DispersalKZGRSBackendSettings, EncoderSettings},
};
use nomos_da_network_core::protocols::sampling::SubnetsConfig;
use nomos_da_network_service::{
NetworkConfig as DaNetworkConfig,
api::http::ApiAdapterSettings,
backends::libp2p::{
common::DaNetworkBackendSettings, executor::DaNetworkExecutorBackendSettings,
},
};
use nomos_da_sampling::{
DaSamplingServiceSettings, backend::kzgrs::KzgrsSamplingBackendSettings,
verifier::kzgrs::KzgrsDaVerifierSettings as SamplingVerifierSettings,
};
use nomos_da_verifier::{
DaVerifierServiceSettings,
backend::{kzgrs::KzgrsDaVerifierSettings, trigger::MempoolPublishTriggerConfig},
storage::adapters::rocksdb::RocksAdapterSettings as VerifierStorageAdapterSettings,
};
use nomos_executor::config::Config as ExecutorConfig;
use nomos_node::{
RocksBackendSettings,
api::backend::AxumBackendSettings as NodeAxumBackendSettings,
config::{
blend::{
deployment::{self as blend_deployment},
serde as blend_serde,
},
deployment::{CustomDeployment, Settings as NodeDeploymentSettings},
mempool::MempoolConfig,
network::deployment::Settings as NetworkDeploymentSettings,
},
};
use nomos_sdp::SdpSettings;
use nomos_time::{
TimeServiceSettings,
backends::{NtpTimeBackendSettings, ntp::async_client::NTPClientSettings},
};
use nomos_utils::math::NonNegativeF64;
use nomos_wallet::WalletServiceSettings;
use crate::{
adjust_timeout,
common::kms::key_id_for_preload_backend,
topology::configs::{
GeneralConfig, blend::GeneralBlendConfig as TopologyBlendConfig, wallet::WalletAccount,
},
};
#[must_use]
#[expect(clippy::too_many_lines, reason = "TODO: Address this at some point.")]
pub fn create_executor_config(config: GeneralConfig) -> ExecutorConfig {
let (blend_user_config, deployment_settings) = build_blend_service_config(&config.blend_config);
ExecutorConfig {
network: config.network_config,
blend: blend_user_config,
deployment: deployment_settings,
cryptarchia: CryptarchiaSettings {
config: config.consensus_config.ledger_config.clone(),
starting_state: StartingState::Genesis {
genesis_tx: config.consensus_config.genesis_tx,
},
recovery_file: PathBuf::from("./recovery/cryptarchia.json"),
bootstrap: chain_service::BootstrapConfig {
prolonged_bootstrap_period: Duration::from_secs(3),
force_bootstrap: false,
offline_grace_period: chain_service::OfflineGracePeriodConfig {
grace_period: Duration::from_secs(20 * 60),
state_recording_interval: Duration::from_secs(60),
},
},
},
chain_network: ChainNetworkSettings {
config: config.consensus_config.ledger_config.clone(),
network_adapter_settings:
chain_network::network::adapters::libp2p::LibP2pAdapterSettings {
topic: String::from(nomos_node::CONSENSUS_TOPIC),
},
bootstrap: chain_network::BootstrapConfig {
ibd: chain_network::IbdConfig {
peers: HashSet::new(),
delay_before_new_download: Duration::from_secs(10),
},
},
sync: SyncConfig {
orphan: OrphanConfig {
max_orphan_cache_size: NonZeroUsize::new(5)
.expect("Max orphan cache size must be non-zero"),
},
},
},
cryptarchia_leader: LeaderSettings {
transaction_selector_settings: (),
config: config.consensus_config.ledger_config.clone(),
leader_config: config.consensus_config.leader_config.clone(),
blend_broadcast_settings:
nomos_blend_service::core::network::libp2p::Libp2pBroadcastSettings {
topic: String::from(nomos_node::CONSENSUS_TOPIC),
},
},
da_network: DaNetworkConfig {
backend: DaNetworkExecutorBackendSettings {
validator_settings: DaNetworkBackendSettings {
node_key: config.da_config.node_key,
listening_address: config.da_config.listening_address,
policy_settings: config.da_config.policy_settings,
monitor_settings: config.da_config.monitor_settings,
balancer_interval: config.da_config.balancer_interval,
redial_cooldown: config.da_config.redial_cooldown,
replication_settings: config.da_config.replication_settings,
subnets_settings: SubnetsConfig {
num_of_subnets: config.da_config.num_samples as usize,
shares_retry_limit: config.da_config.retry_shares_limit,
commitments_retry_limit: config.da_config.retry_commitments_limit,
},
},
num_subnets: config.da_config.num_subnets,
},
membership: config.da_config.membership.clone(),
api_adapter_settings: ApiAdapterSettings {
api_port: config.api_config.address.port(),
is_secure: false,
},
subnet_refresh_interval: config.da_config.subnets_refresh_interval,
subnet_threshold: config.da_config.num_samples as usize,
min_session_members: config.da_config.num_samples as usize,
},
da_verifier: DaVerifierServiceSettings {
share_verifier_settings: KzgrsDaVerifierSettings {
global_params_path: config.da_config.global_params_path.clone(),
domain_size: config.da_config.num_subnets as usize,
},
tx_verifier_settings: (),
network_adapter_settings: (),
storage_adapter_settings: VerifierStorageAdapterSettings {
blob_storage_directory: "./".into(),
},
mempool_trigger_settings: MempoolPublishTriggerConfig {
publish_threshold: NonNegativeF64::try_from(0.8).unwrap(),
share_duration: Duration::from_secs(5),
prune_duration: Duration::from_secs(30),
prune_interval: Duration::from_secs(5),
},
},
tracing: config.tracing_config.tracing_settings,
http: nomos_api::ApiServiceSettings {
backend_settings: NodeAxumBackendSettings {
address: config.api_config.address,
rate_limit_per_second: 10000,
rate_limit_burst: 10000,
max_concurrent_requests: 1000,
..Default::default()
},
},
da_sampling: DaSamplingServiceSettings {
sampling_settings: KzgrsSamplingBackendSettings {
num_samples: config.da_config.num_samples,
num_subnets: config.da_config.num_subnets,
old_blobs_check_interval: config.da_config.old_blobs_check_interval,
blobs_validity_duration: config.da_config.blobs_validity_duration,
},
share_verifier_settings: SamplingVerifierSettings {
global_params_path: config.da_config.global_params_path.clone(),
domain_size: config.da_config.num_subnets as usize,
},
commitments_wait_duration: Duration::from_secs(1),
sdp_blob_trigger_sampling_delay: adjust_timeout(Duration::from_secs(5)),
},
storage: RocksBackendSettings {
db_path: "./db".into(),
read_only: false,
column_family: Some("blocks".into()),
},
da_dispersal: DispersalServiceSettings {
backend: DispersalKZGRSBackendSettings {
encoder_settings: EncoderSettings {
num_columns: config.da_config.num_subnets as usize,
with_cache: false,
global_params_path: config.da_config.global_params_path,
},
dispersal_timeout: Duration::from_secs(20),
retry_cooldown: Duration::from_secs(3),
retry_limit: 2,
},
},
time: TimeServiceSettings {
backend_settings: NtpTimeBackendSettings {
ntp_server: config.time_config.ntp_server,
ntp_client_settings: NTPClientSettings {
timeout: config.time_config.timeout,
listening_interface: config.time_config.interface,
},
update_interval: config.time_config.update_interval,
slot_config: SlotConfig {
slot_duration: config.time_config.slot_duration,
chain_start_time: config.time_config.chain_start_time,
},
epoch_config: config.consensus_config.ledger_config.epoch_config,
base_period_length: config.consensus_config.ledger_config.base_period_length(),
},
},
mempool: MempoolConfig {
pool_recovery_path: "./recovery/mempool.json".into(),
},
sdp: SdpSettings { declaration: None },
wallet: WalletServiceSettings {
known_keys: {
let mut keys = HashSet::from_iter([config.consensus_config.leader_config.pk]);
keys.extend(
config
.consensus_config
.wallet_accounts
.iter()
.map(WalletAccount::public_key),
);
keys
},
},
key_management: config.kms_config,
testing_http: nomos_api::ApiServiceSettings {
backend_settings: NodeAxumBackendSettings {
address: config.api_config.testing_http_address,
rate_limit_per_second: 10000,
rate_limit_burst: 10000,
max_concurrent_requests: 1000,
..Default::default()
},
},
}
}
fn build_blend_service_config(
config: &TopologyBlendConfig,
) -> (blend_serde::Config, NodeDeploymentSettings) {
let zk_key_id =
key_id_for_preload_backend(&Key::from(ZkKey::new(config.secret_zk_key.clone())));
let backend_core = &config.backend_core;
let backend_edge = &config.backend_edge;
let user = blend_serde::Config {
common: blend_serde::common::Config {
non_ephemeral_signing_key: config.private_key.clone(),
recovery_path_prefix: PathBuf::from("./recovery/blend"),
},
core: blend_serde::core::Config {
backend: blend_serde::core::BackendConfig {
listening_address: backend_core.listening_address.clone(),
core_peering_degree: backend_core.core_peering_degree.clone(),
edge_node_connection_timeout: backend_core.edge_node_connection_timeout,
max_edge_node_incoming_connections: backend_core.max_edge_node_incoming_connections,
max_dial_attempts_per_peer: backend_core.max_dial_attempts_per_peer,
},
zk: ZkSettings {
secret_key_kms_id: zk_key_id,
},
},
edge: blend_serde::edge::Config {
backend: blend_serde::edge::BackendConfig {
max_dial_attempts_per_peer_per_message: backend_edge
.max_dial_attempts_per_peer_per_message,
replication_factor: backend_edge.replication_factor,
},
},
};
let deployment_settings = blend_deployment::Settings {
common: blend_deployment::CommonSettings {
num_blend_layers: NonZeroU64::try_from(1).unwrap(),
minimum_network_size: NonZeroU64::try_from(1).unwrap(),
timing: TimingSettings {
round_duration: Duration::from_secs(1),
rounds_per_interval: NonZeroU64::try_from(30u64).unwrap(),
rounds_per_session: NonZeroU64::try_from(648_000u64).unwrap(),
rounds_per_observation_window: NonZeroU64::try_from(30u64).unwrap(),
rounds_per_session_transition_period: NonZeroU64::try_from(30u64).unwrap(),
epoch_transition_period_in_slots: NonZeroU64::try_from(2_600).unwrap(),
},
protocol_name: backend_core.protocol_name.clone(),
},
core: blend_deployment::CoreSettings {
scheduler: SchedulerSettings {
cover: CoverTrafficSettings {
intervals_for_safety_buffer: 100,
message_frequency_per_round: NonNegativeF64::try_from(1f64).unwrap(),
},
delayer: MessageDelayerSettings {
maximum_release_delay_in_rounds: NonZeroU64::try_from(3u64).unwrap(),
},
},
minimum_messages_coefficient: backend_core.minimum_messages_coefficient,
normalization_constant: backend_core.normalization_constant,
},
};
let deployment = NodeDeploymentSettings::Custom(CustomDeployment {
blend: deployment_settings,
network: NetworkDeploymentSettings {
identify_protocol_name: nomos_libp2p::protocol_name::StreamProtocol::new(
"/integration/nomos/identify/1.0.0",
),
kademlia_protocol_name: nomos_libp2p::protocol_name::StreamProtocol::new(
"/integration/nomos/kad/1.0.0",
),
},
});
(user, deployment)
}

View File

@ -0,0 +1,2 @@
pub mod executor;
pub mod validator;

View File

@ -0,0 +1,317 @@
use std::{
collections::HashSet,
num::{NonZeroU64, NonZeroUsize},
path::PathBuf,
time::Duration,
};
use chain_leader::LeaderSettings;
use chain_network::{ChainNetworkSettings, OrphanConfig, SyncConfig};
use chain_service::{CryptarchiaSettings, StartingState};
use cryptarchia_engine::time::SlotConfig;
use key_management_system::keys::{Key, ZkKey};
use nomos_blend_service::{
core::settings::{CoverTrafficSettings, MessageDelayerSettings, SchedulerSettings, ZkSettings},
settings::TimingSettings,
};
use nomos_da_network_core::{
protocols::sampling::SubnetsConfig, swarm::DAConnectionPolicySettings,
};
use nomos_da_network_service::{
NetworkConfig as DaNetworkConfig, api::http::ApiAdapterSettings,
backends::libp2p::common::DaNetworkBackendSettings,
};
use nomos_da_sampling::{
DaSamplingServiceSettings, backend::kzgrs::KzgrsSamplingBackendSettings,
verifier::kzgrs::KzgrsDaVerifierSettings as SamplingVerifierSettings,
};
use nomos_da_verifier::{
DaVerifierServiceSettings,
backend::{kzgrs::KzgrsDaVerifierSettings, trigger::MempoolPublishTriggerConfig},
storage::adapters::rocksdb::RocksAdapterSettings as VerifierStorageAdapterSettings,
};
use nomos_node::{
Config as ValidatorConfig, RocksBackendSettings,
api::backend::AxumBackendSettings as NodeAxumBackendSettings,
config::{
blend::{
deployment::{self as blend_deployment},
serde as blend_serde,
},
deployment::{CustomDeployment, Settings as NodeDeploymentSettings},
mempool::MempoolConfig,
network::deployment::Settings as NetworkDeploymentSettings,
},
};
use nomos_sdp::SdpSettings;
use nomos_time::{
TimeServiceSettings,
backends::{NtpTimeBackendSettings, ntp::async_client::NTPClientSettings},
};
use nomos_utils::math::NonNegativeF64;
use nomos_wallet::WalletServiceSettings;
use crate::{
adjust_timeout,
common::kms::key_id_for_preload_backend,
topology::configs::{
GeneralConfig, blend::GeneralBlendConfig as TopologyBlendConfig, wallet::WalletAccount,
},
};
#[must_use]
#[expect(
clippy::too_many_lines,
reason = "Validator config wiring aggregates many service settings"
)]
pub fn create_validator_config(config: GeneralConfig) -> ValidatorConfig {
let da_policy_settings = config.da_config.policy_settings;
let (blend_user_config, deployment_settings) = build_blend_service_config(&config.blend_config);
ValidatorConfig {
network: config.network_config,
blend: blend_user_config,
deployment: deployment_settings,
cryptarchia: CryptarchiaSettings {
config: config.consensus_config.ledger_config.clone(),
starting_state: StartingState::Genesis {
genesis_tx: config.consensus_config.genesis_tx,
},
recovery_file: PathBuf::from("./recovery/cryptarchia.json"),
bootstrap: chain_service::BootstrapConfig {
prolonged_bootstrap_period: config.bootstrapping_config.prolonged_bootstrap_period,
force_bootstrap: false,
offline_grace_period: chain_service::OfflineGracePeriodConfig {
grace_period: Duration::from_secs(20 * 60),
state_recording_interval: Duration::from_secs(60),
},
},
},
chain_network: ChainNetworkSettings {
config: config.consensus_config.ledger_config.clone(),
network_adapter_settings:
chain_network::network::adapters::libp2p::LibP2pAdapterSettings {
topic: String::from(nomos_node::CONSENSUS_TOPIC),
},
bootstrap: chain_network::BootstrapConfig {
ibd: chain_network::IbdConfig {
peers: HashSet::new(),
delay_before_new_download: Duration::from_secs(10),
},
},
sync: SyncConfig {
orphan: OrphanConfig {
max_orphan_cache_size: NonZeroUsize::new(5)
.expect("Max orphan cache size must be non-zero"),
},
},
},
cryptarchia_leader: LeaderSettings {
transaction_selector_settings: (),
config: config.consensus_config.ledger_config.clone(),
leader_config: config.consensus_config.leader_config.clone(),
blend_broadcast_settings:
nomos_blend_service::core::network::libp2p::Libp2pBroadcastSettings {
topic: String::from(nomos_node::CONSENSUS_TOPIC),
},
},
da_network: DaNetworkConfig {
backend: DaNetworkBackendSettings {
node_key: config.da_config.node_key,
listening_address: config.da_config.listening_address,
policy_settings: DAConnectionPolicySettings {
min_dispersal_peers: 0,
min_replication_peers: da_policy_settings.min_replication_peers,
max_dispersal_failures: da_policy_settings.max_dispersal_failures,
max_sampling_failures: da_policy_settings.max_sampling_failures,
max_replication_failures: da_policy_settings.max_replication_failures,
malicious_threshold: da_policy_settings.malicious_threshold,
},
monitor_settings: config.da_config.monitor_settings,
balancer_interval: config.da_config.balancer_interval,
redial_cooldown: config.da_config.redial_cooldown,
replication_settings: config.da_config.replication_settings,
subnets_settings: SubnetsConfig {
num_of_subnets: config.da_config.num_samples as usize,
shares_retry_limit: config.da_config.retry_shares_limit,
commitments_retry_limit: config.da_config.retry_commitments_limit,
},
},
membership: config.da_config.membership.clone(),
api_adapter_settings: ApiAdapterSettings {
api_port: config.api_config.address.port(),
is_secure: false,
},
subnet_refresh_interval: config.da_config.subnets_refresh_interval,
subnet_threshold: config.da_config.num_samples as usize,
min_session_members: config.da_config.num_samples as usize,
},
da_verifier: DaVerifierServiceSettings {
share_verifier_settings: KzgrsDaVerifierSettings {
global_params_path: config.da_config.global_params_path.clone(),
domain_size: config.da_config.num_subnets as usize,
},
tx_verifier_settings: (),
network_adapter_settings: (),
storage_adapter_settings: VerifierStorageAdapterSettings {
blob_storage_directory: "./".into(),
},
mempool_trigger_settings: MempoolPublishTriggerConfig {
publish_threshold: NonNegativeF64::try_from(0.8).unwrap(),
share_duration: Duration::from_secs(5),
prune_duration: Duration::from_secs(30),
prune_interval: Duration::from_secs(5),
},
},
tracing: config.tracing_config.tracing_settings,
http: nomos_api::ApiServiceSettings {
backend_settings: NodeAxumBackendSettings {
address: config.api_config.address,
rate_limit_per_second: 10000,
rate_limit_burst: 10000,
max_concurrent_requests: 1000,
..Default::default()
},
},
da_sampling: DaSamplingServiceSettings {
sampling_settings: KzgrsSamplingBackendSettings {
num_samples: config.da_config.num_samples,
num_subnets: config.da_config.num_subnets,
old_blobs_check_interval: config.da_config.old_blobs_check_interval,
blobs_validity_duration: config.da_config.blobs_validity_duration,
},
share_verifier_settings: SamplingVerifierSettings {
global_params_path: config.da_config.global_params_path,
domain_size: config.da_config.num_subnets as usize,
},
commitments_wait_duration: Duration::from_secs(1),
sdp_blob_trigger_sampling_delay: adjust_timeout(Duration::from_secs(5)),
},
storage: RocksBackendSettings {
db_path: "./db".into(),
read_only: false,
column_family: Some("blocks".into()),
},
time: TimeServiceSettings {
backend_settings: NtpTimeBackendSettings {
ntp_server: config.time_config.ntp_server,
ntp_client_settings: NTPClientSettings {
timeout: config.time_config.timeout,
listening_interface: config.time_config.interface,
},
update_interval: config.time_config.update_interval,
slot_config: SlotConfig {
slot_duration: config.time_config.slot_duration,
chain_start_time: config.time_config.chain_start_time,
},
epoch_config: config.consensus_config.ledger_config.epoch_config,
base_period_length: config.consensus_config.ledger_config.base_period_length(),
},
},
mempool: MempoolConfig {
pool_recovery_path: "./recovery/mempool.json".into(),
},
sdp: SdpSettings { declaration: None },
wallet: WalletServiceSettings {
known_keys: {
let mut keys = HashSet::from_iter([config.consensus_config.leader_config.pk]);
keys.extend(
config
.consensus_config
.wallet_accounts
.iter()
.map(WalletAccount::public_key),
);
keys
},
},
key_management: config.kms_config,
testing_http: nomos_api::ApiServiceSettings {
backend_settings: NodeAxumBackendSettings {
address: config.api_config.testing_http_address,
rate_limit_per_second: 10000,
rate_limit_burst: 10000,
max_concurrent_requests: 1000,
..Default::default()
},
},
}
}
fn build_blend_service_config(
config: &TopologyBlendConfig,
) -> (blend_serde::Config, NodeDeploymentSettings) {
let zk_key_id =
key_id_for_preload_backend(&Key::from(ZkKey::new(config.secret_zk_key.clone())));
let backend_core = &config.backend_core;
let backend_edge = &config.backend_edge;
let user = blend_serde::Config {
common: blend_serde::common::Config {
non_ephemeral_signing_key: config.private_key.clone(),
recovery_path_prefix: PathBuf::from("./recovery/blend"),
},
core: blend_serde::core::Config {
backend: blend_serde::core::BackendConfig {
listening_address: backend_core.listening_address.clone(),
core_peering_degree: backend_core.core_peering_degree.clone(),
edge_node_connection_timeout: backend_core.edge_node_connection_timeout,
max_edge_node_incoming_connections: backend_core.max_edge_node_incoming_connections,
max_dial_attempts_per_peer: backend_core.max_dial_attempts_per_peer,
},
zk: ZkSettings {
secret_key_kms_id: zk_key_id,
},
},
edge: blend_serde::edge::Config {
backend: blend_serde::edge::BackendConfig {
max_dial_attempts_per_peer_per_message: backend_edge
.max_dial_attempts_per_peer_per_message,
replication_factor: backend_edge.replication_factor,
},
},
};
let deployment_settings = blend_deployment::Settings {
common: blend_deployment::CommonSettings {
num_blend_layers: NonZeroU64::try_from(1).unwrap(),
minimum_network_size: NonZeroU64::try_from(1).unwrap(),
timing: TimingSettings {
round_duration: Duration::from_secs(1),
rounds_per_interval: NonZeroU64::try_from(30u64).unwrap(),
rounds_per_session: NonZeroU64::try_from(648_000u64).unwrap(),
rounds_per_observation_window: NonZeroU64::try_from(30u64).unwrap(),
rounds_per_session_transition_period: NonZeroU64::try_from(30u64).unwrap(),
epoch_transition_period_in_slots: NonZeroU64::try_from(2_600).unwrap(),
},
protocol_name: backend_core.protocol_name.clone(),
},
core: blend_deployment::CoreSettings {
scheduler: SchedulerSettings {
cover: CoverTrafficSettings {
intervals_for_safety_buffer: 100,
message_frequency_per_round: NonNegativeF64::try_from(1f64).unwrap(),
},
delayer: MessageDelayerSettings {
maximum_release_delay_in_rounds: NonZeroU64::try_from(3u64).unwrap(),
},
},
minimum_messages_coefficient: backend_core.minimum_messages_coefficient,
normalization_constant: backend_core.normalization_constant,
},
};
let deployment = NodeDeploymentSettings::Custom(CustomDeployment {
blend: deployment_settings,
network: NetworkDeploymentSettings {
identify_protocol_name: nomos_libp2p::protocol_name::StreamProtocol::new(
"/integration/nomos/identify/1.0.0",
),
kademlia_protocol_name: nomos_libp2p::protocol_name::StreamProtocol::new(
"/integration/nomos/kad/1.0.0",
),
},
});
(user, deployment)
}

View File

@ -0,0 +1,23 @@
use std::net::SocketAddr;
use nomos_utils::net::get_available_tcp_port;
#[derive(Clone)]
pub struct GeneralApiConfig {
pub address: SocketAddr,
pub testing_http_address: SocketAddr,
}
#[must_use]
pub fn create_api_configs(ids: &[[u8; 32]]) -> Vec<GeneralApiConfig> {
ids.iter()
.map(|_| GeneralApiConfig {
address: format!("127.0.0.1:{}", get_available_tcp_port().unwrap())
.parse()
.unwrap(),
testing_http_address: format!("127.0.0.1:{}", get_available_tcp_port().unwrap())
.parse()
.unwrap(),
})
.collect()
}

View File

@ -0,0 +1,72 @@
use core::time::Duration;
use std::{num::NonZeroU64, str::FromStr as _};
use ed25519_dalek::SigningKey;
use nomos_blend_message::crypto::keys::Ed25519PrivateKey;
use nomos_blend_service::{
core::backends::libp2p::Libp2pBlendBackendSettings as Libp2pCoreBlendBackendSettings,
edge::backends::libp2p::Libp2pBlendBackendSettings as Libp2pEdgeBlendBackendSettings,
};
use nomos_libp2p::{Multiaddr, protocol_name::StreamProtocol};
use num_bigint::BigUint;
use zksign::SecretKey;
#[derive(Clone)]
pub struct GeneralBlendConfig {
pub backend_core: Libp2pCoreBlendBackendSettings,
pub backend_edge: Libp2pEdgeBlendBackendSettings,
pub private_key: Ed25519PrivateKey,
pub secret_zk_key: SecretKey,
pub signer: SigningKey,
}
/// Builds blend configs for each node.
///
/// # Panics
///
/// Panics if the provided port strings cannot be parsed into valid `Multiaddr`s
/// or if any of the numeric blend parameters are zero, which would make the
/// libp2p configuration invalid.
#[must_use]
pub fn create_blend_configs(ids: &[[u8; 32]], ports: &[u16]) -> Vec<GeneralBlendConfig> {
ids.iter()
.zip(ports)
.map(|(id, port)| {
let signer = SigningKey::from_bytes(id);
let private_key = Ed25519PrivateKey::from(*id);
// We need unique ZK secret keys, so we just derive them deterministically from
// the generated Ed25519 public keys, which are guaranteed to be unique because
// they are in turned derived from node ID.
let secret_zk_key =
SecretKey::from(BigUint::from_bytes_le(private_key.public_key().as_bytes()));
GeneralBlendConfig {
backend_core: Libp2pCoreBlendBackendSettings {
listening_address: Multiaddr::from_str(&format!(
"/ip4/127.0.0.1/udp/{port}/quic-v1",
))
.unwrap(),
core_peering_degree: 1..=3,
minimum_messages_coefficient: NonZeroU64::try_from(1)
.expect("Minimum messages coefficient cannot be zero."),
normalization_constant: 1.03f64
.try_into()
.expect("Normalization constant cannot be negative."),
edge_node_connection_timeout: Duration::from_secs(1),
max_edge_node_incoming_connections: 300,
max_dial_attempts_per_peer: NonZeroU64::try_from(3)
.expect("Max dial attempts per peer cannot be zero."),
protocol_name: StreamProtocol::new("/blend/integration-tests"),
},
backend_edge: Libp2pEdgeBlendBackendSettings {
max_dial_attempts_per_peer_per_message: 1.try_into().unwrap(),
protocol_name: StreamProtocol::new("/blend/integration-tests"),
replication_factor: 1.try_into().unwrap(),
},
private_key,
secret_zk_key,
signer,
}
})
.collect()
}

View File

@ -0,0 +1,20 @@
use std::time::Duration;
#[derive(Clone)]
pub struct GeneralBootstrapConfig {
pub prolonged_bootstrap_period: Duration,
}
pub const SHORT_PROLONGED_BOOTSTRAP_PERIOD: Duration = Duration::from_secs(1);
#[must_use]
pub fn create_bootstrap_configs(
ids: &[[u8; 32]],
prolonged_bootstrap_period: Duration,
) -> Vec<GeneralBootstrapConfig> {
ids.iter()
.map(|_| GeneralBootstrapConfig {
prolonged_bootstrap_period,
})
.collect()
}

View File

@ -0,0 +1,343 @@
use std::{num::NonZero, sync::Arc};
use chain_leader::LeaderConfig;
use cryptarchia_engine::EpochConfig;
use ed25519_dalek::ed25519::signature::SignerMut as _;
use groth16::CompressedGroth16Proof;
use nomos_core::{
mantle::{
MantleTx, Note, OpProof, Utxo,
genesis_tx::GenesisTx,
ledger::Tx as LedgerTx,
ops::{
Op,
channel::{ChannelId, Ed25519PublicKey, MsgId, inscribe::InscriptionOp},
},
},
sdp::{DeclarationMessage, Locator, ProviderId, ServiceParameters, ServiceType},
};
use nomos_node::{SignedMantleTx, Transaction as _};
use num_bigint::BigUint;
use zksign::{PublicKey, SecretKey};
use super::wallet::{WalletAccount, WalletConfig};
#[derive(Clone)]
pub struct ConsensusParams {
pub n_participants: usize,
pub security_param: NonZero<u32>,
pub active_slot_coeff: f64,
}
impl ConsensusParams {
#[must_use]
pub const fn default_for_participants(n_participants: usize) -> Self {
Self {
n_participants,
// by setting the slot coeff to 1, we also increase the probability of multiple blocks
// (forks) being produced in the same slot (epoch). Setting the security
// parameter to some value > 1 ensures nodes have some time to sync before
// deciding on the longest chain.
security_param: NonZero::new(10).unwrap(),
// a block should be produced (on average) every slot
active_slot_coeff: 0.9,
}
}
}
#[derive(Clone)]
pub struct ProviderInfo {
pub service_type: ServiceType,
pub provider_sk: ed25519_dalek::SigningKey,
pub zk_sk: SecretKey,
pub locator: Locator,
pub note: ServiceNote,
}
impl ProviderInfo {
#[must_use]
pub fn provider_id(&self) -> ProviderId {
ProviderId(self.provider_sk.verifying_key())
}
#[must_use]
pub fn zk_id(&self) -> PublicKey {
self.zk_sk.to_public_key()
}
}
/// General consensus configuration for a chosen participant, that later could
/// be converted into a specific service or services configuration.
#[derive(Clone)]
pub struct GeneralConsensusConfig {
pub leader_config: LeaderConfig,
pub ledger_config: nomos_ledger::Config,
pub genesis_tx: GenesisTx,
pub utxos: Vec<Utxo>,
pub blend_notes: Vec<ServiceNote>,
pub da_notes: Vec<ServiceNote>,
pub wallet_accounts: Vec<WalletAccount>,
}
#[derive(Clone)]
pub struct ServiceNote {
pub pk: PublicKey,
pub sk: SecretKey,
pub note: Note,
pub output_index: usize,
}
fn create_genesis_tx(utxos: &[Utxo]) -> GenesisTx {
// Create a genesis inscription op (similar to config.yaml)
let inscription = InscriptionOp {
channel_id: ChannelId::from([0; 32]),
inscription: vec![103, 101, 110, 101, 115, 105, 115], // "genesis" in bytes
parent: MsgId::root(),
signer: Ed25519PublicKey::from_bytes(&[0; 32]).unwrap(),
};
// Create ledger transaction with the utxos as outputs
let outputs: Vec<Note> = utxos.iter().map(|u| u.note).collect();
let ledger_tx = LedgerTx::new(vec![], outputs);
// Create the mantle transaction
let mantle_tx = MantleTx {
ops: vec![Op::ChannelInscribe(inscription)],
ledger_tx,
execution_gas_price: 0,
storage_gas_price: 0,
};
let signed_mantle_tx = SignedMantleTx {
mantle_tx,
ops_proofs: vec![OpProof::NoProof],
ledger_tx_proof: zksign::Signature::new(CompressedGroth16Proof::from_bytes(&[0u8; 128])),
};
// Wrap in GenesisTx
GenesisTx::from_tx(signed_mantle_tx).expect("Invalid genesis transaction")
}
#[must_use]
pub fn create_consensus_configs(
ids: &[[u8; 32]],
consensus_params: &ConsensusParams,
wallet: &WalletConfig,
) -> Vec<GeneralConsensusConfig> {
let mut leader_keys = Vec::new();
let mut blend_notes = Vec::new();
let mut da_notes = Vec::new();
let utxos = create_utxos_for_leader_and_services(
ids,
&mut leader_keys,
&mut blend_notes,
&mut da_notes,
);
let utxos = append_wallet_utxos(utxos, wallet);
let genesis_tx = create_genesis_tx(&utxos);
let ledger_config = nomos_ledger::Config {
epoch_config: EpochConfig {
epoch_stake_distribution_stabilization: NonZero::new(3).unwrap(),
epoch_period_nonce_buffer: NonZero::new(3).unwrap(),
epoch_period_nonce_stabilization: NonZero::new(4).unwrap(),
},
consensus_config: cryptarchia_engine::Config {
security_param: consensus_params.security_param,
active_slot_coeff: consensus_params.active_slot_coeff,
},
sdp_config: nomos_ledger::mantle::sdp::Config {
service_params: Arc::new(
[
(
ServiceType::BlendNetwork,
ServiceParameters {
lock_period: 10,
inactivity_period: 20,
retention_period: 100,
timestamp: 0,
session_duration: 1000,
},
),
(
ServiceType::DataAvailability,
ServiceParameters {
lock_period: 10,
inactivity_period: 20,
retention_period: 100,
timestamp: 0,
session_duration: 1000,
},
),
]
.into(),
),
min_stake: nomos_core::sdp::MinStake {
threshold: 1,
timestamp: 0,
},
},
};
leader_keys
.into_iter()
.map(|(pk, sk)| GeneralConsensusConfig {
leader_config: LeaderConfig { pk, sk },
ledger_config: ledger_config.clone(),
genesis_tx: genesis_tx.clone(),
utxos: utxos.clone(),
da_notes: da_notes.clone(),
blend_notes: blend_notes.clone(),
wallet_accounts: wallet.accounts.clone(),
})
.collect()
}
fn create_utxos_for_leader_and_services(
ids: &[[u8; 32]],
leader_keys: &mut Vec<(PublicKey, SecretKey)>,
blend_notes: &mut Vec<ServiceNote>,
da_notes: &mut Vec<ServiceNote>,
) -> Vec<Utxo> {
let derive_key_material = |prefix: &[u8], id_bytes: &[u8]| -> [u8; 16] {
let mut sk_data = [0; 16];
let prefix_len = prefix.len();
sk_data[..prefix_len].copy_from_slice(prefix);
let remaining_len = 16 - prefix_len;
sk_data[prefix_len..].copy_from_slice(&id_bytes[..remaining_len]);
sk_data
};
let mut utxos = Vec::new();
// Assume output index which will be set by the ledger tx.
let mut output_index = 0;
// Create notes for leader, Blend and DA declarations.
for &id in ids {
let sk_leader_data = derive_key_material(b"ld", &id);
let sk_leader = SecretKey::from(BigUint::from_bytes_le(&sk_leader_data));
let pk_leader = sk_leader.to_public_key();
leader_keys.push((pk_leader, sk_leader));
utxos.push(Utxo {
note: Note::new(1_000, pk_leader),
tx_hash: BigUint::from(0u8).into(),
output_index: 0,
});
output_index += 1;
let sk_da_data = derive_key_material(b"da", &id);
let sk_da = SecretKey::from(BigUint::from_bytes_le(&sk_da_data));
let pk_da = sk_da.to_public_key();
let note_da = Note::new(1, pk_da);
da_notes.push(ServiceNote {
pk: pk_da,
sk: sk_da,
note: note_da,
output_index,
});
utxos.push(Utxo {
note: note_da,
tx_hash: BigUint::from(0u8).into(),
output_index: 0,
});
output_index += 1;
let sk_blend_data = derive_key_material(b"bn", &id);
let sk_blend = SecretKey::from(BigUint::from_bytes_le(&sk_blend_data));
let pk_blend = sk_blend.to_public_key();
let note_blend = Note::new(1, pk_blend);
blend_notes.push(ServiceNote {
pk: pk_blend,
sk: sk_blend,
note: note_blend,
output_index,
});
utxos.push(Utxo {
note: note_blend,
tx_hash: BigUint::from(0u8).into(),
output_index: 0,
});
output_index += 1;
}
utxos
}
fn append_wallet_utxos(mut utxos: Vec<Utxo>, wallet: &WalletConfig) -> Vec<Utxo> {
for account in &wallet.accounts {
utxos.push(Utxo {
note: Note::new(account.value, account.public_key()),
tx_hash: BigUint::from(0u8).into(),
output_index: 0,
});
}
utxos
}
#[must_use]
pub fn create_genesis_tx_with_declarations(
ledger_tx: LedgerTx,
providers: Vec<ProviderInfo>,
) -> GenesisTx {
let inscription = InscriptionOp {
channel_id: ChannelId::from([0; 32]),
inscription: vec![103, 101, 110, 101, 115, 105, 115], // "genesis" in bytes
parent: MsgId::root(),
signer: Ed25519PublicKey::from_bytes(&[0; 32]).unwrap(),
};
let ledger_tx_hash = ledger_tx.hash();
let mut ops = vec![Op::ChannelInscribe(inscription)];
for provider in &providers {
let utxo = Utxo {
tx_hash: ledger_tx_hash,
output_index: provider.note.output_index,
note: provider.note.note,
};
let declaration = DeclarationMessage {
service_type: provider.service_type,
locators: vec![provider.locator.clone()],
provider_id: provider.provider_id(),
zk_id: provider.zk_id(),
locked_note_id: utxo.id(),
};
ops.push(Op::SDPDeclare(declaration));
}
let mantle_tx = MantleTx {
ops,
ledger_tx,
execution_gas_price: 0,
storage_gas_price: 0,
};
let mantle_tx_hash = mantle_tx.hash();
let mut ops_proofs = vec![OpProof::NoProof];
for mut provider in providers {
let zk_sig =
SecretKey::multi_sign(&[provider.note.sk, provider.zk_sk], mantle_tx_hash.as_ref())
.unwrap();
let ed25519_sig = provider
.provider_sk
.sign(mantle_tx_hash.as_signing_bytes().as_ref());
ops_proofs.push(OpProof::ZkAndEd25519Sigs {
zk_sig,
ed25519_sig,
});
}
let signed_mantle_tx = SignedMantleTx {
mantle_tx,
ops_proofs,
ledger_tx_proof: zksign::Signature::new(CompressedGroth16Proof::from_bytes(&[0u8; 128])),
};
GenesisTx::from_tx(signed_mantle_tx).expect("Invalid genesis transaction")
}

View File

@ -0,0 +1,212 @@
use std::{
collections::{HashMap, HashSet},
path::PathBuf,
str::FromStr as _,
sync::LazyLock,
time::Duration,
};
use ed25519_dalek::SigningKey;
use nomos_core::sdp::SessionNumber;
use nomos_da_network_core::swarm::{
DAConnectionMonitorSettings, DAConnectionPolicySettings, ReplicationConfig,
};
use nomos_libp2p::{Multiaddr, PeerId, ed25519};
use nomos_node::NomosDaMembership;
use num_bigint::BigUint;
use subnetworks_assignations::{MembershipCreator as _, MembershipHandler as _};
use zksign::SecretKey;
use crate::secret_key_to_peer_id;
pub static GLOBAL_PARAMS_PATH: LazyLock<String> = LazyLock::new(|| {
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let relative_path = PathBuf::from("../../tests/kzgrs/kzgrs_test_params");
manifest_dir
.join(relative_path)
.canonicalize()
.expect("Failed to resolve absolute path")
.to_string_lossy()
.to_string()
});
#[derive(Clone)]
pub struct DaParams {
pub subnetwork_size: usize,
pub dispersal_factor: usize,
pub num_samples: u16,
pub num_subnets: u16,
pub old_blobs_check_interval: Duration,
pub blobs_validity_duration: Duration,
pub global_params_path: String,
pub policy_settings: DAConnectionPolicySettings,
pub monitor_settings: DAConnectionMonitorSettings,
pub balancer_interval: Duration,
pub redial_cooldown: Duration,
pub replication_settings: ReplicationConfig,
pub subnets_refresh_interval: Duration,
pub retry_shares_limit: usize,
pub retry_commitments_limit: usize,
}
impl Default for DaParams {
fn default() -> Self {
Self {
subnetwork_size: 2,
dispersal_factor: 1,
num_samples: 1,
num_subnets: 2,
old_blobs_check_interval: Duration::from_secs(5),
blobs_validity_duration: Duration::from_secs(60),
global_params_path: GLOBAL_PARAMS_PATH.to_string(),
policy_settings: DAConnectionPolicySettings {
min_dispersal_peers: 1,
min_replication_peers: 1,
max_dispersal_failures: 0,
max_sampling_failures: 0,
max_replication_failures: 0,
malicious_threshold: 0,
},
monitor_settings: DAConnectionMonitorSettings {
failure_time_window: Duration::from_secs(5),
..Default::default()
},
balancer_interval: Duration::from_secs(1),
redial_cooldown: Duration::ZERO,
replication_settings: ReplicationConfig {
seen_message_cache_size: 1000,
seen_message_ttl: Duration::from_secs(3600),
},
subnets_refresh_interval: Duration::from_secs(30),
retry_shares_limit: 1,
retry_commitments_limit: 1,
}
}
}
#[derive(Debug, Clone)]
pub struct GeneralDaConfig {
pub node_key: ed25519::SecretKey,
pub signer: SigningKey,
pub peer_id: PeerId,
pub membership: NomosDaMembership,
pub listening_address: Multiaddr,
pub blob_storage_directory: PathBuf,
pub global_params_path: String,
pub verifier_sk: String,
pub verifier_index: HashSet<u16>,
pub num_samples: u16,
pub num_subnets: u16,
pub old_blobs_check_interval: Duration,
pub blobs_validity_duration: Duration,
pub policy_settings: DAConnectionPolicySettings,
pub monitor_settings: DAConnectionMonitorSettings,
pub balancer_interval: Duration,
pub redial_cooldown: Duration,
pub replication_settings: ReplicationConfig,
pub subnets_refresh_interval: Duration,
pub retry_shares_limit: usize,
pub retry_commitments_limit: usize,
pub secret_zk_key: SecretKey,
}
#[must_use]
pub fn create_da_configs(
ids: &[[u8; 32]],
da_params: &DaParams,
ports: &[u16],
) -> Vec<GeneralDaConfig> {
let mut node_keys = vec![];
let mut peer_ids = vec![];
let mut listening_addresses = vec![];
for (i, id) in ids.iter().enumerate() {
let mut node_key_bytes = *id;
let node_key = ed25519::SecretKey::try_from_bytes(&mut node_key_bytes)
.expect("Failed to generate secret key from bytes");
node_keys.push(node_key.clone());
let peer_id = secret_key_to_peer_id(node_key);
peer_ids.push(peer_id);
let listening_address =
Multiaddr::from_str(&format!("/ip4/127.0.0.1/udp/{}/quic-v1", ports[i],))
.expect("Failed to create multiaddr");
listening_addresses.push(listening_address);
}
let membership = {
let template = NomosDaMembership::new(
SessionNumber::default(),
da_params.subnetwork_size,
da_params.dispersal_factor,
);
let mut assignations: HashMap<u16, HashSet<PeerId>> = HashMap::new();
if peer_ids.is_empty() {
for id in 0..da_params.subnetwork_size {
assignations.insert(u16::try_from(id).unwrap_or_default(), HashSet::new());
}
} else {
let mut sorted_peers = peer_ids.clone();
sorted_peers.sort_unstable();
let dispersal = da_params.dispersal_factor.max(1);
let mut peer_cycle = sorted_peers.iter().cycle();
for id in 0..da_params.subnetwork_size {
let mut members = HashSet::new();
for _ in 0..dispersal {
// cycle() only yields None when the iterator is empty, which we guard against.
if let Some(peer) = peer_cycle.next() {
members.insert(*peer);
}
}
assignations.insert(u16::try_from(id).unwrap_or_default(), members);
}
}
template.init(SessionNumber::default(), assignations)
};
ids.iter()
.zip(node_keys)
.enumerate()
.map(|(i, (id, node_key))| {
let blob_storage_directory = PathBuf::from(format!("/tmp/blob_storage_{i}"));
let verifier_sk = blst::min_sig::SecretKey::key_gen(id, &[]).unwrap();
let verifier_sk_bytes = verifier_sk.to_bytes();
let peer_id = peer_ids[i];
let signer = SigningKey::from_bytes(id);
let subnetwork_ids = membership.membership(&peer_id);
// We need unique ZK secret keys, so we just derive them deterministically from
// the generated Ed25519 public keys, which are guaranteed to be unique because
// they are in turned derived from node ID.
let secret_zk_key =
SecretKey::from(BigUint::from_bytes_le(signer.verifying_key().as_bytes()));
GeneralDaConfig {
node_key,
signer,
peer_id,
secret_zk_key,
membership: membership.clone(),
listening_address: listening_addresses[i].clone(),
blob_storage_directory,
global_params_path: da_params.global_params_path.clone(),
verifier_sk: hex::encode(verifier_sk_bytes),
verifier_index: subnetwork_ids,
num_samples: da_params.num_samples,
num_subnets: da_params.num_subnets,
old_blobs_check_interval: da_params.old_blobs_check_interval,
blobs_validity_duration: da_params.blobs_validity_duration,
policy_settings: da_params.policy_settings.clone(),
monitor_settings: da_params.monitor_settings.clone(),
balancer_interval: da_params.balancer_interval,
redial_cooldown: da_params.redial_cooldown,
replication_settings: da_params.replication_settings,
subnets_refresh_interval: da_params.subnets_refresh_interval,
retry_shares_limit: da_params.retry_shares_limit,
retry_commitments_limit: da_params.retry_commitments_limit,
}
})
.collect()
}

View File

@ -0,0 +1,67 @@
use core::{num::NonZeroU64, time::Duration};
use nomos_blend_service::{
core::settings::{CoverTrafficSettings, MessageDelayerSettings, SchedulerSettings},
settings::TimingSettings,
};
use nomos_libp2p::protocol_name::StreamProtocol;
use nomos_node::config::{
blend::deployment::{
CommonSettings as BlendCommonSettings, CoreSettings as BlendCoreSettings,
Settings as BlendDeploymentSettings,
},
deployment::{CustomDeployment, Settings as DeploymentSettings},
network::deployment::Settings as NetworkDeploymentSettings,
};
use nomos_utils::math::NonNegativeF64;
#[must_use]
pub fn default_e2e_deployment_settings() -> DeploymentSettings {
DeploymentSettings::Custom(CustomDeployment {
blend: BlendDeploymentSettings {
common: BlendCommonSettings {
minimum_network_size: NonZeroU64::try_from(30u64)
.expect("Minimum network size cannot be zero."),
num_blend_layers: NonZeroU64::try_from(3)
.expect("Number of blend layers cannot be zero."),
timing: TimingSettings {
round_duration: Duration::from_secs(1),
rounds_per_interval: NonZeroU64::try_from(30u64)
.expect("Rounds per interval cannot be zero."),
// (21,600 blocks * 30s per block) / 1s per round = 648,000 rounds
rounds_per_session: NonZeroU64::try_from(648_000u64)
.expect("Rounds per session cannot be zero."),
rounds_per_observation_window: NonZeroU64::try_from(30u64)
.expect("Rounds per observation window cannot be zero."),
rounds_per_session_transition_period: NonZeroU64::try_from(30u64)
.expect("Rounds per session transition period cannot be zero."),
epoch_transition_period_in_slots: NonZeroU64::try_from(2_600)
.expect("Epoch transition period in slots cannot be zero."),
},
protocol_name: StreamProtocol::new("/blend/integration-tests"),
},
core: BlendCoreSettings {
minimum_messages_coefficient: NonZeroU64::try_from(1)
.expect("Minimum messages coefficient cannot be zero."),
normalization_constant: 1.03f64
.try_into()
.expect("Normalization constant cannot be negative."),
scheduler: SchedulerSettings {
cover: CoverTrafficSettings {
intervals_for_safety_buffer: 100,
message_frequency_per_round: NonNegativeF64::try_from(1f64)
.expect("Message frequency per round cannot be negative."),
},
delayer: MessageDelayerSettings {
maximum_release_delay_in_rounds: NonZeroU64::try_from(3u64)
.expect("Maximum release delay between rounds cannot be zero."),
},
},
},
},
network: NetworkDeploymentSettings {
identify_protocol_name: StreamProtocol::new("/integration/nomos/identify/1.0.0"),
kademlia_protocol_name: StreamProtocol::new("/integration/nomos/kad/1.0.0"),
},
})
}

View File

@ -0,0 +1,164 @@
pub mod api;
pub mod blend;
pub mod bootstrap;
pub mod consensus;
pub mod da;
pub mod network;
pub mod time;
pub mod tracing;
pub mod wallet;
use blend::GeneralBlendConfig;
use consensus::{GeneralConsensusConfig, ProviderInfo, create_genesis_tx_with_declarations};
use da::GeneralDaConfig;
use key_management_system::{
backend::preload::PreloadKMSBackendSettings,
keys::{Ed25519Key, Key, ZkKey},
};
use network::GeneralNetworkConfig;
use nomos_core::{
mantle::GenesisTx as _,
sdp::{Locator, ServiceType},
};
use nomos_utils::net::get_available_udp_port;
use rand::{Rng as _, thread_rng};
use tracing::GeneralTracingConfig;
use wallet::WalletConfig;
use crate::{
common::kms::key_id_for_preload_backend,
topology::configs::{
api::GeneralApiConfig,
bootstrap::{GeneralBootstrapConfig, SHORT_PROLONGED_BOOTSTRAP_PERIOD},
consensus::ConsensusParams,
da::DaParams,
network::NetworkParams,
time::GeneralTimeConfig,
},
};
#[derive(Clone)]
pub struct GeneralConfig {
pub api_config: GeneralApiConfig,
pub consensus_config: GeneralConsensusConfig,
pub bootstrapping_config: GeneralBootstrapConfig,
pub da_config: GeneralDaConfig,
pub network_config: GeneralNetworkConfig,
pub blend_config: GeneralBlendConfig,
pub tracing_config: GeneralTracingConfig,
pub time_config: GeneralTimeConfig,
pub kms_config: PreloadKMSBackendSettings,
}
#[must_use]
pub fn create_general_configs(n_nodes: usize) -> Vec<GeneralConfig> {
create_general_configs_with_network(n_nodes, &NetworkParams::default())
}
#[must_use]
pub fn create_general_configs_with_network(
n_nodes: usize,
network_params: &NetworkParams,
) -> Vec<GeneralConfig> {
create_general_configs_with_blend_core_subset(n_nodes, n_nodes, network_params)
}
#[must_use]
pub fn create_general_configs_with_blend_core_subset(
n_nodes: usize,
// TODO: Instead of this, define a config struct for each node.
// That would be also useful for non-even token distributions: https://github.com/logos-co/nomos/issues/1888
n_blend_core_nodes: usize,
network_params: &NetworkParams,
) -> Vec<GeneralConfig> {
assert!(
n_blend_core_nodes <= n_nodes,
"n_blend_core_nodes({n_blend_core_nodes}) must be less than or equal to n_nodes({n_nodes})",
);
// Blend relies on each node declaring a different ZK public key, so we need
// different IDs to generate different keys.
let mut ids: Vec<_> = (0..n_nodes).map(|i| [i as u8; 32]).collect();
let mut da_ports = vec![];
let mut blend_ports = vec![];
for id in &mut ids {
thread_rng().fill(id);
da_ports.push(get_available_udp_port().unwrap());
blend_ports.push(get_available_udp_port().unwrap());
}
let consensus_params = ConsensusParams::default_for_participants(n_nodes);
let mut consensus_configs =
consensus::create_consensus_configs(&ids, &consensus_params, &WalletConfig::default());
let bootstrap_config =
bootstrap::create_bootstrap_configs(&ids, SHORT_PROLONGED_BOOTSTRAP_PERIOD);
let network_configs = network::create_network_configs(&ids, network_params);
let da_configs = da::create_da_configs(&ids, &DaParams::default(), &da_ports);
let api_configs = api::create_api_configs(&ids);
let blend_configs = blend::create_blend_configs(&ids, &blend_ports);
let tracing_configs = tracing::create_tracing_configs(&ids);
let time_config = time::default_time_config();
let providers: Vec<_> = blend_configs
.iter()
.enumerate()
.take(n_blend_core_nodes)
.map(|(i, blend_conf)| ProviderInfo {
service_type: ServiceType::BlendNetwork,
provider_sk: blend_conf.signer.clone(),
zk_sk: blend_conf.secret_zk_key.clone(),
locator: Locator(blend_conf.backend_core.listening_address.clone()),
note: consensus_configs[0].blend_notes[i].clone(),
})
.collect();
let ledger_tx = consensus_configs[0]
.genesis_tx
.mantle_tx()
.ledger_tx
.clone();
let genesis_tx = create_genesis_tx_with_declarations(ledger_tx, providers);
for c in &mut consensus_configs {
c.genesis_tx = genesis_tx.clone();
}
// Set Blend and DA keys in KMS of each node config.
let kms_configs: Vec<_> = blend_configs
.iter()
.map(|blend_conf| {
let ed_key = Ed25519Key::new(blend_conf.signer.clone());
let zk_key = ZkKey::new(blend_conf.secret_zk_key.clone());
PreloadKMSBackendSettings {
keys: [
(
key_id_for_preload_backend(&Key::from(ed_key.clone())),
Key::from(ed_key),
),
(
key_id_for_preload_backend(&Key::from(zk_key.clone())),
Key::from(zk_key),
),
]
.into(),
}
})
.collect();
let mut general_configs = vec![];
for i in 0..n_nodes {
general_configs.push(GeneralConfig {
api_config: api_configs[i].clone(),
consensus_config: consensus_configs[i].clone(),
bootstrapping_config: bootstrap_config[i].clone(),
da_config: da_configs[i].clone(),
network_config: network_configs[i].clone(),
blend_config: blend_configs[i].clone(),
tracing_config: tracing_configs[i].clone(),
time_config: time_config.clone(),
kms_config: kms_configs[i].clone(),
});
}
general_configs
}

View File

@ -0,0 +1,116 @@
use std::time::Duration;
use nomos_libp2p::{
IdentifySettings, KademliaSettings, Multiaddr, NatSettings, ed25519, gossipsub,
};
use nomos_node::config::network::serde::{BackendSettings, Config, SwarmConfig};
use nomos_utils::net::get_available_udp_port;
use crate::node_address_from_port;
#[derive(Default, Clone)]
pub enum Libp2pNetworkLayout {
#[default]
Star,
Chain,
Full,
}
#[derive(Default, Clone)]
pub struct NetworkParams {
pub libp2p_network_layout: Libp2pNetworkLayout,
}
pub type GeneralNetworkConfig = Config;
fn default_swarm_config() -> SwarmConfig {
SwarmConfig {
host: std::net::Ipv4Addr::UNSPECIFIED,
port: 60000,
node_key: ed25519::SecretKey::generate(),
gossipsub_config: gossipsub::Config::default(),
kademlia_config: KademliaSettings::default(),
identify_config: IdentifySettings::default(),
chain_sync_config: cryptarchia_sync::Config::default(),
nat_config: NatSettings::default(),
}
}
#[must_use]
pub fn create_network_configs(
ids: &[[u8; 32]],
network_params: &NetworkParams,
) -> Vec<GeneralNetworkConfig> {
let swarm_configs: Vec<SwarmConfig> = ids
.iter()
.map(|id| {
let mut node_key_bytes = *id;
let node_key = ed25519::SecretKey::try_from_bytes(&mut node_key_bytes)
.expect("Failed to generate secret key from bytes");
SwarmConfig {
node_key,
port: get_available_udp_port().unwrap(),
chain_sync_config: cryptarchia_sync::Config {
peer_response_timeout: Duration::from_secs(60),
},
..default_swarm_config()
}
})
.collect();
let all_initial_peers = initial_peers_by_network_layout(&swarm_configs, network_params);
swarm_configs
.iter()
.zip(all_initial_peers)
.map(|(swarm_config, initial_peers)| GeneralNetworkConfig {
backend: BackendSettings {
initial_peers,
inner: swarm_config.to_owned(),
},
})
.collect()
}
fn initial_peers_by_network_layout(
swarm_configs: &[SwarmConfig],
network_params: &NetworkParams,
) -> Vec<Vec<Multiaddr>> {
let mut all_initial_peers = vec![];
match network_params.libp2p_network_layout {
Libp2pNetworkLayout::Star => {
// First node is the hub - has no initial peers
all_initial_peers.push(vec![]);
let first_addr = node_address_from_port(swarm_configs[0].port);
// All other nodes connect to the first node
for _ in 1..swarm_configs.len() {
all_initial_peers.push(vec![first_addr.clone()]);
}
}
Libp2pNetworkLayout::Chain => {
// First node has no initial peers
all_initial_peers.push(vec![]);
// Each subsequent node connects to the previous one
for i in 1..swarm_configs.len() {
let prev_addr = node_address_from_port(swarm_configs[i - 1].port);
all_initial_peers.push(vec![prev_addr]);
}
}
Libp2pNetworkLayout::Full => {
// Each node connects to all previous nodes, unidirectional connections
for i in 0..swarm_configs.len() {
let mut peers = vec![];
for swarm_config in swarm_configs.iter().take(i) {
peers.push(node_address_from_port(swarm_config.port));
}
all_initial_peers.push(peers);
}
}
}
all_initial_peers
}

View File

@ -0,0 +1,35 @@
use std::{
net::{IpAddr, Ipv4Addr},
str::FromStr as _,
time::Duration,
};
use time::OffsetDateTime;
const DEFAULT_SLOT_TIME: u64 = 2;
const CONSENSUS_SLOT_TIME_VAR: &str = "CONSENSUS_SLOT_TIME";
#[derive(Clone, Debug)]
pub struct GeneralTimeConfig {
pub slot_duration: Duration,
pub chain_start_time: OffsetDateTime,
pub ntp_server: String,
pub timeout: Duration,
pub interface: IpAddr,
pub update_interval: Duration,
}
#[must_use]
pub fn default_time_config() -> GeneralTimeConfig {
let slot_duration = std::env::var(CONSENSUS_SLOT_TIME_VAR)
.map(|s| <u64>::from_str(&s).unwrap())
.unwrap_or(DEFAULT_SLOT_TIME);
GeneralTimeConfig {
slot_duration: Duration::from_secs(slot_duration),
chain_start_time: OffsetDateTime::now_utc(),
ntp_server: String::from("pool.ntp.org"),
timeout: Duration::from_secs(5),
interface: IpAddr::V4(Ipv4Addr::UNSPECIFIED),
update_interval: Duration::from_secs(16),
}
}

View File

@ -0,0 +1,71 @@
use nomos_tracing::{
logging::loki::LokiConfig, metrics::otlp::OtlpMetricsConfig, tracing::otlp::OtlpTracingConfig,
};
use nomos_tracing_service::{
ConsoleLayer, FilterLayer, LoggerLayer, MetricsLayer, TracingLayer, TracingSettings,
};
use tracing::Level;
use crate::IS_DEBUG_TRACING;
#[derive(Clone, Default)]
pub struct GeneralTracingConfig {
pub tracing_settings: TracingSettings,
}
impl GeneralTracingConfig {
fn local_debug_tracing(id: usize) -> Self {
let host_identifier = format!("node-{id}");
Self {
tracing_settings: TracingSettings {
logger: LoggerLayer::Loki(LokiConfig {
endpoint: "http://localhost:3100".try_into().unwrap(),
host_identifier: host_identifier.clone(),
}),
tracing: TracingLayer::Otlp(OtlpTracingConfig {
endpoint: "http://localhost:4317".try_into().unwrap(),
sample_ratio: 0.5,
service_name: host_identifier.clone(),
}),
filter: FilterLayer::EnvFilter(nomos_tracing::filter::envfilter::EnvFilterConfig {
// Allow events only from modules that matches the regex, if it matches - use
// provided tracing level. Libp2p related crates are very log intensive in debug
// mode.
filters: std::iter::once(&("nomos", "debug"))
.map(|(k, v)| ((*k).to_owned(), (*v).to_owned()))
.collect(),
}),
metrics: MetricsLayer::Otlp(OtlpMetricsConfig {
endpoint: "http://127.0.0.1:9090/api/v1/otlp/v1/metrics"
.try_into()
.unwrap(),
host_identifier,
}),
console: ConsoleLayer::None,
level: Level::DEBUG,
},
}
}
}
#[must_use]
pub fn create_tracing_configs(ids: &[[u8; 32]]) -> Vec<GeneralTracingConfig> {
if *IS_DEBUG_TRACING {
create_debug_configs(ids)
} else {
create_default_configs(ids)
}
}
fn create_debug_configs(ids: &[[u8; 32]]) -> Vec<GeneralTracingConfig> {
ids.iter()
.enumerate()
.map(|(i, _)| GeneralTracingConfig::local_debug_tracing(i))
.collect()
}
fn create_default_configs(ids: &[[u8; 32]]) -> Vec<GeneralTracingConfig> {
ids.iter()
.map(|_| GeneralTracingConfig::default())
.collect()
}

View File

@ -0,0 +1,79 @@
use std::num::NonZeroUsize;
use num_bigint::BigUint;
use zksign::{PublicKey, SecretKey};
/// Collection of wallet accounts that should be funded at genesis.
#[derive(Clone, Default, Debug)]
pub struct WalletConfig {
pub accounts: Vec<WalletAccount>,
}
impl WalletConfig {
#[must_use]
pub const fn new(accounts: Vec<WalletAccount>) -> Self {
Self { accounts }
}
#[must_use]
pub fn uniform(total_funds: u64, users: NonZeroUsize) -> Self {
let user_count = users.get() as u64;
assert!(user_count > 0, "wallet user count must be non-zero");
assert!(
total_funds >= user_count,
"wallet funds must allocate at least 1 token per user"
);
let base_allocation = total_funds / user_count;
let mut remainder = total_funds % user_count;
let accounts = (0..users.get())
.map(|idx| {
let mut amount = base_allocation;
if remainder > 0 {
amount += 1;
remainder -= 1;
}
WalletAccount::deterministic(idx as u64, amount)
})
.collect();
Self { accounts }
}
}
/// Wallet account that holds funds in the genesis state.
#[derive(Clone, Debug)]
pub struct WalletAccount {
pub label: String,
pub secret_key: SecretKey,
pub value: u64,
}
impl WalletAccount {
#[must_use]
pub fn new(label: impl Into<String>, secret_key: SecretKey, value: u64) -> Self {
assert!(value > 0, "wallet account value must be positive");
Self {
label: label.into(),
secret_key,
value,
}
}
#[must_use]
pub fn deterministic(index: u64, value: u64) -> Self {
let mut seed = [0u8; 32];
seed[..2].copy_from_slice(b"wl");
seed[2..10].copy_from_slice(&index.to_le_bytes());
let secret_key = SecretKey::from(BigUint::from_bytes_le(&seed));
Self::new(format!("wallet-user-{index}"), secret_key, value)
}
#[must_use]
pub fn public_key(&self) -> PublicKey {
self.secret_key.to_public_key()
}
}

View File

@ -0,0 +1 @@
pub mod configs;

View File

@ -0,0 +1,52 @@
[package]
categories.workspace = true
description.workspace = true
edition.workspace = true
keywords.workspace = true
license.workspace = true
name = "testing-framework-core"
readme.workspace = true
repository.workspace = true
version = "0.1.0"
[lints]
workspace = true
[features]
default = []
[dependencies]
anyhow = "1"
async-trait = "0.1"
broadcast-service = { workspace = true }
chain-service = { workspace = true }
common-http-client = { workspace = true }
futures = { default-features = false, version = "0.3" }
groth16 = { workspace = true }
hex = { version = "0.4.3", default-features = false }
integration-configs = { workspace = true }
key-management-system = { workspace = true }
kzgrs-backend = { workspace = true }
nomos-core = { workspace = true }
nomos-da-network-core = { workspace = true }
nomos-da-network-service = { workspace = true }
nomos-executor = { workspace = true, default-features = false, features = ["testing", "tracing"] }
nomos-http-api-common = { workspace = true }
nomos-libp2p = { workspace = true }
nomos-network = { workspace = true, features = ["libp2p"] }
nomos-node = { workspace = true, default-features = false, features = ["testing"] }
nomos-tracing = { workspace = true }
nomos-tracing-service = { workspace = true }
nomos-utils = { workspace = true }
prometheus-http-query = "0.8"
rand = { workspace = true }
reqwest = { workspace = true, features = ["json"] }
serde = { workspace = true }
serde_json = { workspace = true }
serde_with = { workspace = true }
serde_yaml = { workspace = true }
tempfile = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true, features = ["macros", "process", "rt-multi-thread", "time"] }
tracing = { workspace = true }
tx-service = { workspace = true, features = ["libp2p", "mock"] }

View File

@ -0,0 +1,19 @@
pub mod nodes;
pub mod scenario;
pub mod topology;
use std::{env, ops::Mul as _, sync::LazyLock, time::Duration};
pub use integration_configs::{
IS_DEBUG_TRACING, node_address_from_port, secret_key_to_peer_id, secret_key_to_provider_id,
topology::configs::da::GLOBAL_PARAMS_PATH,
};
static IS_SLOW_TEST_ENV: LazyLock<bool> =
LazyLock::new(|| env::var("SLOW_TEST_ENV").is_ok_and(|s| s == "true"));
/// In slow test environments like Codecov, use 2x timeout.
#[must_use]
pub fn adjust_timeout(d: Duration) -> Duration {
if *IS_SLOW_TEST_ENV { d.mul(2) } else { d }
}

View File

@ -0,0 +1,252 @@
use std::net::SocketAddr;
use chain_service::CryptarchiaInfo;
use common_http_client::CommonHttpClient;
use nomos_core::{block::Block, da::BlobId, mantle::SignedMantleTx, sdp::SessionNumber};
use nomos_da_network_core::swarm::{BalancerStats, MonitorStats};
use nomos_da_network_service::MembershipResponse;
use nomos_http_api_common::paths::{
CRYPTARCHIA_INFO, DA_BALANCER_STATS, DA_BLACKLISTED_PEERS, DA_BLOCK_PEER, DA_GET_MEMBERSHIP,
DA_HISTORIC_SAMPLING, DA_MONITOR_STATS, DA_UNBLOCK_PEER, MEMPOOL_ADD_TX, NETWORK_INFO,
STORAGE_BLOCK,
};
use nomos_network::backends::libp2p::Libp2pInfo;
use nomos_node::{HeaderId, api::testing::handlers::HistoricSamplingRequest};
use reqwest::{Client, RequestBuilder, Response, Url};
use serde::{Serialize, de::DeserializeOwned};
use serde_json::Value;
pub const DA_GET_TESTING_ENDPOINT_ERROR: &str = "Failed to connect to testing endpoint. The binary was likely built without the 'testing' \
feature. Try: cargo build --workspace --all-features";
#[derive(Clone)]
pub struct ApiClient {
pub(crate) base_url: Url,
pub(crate) testing_url: Option<Url>,
client: Client,
pub(crate) http_client: CommonHttpClient,
}
impl ApiClient {
#[must_use]
pub fn new(base_addr: SocketAddr, testing_addr: Option<SocketAddr>) -> Self {
let base_url =
Url::parse(&format!("http://{base_addr}")).expect("Valid base address for node");
let testing_url = testing_addr
.map(|addr| Url::parse(&format!("http://{addr}")).expect("Valid testing address"));
Self::from_urls(base_url, testing_url)
}
#[must_use]
pub fn from_urls(base_url: Url, testing_url: Option<Url>) -> Self {
let client = Client::new();
Self {
base_url,
testing_url,
http_client: CommonHttpClient::new_with_client(client.clone(), None),
client,
}
}
#[must_use]
pub fn testing_url(&self) -> Option<Url> {
self.testing_url.clone()
}
pub fn get_builder(&self, path: &str) -> RequestBuilder {
self.client.get(self.join_base(path))
}
pub async fn get_response(&self, path: &str) -> reqwest::Result<Response> {
self.client.get(self.join_base(path)).send().await
}
pub async fn get_json<T>(&self, path: &str) -> reqwest::Result<T>
where
T: DeserializeOwned,
{
self.get_response(path)
.await?
.error_for_status()?
.json()
.await
}
pub async fn post_json_decode<T, R>(&self, path: &str, body: &T) -> reqwest::Result<R>
where
T: Serialize + Sync + ?Sized,
R: DeserializeOwned,
{
self.post_json_response(path, body)
.await?
.error_for_status()?
.json()
.await
}
pub async fn post_json_response<T>(&self, path: &str, body: &T) -> reqwest::Result<Response>
where
T: Serialize + Sync + ?Sized,
{
self.client
.post(self.join_base(path))
.json(body)
.send()
.await
}
pub async fn post_json_unit<T>(&self, path: &str, body: &T) -> reqwest::Result<()>
where
T: Serialize + Sync + ?Sized,
{
self.post_json_response(path, body)
.await?
.error_for_status()?;
Ok(())
}
pub async fn get_testing_json<T>(&self, path: &str) -> reqwest::Result<T>
where
T: DeserializeOwned,
{
self.get_testing_response(path)
.await?
.error_for_status()?
.json()
.await
}
pub async fn post_testing_json_decode<T, R>(&self, path: &str, body: &T) -> reqwest::Result<R>
where
T: Serialize + Sync + ?Sized,
R: DeserializeOwned,
{
self.post_testing_json_response(path, body)
.await?
.error_for_status()?
.json()
.await
}
pub async fn post_testing_json_unit<T>(&self, path: &str, body: &T) -> reqwest::Result<()>
where
T: Serialize + Sync + ?Sized,
{
self.post_testing_json_response(path, body)
.await?
.error_for_status()?;
Ok(())
}
pub async fn post_testing_json_response<T>(
&self,
path: &str,
body: &T,
) -> reqwest::Result<Response>
where
T: Serialize + Sync + ?Sized,
{
let testing_url = self
.testing_url
.as_ref()
.expect(DA_GET_TESTING_ENDPOINT_ERROR);
self.client
.post(Self::join_url(testing_url, path))
.json(body)
.send()
.await
}
pub async fn get_testing_response(&self, path: &str) -> reqwest::Result<Response> {
let testing_url = self
.testing_url
.as_ref()
.expect(DA_GET_TESTING_ENDPOINT_ERROR);
self.client
.get(Self::join_url(testing_url, path))
.send()
.await
}
pub async fn block_peer(&self, peer_id: &str) -> reqwest::Result<bool> {
self.post_json_decode(DA_BLOCK_PEER, &peer_id).await
}
pub async fn unblock_peer(&self, peer_id: &str) -> reqwest::Result<bool> {
self.post_json_decode(DA_UNBLOCK_PEER, &peer_id).await
}
pub async fn blacklisted_peers(&self) -> reqwest::Result<Vec<String>> {
self.get_json(DA_BLACKLISTED_PEERS).await
}
pub async fn balancer_stats(&self) -> reqwest::Result<BalancerStats> {
self.get_json(DA_BALANCER_STATS).await
}
pub async fn monitor_stats(&self) -> reqwest::Result<MonitorStats> {
self.get_json(DA_MONITOR_STATS).await
}
pub async fn consensus_info(&self) -> reqwest::Result<CryptarchiaInfo> {
self.get_json(CRYPTARCHIA_INFO).await
}
pub async fn network_info(&self) -> reqwest::Result<Libp2pInfo> {
self.get_json(NETWORK_INFO).await
}
pub async fn storage_block(
&self,
id: &HeaderId,
) -> reqwest::Result<Option<Block<SignedMantleTx>>> {
self.post_json_decode(STORAGE_BLOCK, id).await
}
pub async fn da_get_membership(
&self,
session_id: &SessionNumber,
) -> reqwest::Result<MembershipResponse> {
self.post_testing_json_decode(DA_GET_MEMBERSHIP, session_id)
.await
}
pub async fn da_historic_sampling(
&self,
request: &HistoricSamplingRequest<BlobId>,
) -> reqwest::Result<bool> {
self.post_testing_json_decode(DA_HISTORIC_SAMPLING, request)
.await
}
pub async fn submit_transaction(&self, tx: &SignedMantleTx) -> reqwest::Result<()> {
self.post_json_unit(MEMPOOL_ADD_TX, tx).await
}
pub async fn get_headers_raw(&self, builder: RequestBuilder) -> reqwest::Result<Response> {
builder.send().await
}
pub async fn mempool_metrics(&self, pool: &str) -> reqwest::Result<Value> {
self.get_json(&format!("/{pool}/metrics")).await
}
#[must_use]
pub const fn base_url(&self) -> &Url {
&self.base_url
}
#[must_use]
pub const fn http_client(&self) -> &CommonHttpClient {
&self.http_client
}
fn join_base(&self, path: &str) -> Url {
Self::join_url(&self.base_url, path)
}
fn join_url(base: &Url, path: &str) -> Url {
let trimmed = path.trim_start_matches('/');
base.join(trimmed).expect("valid relative path")
}
}

View File

@ -0,0 +1,282 @@
use std::{
collections::HashSet,
path::PathBuf,
process::{Child, Command, Stdio},
time::Duration,
};
use broadcast_service::BlockInfo;
use chain_service::CryptarchiaInfo;
use futures::Stream;
pub use integration_configs::nodes::executor::create_executor_config;
use kzgrs_backend::common::share::{DaLightShare, DaShare, DaSharesCommitments};
use nomos_core::{
block::Block, da::BlobId, header::HeaderId, mantle::SignedMantleTx, sdp::SessionNumber,
};
use nomos_da_network_core::swarm::{BalancerStats, MonitorStats};
use nomos_da_network_service::MembershipResponse;
use nomos_executor::config::Config;
use nomos_http_api_common::paths::{DA_GET_SHARES_COMMITMENTS, MANTLE_METRICS, MEMPOOL_ADD_TX};
use nomos_network::backends::libp2p::Libp2pInfo;
use nomos_node::api::testing::handlers::HistoricSamplingRequest;
use nomos_tracing::logging::local::FileConfig;
use nomos_tracing_service::LoggerLayer;
use reqwest::Url;
use serde_yaml::{Mapping, Number as YamlNumber, Value};
use super::{ApiClient, create_tempdir, persist_tempdir, should_persist_tempdir};
use crate::{IS_DEBUG_TRACING, adjust_timeout, nodes::LOGS_PREFIX};
const BIN_PATH: &str = "target/debug/nomos-executor";
fn binary_path() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("../../")
.join(BIN_PATH)
}
pub struct Executor {
tempdir: tempfile::TempDir,
child: Child,
config: Config,
api: ApiClient,
}
fn inject_ibd_into_cryptarchia(yaml_value: &mut Value) {
let Some(root) = yaml_value.as_mapping_mut() else {
return;
};
let Some(cryptarchia) = root
.get_mut(&Value::String("cryptarchia".into()))
.and_then(Value::as_mapping_mut)
else {
return;
};
if !cryptarchia.contains_key(&Value::String("network_adapter_settings".into())) {
let mut network = Mapping::new();
network.insert(
Value::String("topic".into()),
Value::String(nomos_node::CONSENSUS_TOPIC.into()),
);
cryptarchia.insert(Value::String("network_adapter_settings".into()), Value::Mapping(network));
}
if !cryptarchia.contains_key(&Value::String("sync".into())) {
let mut orphan = Mapping::new();
orphan.insert(
Value::String("max_orphan_cache_size".into()),
Value::Number(YamlNumber::from(5)),
);
let mut sync = Mapping::new();
sync.insert(Value::String("orphan".into()), Value::Mapping(orphan));
cryptarchia.insert(Value::String("sync".into()), Value::Mapping(sync));
}
let Some(bootstrap) = cryptarchia
.get_mut(&Value::String("bootstrap".into()))
.and_then(Value::as_mapping_mut)
else {
return;
};
let ibd_key = Value::String("ibd".into());
if bootstrap.contains_key(&ibd_key) {
return;
}
let mut ibd = Mapping::new();
ibd.insert(Value::String("peers".into()), Value::Sequence(vec![]));
bootstrap.insert(ibd_key, Value::Mapping(ibd));
}
impl Drop for Executor {
fn drop(&mut self) {
if should_persist_tempdir()
&& let Err(e) = persist_tempdir(&mut self.tempdir, "nomos-executor")
{
println!("failed to persist tempdir: {e}");
}
if let Err(e) = self.child.kill() {
println!("failed to kill the child process: {e}");
}
}
}
impl Executor {
pub async fn spawn(mut config: Config) -> Self {
let dir = create_tempdir().unwrap();
let config_path = dir.path().join("executor.yaml");
let file = std::fs::File::create(&config_path).unwrap();
if !*IS_DEBUG_TRACING {
// setup logging so that we can intercept it later in testing
config.tracing.logger = LoggerLayer::File(FileConfig {
directory: dir.path().to_owned(),
prefix: Some(LOGS_PREFIX.into()),
});
}
config.storage.db_path = dir.path().join("db");
dir.path().clone_into(
&mut config
.da_verifier
.storage_adapter_settings
.blob_storage_directory,
);
let addr = config.http.backend_settings.address;
let testing_addr = config.testing_http.backend_settings.address;
let mut yaml_value = serde_yaml::to_value(&config).unwrap();
inject_ibd_into_cryptarchia(&mut yaml_value);
serde_yaml::to_writer(file, &yaml_value).unwrap();
let child = Command::new(binary_path())
.arg(&config_path)
.current_dir(dir.path())
.stdout(Stdio::inherit())
.spawn()
.unwrap();
let node = Self {
child,
tempdir: dir,
config,
api: ApiClient::new(addr, Some(testing_addr)),
};
tokio::time::timeout(adjust_timeout(Duration::from_secs(10)), async {
node.wait_online().await;
})
.await
.unwrap();
node
}
pub async fn block_peer(&self, peer_id: String) -> bool {
self.api.block_peer(&peer_id).await.unwrap()
}
pub async fn unblock_peer(&self, peer_id: String) -> bool {
self.api.unblock_peer(&peer_id).await.unwrap()
}
pub async fn blacklisted_peers(&self) -> Vec<String> {
self.api.blacklisted_peers().await.unwrap()
}
async fn wait_online(&self) {
loop {
let res = self.api.get_response(MANTLE_METRICS).await;
if res.is_ok() && res.unwrap().status().is_success() {
break;
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
}
#[must_use]
pub const fn config(&self) -> &Config {
&self.config
}
#[must_use]
pub fn url(&self) -> Url {
self.api.base_url().clone()
}
#[must_use]
pub fn testing_url(&self) -> Option<Url> {
self.api.testing_url()
}
pub async fn balancer_stats(&self) -> BalancerStats {
self.api.balancer_stats().await.unwrap()
}
pub async fn monitor_stats(&self) -> MonitorStats {
self.api.monitor_stats().await.unwrap()
}
pub async fn network_info(&self) -> Libp2pInfo {
self.api.network_info().await.unwrap()
}
pub async fn consensus_info(&self) -> CryptarchiaInfo {
self.api.consensus_info().await.unwrap()
}
pub async fn get_block(&self, id: HeaderId) -> Option<Block<SignedMantleTx>> {
self.api.storage_block(&id).await.unwrap()
}
pub async fn get_shares(
&self,
blob_id: BlobId,
requested_shares: HashSet<[u8; 2]>,
filter_shares: HashSet<[u8; 2]>,
return_available: bool,
) -> Result<impl Stream<Item = DaLightShare>, common_http_client::Error> {
self.api
.http_client()
.get_shares::<DaShare>(
self.api.base_url().clone(),
blob_id,
requested_shares,
filter_shares,
return_available,
)
.await
}
pub async fn get_commitments(&self, blob_id: BlobId) -> Option<DaSharesCommitments> {
self.api
.post_json_decode(DA_GET_SHARES_COMMITMENTS, &blob_id)
.await
.unwrap()
}
pub async fn get_storage_commitments(
&self,
blob_id: BlobId,
) -> Result<Option<DaSharesCommitments>, common_http_client::Error> {
self.api
.http_client()
.get_storage_commitments::<DaShare>(self.api.base_url().clone(), blob_id)
.await
}
pub async fn da_get_membership(
&self,
session_id: SessionNumber,
) -> Result<MembershipResponse, reqwest::Error> {
self.api.da_get_membership(&session_id).await
}
pub async fn da_historic_sampling<I>(
&self,
block_id: HeaderId,
blob_ids: I,
) -> Result<bool, reqwest::Error>
where
I: IntoIterator<Item = (BlobId, SessionNumber)>,
{
let request = HistoricSamplingRequest {
block_id,
blob_ids: blob_ids.into_iter().collect(),
};
self.api.da_historic_sampling(&request).await
}
pub async fn get_lib_stream(
&self,
) -> Result<impl Stream<Item = BlockInfo>, common_http_client::Error> {
self.api
.http_client()
.get_lib_stream(self.api.base_url().clone())
.await
}
pub async fn add_tx(&self, tx: SignedMantleTx) -> Result<(), reqwest::Error> {
self.api.post_json_unit(MEMPOOL_ADD_TX, &tx).await
}
}

View File

@ -0,0 +1,35 @@
mod api_client;
pub mod executor;
pub mod validator;
use std::sync::LazyLock;
pub use api_client::ApiClient;
use tempfile::TempDir;
pub(crate) const LOGS_PREFIX: &str = "__logs";
static KEEP_NODE_TEMPDIRS: LazyLock<bool> =
LazyLock::new(|| std::env::var("NOMOS_TESTS_KEEP_LOGS").is_ok());
fn create_tempdir() -> std::io::Result<TempDir> {
// It's easier to use the current location instead of OS-default tempfile
// location because Github Actions can easily access files in the current
// location using wildcard to upload them as artifacts.
TempDir::new_in(std::env::current_dir()?)
}
fn persist_tempdir(tempdir: &mut TempDir, label: &str) -> std::io::Result<()> {
println!(
"{}: persisting directory at {}",
label,
tempdir.path().display()
);
// we need ownership of the dir to persist it
let dir = std::mem::replace(tempdir, tempfile::tempdir()?);
let _ = dir.keep();
Ok(())
}
pub(crate) fn should_persist_tempdir() -> bool {
std::thread::panicking() || *KEEP_NODE_TEMPDIRS
}

View File

@ -0,0 +1,344 @@
use std::{
collections::HashSet,
path::PathBuf,
process::{Child, Command, Stdio},
time::Duration,
};
use broadcast_service::BlockInfo;
use chain_service::CryptarchiaInfo;
use futures::Stream;
pub use integration_configs::nodes::validator::create_validator_config;
use kzgrs_backend::common::share::{DaLightShare, DaShare, DaSharesCommitments};
use nomos_core::{block::Block, da::BlobId, mantle::SignedMantleTx, sdp::SessionNumber};
use nomos_da_network_core::swarm::{BalancerStats, MonitorStats};
use nomos_da_network_service::MembershipResponse;
use nomos_http_api_common::paths::{CRYPTARCHIA_HEADERS, DA_GET_SHARES_COMMITMENTS};
use nomos_network::backends::libp2p::Libp2pInfo;
use nomos_node::{Config, HeaderId, api::testing::handlers::HistoricSamplingRequest};
use nomos_tracing::logging::local::FileConfig;
use nomos_tracing_service::LoggerLayer;
use reqwest::Url;
use serde_yaml::{Mapping, Number as YamlNumber, Value};
use tokio::time::error::Elapsed;
use tx_service::MempoolMetrics;
use super::{ApiClient, create_tempdir, persist_tempdir, should_persist_tempdir};
use crate::{IS_DEBUG_TRACING, adjust_timeout, nodes::LOGS_PREFIX};
const BIN_PATH: &str = "target/debug/nomos-node";
fn binary_path() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("../../")
.join(BIN_PATH)
}
pub enum Pool {
Da,
Mantle,
}
pub struct Validator {
tempdir: tempfile::TempDir,
child: Child,
config: Config,
api: ApiClient,
}
fn inject_ibd_into_cryptarchia(yaml_value: &mut Value) {
let Some(root) = yaml_value.as_mapping_mut() else {
return;
};
let Some(cryptarchia) = root
.get_mut(&Value::String("cryptarchia".into()))
.and_then(Value::as_mapping_mut)
else {
return;
};
if !cryptarchia.contains_key(&Value::String("network_adapter_settings".into())) {
let mut network = Mapping::new();
network.insert(
Value::String("topic".into()),
Value::String(nomos_node::CONSENSUS_TOPIC.into()),
);
cryptarchia.insert(Value::String("network_adapter_settings".into()), Value::Mapping(network));
}
if !cryptarchia.contains_key(&Value::String("sync".into())) {
let mut orphan = Mapping::new();
orphan.insert(
Value::String("max_orphan_cache_size".into()),
Value::Number(YamlNumber::from(5)),
);
let mut sync = Mapping::new();
sync.insert(Value::String("orphan".into()), Value::Mapping(orphan));
cryptarchia.insert(Value::String("sync".into()), Value::Mapping(sync));
}
let Some(bootstrap) = cryptarchia
.get_mut(&Value::String("bootstrap".into()))
.and_then(Value::as_mapping_mut)
else {
return;
};
let ibd_key = Value::String("ibd".into());
if bootstrap.contains_key(&ibd_key) {
return;
}
let mut ibd = Mapping::new();
ibd.insert(Value::String("peers".into()), Value::Sequence(vec![]));
bootstrap.insert(ibd_key, Value::Mapping(ibd));
}
impl Drop for Validator {
fn drop(&mut self) {
if should_persist_tempdir()
&& let Err(e) = persist_tempdir(&mut self.tempdir, "nomos-node")
{
println!("failed to persist tempdir: {e}");
}
if let Err(e) = self.child.kill() {
println!("failed to kill the child process: {e}");
}
}
}
impl Validator {
/// Check if the validator process is still running
pub fn is_running(&mut self) -> bool {
match self.child.try_wait() {
Ok(None) => true,
Ok(Some(_)) | Err(_) => false,
}
}
/// Wait for the validator process to exit, with a timeout
/// Returns true if the process exited within the timeout, false otherwise
pub async fn wait_for_exit(&mut self, timeout: Duration) -> bool {
tokio::time::timeout(timeout, async {
loop {
if !self.is_running() {
return;
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
})
.await
.is_ok()
}
pub async fn spawn(mut config: Config) -> Result<Self, Elapsed> {
let dir = create_tempdir().unwrap();
let config_path = dir.path().join("validator.yaml");
let file = std::fs::File::create(&config_path).unwrap();
if !*IS_DEBUG_TRACING {
// setup logging so that we can intercept it later in testing
config.tracing.logger = LoggerLayer::File(FileConfig {
directory: dir.path().to_owned(),
prefix: Some(LOGS_PREFIX.into()),
});
}
config.storage.db_path = dir.path().join("db");
dir.path().clone_into(
&mut config
.da_verifier
.storage_adapter_settings
.blob_storage_directory,
);
let addr = config.http.backend_settings.address;
let testing_addr = config.testing_http.backend_settings.address;
let mut yaml_value = serde_yaml::to_value(&config).unwrap();
inject_ibd_into_cryptarchia(&mut yaml_value);
serde_yaml::to_writer(file, &yaml_value).unwrap();
let child = Command::new(binary_path())
.arg(&config_path)
.current_dir(dir.path())
.stdout(Stdio::inherit())
.stderr(Stdio::inherit())
.spawn()
.unwrap();
let node = Self {
child,
tempdir: dir,
config,
api: ApiClient::new(addr, Some(testing_addr)),
};
tokio::time::timeout(adjust_timeout(Duration::from_secs(10)), async {
node.wait_online().await;
})
.await?;
Ok(node)
}
#[must_use]
pub fn url(&self) -> Url {
self.api.base_url().clone()
}
#[must_use]
pub fn testing_url(&self) -> Option<Url> {
self.api.testing_url()
}
async fn wait_online(&self) {
loop {
if self.api.consensus_info().await.is_ok() {
break;
}
tokio::time::sleep(Duration::from_millis(100)).await;
}
}
pub async fn get_block(&self, id: HeaderId) -> Option<Block<SignedMantleTx>> {
self.api.storage_block(&id).await.unwrap()
}
pub async fn get_commitments(&self, blob_id: BlobId) -> Option<DaSharesCommitments> {
self.api
.post_json_decode(DA_GET_SHARES_COMMITMENTS, &blob_id)
.await
.unwrap()
}
pub async fn get_mempoool_metrics(&self, pool: Pool) -> MempoolMetrics {
let discr = match pool {
Pool::Mantle => "mantle",
Pool::Da => "da",
};
let res = self.api.mempool_metrics(discr).await.unwrap();
MempoolMetrics {
pending_items: res["pending_items"].as_u64().unwrap() as usize,
last_item_timestamp: res["last_item_timestamp"].as_u64().unwrap(),
}
}
pub async fn da_historic_sampling<I>(
&self,
block_id: HeaderId,
blob_ids: I,
) -> Result<bool, reqwest::Error>
where
I: IntoIterator<Item = (BlobId, SessionNumber)>,
{
let request = HistoricSamplingRequest {
block_id,
blob_ids: blob_ids.into_iter().collect(),
};
self.api.da_historic_sampling(&request).await
}
// not async so that we can use this in `Drop`
#[must_use]
pub fn get_logs_from_file(&self) -> String {
println!(
"fetching logs from dir {}...",
self.tempdir.path().display()
);
// std::thread::sleep(std::time::Duration::from_secs(50));
std::fs::read_dir(self.tempdir.path())
.unwrap()
.filter_map(|entry| {
let entry = entry.unwrap();
let path = entry.path();
(path.is_file() && path.to_str().unwrap().contains(LOGS_PREFIX)).then_some(path)
})
.map(|f| std::fs::read_to_string(f).unwrap())
.collect::<String>()
}
#[must_use]
pub const fn config(&self) -> &Config {
&self.config
}
pub async fn get_headers(&self, from: Option<HeaderId>, to: Option<HeaderId>) -> Vec<HeaderId> {
let mut req = self.api.get_builder(CRYPTARCHIA_HEADERS);
if let Some(from) = from {
req = req.query(&[("from", from)]);
}
if let Some(to) = to {
req = req.query(&[("to", to)]);
}
let res = self.api.get_headers_raw(req).await;
println!("res: {res:?}");
res.unwrap().json::<Vec<HeaderId>>().await.unwrap()
}
pub async fn consensus_info(&self) -> CryptarchiaInfo {
let info = self.api.consensus_info().await.unwrap();
println!("{info:?}");
info
}
pub async fn balancer_stats(&self) -> BalancerStats {
self.api.balancer_stats().await.unwrap()
}
pub async fn monitor_stats(&self) -> MonitorStats {
self.api.monitor_stats().await.unwrap()
}
pub async fn da_get_membership(
&self,
session_id: SessionNumber,
) -> Result<MembershipResponse, reqwest::Error> {
self.api.da_get_membership(&session_id).await
}
pub async fn network_info(&self) -> Libp2pInfo {
self.api.network_info().await.unwrap()
}
pub async fn get_shares(
&self,
blob_id: BlobId,
requested_shares: HashSet<[u8; 2]>,
filter_shares: HashSet<[u8; 2]>,
return_available: bool,
) -> Result<impl Stream<Item = DaLightShare>, common_http_client::Error> {
self.api
.http_client()
.get_shares::<DaShare>(
self.api.base_url().clone(),
blob_id,
requested_shares,
filter_shares,
return_available,
)
.await
}
pub async fn get_storage_commitments(
&self,
blob_id: BlobId,
) -> Result<Option<DaSharesCommitments>, common_http_client::Error> {
self.api
.http_client()
.get_storage_commitments::<DaShare>(self.api.base_url().clone(), blob_id)
.await
}
pub async fn get_lib_stream(
&self,
) -> Result<impl Stream<Item = BlockInfo>, common_http_client::Error> {
self.api
.http_client()
.get_lib_stream(self.api.base_url().clone())
.await
}
}

View File

@ -0,0 +1,28 @@
use async_trait::async_trait;
use super::DynError;
/// Marker type used by scenario builders to request node control support.
#[derive(Clone, Copy, Debug, Default)]
pub struct NodeControlCapability;
/// Trait implemented by scenario capability markers to signal whether node
/// control is required.
pub trait RequiresNodeControl {
const REQUIRED: bool;
}
impl RequiresNodeControl for () {
const REQUIRED: bool = false;
}
impl RequiresNodeControl for NodeControlCapability {
const REQUIRED: bool = true;
}
/// Interface exposed by runners that can restart nodes at runtime.
#[async_trait]
pub trait NodeControlHandle: Send + Sync {
async fn restart_validator(&self, index: usize) -> Result<(), DynError>;
async fn restart_executor(&self, index: usize) -> Result<(), DynError>;
}

View File

@ -0,0 +1,153 @@
use std::{fs::File, num::NonZero, path::Path, time::Duration};
use anyhow::{Context as _, Result};
use nomos_da_network_core::swarm::ReplicationConfig;
use nomos_tracing::metrics::otlp::OtlpMetricsConfig;
use nomos_tracing_service::{MetricsLayer, TracingSettings};
use nomos_utils::bounded_duration::{MinimalBoundedDuration, SECOND};
use reqwest::Url;
use serde::{Deserialize, Serialize};
use serde_with::serde_as;
use crate::topology::GeneratedTopology;
#[serde_as]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CfgSyncConfig {
pub port: u16,
pub n_hosts: usize,
pub timeout: u64,
pub security_param: NonZero<u32>,
pub active_slot_coeff: f64,
pub subnetwork_size: usize,
pub dispersal_factor: usize,
pub num_samples: u16,
pub num_subnets: u16,
#[serde_as(as = "MinimalBoundedDuration<0, SECOND>")]
pub old_blobs_check_interval: Duration,
#[serde_as(as = "MinimalBoundedDuration<0, SECOND>")]
pub blobs_validity_duration: Duration,
pub global_params_path: String,
pub min_dispersal_peers: usize,
pub min_replication_peers: usize,
#[serde_as(as = "MinimalBoundedDuration<0, SECOND>")]
pub monitor_failure_time_window: Duration,
#[serde_as(as = "MinimalBoundedDuration<0, SECOND>")]
pub balancer_interval: Duration,
pub replication_settings: ReplicationConfig,
pub retry_shares_limit: usize,
pub retry_commitments_limit: usize,
pub tracing_settings: TracingSettings,
}
pub fn load_cfgsync_template(path: &Path) -> Result<CfgSyncConfig> {
let file = File::open(path)
.with_context(|| format!("opening cfgsync template at {}", path.display()))?;
serde_yaml::from_reader(file).context("parsing cfgsync template")
}
pub fn write_cfgsync_template(path: &Path, cfg: &CfgSyncConfig) -> Result<()> {
let file = File::create(path)
.with_context(|| format!("writing cfgsync template to {}", path.display()))?;
let serializable = SerializableCfgSyncConfig::from(cfg);
serde_yaml::to_writer(file, &serializable).context("serializing cfgsync template")
}
pub fn render_cfgsync_yaml(cfg: &CfgSyncConfig) -> Result<String> {
let serializable = SerializableCfgSyncConfig::from(cfg);
serde_yaml::to_string(&serializable).context("rendering cfgsync yaml")
}
pub fn apply_topology_overrides(
cfg: &mut CfgSyncConfig,
topology: &GeneratedTopology,
use_kzg_mount: bool,
) {
let hosts = topology.validators().len() + topology.executors().len();
cfg.n_hosts = hosts;
let consensus = &topology.config().consensus_params;
cfg.security_param = consensus.security_param;
cfg.active_slot_coeff = consensus.active_slot_coeff;
let da = &topology.config().da_params;
cfg.subnetwork_size = da.subnetwork_size;
cfg.dispersal_factor = da.dispersal_factor;
cfg.num_samples = da.num_samples;
cfg.num_subnets = da.num_subnets;
cfg.old_blobs_check_interval = da.old_blobs_check_interval;
cfg.blobs_validity_duration = da.blobs_validity_duration;
cfg.global_params_path = if use_kzg_mount {
"/kzgrs_test_params".into()
} else {
da.global_params_path.clone()
};
cfg.min_dispersal_peers = da.policy_settings.min_dispersal_peers;
cfg.min_replication_peers = da.policy_settings.min_replication_peers;
cfg.monitor_failure_time_window = da.monitor_settings.failure_time_window;
cfg.balancer_interval = da.balancer_interval;
cfg.replication_settings = da.replication_settings;
cfg.retry_shares_limit = da.retry_shares_limit;
cfg.retry_commitments_limit = da.retry_commitments_limit;
cfg.tracing_settings.metrics = MetricsLayer::Otlp(OtlpMetricsConfig {
endpoint: Url::parse("http://prometheus:9090/api/v1/otlp/v1/metrics")
.expect("valid prometheus otlp endpoint"),
host_identifier: String::new(),
});
}
#[serde_as]
#[derive(Serialize)]
struct SerializableCfgSyncConfig {
port: u16,
n_hosts: usize,
timeout: u64,
security_param: NonZero<u32>,
active_slot_coeff: f64,
subnetwork_size: usize,
dispersal_factor: usize,
num_samples: u16,
num_subnets: u16,
#[serde_as(as = "MinimalBoundedDuration<0, SECOND>")]
old_blobs_check_interval: Duration,
#[serde_as(as = "MinimalBoundedDuration<0, SECOND>")]
blobs_validity_duration: Duration,
global_params_path: String,
min_dispersal_peers: usize,
min_replication_peers: usize,
#[serde_as(as = "MinimalBoundedDuration<0, SECOND>")]
monitor_failure_time_window: Duration,
#[serde_as(as = "MinimalBoundedDuration<0, SECOND>")]
balancer_interval: Duration,
replication_settings: ReplicationConfig,
retry_shares_limit: usize,
retry_commitments_limit: usize,
tracing_settings: TracingSettings,
}
impl From<&CfgSyncConfig> for SerializableCfgSyncConfig {
fn from(cfg: &CfgSyncConfig) -> Self {
Self {
port: cfg.port,
n_hosts: cfg.n_hosts,
timeout: cfg.timeout,
security_param: cfg.security_param,
active_slot_coeff: cfg.active_slot_coeff,
subnetwork_size: cfg.subnetwork_size,
dispersal_factor: cfg.dispersal_factor,
num_samples: cfg.num_samples,
num_subnets: cfg.num_subnets,
old_blobs_check_interval: cfg.old_blobs_check_interval,
blobs_validity_duration: cfg.blobs_validity_duration,
global_params_path: cfg.global_params_path.clone(),
min_dispersal_peers: cfg.min_dispersal_peers,
min_replication_peers: cfg.min_replication_peers,
monitor_failure_time_window: cfg.monitor_failure_time_window,
balancer_interval: cfg.balancer_interval,
replication_settings: cfg.replication_settings,
retry_shares_limit: cfg.retry_shares_limit,
retry_commitments_limit: cfg.retry_commitments_limit,
tracing_settings: cfg.tracing_settings.clone(),
}
}
}

View File

@ -0,0 +1,255 @@
use std::{num::NonZeroUsize, sync::Arc, time::Duration};
use super::{
NodeControlCapability, expectation::Expectation, runtime::context::RunMetrics,
workload::Workload,
};
use crate::topology::{
GeneratedTopology, TopologyBuilder, TopologyConfig, configs::wallet::WalletConfig,
};
const DEFAULT_FUNDS_PER_WALLET: u64 = 100;
/// Immutable scenario definition shared between the runner, workloads, and
/// expectations.
pub struct Scenario<Caps = ()> {
topology: GeneratedTopology,
workloads: Vec<Arc<dyn Workload>>,
expectations: Vec<Box<dyn Expectation>>,
duration: Duration,
capabilities: Caps,
}
impl<Caps> Scenario<Caps> {
fn new(
topology: GeneratedTopology,
workloads: Vec<Arc<dyn Workload>>,
expectations: Vec<Box<dyn Expectation>>,
duration: Duration,
capabilities: Caps,
) -> Self {
Self {
topology,
workloads,
expectations,
duration,
capabilities,
}
}
#[must_use]
pub const fn topology(&self) -> &GeneratedTopology {
&self.topology
}
#[must_use]
pub fn workloads(&self) -> &[Arc<dyn Workload>] {
&self.workloads
}
#[must_use]
pub fn expectations(&self) -> &[Box<dyn Expectation>] {
&self.expectations
}
#[must_use]
pub fn expectations_mut(&mut self) -> &mut [Box<dyn Expectation>] {
&mut self.expectations
}
#[must_use]
pub const fn duration(&self) -> Duration {
self.duration
}
#[must_use]
pub const fn capabilities(&self) -> &Caps {
&self.capabilities
}
}
/// Builder used by callers to describe the desired scenario.
pub struct Builder<Caps = ()> {
topology: TopologyBuilder,
workloads: Vec<Arc<dyn Workload>>,
expectations: Vec<Box<dyn Expectation>>,
duration: Duration,
capabilities: Caps,
}
pub type ScenarioBuilder = Builder<()>;
impl<Caps: Default> Builder<Caps> {
#[must_use]
pub fn new(topology: TopologyBuilder) -> Self {
Self {
topology,
workloads: Vec::new(),
expectations: Vec::new(),
duration: Duration::ZERO,
capabilities: Caps::default(),
}
}
#[must_use]
pub fn with_node_counts(validators: usize, executors: usize) -> Self {
Self::new(TopologyBuilder::new(TopologyConfig::with_node_numbers(
validators, executors,
)))
}
}
impl<Caps> Builder<Caps> {
#[must_use]
pub fn with_capabilities<NewCaps>(self, capabilities: NewCaps) -> Builder<NewCaps> {
let Self {
topology,
workloads,
expectations,
duration,
..
} = self;
Builder {
topology,
workloads,
expectations,
duration,
capabilities,
}
}
#[must_use]
pub const fn capabilities(&self) -> &Caps {
&self.capabilities
}
#[must_use]
pub const fn capabilities_mut(&mut self) -> &mut Caps {
&mut self.capabilities
}
#[must_use]
pub fn with_workload<W>(mut self, workload: W) -> Self
where
W: Workload + 'static,
{
self.expectations.extend(workload.expectations());
self.workloads.push(Arc::new(workload));
self
}
#[must_use]
pub fn with_expectation<E>(mut self, expectation: E) -> Self
where
E: Expectation + 'static,
{
self.expectations.push(Box::new(expectation));
self
}
#[must_use]
pub const fn with_run_duration(mut self, duration: Duration) -> Self {
self.duration = duration;
self
}
#[must_use]
pub fn map_topology(mut self, f: impl FnOnce(TopologyBuilder) -> TopologyBuilder) -> Self {
self.topology = f(self.topology);
self
}
#[must_use]
pub fn with_wallet_config(mut self, wallet: WalletConfig) -> Self {
self.topology = self.topology.with_wallet_config(wallet);
self
}
#[must_use]
pub fn wallets(self, users: usize) -> Self {
let user_count = NonZeroUsize::new(users).expect("wallet user count must be non-zero");
let total_funds = DEFAULT_FUNDS_PER_WALLET
.checked_mul(users as u64)
.expect("wallet count exceeds capacity");
let wallet = WalletConfig::uniform(total_funds, user_count);
self.with_wallet_config(wallet)
}
#[must_use]
pub fn build(self) -> Scenario<Caps> {
let Self {
topology,
mut workloads,
mut expectations,
duration,
capabilities,
..
} = self;
let generated = topology.build();
let duration = enforce_min_duration(&generated, duration);
let run_metrics = RunMetrics::from_topology(&generated, duration);
initialize_components(&generated, &run_metrics, &mut workloads, &mut expectations);
Scenario::new(generated, workloads, expectations, duration, capabilities)
}
}
impl Builder<()> {
#[must_use]
pub fn enable_node_control(self) -> Builder<NodeControlCapability> {
self.with_capabilities(NodeControlCapability)
}
}
fn initialize_components(
descriptors: &GeneratedTopology,
run_metrics: &RunMetrics,
workloads: &mut [Arc<dyn Workload>],
expectations: &mut [Box<dyn Expectation>],
) {
initialize_workloads(descriptors, run_metrics, workloads);
initialize_expectations(descriptors, run_metrics, expectations);
}
fn initialize_workloads(
descriptors: &GeneratedTopology,
run_metrics: &RunMetrics,
workloads: &mut [Arc<dyn Workload>],
) {
for workload in workloads {
let inner =
Arc::get_mut(workload).expect("workload unexpectedly cloned before initialization");
if let Err(err) = inner.init(descriptors, run_metrics) {
panic!("workload '{}' failed to initialize: {err}", inner.name());
}
}
}
fn initialize_expectations(
descriptors: &GeneratedTopology,
run_metrics: &RunMetrics,
expectations: &mut [Box<dyn Expectation>],
) {
for expectation in expectations {
if let Err(err) = expectation.init(descriptors, run_metrics) {
panic!(
"expectation '{}' failed to initialize: {err}",
expectation.name()
);
}
}
}
fn enforce_min_duration(descriptors: &GeneratedTopology, requested: Duration) -> Duration {
const MIN_BLOCKS: u32 = 2;
const FALLBACK_SECS: u64 = 10;
let min_duration = descriptors.slot_duration().map_or_else(
|| Duration::from_secs(FALLBACK_SECS),
|slot| slot * MIN_BLOCKS,
);
requested.max(min_duration)
}

View File

@ -0,0 +1,23 @@
use async_trait::async_trait;
use super::{DynError, RunContext, runtime::context::RunMetrics};
use crate::topology::GeneratedTopology;
#[async_trait]
pub trait Expectation: Send + Sync {
fn name(&self) -> &str;
fn init(
&mut self,
_descriptors: &GeneratedTopology,
_run_metrics: &RunMetrics,
) -> Result<(), DynError> {
Ok(())
}
async fn start_capture(&mut self, _ctx: &RunContext) -> Result<(), DynError> {
Ok(())
}
async fn evaluate(&mut self, ctx: &RunContext) -> Result<(), DynError>;
}

View File

@ -0,0 +1,204 @@
use std::{fmt::Write as _, time::Duration};
use futures::FutureExt as _;
use super::{
BoxFuture, CONSENSUS_PROCESSED_BLOCKS, Expectation, ExpectationError, MetricsError, RunContext,
};
/// Enforces that every validator advances to the minimum block height implied
/// by the scenario duration, slot timing, and active slot coefficient (or a
/// caller-provided override).
///
/// Polls each validator's HTTP consensus info to catch stalls even when
/// Prometheus is unavailable.
#[derive(Clone, Copy, Debug)]
pub struct ConsensusLiveness {
minimum_override: Option<u64>,
tolerance: f64,
}
pub struct PrometheusBlockProduction {
minimum: u64,
}
impl PrometheusBlockProduction {
#[must_use]
pub const fn new(minimum: u64) -> Self {
Self { minimum }
}
#[must_use]
pub const fn minimum(&self) -> u64 {
self.minimum
}
}
impl Expectation for PrometheusBlockProduction {
fn name(&self) -> &'static str {
"prometheus_block_production"
}
fn evaluate<'a>(&'a self, ctx: &'a RunContext) -> BoxFuture<'a, Result<(), ExpectationError>> {
async move {
let total = ctx
.metrics()
.consensus_processed_blocks()
.map_err(|err| into_expectation_error(&err))?;
if total >= self.minimum() as f64 {
tracing::info!(
query = CONSENSUS_PROCESSED_BLOCKS,
observed_total = total,
minimum = self.minimum(),
"block production expectation satisfied via prometheus"
);
Ok(())
} else {
Err(ExpectationError::new(format!(
"prometheus query `{}` sum {total} below block target {}",
CONSENSUS_PROCESSED_BLOCKS,
self.minimum()
)))
}
}
.boxed()
}
}
fn into_expectation_error(err: &MetricsError) -> ExpectationError {
ExpectationError::new(err.to_string())
}
impl ConsensusLiveness {
#[must_use]
pub const fn with_minimum(minimum_blocks: u64) -> Self {
Self {
minimum_override: Some(minimum_blocks),
tolerance: 1.0,
}
}
#[must_use]
pub const fn with_tolerance(tolerance: f64) -> Self {
Self {
minimum_override: None,
tolerance,
}
}
}
impl Default for ConsensusLiveness {
fn default() -> Self {
Self::with_tolerance(0.8)
}
}
impl Expectation for ConsensusLiveness {
fn name(&self) -> &'static str {
"consensus_liveness"
}
fn evaluate<'a>(&'a self, ctx: &'a RunContext) -> BoxFuture<'a, Result<(), ExpectationError>> {
async move {
if ctx.validators().is_empty() {
return Err(ExpectationError::new(
"consensus liveness requires at least one validator",
));
}
let target = consensus_target_blocks(ctx, self.minimum_override, self.tolerance);
let mut issues = Vec::new();
let mut heights = Vec::with_capacity(ctx.validators().len());
for handle in ctx.validators() {
let index = handle.descriptor().index;
match handle.client().consensus_info().await {
Ok(info) => {
heights.push(info.height);
if info.height < target {
issues.push(format!(
"validator-{index} height {} below target {}",
info.height, target
));
}
}
Err(err) => {
issues.push(format!("validator-{index} consensus_info failed: {err}"));
}
}
}
if issues.is_empty() {
tracing::info!(
target,
heights = ?heights,
"consensus liveness expectation satisfied"
);
Ok(())
} else {
let mut message = String::new();
let _ = writeln!(
&mut message,
"consensus liveness violated (target={target}):"
);
for issue in issues {
let _ = writeln!(&mut message, "- {issue}");
}
Err(ExpectationError::new(message.trim_end()))
}
}
.boxed()
}
}
fn consensus_target_blocks(ctx: &RunContext, override_minimum: Option<u64>, tolerance: f64) -> u64 {
if let Some(minimum) = override_minimum {
return minimum;
}
if tolerance <= 0.0 {
return 0;
}
let slot_duration = ctx
.descriptors()
.validators()
.first()
.map_or(Duration::from_secs(2), |node| {
node.general.time_config.slot_duration
});
if slot_duration.is_zero() {
return 0;
}
let active_slot_coeff = ctx
.descriptors()
.config()
.consensus_params
.active_slot_coeff;
if active_slot_coeff <= 0.0 {
return 0;
}
let run_duration = ctx.run_duration();
if run_duration.is_zero() {
return 0;
}
let slot_duration_secs = slot_duration.as_secs_f64();
if slot_duration_secs == 0.0 {
return 0;
}
let slot_count = run_duration.as_secs_f64() / slot_duration_secs;
if slot_count < 1.0 {
return 0;
}
let expected_blocks = slot_count * active_slot_coeff;
let adjusted = (expected_blocks * tolerance).floor();
adjusted.max(1.0) as u64
}

View File

@ -0,0 +1,129 @@
use std::{fmt, time::Duration};
use futures::future::try_join_all;
use nomos_http_api_common::paths;
use reqwest::Client as ReqwestClient;
use thiserror::Error;
use tokio::time::{sleep, timeout};
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum NodeRole {
Validator,
Executor,
}
impl NodeRole {
#[must_use]
pub const fn label(self) -> &'static str {
match self {
Self::Validator => "validator",
Self::Executor => "executor",
}
}
}
impl fmt::Display for NodeRole {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.label())
}
}
#[derive(Clone, Copy, Debug, Error)]
#[error("timeout waiting for {role} HTTP endpoint on port {port} after {timeout:?}")]
pub struct HttpReadinessError {
role: NodeRole,
port: u16,
timeout: Duration,
}
impl HttpReadinessError {
#[must_use]
pub const fn new(role: NodeRole, port: u16, timeout: Duration) -> Self {
Self {
role,
port,
timeout,
}
}
#[must_use]
pub const fn role(&self) -> NodeRole {
self.role
}
#[must_use]
pub const fn port(&self) -> u16 {
self.port
}
#[must_use]
pub const fn timeout(&self) -> Duration {
self.timeout
}
}
pub async fn wait_for_http_ports(
ports: &[u16],
role: NodeRole,
timeout_duration: Duration,
poll_interval: Duration,
) -> Result<(), HttpReadinessError> {
wait_for_http_ports_with_host(ports, role, "127.0.0.1", timeout_duration, poll_interval).await
}
pub async fn wait_for_http_ports_with_host(
ports: &[u16],
role: NodeRole,
host: &str,
timeout_duration: Duration,
poll_interval: Duration,
) -> Result<(), HttpReadinessError> {
if ports.is_empty() {
return Ok(());
}
let client = ReqwestClient::new();
let probes = ports.iter().copied().map(|port| {
wait_for_single_port(
client.clone(),
port,
role,
host,
timeout_duration,
poll_interval,
)
});
try_join_all(probes).await.map(|_| ())
}
async fn wait_for_single_port(
client: ReqwestClient,
port: u16,
role: NodeRole,
host: &str,
timeout_duration: Duration,
poll_interval: Duration,
) -> Result<(), HttpReadinessError> {
let url = format!("http://{host}:{port}{}", paths::CRYPTARCHIA_INFO);
let probe = async {
loop {
let is_ready = client
.get(&url)
.send()
.await
.map(|response| response.status().is_success())
.unwrap_or(false);
if is_ready {
return;
}
sleep(poll_interval).await;
}
};
timeout(timeout_duration, probe)
.await
.map_err(|_| HttpReadinessError::new(role, port, timeout_duration))
}

View File

@ -0,0 +1,25 @@
//! Scenario orchestration primitives shared by integration tests and runners.
mod capabilities;
pub mod cfgsync;
mod definition;
mod expectation;
pub mod http_probe;
mod runtime;
mod workload;
pub type DynError = Box<dyn std::error::Error + Send + Sync + 'static>;
pub use capabilities::{NodeControlCapability, NodeControlHandle, RequiresNodeControl};
pub use definition::{Builder, Scenario, ScenarioBuilder};
pub use expectation::Expectation;
pub use runtime::{
BlockFeed, BlockFeedTask, BlockRecord, BlockStats, CleanupGuard, Deployer, NodeClients,
RunContext, RunHandle, RunMetrics, Runner, ScenarioError,
metrics::{
CONSENSUS_PROCESSED_BLOCKS, CONSENSUS_TRANSACTIONS_TOTAL, Metrics, MetricsError,
PrometheusEndpoint, PrometheusInstantSample,
},
spawn_block_feed,
};
pub use workload::Workload;

View File

@ -0,0 +1,178 @@
use std::{
collections::HashSet,
sync::{
Arc,
atomic::{AtomicU64, Ordering},
},
time::Duration,
};
use anyhow::{Context as _, Result};
use nomos_core::{block::Block, mantle::SignedMantleTx};
use nomos_node::HeaderId;
use tokio::{sync::broadcast, task::JoinHandle, time::sleep};
use tracing::{debug, error};
use super::context::CleanupGuard;
use crate::nodes::ApiClient;
const POLL_INTERVAL: Duration = Duration::from_secs(1);
#[derive(Clone)]
pub struct BlockFeed {
inner: Arc<BlockFeedInner>,
}
struct BlockFeedInner {
sender: broadcast::Sender<Arc<BlockRecord>>,
stats: Arc<BlockStats>,
}
#[derive(Clone)]
pub struct BlockRecord {
pub header: HeaderId,
pub block: Arc<Block<SignedMantleTx>>,
}
pub struct BlockFeedTask {
handle: JoinHandle<()>,
}
impl BlockFeed {
#[must_use]
pub fn subscribe(&self) -> broadcast::Receiver<Arc<BlockRecord>> {
self.inner.sender.subscribe()
}
#[must_use]
pub fn stats(&self) -> Arc<BlockStats> {
Arc::clone(&self.inner.stats)
}
fn ingest(&self, header: HeaderId, block: Block<SignedMantleTx>) {
self.inner.stats.record_block(&block);
let record = Arc::new(BlockRecord {
header,
block: Arc::new(block),
});
let _ = self.inner.sender.send(record);
}
}
impl BlockFeedTask {
#[must_use]
pub const fn new(handle: JoinHandle<()>) -> Self {
Self { handle }
}
}
pub async fn spawn_block_feed(client: ApiClient) -> Result<(BlockFeed, BlockFeedTask)> {
let (sender, _) = broadcast::channel(1024);
let feed = BlockFeed {
inner: Arc::new(BlockFeedInner {
sender,
stats: Arc::new(BlockStats::default()),
}),
};
let mut scanner = BlockScanner::new(client, feed.clone());
scanner.catch_up().await?;
let handle = tokio::spawn(async move { scanner.run().await });
Ok((feed, BlockFeedTask::new(handle)))
}
struct BlockScanner {
client: ApiClient,
feed: BlockFeed,
seen: HashSet<HeaderId>,
}
impl BlockScanner {
fn new(client: ApiClient, feed: BlockFeed) -> Self {
Self {
client,
feed,
seen: HashSet::new(),
}
}
async fn run(&mut self) {
loop {
if let Err(err) = self.catch_up().await {
error!(%err, "block feed catch up failed");
}
sleep(POLL_INTERVAL).await;
}
}
async fn catch_up(&mut self) -> Result<()> {
let info = self.client.consensus_info().await?;
let tip = info.tip;
let mut remaining_height = info.height;
let mut stack = Vec::new();
let mut cursor = tip;
loop {
if self.seen.contains(&cursor) {
break;
}
if remaining_height == 0 {
self.seen.insert(cursor);
break;
}
let block = self
.client
.storage_block(&cursor)
.await?
.context("missing block while catching up")?;
let parent = block.header().parent();
stack.push((cursor, block));
if self.seen.contains(&parent) || parent == cursor {
break;
}
cursor = parent;
remaining_height = remaining_height.saturating_sub(1);
}
let mut processed = 0usize;
while let Some((header, block)) = stack.pop() {
self.feed.ingest(header, block);
self.seen.insert(header);
processed += 1;
}
debug!(processed, "block feed processed catch up batch");
Ok(())
}
}
impl CleanupGuard for BlockFeedTask {
fn cleanup(self: Box<Self>) {
self.handle.abort();
}
}
#[derive(Default)]
pub struct BlockStats {
total_transactions: AtomicU64,
}
impl BlockStats {
fn record_block(&self, block: &Block<SignedMantleTx>) {
self.total_transactions
.fetch_add(block.transactions().len() as u64, Ordering::Relaxed);
}
#[must_use]
pub fn total_transactions(&self) -> u64 {
self.total_transactions.load(Ordering::Relaxed)
}
}

View File

@ -0,0 +1,215 @@
use std::{sync::Arc, time::Duration};
use super::{block_feed::BlockFeed, metrics::Metrics, node_clients::ClusterClient};
use crate::{
nodes::ApiClient,
scenario::{NodeClients, NodeControlHandle},
topology::{GeneratedTopology, Topology, configs::wallet::WalletAccount},
};
pub struct RunContext {
descriptors: GeneratedTopology,
cluster: Option<Topology>,
node_clients: NodeClients,
metrics: RunMetrics,
telemetry: Metrics,
block_feed: BlockFeed,
node_control: Option<Arc<dyn NodeControlHandle>>,
}
impl RunContext {
/// Builds a run context, clamping the requested duration so we always run
/// for at least a couple of slot lengths (or a fallback window if slots are
/// unknown).
#[must_use]
pub fn new(
descriptors: GeneratedTopology,
cluster: Option<Topology>,
node_clients: NodeClients,
run_duration: Duration,
telemetry: Metrics,
block_feed: BlockFeed,
node_control: Option<Arc<dyn NodeControlHandle>>,
) -> Self {
let metrics = RunMetrics::new(&descriptors, run_duration);
Self {
descriptors,
cluster,
node_clients,
metrics,
telemetry,
block_feed,
node_control,
}
}
#[must_use]
pub const fn descriptors(&self) -> &GeneratedTopology {
&self.descriptors
}
#[must_use]
pub const fn topology(&self) -> Option<&Topology> {
self.cluster.as_ref()
}
#[must_use]
pub const fn node_clients(&self) -> &NodeClients {
&self.node_clients
}
#[must_use]
pub fn random_node_client(&self) -> Option<&ApiClient> {
self.node_clients.any_client()
}
#[must_use]
pub fn block_feed(&self) -> BlockFeed {
self.block_feed.clone()
}
#[must_use]
pub fn wallet_accounts(&self) -> &[WalletAccount] {
self.descriptors.wallet_accounts()
}
#[must_use]
pub const fn telemetry(&self) -> &Metrics {
&self.telemetry
}
#[must_use]
pub const fn run_duration(&self) -> Duration {
self.metrics.run_duration()
}
#[must_use]
pub const fn expected_blocks(&self) -> u64 {
self.metrics.expected_consensus_blocks()
}
#[must_use]
pub const fn run_metrics(&self) -> RunMetrics {
self.metrics
}
#[must_use]
pub fn node_control(&self) -> Option<Arc<dyn NodeControlHandle>> {
self.node_control.clone()
}
#[must_use]
pub const fn cluster_client(&self) -> ClusterClient<'_> {
self.node_clients.cluster_client()
}
}
/// Handle returned by the runner to control the lifecycle of the run.
pub struct RunHandle {
run_context: Arc<RunContext>,
cleanup_guard: Option<Box<dyn CleanupGuard>>,
}
impl Drop for RunHandle {
fn drop(&mut self) {
if let Some(guard) = self.cleanup_guard.take() {
guard.cleanup();
}
}
}
impl RunHandle {
#[must_use]
pub fn new(context: RunContext, cleanup_guard: Option<Box<dyn CleanupGuard>>) -> Self {
Self {
run_context: Arc::new(context),
cleanup_guard,
}
}
#[must_use]
pub(crate) fn from_shared(
context: Arc<RunContext>,
cleanup_guard: Option<Box<dyn CleanupGuard>>,
) -> Self {
Self {
run_context: context,
cleanup_guard,
}
}
#[must_use]
pub fn context(&self) -> &RunContext {
&self.run_context
}
}
#[derive(Clone, Copy)]
pub struct RunMetrics {
run_duration: Duration,
expected_blocks: u64,
block_interval_hint: Option<Duration>,
}
impl RunMetrics {
#[must_use]
pub fn new(descriptors: &GeneratedTopology, run_duration: Duration) -> Self {
Self::from_topology(descriptors, run_duration)
}
#[must_use]
pub fn from_topology(descriptors: &GeneratedTopology, run_duration: Duration) -> Self {
let slot_duration = descriptors.slot_duration();
let active_slot_coeff = descriptors.config().consensus_params.active_slot_coeff;
let expected_blocks =
calculate_expected_blocks(run_duration, slot_duration, active_slot_coeff);
let block_interval_hint =
slot_duration.map(|duration| duration.mul_f64(active_slot_coeff.clamp(0.0, 1.0)));
Self {
run_duration,
expected_blocks,
block_interval_hint,
}
}
#[must_use]
pub const fn run_duration(&self) -> Duration {
self.run_duration
}
#[must_use]
pub const fn expected_consensus_blocks(&self) -> u64 {
self.expected_blocks
}
#[must_use]
pub const fn block_interval_hint(&self) -> Option<Duration> {
self.block_interval_hint
}
}
pub trait CleanupGuard: Send {
fn cleanup(self: Box<Self>);
}
/// Computes the minimum duration well allow for a scenario run so that the
/// scheduler can observe a few block opportunities even if the caller
/// requested an extremely short window.
fn calculate_expected_blocks(
run_duration: Duration,
slot_duration: Option<Duration>,
active_slot_coeff: f64,
) -> u64 {
let Some(slot_duration) = slot_duration else {
return 0;
};
let slot_secs = slot_duration.as_secs_f64();
let run_secs = run_duration.as_secs_f64();
let expected = run_secs / slot_secs * active_slot_coeff;
expected.ceil().clamp(0.0, u64::MAX as f64) as u64
}

View File

@ -0,0 +1,22 @@
use async_trait::async_trait;
use super::runner::Runner;
use crate::scenario::{DynError, Scenario};
/// Error returned when executing workloads or expectations.
#[derive(Debug, thiserror::Error)]
pub enum ScenarioError {
#[error("workload failure: {0}")]
Workload(#[source] DynError),
#[error("expectation capture failed: {0}")]
ExpectationCapture(#[source] DynError),
#[error("expectations failed:\n{0}")]
Expectations(#[source] DynError),
}
#[async_trait]
pub trait Deployer<Caps = ()>: Send + Sync {
type Error;
async fn deploy(&self, scenario: &Scenario<Caps>) -> Result<Runner, Self::Error>;
}

View File

@ -0,0 +1,201 @@
use std::{collections::HashMap, sync::Arc};
use prometheus_http_query::{Client as PrometheusClient, response::Data as PrometheusData};
use reqwest::Url;
use tracing::warn;
pub const CONSENSUS_PROCESSED_BLOCKS: &str = "consensus_processed_blocks";
pub const CONSENSUS_TRANSACTIONS_TOTAL: &str = "consensus_transactions_total";
const CONSENSUS_TRANSACTIONS_VALIDATOR_QUERY: &str =
r#"sum(consensus_transactions_total{job=~"validator-.*"})"#;
#[derive(Clone, Default)]
pub struct Metrics {
prometheus: Option<Arc<PrometheusEndpoint>>,
}
impl Metrics {
#[must_use]
pub const fn empty() -> Self {
Self { prometheus: None }
}
pub fn from_prometheus(url: Url) -> Result<Self, MetricsError> {
let handle = Arc::new(PrometheusEndpoint::new(url)?);
Ok(Self::empty().with_prometheus_endpoint(handle))
}
pub fn from_prometheus_str(raw_url: &str) -> Result<Self, MetricsError> {
Url::parse(raw_url)
.map_err(|err| MetricsError::new(format!("invalid prometheus url: {err}")))
.and_then(Self::from_prometheus)
}
#[must_use]
pub fn with_prometheus_endpoint(mut self, handle: Arc<PrometheusEndpoint>) -> Self {
self.prometheus = Some(handle);
self
}
#[must_use]
pub fn prometheus(&self) -> Option<Arc<PrometheusEndpoint>> {
self.prometheus.as_ref().map(Arc::clone)
}
#[must_use]
pub const fn is_configured(&self) -> bool {
self.prometheus.is_some()
}
pub fn instant_values(&self, query: &str) -> Result<Vec<f64>, MetricsError> {
let handle = self
.prometheus()
.ok_or_else(|| MetricsError::new("prometheus endpoint unavailable"))?;
handle.instant_values(query)
}
pub fn counter_value(&self, query: &str) -> Result<f64, MetricsError> {
let handle = self
.prometheus()
.ok_or_else(|| MetricsError::new("prometheus endpoint unavailable"))?;
handle.counter_value(query)
}
pub fn consensus_processed_blocks(&self) -> Result<f64, MetricsError> {
self.counter_value(CONSENSUS_PROCESSED_BLOCKS)
}
pub fn consensus_transactions_total(&self) -> Result<f64, MetricsError> {
let handle = self
.prometheus()
.ok_or_else(|| MetricsError::new("prometheus endpoint unavailable"))?;
match handle.instant_samples(CONSENSUS_TRANSACTIONS_VALIDATOR_QUERY) {
Ok(samples) if !samples.is_empty() => {
return Ok(samples.into_iter().map(|sample| sample.value).sum());
}
Ok(_) => {
warn!(
query = CONSENSUS_TRANSACTIONS_VALIDATOR_QUERY,
"validator-specific consensus transaction metric returned no samples; falling back to aggregate counter"
);
}
Err(err) => {
warn!(
query = CONSENSUS_TRANSACTIONS_VALIDATOR_QUERY,
error = %err,
"failed to query validator-specific consensus transaction metric; falling back to aggregate counter"
);
}
}
handle.counter_value(CONSENSUS_TRANSACTIONS_TOTAL)
}
}
#[derive(Debug, thiserror::Error)]
pub enum MetricsError {
#[error("{0}")]
Store(String),
}
impl MetricsError {
#[must_use]
pub fn new(message: impl Into<String>) -> Self {
Self::Store(message.into())
}
}
pub struct PrometheusEndpoint {
base_url: Url,
client: PrometheusClient,
}
#[derive(Clone, Debug)]
pub struct PrometheusInstantSample {
pub labels: HashMap<String, String>,
pub timestamp: f64,
pub value: f64,
}
impl PrometheusEndpoint {
pub fn new(base_url: Url) -> Result<Self, MetricsError> {
let client = PrometheusClient::try_from(base_url.as_str().to_owned()).map_err(|err| {
MetricsError::new(format!("failed to create prometheus client: {err}"))
})?;
Ok(Self { base_url, client })
}
#[must_use]
pub const fn base_url(&self) -> &Url {
&self.base_url
}
#[must_use]
pub fn port(&self) -> Option<u16> {
self.base_url.port_or_known_default()
}
pub fn instant_samples(
&self,
query: &str,
) -> Result<Vec<PrometheusInstantSample>, MetricsError> {
let query = query.to_owned();
let client = self.client.clone();
let response = std::thread::spawn(move || -> Result<_, MetricsError> {
let runtime = tokio::runtime::Runtime::new()
.map_err(|err| MetricsError::new(format!("failed to create runtime: {err}")))?;
runtime
.block_on(async { client.query(&query).get().await })
.map_err(|err| MetricsError::new(format!("prometheus query failed: {err}")))
})
.join()
.map_err(|_| MetricsError::new("prometheus query thread panicked"))??;
let mut samples = Vec::new();
match response.data() {
PrometheusData::Vector(vectors) => {
for vector in vectors {
samples.push(PrometheusInstantSample {
labels: vector.metric().clone(),
timestamp: vector.sample().timestamp(),
value: vector.sample().value(),
});
}
}
PrometheusData::Matrix(ranges) => {
for range in ranges {
let labels = range.metric().clone();
for sample in range.samples() {
samples.push(PrometheusInstantSample {
labels: labels.clone(),
timestamp: sample.timestamp(),
value: sample.value(),
});
}
}
}
PrometheusData::Scalar(sample) => {
samples.push(PrometheusInstantSample {
labels: HashMap::new(),
timestamp: sample.timestamp(),
value: sample.value(),
});
}
}
Ok(samples)
}
pub fn instant_values(&self, query: &str) -> Result<Vec<f64>, MetricsError> {
self.instant_samples(query)
.map(|samples| samples.into_iter().map(|sample| sample.value).collect())
}
pub fn counter_value(&self, query: &str) -> Result<f64, MetricsError> {
self.instant_values(query)
.map(|values| values.into_iter().sum())
}
}

View File

@ -0,0 +1,12 @@
mod block_feed;
pub mod context;
mod deployer;
pub mod metrics;
mod node_clients;
mod runner;
pub use block_feed::{BlockFeed, BlockFeedTask, BlockRecord, BlockStats, spawn_block_feed};
pub use context::{CleanupGuard, RunContext, RunHandle, RunMetrics};
pub use deployer::{Deployer, ScenarioError};
pub use node_clients::NodeClients;
pub use runner::Runner;

View File

@ -0,0 +1,135 @@
use std::pin::Pin;
use rand::{Rng as _, seq::SliceRandom as _, thread_rng};
use crate::{
nodes::ApiClient,
scenario::DynError,
topology::{GeneratedTopology, Topology},
};
#[derive(Clone, Default)]
pub struct NodeClients {
validators: Vec<ApiClient>,
executors: Vec<ApiClient>,
}
impl NodeClients {
#[must_use]
pub const fn new(validators: Vec<ApiClient>, executors: Vec<ApiClient>) -> Self {
Self {
validators,
executors,
}
}
#[must_use]
pub fn from_topology(_descriptors: &GeneratedTopology, topology: &Topology) -> Self {
let validator_clients = topology.validators().iter().map(|node| {
let testing = node.testing_url();
ApiClient::from_urls(node.url(), testing)
});
let executor_clients = topology.executors().iter().map(|node| {
let testing = node.testing_url();
ApiClient::from_urls(node.url(), testing)
});
Self::new(validator_clients.collect(), executor_clients.collect())
}
#[must_use]
pub fn validator_clients(&self) -> &[ApiClient] {
&self.validators
}
#[must_use]
pub fn executor_clients(&self) -> &[ApiClient] {
&self.executors
}
#[must_use]
pub fn random_validator(&self) -> Option<&ApiClient> {
if self.validators.is_empty() {
return None;
}
let mut rng = thread_rng();
let idx = rng.gen_range(0..self.validators.len());
self.validators.get(idx)
}
#[must_use]
pub fn random_executor(&self) -> Option<&ApiClient> {
if self.executors.is_empty() {
return None;
}
let mut rng = thread_rng();
let idx = rng.gen_range(0..self.executors.len());
self.executors.get(idx)
}
pub fn all_clients(&self) -> impl Iterator<Item = &ApiClient> {
self.validators.iter().chain(self.executors.iter())
}
#[must_use]
pub fn any_client(&self) -> Option<&ApiClient> {
let validator_count = self.validators.len();
let executor_count = self.executors.len();
let total = validator_count + executor_count;
if total == 0 {
return None;
}
let mut rng = thread_rng();
let choice = rng.gen_range(0..total);
if choice < validator_count {
self.validators.get(choice)
} else {
self.executors.get(choice - validator_count)
}
}
#[must_use]
pub const fn cluster_client(&self) -> ClusterClient<'_> {
ClusterClient::new(self)
}
}
pub struct ClusterClient<'a> {
node_clients: &'a NodeClients,
}
impl<'a> ClusterClient<'a> {
#[must_use]
pub const fn new(node_clients: &'a NodeClients) -> Self {
Self { node_clients }
}
pub async fn try_all_clients<T, E>(
&self,
mut f: impl for<'b> FnMut(
&'b ApiClient,
) -> Pin<Box<dyn Future<Output = Result<T, E>> + Send + 'b>>
+ Send,
) -> Result<T, DynError>
where
E: Into<DynError>,
{
let mut clients: Vec<&ApiClient> = self.node_clients.all_clients().collect();
if clients.is_empty() {
return Err("cluster client has no api clients".into());
}
clients.shuffle(&mut thread_rng());
let mut last_err = None;
for client in clients {
match f(client).await {
Ok(value) => return Ok(value),
Err(err) => last_err = Some(err.into()),
}
}
Err(last_err.unwrap_or_else(|| "cluster client exhausted all nodes".into()))
}
}

View File

@ -0,0 +1,251 @@
use std::{any::Any, panic::AssertUnwindSafe, sync::Arc, time::Duration};
use futures::FutureExt as _;
use tokio::{
task::JoinSet,
time::{sleep, timeout},
};
use super::deployer::ScenarioError;
use crate::scenario::{
DynError, Expectation, Scenario,
runtime::context::{CleanupGuard, RunContext, RunHandle},
};
type WorkloadOutcome = Result<(), DynError>;
/// Represents a fully prepared environment capable of executing a scenario.
pub struct Runner {
context: Arc<RunContext>,
cleanup_guard: Option<Box<dyn CleanupGuard>>,
}
impl Runner {
#[must_use]
pub fn new(context: RunContext, cleanup_guard: Option<Box<dyn CleanupGuard>>) -> Self {
Self {
context: Arc::new(context),
cleanup_guard,
}
}
#[must_use]
pub fn context(&self) -> Arc<RunContext> {
Arc::clone(&self.context)
}
pub(crate) fn cleanup(&mut self) {
if let Some(guard) = self.cleanup_guard.take() {
guard.cleanup();
}
}
pub(crate) fn into_run_handle(mut self) -> RunHandle {
RunHandle::from_shared(Arc::clone(&self.context), self.cleanup_guard.take())
}
/// Executes the scenario by driving workloads first and then evaluating all
/// expectations. On any failure it cleans up resources and propagates the
/// error to the caller.
pub async fn run<Caps>(
mut self,
scenario: &mut Scenario<Caps>,
) -> Result<RunHandle, ScenarioError>
where
Caps: Send + Sync,
{
let context = self.context();
if let Err(error) =
Self::prepare_expectations(scenario.expectations_mut(), context.as_ref()).await
{
self.cleanup();
return Err(error);
}
if let Err(error) = Self::run_workloads(&context, scenario).await {
self.cleanup();
return Err(error);
}
Self::cooldown(&context).await;
if let Err(error) =
Self::run_expectations(scenario.expectations_mut(), context.as_ref()).await
{
self.cleanup();
return Err(error);
}
Ok(self.into_run_handle())
}
async fn prepare_expectations(
expectations: &mut [Box<dyn Expectation>],
context: &RunContext,
) -> Result<(), ScenarioError> {
for expectation in expectations {
if let Err(source) = expectation.start_capture(context).await {
return Err(ScenarioError::ExpectationCapture(source));
}
}
Ok(())
}
/// Spawns every workload, waits until the configured duration elapses (or a
/// workload fails), and then aborts the remaining tasks.
async fn run_workloads<Caps>(
context: &Arc<RunContext>,
scenario: &Scenario<Caps>,
) -> Result<(), ScenarioError>
where
Caps: Send + Sync,
{
let mut workloads = Self::spawn_workloads(scenario, context);
let _ = Self::drive_until_timer(&mut workloads, scenario.duration()).await?;
Self::drain_workloads(&mut workloads).await
}
/// Evaluates every registered expectation, aggregating failures so callers
/// can see all missing conditions in a single report.
async fn run_expectations(
expectations: &mut [Box<dyn Expectation>],
context: &RunContext,
) -> Result<(), ScenarioError> {
let mut failures: Vec<(String, DynError)> = Vec::new();
for expectation in expectations {
if let Err(source) = expectation.evaluate(context).await {
failures.push((expectation.name().to_owned(), source));
}
}
if failures.is_empty() {
return Ok(());
}
let summary = failures
.into_iter()
.map(|(name, source)| format!("{name}: {source}"))
.collect::<Vec<_>>()
.join("\n");
Err(ScenarioError::Expectations(summary.into()))
}
async fn cooldown(context: &Arc<RunContext>) {
let metrics = context.run_metrics();
let needs_stabilization = context.node_control().is_some();
if let Some(interval) = metrics.block_interval_hint() {
if interval.is_zero() {
return;
}
let mut wait = interval.mul_f64(5.0);
if needs_stabilization {
let minimum = Duration::from_secs(30);
if wait < minimum {
wait = minimum;
}
}
if !wait.is_zero() {
sleep(wait).await;
}
} else if needs_stabilization {
sleep(Duration::from_secs(30)).await;
}
}
/// Spawns each workload inside its own task and returns the join set for
/// cooperative management.
fn spawn_workloads<Caps>(
scenario: &Scenario<Caps>,
context: &Arc<RunContext>,
) -> JoinSet<WorkloadOutcome>
where
Caps: Send + Sync,
{
let mut workloads = JoinSet::new();
for workload in scenario.workloads() {
let workload = Arc::clone(workload);
let ctx = Arc::clone(context);
workloads.spawn(async move {
let outcome = AssertUnwindSafe(async { workload.start(ctx.as_ref()).await })
.catch_unwind()
.await;
outcome.unwrap_or_else(|panic| {
Err(format!("workload panicked: {}", panic_message(panic)).into())
})
});
}
workloads
}
/// Polls workload tasks until the timeout fires or one reports an error.
async fn drive_until_timer(
workloads: &mut JoinSet<WorkloadOutcome>,
duration: Duration,
) -> Result<bool, ScenarioError> {
let run_future = async {
while let Some(result) = workloads.join_next().await {
Self::map_join_result(result)?;
}
Ok(())
};
timeout(duration, run_future)
.await
.map_or(Ok(true), |result| {
result?;
Ok(false)
})
}
/// Aborts and drains any remaining workload tasks so we do not leak work
/// across scenario runs.
async fn drain_workloads(
workloads: &mut JoinSet<WorkloadOutcome>,
) -> Result<(), ScenarioError> {
workloads.abort_all();
while let Some(result) = workloads.join_next().await {
Self::map_join_result(result)?;
}
Ok(())
}
/// Converts the outcome of a workload task into the canonical scenario
/// error, tolerating cancellation when the runner aborts unfinished tasks.
fn map_join_result(
result: Result<WorkloadOutcome, tokio::task::JoinError>,
) -> Result<(), ScenarioError> {
match result {
Ok(outcome) => outcome.map_err(ScenarioError::Workload),
Err(join_err) if join_err.is_cancelled() => Ok(()),
Err(join_err) => Err(ScenarioError::Workload(
format!("workload task failed: {join_err}").into(),
)),
}
}
}
/// Attempts to turn a panic payload into a readable string for diagnostics.
fn panic_message(panic: Box<dyn Any + Send>) -> String {
panic.downcast::<String>().map_or_else(
|panic| {
panic.downcast::<&'static str>().map_or_else(
|_| "unknown panic".to_owned(),
|message| (*message).to_owned(),
)
},
|message| *message,
)
}
impl Drop for Runner {
fn drop(&mut self) {
self.cleanup();
}
}

View File

@ -0,0 +1,23 @@
use async_trait::async_trait;
use super::{DynError, Expectation, RunContext, runtime::context::RunMetrics};
use crate::topology::GeneratedTopology;
#[async_trait]
pub trait Workload: Send + Sync {
fn name(&self) -> &str;
fn expectations(&self) -> Vec<Box<dyn Expectation>> {
Vec::new()
}
fn init(
&mut self,
_descriptors: &GeneratedTopology,
_run_metrics: &RunMetrics,
) -> Result<(), DynError> {
Ok(())
}
async fn start(&self, ctx: &RunContext) -> Result<(), DynError>;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,37 @@
[package]
categories.workspace = true
description.workspace = true
edition.workspace = true
keywords.workspace = true
license.workspace = true
name = "testing-framework-runner-compose"
readme.workspace = true
repository.workspace = true
version = "0.1.0"
[lints]
workspace = true
[dependencies]
anyhow = "1"
async-trait = { workspace = true }
axum = { version = "0.7", default-features = false, features = ["http1", "json", "tokio"] }
cfgsync = { workspace = true }
reqwest = { workspace = true, features = ["json"] }
serde = { workspace = true, features = ["derive"] }
tempfile = { workspace = true }
tera = "1.19"
testing-framework-core = { path = "../../core" }
thiserror = { workspace = true }
tokio = { workspace = true, features = ["macros", "net", "process", "rt-multi-thread", "sync", "time"] }
tracing = { workspace = true }
url = { version = "2" }
uuid = { version = "1", features = ["v4"] }
[dev-dependencies]
groth16 = { workspace = true }
nomos-core = { workspace = true }
nomos-ledger = { workspace = true }
nomos-tracing-service = { workspace = true }
tests = { workspace = true }
zksign = { workspace = true }

View File

@ -0,0 +1,65 @@
services:
prometheus:
image: prom/prometheus:v3.0.1
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=7d
- --web.enable-otlp-receiver
- --enable-feature=otlp-write-receiver
volumes:
- ./testnet/monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:z
ports:
- {{ prometheus.host_port }}
restart: on-failure
{% for node in validators %}
{{ node.name }}:
image: {{ node.image }}
{% if node.platform %} platform: {{ node.platform }}
{% endif %} entrypoint: {{ node.entrypoint }}
volumes:
{% for volume in node.volumes %}
- {{ volume }}
{% endfor %}
{% if node.extra_hosts | length > 0 %}
extra_hosts:
{% for host in node.extra_hosts %}
- {{ host }}
{% endfor %}
{% endif %}
ports:
{% for port in node.ports %}
- {{ port }}
{% endfor %}
environment:
{% for env in node.environment %}
{{ env.key }}: "{{ env.value }}"
{% endfor %}
restart: on-failure
{% endfor %}{% for node in executors %}
{{ node.name }}:
image: {{ node.image }}
{% if node.platform %} platform: {{ node.platform }}
{% endif %} entrypoint: {{ node.entrypoint }}
volumes:
{% for volume in node.volumes %}
- {{ volume }}
{% endfor %}
{% if node.extra_hosts | length > 0 %}
extra_hosts:
{% for host in node.extra_hosts %}
- {{ host }}
{% endfor %}
{% endif %}
ports:
{% for port in node.ports %}
- {{ port }}
{% endfor %}
environment:
{% for env in node.environment %}
{{ env.key }}: "{{ env.value }}"
{% endfor %}
restart: on-failure
{% endfor %}

View File

@ -0,0 +1,77 @@
use std::{net::Ipv4Addr, path::Path, sync::Arc};
use anyhow::Context as _;
use axum::serve;
use cfgsync::{
repo::ConfigRepo,
server::{CfgSyncConfig as ServerCfgSyncConfig, cfgsync_app},
};
use testing_framework_core::{
scenario::cfgsync::{apply_topology_overrides, load_cfgsync_template, write_cfgsync_template},
topology::GeneratedTopology,
};
use tokio::{net::TcpListener, sync::oneshot, task::JoinHandle};
#[derive(Debug)]
pub struct CfgsyncServerHandle {
shutdown: Option<oneshot::Sender<()>>,
pub join: JoinHandle<()>,
}
impl CfgsyncServerHandle {
pub fn shutdown(&mut self) {
if let Some(tx) = self.shutdown.take() {
let _ = tx.send(());
}
self.join.abort();
}
}
pub fn update_cfgsync_config(
path: &Path,
topology: &GeneratedTopology,
use_kzg_mount: bool,
) -> anyhow::Result<()> {
let mut cfg = load_cfgsync_template(path)?;
apply_topology_overrides(&mut cfg, topology, use_kzg_mount);
write_cfgsync_template(path, &cfg)?;
Ok(())
}
pub async fn start_cfgsync_server(
cfgsync_path: &Path,
port: u16,
) -> anyhow::Result<CfgsyncServerHandle> {
let cfg_path = cfgsync_path.to_path_buf();
let config = ServerCfgSyncConfig::load_from_file(&cfg_path)
.map_err(|err| anyhow::anyhow!("loading cfgsync config: {err}"))?;
let repo: Arc<ConfigRepo> = config.into();
let listener = TcpListener::bind((Ipv4Addr::UNSPECIFIED, port))
.await
.context("binding cfgsync listener")?;
let cfgsync_router = cfgsync_app(repo);
let (shutdown_tx, shutdown_rx) = oneshot::channel();
let (ready_tx, ready_rx) = oneshot::channel();
let join = tokio::spawn(async move {
let server =
serve(listener, cfgsync_router.into_make_service()).with_graceful_shutdown(async {
let _ = shutdown_rx.await;
});
let _ = ready_tx.send(());
if let Err(err) = server.await {
eprintln!("[compose-runner] cfgsync server error: {err}");
}
});
ready_rx
.await
.context("waiting for cfgsync server to become ready")?;
Ok(CfgsyncServerHandle {
shutdown: Some(shutdown_tx),
join,
})
}

View File

@ -0,0 +1,70 @@
use std::{env, path::PathBuf};
use testing_framework_core::scenario::CleanupGuard;
use crate::{cfgsync::CfgsyncServerHandle, compose::compose_down, workspace::ComposeWorkspace};
pub struct RunnerCleanup {
pub compose_file: PathBuf,
pub project_name: String,
pub root: PathBuf,
workspace: Option<ComposeWorkspace>,
cfgsync: Option<CfgsyncServerHandle>,
}
impl RunnerCleanup {
pub fn new(
compose_file: PathBuf,
project_name: String,
root: PathBuf,
workspace: ComposeWorkspace,
cfgsync: Option<CfgsyncServerHandle>,
) -> Self {
debug_assert!(
!compose_file.as_os_str().is_empty() && !project_name.is_empty(),
"compose cleanup should receive valid identifiers"
);
Self {
compose_file,
project_name,
root,
workspace: Some(workspace),
cfgsync,
}
}
fn teardown_compose(&self) {
if let Err(err) = compose_down(&self.compose_file, &self.project_name, &self.root) {
eprintln!("[compose-runner] docker compose down failed: {err}");
}
}
}
impl CleanupGuard for RunnerCleanup {
fn cleanup(mut self: Box<Self>) {
let preserve = env::var("COMPOSE_RUNNER_PRESERVE").is_ok()
|| env::var("TESTNET_RUNNER_PRESERVE").is_ok();
if preserve {
if let Some(mut handle) = self.cfgsync.take() {
handle.shutdown();
}
if let Some(workspace) = self.workspace.take() {
let keep = workspace.into_inner().keep();
eprintln!(
"[compose-runner] preserving docker state at {}",
keep.display()
);
}
eprintln!("[compose-runner] compose preserve flag set; skipping docker compose down");
return;
}
self.teardown_compose();
if let Some(mut handle) = self.cfgsync.take() {
handle.shutdown();
}
}
}

View File

@ -0,0 +1,612 @@
use std::{
env, fs, io,
path::{Path, PathBuf},
process,
time::Duration,
};
use anyhow::Context as _;
use serde::Serialize;
use tera::Context as TeraContext;
use testing_framework_core::{
adjust_timeout,
topology::{GeneratedNodeConfig, GeneratedTopology},
};
use tokio::{process::Command, time::timeout};
const COMPOSE_UP_TIMEOUT: Duration = Duration::from_secs(120);
const TEMPLATE_RELATIVE_PATH: &str =
"testing-framework/runners/compose/assets/docker-compose.yml.tera";
#[derive(Debug, thiserror::Error)]
pub enum ComposeCommandError {
#[error("{command} exited with status {status}")]
Failed {
command: String,
status: process::ExitStatus,
},
#[error("failed to spawn {command}: {source}")]
Spawn {
command: String,
#[source]
source: io::Error,
},
#[error("{command} timed out after {timeout:?}")]
Timeout { command: String, timeout: Duration },
}
pub async fn compose_up(
compose_path: &Path,
project_name: &str,
root: &Path,
) -> Result<(), ComposeCommandError> {
let mut cmd = Command::new("docker");
cmd.arg("compose")
.arg("-f")
.arg(compose_path)
.arg("-p")
.arg(project_name)
.arg("up")
.arg("-d")
.current_dir(root);
run_compose_command(cmd, adjust_timeout(COMPOSE_UP_TIMEOUT), "docker compose up").await
}
pub fn compose_down(
compose_path: &Path,
project_name: &str,
root: &Path,
) -> Result<(), ComposeCommandError> {
let description = "docker compose down".to_owned();
let status = process::Command::new("docker")
.arg("compose")
.arg("-f")
.arg(compose_path)
.arg("-p")
.arg(project_name)
.arg("down")
.arg("--volumes")
.current_dir(root)
.status()
.map_err(|source| ComposeCommandError::Spawn {
command: description.clone(),
source,
})?;
if status.success() {
Ok(())
} else {
Err(ComposeCommandError::Failed {
command: description,
status,
})
}
}
#[derive(Debug, thiserror::Error)]
pub enum TemplateError {
#[error("failed to resolve repository root for compose template: {source}")]
RepositoryRoot {
#[source]
source: anyhow::Error,
},
#[error("failed to read compose template at {path}: {source}")]
Read {
path: PathBuf,
#[source]
source: io::Error,
},
#[error("failed to serialise compose descriptor for templating: {source}")]
Serialize {
#[source]
source: tera::Error,
},
#[error("failed to render compose template at {path}: {source}")]
Render {
path: PathBuf,
#[source]
source: tera::Error,
},
#[error("failed to write compose file at {path}: {source}")]
Write {
path: PathBuf,
#[source]
source: io::Error,
},
}
#[derive(Debug, thiserror::Error)]
pub enum DescriptorBuildError {
#[error("cfgsync port is not configured for compose descriptor")]
MissingCfgsyncPort,
#[error("prometheus port is not configured for compose descriptor")]
MissingPrometheusPort,
}
#[derive(Clone, Debug, Serialize)]
pub struct ComposeDescriptor {
prometheus: PrometheusTemplate,
validators: Vec<NodeDescriptor>,
executors: Vec<NodeDescriptor>,
}
impl ComposeDescriptor {
#[must_use]
pub const fn builder(topology: &GeneratedTopology) -> ComposeDescriptorBuilder<'_> {
ComposeDescriptorBuilder::new(topology)
}
#[cfg(test)]
fn validators(&self) -> &[NodeDescriptor] {
&self.validators
}
#[cfg(test)]
fn executors(&self) -> &[NodeDescriptor] {
&self.executors
}
}
pub struct ComposeDescriptorBuilder<'a> {
topology: &'a GeneratedTopology,
use_kzg_mount: bool,
cfgsync_port: Option<u16>,
prometheus_port: Option<u16>,
}
impl<'a> ComposeDescriptorBuilder<'a> {
const fn new(topology: &'a GeneratedTopology) -> Self {
Self {
topology,
use_kzg_mount: false,
cfgsync_port: None,
prometheus_port: None,
}
}
#[must_use]
pub const fn with_kzg_mount(mut self, enabled: bool) -> Self {
self.use_kzg_mount = enabled;
self
}
#[must_use]
pub const fn with_cfgsync_port(mut self, port: u16) -> Self {
self.cfgsync_port = Some(port);
self
}
#[must_use]
pub const fn with_prometheus_port(mut self, port: u16) -> Self {
self.prometheus_port = Some(port);
self
}
pub fn build(self) -> Result<ComposeDescriptor, DescriptorBuildError> {
let cfgsync_port = self
.cfgsync_port
.ok_or(DescriptorBuildError::MissingCfgsyncPort)?;
let prometheus_host_port = self
.prometheus_port
.ok_or(DescriptorBuildError::MissingPrometheusPort)?;
let (default_image, default_platform) = resolve_image();
let image = default_image;
let platform = default_platform;
let validators = build_nodes(
self.topology.validators(),
ComposeNodeKind::Validator,
&image,
platform.as_deref(),
self.use_kzg_mount,
cfgsync_port,
);
let executors = build_nodes(
self.topology.executors(),
ComposeNodeKind::Executor,
&image,
platform.as_deref(),
self.use_kzg_mount,
cfgsync_port,
);
Ok(ComposeDescriptor {
prometheus: PrometheusTemplate::new(prometheus_host_port),
validators,
executors,
})
}
}
#[derive(Clone, Debug, Serialize)]
pub struct PrometheusTemplate {
host_port: String,
}
impl PrometheusTemplate {
fn new(port: u16) -> Self {
Self {
host_port: format!("127.0.0.1:{port}:9090"),
}
}
}
#[derive(Clone, Debug, Serialize, PartialEq, Eq)]
pub struct EnvEntry {
key: String,
value: String,
}
impl EnvEntry {
fn new(key: impl Into<String>, value: impl Into<String>) -> Self {
Self {
key: key.into(),
value: value.into(),
}
}
#[cfg(test)]
fn key(&self) -> &str {
&self.key
}
#[cfg(test)]
fn value(&self) -> &str {
&self.value
}
}
#[derive(Clone, Debug, Serialize)]
pub struct NodeDescriptor {
name: String,
image: String,
entrypoint: String,
volumes: Vec<String>,
extra_hosts: Vec<String>,
ports: Vec<String>,
environment: Vec<EnvEntry>,
#[serde(skip_serializing_if = "Option::is_none")]
platform: Option<String>,
}
#[derive(Clone, Debug)]
pub struct NodeHostPorts {
pub api: u16,
pub testing: u16,
}
#[derive(Clone, Debug)]
pub struct HostPortMapping {
pub validators: Vec<NodeHostPorts>,
pub executors: Vec<NodeHostPorts>,
}
impl HostPortMapping {
pub fn validator_api_ports(&self) -> Vec<u16> {
self.validators.iter().map(|ports| ports.api).collect()
}
pub fn executor_api_ports(&self) -> Vec<u16> {
self.executors.iter().map(|ports| ports.api).collect()
}
}
impl NodeDescriptor {
fn from_node(
kind: ComposeNodeKind,
index: usize,
node: &GeneratedNodeConfig,
image: &str,
platform: Option<&str>,
use_kzg_mount: bool,
cfgsync_port: u16,
) -> Self {
let mut environment = base_environment(cfgsync_port);
let identifier = kind.instance_name(index);
environment.extend([
EnvEntry::new(
"CFG_NETWORK_PORT",
node.general.network_config.backend.inner.port.to_string(),
),
EnvEntry::new("CFG_DA_PORT", node.da_port.to_string()),
EnvEntry::new("CFG_BLEND_PORT", node.blend_port.to_string()),
EnvEntry::new(
"CFG_API_PORT",
node.general.api_config.address.port().to_string(),
),
EnvEntry::new(
"CFG_TESTING_HTTP_PORT",
node.general
.api_config
.testing_http_address
.port()
.to_string(),
),
EnvEntry::new("CFG_HOST_IDENTIFIER", identifier),
]);
let ports = vec![
node.general.api_config.address.port().to_string(),
node.general
.api_config
.testing_http_address
.port()
.to_string(),
];
Self {
name: kind.instance_name(index),
image: image.to_owned(),
entrypoint: kind.entrypoint().to_owned(),
volumes: base_volumes(use_kzg_mount),
extra_hosts: default_extra_hosts(),
ports,
environment,
platform: platform.map(ToOwned::to_owned),
}
}
#[cfg(test)]
fn ports(&self) -> &[String] {
&self.ports
}
#[cfg(test)]
fn environment(&self) -> &[EnvEntry] {
&self.environment
}
}
pub fn write_compose_file(
descriptor: &ComposeDescriptor,
compose_path: &Path,
) -> Result<(), TemplateError> {
TemplateSource::load()?.write(descriptor, compose_path)
}
pub async fn dump_compose_logs(compose_file: &Path, project: &str, root: &Path) {
let mut cmd = Command::new("docker");
cmd.arg("compose")
.arg("-f")
.arg(compose_file)
.arg("-p")
.arg(project)
.arg("logs")
.arg("--no-color")
.current_dir(root);
match cmd.output().await {
Ok(output) => {
if !output.stdout.is_empty() {
eprintln!(
"[compose-runner] docker compose logs:\n{}",
String::from_utf8_lossy(&output.stdout)
);
}
if !output.stderr.is_empty() {
eprintln!(
"[compose-runner] docker compose errors:\n{}",
String::from_utf8_lossy(&output.stderr)
);
}
}
Err(err) => {
eprintln!("[compose-runner] failed to collect docker compose logs: {err}");
}
}
}
struct TemplateSource {
path: PathBuf,
contents: String,
}
impl TemplateSource {
fn load() -> Result<Self, TemplateError> {
let repo_root =
repository_root().map_err(|source| TemplateError::RepositoryRoot { source })?;
let path = repo_root.join(TEMPLATE_RELATIVE_PATH);
let contents = fs::read_to_string(&path).map_err(|source| TemplateError::Read {
path: path.clone(),
source,
})?;
Ok(Self { path, contents })
}
fn render(&self, descriptor: &ComposeDescriptor) -> Result<String, TemplateError> {
let context = TeraContext::from_serialize(descriptor)
.map_err(|source| TemplateError::Serialize { source })?;
tera::Tera::one_off(&self.contents, &context, false).map_err(|source| {
TemplateError::Render {
path: self.path.clone(),
source,
}
})
}
fn write(&self, descriptor: &ComposeDescriptor, output: &Path) -> Result<(), TemplateError> {
let rendered = self.render(descriptor)?;
fs::write(output, rendered).map_err(|source| TemplateError::Write {
path: output.to_path_buf(),
source,
})
}
}
pub fn repository_root() -> anyhow::Result<PathBuf> {
env::var("CARGO_WORKSPACE_DIR")
.map(PathBuf::from)
.or_else(|_| {
Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.and_then(Path::parent)
.and_then(Path::parent)
.map(PathBuf::from)
.context("resolving repository root from manifest dir")
})
}
#[derive(Clone, Copy)]
enum ComposeNodeKind {
Validator,
Executor,
}
impl ComposeNodeKind {
fn instance_name(self, index: usize) -> String {
match self {
Self::Validator => format!("validator-{index}"),
Self::Executor => format!("executor-{index}"),
}
}
const fn entrypoint(self) -> &'static str {
match self {
Self::Validator => "/etc/nomos/scripts/run_nomos_node.sh",
Self::Executor => "/etc/nomos/scripts/run_nomos_executor.sh",
}
}
}
fn build_nodes(
nodes: &[GeneratedNodeConfig],
kind: ComposeNodeKind,
image: &str,
platform: Option<&str>,
use_kzg_mount: bool,
cfgsync_port: u16,
) -> Vec<NodeDescriptor> {
nodes
.iter()
.enumerate()
.map(|(index, node)| {
NodeDescriptor::from_node(
kind,
index,
node,
image,
platform,
use_kzg_mount,
cfgsync_port,
)
})
.collect()
}
fn base_environment(cfgsync_port: u16) -> Vec<EnvEntry> {
vec![
EnvEntry::new("POL_PROOF_DEV_MODE", "true"),
EnvEntry::new(
"CFG_SERVER_ADDR",
format!("http://host.docker.internal:{cfgsync_port}"),
),
EnvEntry::new("OTEL_METRIC_EXPORT_INTERVAL", "5000"),
]
}
fn base_volumes(use_kzg_mount: bool) -> Vec<String> {
let mut volumes = vec!["./testnet:/etc/nomos".into()];
if use_kzg_mount {
volumes.push("./kzgrs_test_params:/kzgrs_test_params:z".into());
}
volumes
}
fn default_extra_hosts() -> Vec<String> {
host_gateway_entry().into_iter().collect()
}
pub fn resolve_image() -> (String, Option<String>) {
let image =
env::var("NOMOS_TESTNET_IMAGE").unwrap_or_else(|_| String::from("nomos-testnet:local"));
let platform = (image == "ghcr.io/logos-co/nomos:testnet").then(|| "linux/amd64".to_owned());
(image, platform)
}
fn host_gateway_entry() -> Option<String> {
if let Ok(value) = env::var("COMPOSE_RUNNER_HOST_GATEWAY") {
if value.eq_ignore_ascii_case("disable") || value.is_empty() {
return None;
}
return Some(value);
}
if cfg!(any(target_os = "macos", target_os = "windows")) {
return Some("host.docker.internal:host-gateway".into());
}
env::var("DOCKER_HOST_GATEWAY")
.ok()
.filter(|value| !value.is_empty())
.map(|gateway| format!("host.docker.internal:{gateway}"))
}
async fn run_compose_command(
mut command: Command,
timeout_duration: Duration,
description: &str,
) -> Result<(), ComposeCommandError> {
match timeout(timeout_duration, command.status()).await {
Ok(Ok(status)) if status.success() => Ok(()),
Ok(Ok(status)) => Err(ComposeCommandError::Failed {
command: description.to_owned(),
status,
}),
Ok(Err(err)) => Err(ComposeCommandError::Spawn {
command: description.to_owned(),
source: err,
}),
Err(_) => Err(ComposeCommandError::Timeout {
command: description.to_owned(),
timeout: timeout_duration,
}),
}
}
#[cfg(test)]
mod tests {
use testing_framework_core::topology::{TopologyBuilder, TopologyConfig};
use super::*;
#[test]
fn descriptor_matches_topology_counts() {
let topology = TopologyBuilder::new(TopologyConfig::with_node_numbers(2, 1)).build();
let descriptor = ComposeDescriptor::builder(&topology)
.with_cfgsync_port(4400)
.with_prometheus_port(9090)
.build()
.expect("descriptor");
assert_eq!(descriptor.validators().len(), topology.validators().len());
assert_eq!(descriptor.executors().len(), topology.executors().len());
}
#[test]
fn descriptor_includes_expected_env_and_ports() {
let topology = TopologyBuilder::new(TopologyConfig::with_node_numbers(1, 1)).build();
let cfgsync_port = 4555;
let descriptor = ComposeDescriptor::builder(&topology)
.with_cfgsync_port(cfgsync_port)
.with_prometheus_port(9090)
.build()
.expect("descriptor");
let validator = &descriptor.validators()[0];
assert!(
validator
.environment()
.iter()
.any(|entry| entry.key() == "CFG_SERVER_ADDR"
&& entry.value() == format!("http://host.docker.internal:{cfgsync_port}"))
);
let api_container = topology.validators()[0].general.api_config.address.port();
assert!(validator.ports().contains(&api_container.to_string()));
}
}

View File

@ -0,0 +1,9 @@
mod cfgsync;
mod cleanup;
mod compose;
mod runner;
mod wait;
mod workspace;
pub use runner::{ComposeRunner, ComposeRunnerError};
pub use workspace::ComposeWorkspace;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
use std::time::Duration;
use testing_framework_core::{
adjust_timeout,
scenario::http_probe::{self, HttpReadinessError, NodeRole},
};
const DEFAULT_WAIT: Duration = Duration::from_secs(90);
const POLL_INTERVAL: Duration = Duration::from_millis(250);
pub async fn wait_for_validators(ports: &[u16]) -> Result<(), HttpReadinessError> {
wait_for_ports(ports, NodeRole::Validator).await
}
pub async fn wait_for_executors(ports: &[u16]) -> Result<(), HttpReadinessError> {
wait_for_ports(ports, NodeRole::Executor).await
}
async fn wait_for_ports(ports: &[u16], role: NodeRole) -> Result<(), HttpReadinessError> {
http_probe::wait_for_http_ports(ports, role, adjust_timeout(DEFAULT_WAIT), POLL_INTERVAL).await
}

View File

@ -0,0 +1,89 @@
use std::{
env, fs,
path::{Path, PathBuf},
};
use anyhow::{Context as _, Result};
use tempfile::TempDir;
/// Copy the repository `testnet/` directory into a scenario-specific temp dir.
#[derive(Debug)]
pub struct ComposeWorkspace {
root: TempDir,
}
impl ComposeWorkspace {
/// Clone the testnet assets into a temporary directory.
pub fn create() -> Result<Self> {
let repo_root = env::var("CARGO_WORKSPACE_DIR")
.map(PathBuf::from)
.or_else(|_| {
Path::new(env!("CARGO_MANIFEST_DIR"))
.parent()
.and_then(Path::parent)
.and_then(Path::parent)
.map(Path::to_path_buf)
.context("resolving workspace root from manifest dir")
})
.context("locating repository root")?;
let temp = tempfile::Builder::new()
.prefix("nomos-testnet-")
.tempdir()
.context("creating testnet temp dir")?;
let testnet_source = repo_root.join("testnet");
if !testnet_source.exists() {
anyhow::bail!(
"testnet directory not found at {}",
testnet_source.display()
);
}
copy_dir_recursive(&testnet_source, &temp.path().join("testnet"))?;
let kzg_source = repo_root.join("tests/kzgrs/kzgrs_test_params");
if kzg_source.exists() {
let target = temp.path().join("kzgrs_test_params");
if kzg_source.is_dir() {
copy_dir_recursive(&kzg_source, &target)?;
} else {
fs::copy(&kzg_source, &target).with_context(|| {
format!("copying {} -> {}", kzg_source.display(), target.display())
})?;
}
}
Ok(Self { root: temp })
}
#[must_use]
pub fn root_path(&self) -> &Path {
self.root.path()
}
#[must_use]
pub fn testnet_dir(&self) -> PathBuf {
self.root.path().join("testnet")
}
#[must_use]
pub fn into_inner(self) -> TempDir {
self.root
}
}
fn copy_dir_recursive(source: &Path, target: &Path) -> Result<()> {
fs::create_dir_all(target)
.with_context(|| format!("creating target dir {}", target.display()))?;
for entry in fs::read_dir(source).with_context(|| format!("reading {}", source.display()))? {
let entry = entry?;
let file_type = entry.file_type()?;
let dest = target.join(entry.file_name());
if file_type.is_dir() {
copy_dir_recursive(&entry.path(), &dest)?;
} else if !file_type.is_dir() {
fs::copy(entry.path(), &dest).with_context(|| {
format!("copying {} -> {}", entry.path().display(), dest.display())
})?;
}
}
Ok(())
}

View File

@ -0,0 +1,74 @@
# syntax=docker/dockerfile:1
ARG VERSION=v0.3.1
ARG NOMOS_CIRCUITS_PLATFORM=linux-x86_64
# ===========================
# BUILD IMAGE
# ===========================
FROM rust:1.91.0-slim-bookworm AS builder
ARG VERSION
ARG NOMOS_CIRCUITS_PLATFORM
ARG TARGETARCH
LABEL maintainer="logos devs" \
source="https://github.com/logos-co/nomos-node" \
description="Nomos testing framework build image"
WORKDIR /nomos
COPY . .
RUN apt-get update && apt-get install -yq \
git gcc g++ clang libssl-dev pkg-config ca-certificates curl wget \
build-essential cmake libgmp-dev libsodium-dev nasm m4 && \
rm -rf /var/lib/apt/lists/*
ENV NOMOS_CIRCUITS_PLATFORM=${NOMOS_CIRCUITS_PLATFORM}
RUN chmod +x scripts/setup-nomos-circuits.sh && \
scripts/setup-nomos-circuits.sh "$VERSION" "/opt/circuits"
RUN if [ "${TARGETARCH:-amd64}" = "arm64" ]; then \
chmod +x scripts/build-rapidsnark.sh && \
scripts/build-rapidsnark.sh "/opt/circuits"; \
fi
ENV NOMOS_CIRCUITS=/opt/circuits
# Use debug builds to keep the linker memory footprint low; we only need
# binaries for integration testing, not optimized releases.
RUN cargo build --all-features
# ===========================
# NODE IMAGE
# ===========================
FROM debian:bookworm-slim
ARG VERSION
LABEL maintainer="logos devs" \
source="https://github.com/logos-co/nomos-node" \
description="Nomos testing framework runtime image"
RUN apt-get update && apt-get install -yq \
libstdc++6 \
libssl3 \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /opt/circuits /opt/circuits
COPY --from=builder /nomos/target/debug/nomos-node /usr/bin/nomos-node
COPY --from=builder /nomos/target/debug/nomos-executor /usr/bin/nomos-executor
COPY --from=builder /nomos/target/debug/nomos-cli /usr/bin/nomos-cli
COPY --from=builder /nomos/target/debug/cfgsync-server /usr/bin/cfgsync-server
COPY --from=builder /nomos/target/debug/cfgsync-client /usr/bin/cfgsync-client
ENV NOMOS_CIRCUITS=/opt/circuits
EXPOSE 3000 8080 9000 60000
ENTRYPOINT ["/usr/bin/nomos-node"]

View File

@ -0,0 +1,29 @@
[package]
categories.workspace = true
description.workspace = true
edition.workspace = true
keywords.workspace = true
license.workspace = true
name = "testing-framework-runner-k8s"
readme.workspace = true
repository.workspace = true
version = "0.1.0"
[lints]
workspace = true
[dependencies]
anyhow = "1"
async-trait = { workspace = true }
k8s-openapi = { version = "0.20", features = ["latest"] }
kube = { version = "0.87", default-features = false, features = ["client", "runtime", "rustls-tls"] }
reqwest = { workspace = true, features = ["json"] }
serde = { version = "1", features = ["derive"] }
serde_yaml = { workspace = true }
tempfile = { workspace = true }
testing-framework-core = { path = "../../core" }
thiserror = { workspace = true }
tokio = { workspace = true, features = ["macros", "net", "process", "rt-multi-thread", "sync", "time"] }
tracing = { workspace = true }
url = { version = "2" }
uuid = { version = "1", features = ["v4"] }

View File

@ -0,0 +1,6 @@
apiVersion: v2
name: nomos-runner
description: Helm chart for Nomos integration test runner assets
type: application
version: 0.1.0
appVersion: "0.1.0"

View File

@ -0,0 +1,41 @@
{{- define "nomos-runner.chart" -}}
{{- .Chart.Name -}}
{{- end -}}
{{- define "nomos-runner.fullname" -}}
{{- printf "%s" .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- define "nomos-runner.labels" -}}
app.kubernetes.io/name: {{ include "nomos-runner.chart" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end -}}
{{- define "nomos-runner.selectorLabels" -}}
app.kubernetes.io/name: {{ include "nomos-runner.chart" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end -}}
{{- define "nomos-runner.validatorLabels" -}}
{{- $root := index . "root" -}}
{{- $index := index . "index" -}}
app.kubernetes.io/name: {{ include "nomos-runner.chart" $root }}
app.kubernetes.io/instance: {{ $root.Release.Name }}
nomos/logical-role: validator
nomos/validator-index: "{{ $index }}"
{{- end -}}
{{- define "nomos-runner.executorLabels" -}}
{{- $root := index . "root" -}}
{{- $index := index . "index" -}}
app.kubernetes.io/name: {{ include "nomos-runner.chart" $root }}
app.kubernetes.io/instance: {{ $root.Release.Name }}
nomos/logical-role: executor
nomos/executor-index: "{{ $index }}"
{{- end -}}
{{- define "nomos-runner.prometheusLabels" -}}
app.kubernetes.io/name: {{ include "nomos-runner.chart" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
nomos/logical-role: prometheus
{{- end -}}

View File

@ -0,0 +1,43 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "nomos-runner.fullname" . }}-cfgsync
labels:
{{- include "nomos-runner.labels" . | nindent 4 }}
spec:
replicas: 1
selector:
matchLabels:
{{- include "nomos-runner.selectorLabels" . | nindent 6 }}
nomos/component: cfgsync
template:
metadata:
labels:
{{- include "nomos-runner.selectorLabels" . | nindent 8 }}
nomos/component: cfgsync
spec:
containers:
- name: cfgsync
image: {{ .Values.image }}
imagePullPolicy: {{ .Values.imagePullPolicy }}
command: ["/etc/nomos/scripts/run_cfgsync.sh"]
ports:
- name: http
containerPort: {{ .Values.cfgsync.port }}
env:
- name: RUST_LOG
value: debug
volumeMounts:
- name: assets
mountPath: /etc/nomos
readOnly: true
volumes:
- name: assets
configMap:
name: {{ include "nomos-runner.fullname" . }}-assets
defaultMode: 0755
items:
- key: cfgsync.yaml
path: cfgsync.yaml
- key: run_cfgsync.sh
path: scripts/run_cfgsync.sh

View File

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "nomos-runner.fullname" . }}-cfgsync
labels:
{{- include "nomos-runner.labels" . | nindent 4 }}
spec:
type: ClusterIP
selector:
{{- include "nomos-runner.selectorLabels" . | nindent 4 }}
nomos/component: cfgsync
ports:
- name: http
port: {{ .Values.cfgsync.port }}
targetPort: http

View File

@ -0,0 +1,31 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "nomos-runner.fullname" . }}-assets
labels:
{{- include "nomos-runner.labels" . | nindent 4 }}
data:
cfgsync.yaml: |
{{- if .Values.cfgsync.config }}
{{ .Values.cfgsync.config | indent 4 }}
{{- else }}
{{ "" | indent 4 }}
{{- end }}
run_cfgsync.sh: |
{{- if .Values.scripts.runCfgsyncSh }}
{{ .Values.scripts.runCfgsyncSh | indent 4 }}
{{- else }}
{{ "" | indent 4 }}
{{- end }}
run_nomos_node.sh: |
{{- if .Values.scripts.runNomosNodeSh }}
{{ .Values.scripts.runNomosNodeSh | indent 4 }}
{{- else }}
{{ "" | indent 4 }}
{{- end }}
run_nomos_executor.sh: |
{{- if .Values.scripts.runNomosExecutorSh }}
{{ .Values.scripts.runNomosExecutorSh | indent 4 }}
{{- else }}
{{ "" | indent 4 }}
{{- end }}

View File

@ -0,0 +1,63 @@
{{- $root := . -}}
{{- $nodes := default (list) .Values.executors.nodes }}
{{- range $i, $node := $nodes }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "nomos-runner.fullname" $root }}-executor-{{ $i }}
labels:
{{- include "nomos-runner.executorLabels" (dict "root" $root "index" $i) | nindent 4 }}
spec:
replicas: 1
selector:
matchLabels:
{{- include "nomos-runner.executorLabels" (dict "root" $root "index" $i) | nindent 6 }}
template:
metadata:
labels:
{{- include "nomos-runner.executorLabels" (dict "root" $root "index" $i) | nindent 8 }}
spec:
containers:
- name: executor
image: {{ $root.Values.image }}
imagePullPolicy: {{ $root.Values.imagePullPolicy }}
command: ["/etc/nomos/scripts/run_nomos_executor.sh"]
ports:
- name: http
containerPort: {{ default 18080 $node.apiPort }}
- name: testing-http
containerPort: {{ default 18081 $node.testingHttpPort }}
env:
- name: CFG_SERVER_ADDR
value: http://{{ include "nomos-runner.fullname" $root }}-cfgsync:{{ $root.Values.cfgsync.port }}
{{- range $key, $value := $node.env }}
- name: {{ $key }}
value: "{{ $value }}"
{{- end }}
volumeMounts:
- name: assets
mountPath: /etc/nomos
readOnly: true
- name: kzg-params
mountPath: /kzgrs_test_params
readOnly: true
volumes:
- name: assets
configMap:
name: {{ include "nomos-runner.fullname" $root }}-assets
defaultMode: 0755
items:
- key: cfgsync.yaml
path: cfgsync.yaml
- key: run_cfgsync.sh
path: scripts/run_cfgsync.sh
- key: run_nomos_executor.sh
path: scripts/run_nomos_executor.sh
- key: run_nomos_node.sh
path: scripts/run_nomos_node.sh
- name: kzg-params
persistentVolumeClaim:
claimName: {{ include "nomos-runner.fullname" $root }}-kzg
readOnly: true
{{- end }}

View File

@ -0,0 +1,22 @@
{{- $root := . -}}
{{- $nodes := default (list) .Values.executors.nodes }}
{{- range $i, $node := $nodes }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ include "nomos-runner.fullname" $root }}-executor-{{ $i }}
labels:
{{- include "nomos-runner.executorLabels" (dict "root" $root "index" $i) | nindent 4 }}
spec:
type: NodePort
selector:
{{- include "nomos-runner.executorLabels" (dict "root" $root "index" $i) | nindent 4 }}
ports:
- name: http
port: {{ default 18080 $node.apiPort }}
targetPort: http
- name: testing-http
port: {{ default 18081 $node.testingHttpPort }}
targetPort: testing-http
{{- end }}

View File

@ -0,0 +1,16 @@
{{- if .Values.prometheus.enabled }}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus
labels:
{{- include "nomos-runner.prometheusLabels" . | nindent 4 }}
data:
prometheus.yml: |
{{- if .Values.prometheus.config }}
{{ .Values.prometheus.config | indent 4 }}
{{- else }}
{{ "" | indent 4 }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,38 @@
{{- if .Values.prometheus.enabled }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
labels:
{{- include "nomos-runner.prometheusLabels" . | nindent 4 }}
spec:
replicas: 1
selector:
matchLabels:
{{- include "nomos-runner.prometheusLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "nomos-runner.prometheusLabels" . | nindent 8 }}
spec:
containers:
- name: prometheus
image: {{ .Values.prometheus.image }}
imagePullPolicy: {{ .Values.prometheus.imagePullPolicy | default "IfNotPresent" }}
args:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time={{ .Values.prometheus.retention }}
- --web.enable-otlp-receiver
- --enable-feature=otlp-write-receiver
ports:
- containerPort: 9090
name: http
volumeMounts:
- name: prometheus-config
mountPath: /etc/prometheus
volumes:
- name: prometheus-config
configMap:
name: prometheus
{{- end }}

View File

@ -0,0 +1,20 @@
{{- if .Values.prometheus.enabled }}
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
labels:
{{- include "nomos-runner.prometheusLabels" . | nindent 4 }}
spec:
type: {{ .Values.prometheus.service.type | default "NodePort" }}
selector:
{{- include "nomos-runner.prometheusLabels" . | nindent 4 }}
ports:
- name: http
port: 9090
targetPort: http
{{- if and (eq (default "NodePort" .Values.prometheus.service.type) "NodePort") .Values.prometheus.service.nodePort }}
nodePort: {{ .Values.prometheus.service.nodePort }}
{{- end }}
{{- end }}

View File

@ -0,0 +1,16 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: {{ include "nomos-runner.fullname" . }}-kzg
labels:
{{- include "nomos-runner.labels" . | nindent 4 }}
spec:
capacity:
storage: {{ .Values.kzg.storageSize }}
accessModes:
- ReadOnlyMany
persistentVolumeReclaimPolicy: Delete
storageClassName: manual
hostPath:
path: {{ .Values.kzg.hostPath }}
type: {{ .Values.kzg.hostPathType }}

View File

@ -0,0 +1,14 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: {{ include "nomos-runner.fullname" . }}-kzg
labels:
{{- include "nomos-runner.labels" . | nindent 4 }}
spec:
accessModes:
- ReadOnlyMany
storageClassName: manual
volumeName: {{ include "nomos-runner.fullname" . }}-kzg
resources:
requests:
storage: {{ .Values.kzg.storageSize }}

View File

@ -0,0 +1,61 @@
{{- $root := . -}}
{{- $nodes := default (list) .Values.validators.nodes }}
{{- range $i, $node := $nodes }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "nomos-runner.fullname" $root }}-validator-{{ $i }}
labels:
{{- include "nomos-runner.validatorLabels" (dict "root" $root "index" $i) | nindent 4 }}
spec:
replicas: 1
selector:
matchLabels:
{{- include "nomos-runner.validatorLabels" (dict "root" $root "index" $i) | nindent 6 }}
template:
metadata:
labels:
{{- include "nomos-runner.validatorLabels" (dict "root" $root "index" $i) | nindent 8 }}
spec:
containers:
- name: validator
image: {{ $root.Values.image }}
imagePullPolicy: {{ $root.Values.imagePullPolicy }}
command: ["/etc/nomos/scripts/run_nomos_node.sh"]
ports:
- name: http
containerPort: {{ default 18080 $node.apiPort }}
- name: testing-http
containerPort: {{ default 18081 $node.testingHttpPort }}
env:
- name: CFG_SERVER_ADDR
value: http://{{ include "nomos-runner.fullname" $root }}-cfgsync:{{ $root.Values.cfgsync.port }}
{{- range $key, $value := $node.env }}
- name: {{ $key }}
value: "{{ $value }}"
{{- end }}
volumeMounts:
- name: assets
mountPath: /etc/nomos
readOnly: true
- name: kzg-params
mountPath: /kzgrs_test_params
readOnly: true
volumes:
- name: assets
configMap:
name: {{ include "nomos-runner.fullname" $root }}-assets
defaultMode: 0755
items:
- key: cfgsync.yaml
path: cfgsync.yaml
- key: run_cfgsync.sh
path: scripts/run_cfgsync.sh
- key: run_nomos_node.sh
path: scripts/run_nomos_node.sh
- name: kzg-params
persistentVolumeClaim:
claimName: {{ include "nomos-runner.fullname" $root }}-kzg
readOnly: true
{{- end }}

View File

@ -0,0 +1,22 @@
{{- $root := . -}}
{{- $nodes := default (list) .Values.validators.nodes }}
{{- range $i, $node := $nodes }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ include "nomos-runner.fullname" $root }}-validator-{{ $i }}
labels:
{{- include "nomos-runner.validatorLabels" (dict "root" $root "index" $i) | nindent 4 }}
spec:
type: NodePort
selector:
{{- include "nomos-runner.validatorLabels" (dict "root" $root "index" $i) | nindent 4 }}
ports:
- name: http
port: {{ default 18080 $node.apiPort }}
targetPort: http
- name: testing-http
port: {{ default 18081 $node.testingHttpPort }}
targetPort: testing-http
{{- end }}

View File

@ -0,0 +1,38 @@
image: "nomos-testnet:local"
imagePullPolicy: IfNotPresent
cfgsync:
port: 4400
config: ""
scripts:
runCfgsyncSh: ""
runNomosNodeSh: ""
runNomosExecutorSh: ""
validators:
count: 1
nodes: []
executors:
count: 1
nodes: []
kzg:
hostPath: "/var/lib/nomos/kzgrs_test_params"
hostPathType: "Directory"
storageSize: "1Gi"
prometheus:
enabled: true
image: "prom/prometheus:v3.0.1"
imagePullPolicy: IfNotPresent
retention: "7d"
service:
type: NodePort
nodePort: null
config: |
global:
evaluation_interval: 15s
external_labels:
monitor: "NomosRunner"

View File

@ -0,0 +1,280 @@
use std::{
collections::BTreeMap,
env, fs, io,
path::{Path, PathBuf},
};
use anyhow::{Context as _, Result as AnyResult};
use serde::Serialize;
use tempfile::TempDir;
use testing_framework_core::{
scenario::cfgsync::{apply_topology_overrides, load_cfgsync_template, render_cfgsync_yaml},
topology::GeneratedTopology,
};
use thiserror::Error;
pub struct RunnerAssets {
pub image: String,
pub kzg_path: PathBuf,
pub chart_path: PathBuf,
pub cfgsync_file: PathBuf,
pub run_cfgsync_script: PathBuf,
pub run_nomos_node_script: PathBuf,
pub run_nomos_executor_script: PathBuf,
pub values_file: PathBuf,
_tempdir: TempDir,
}
pub const CFGSYNC_PORT: u16 = 4400;
#[derive(Debug, Error)]
pub enum AssetsError {
#[error("failed to locate workspace root: {source}")]
WorkspaceRoot {
#[source]
source: anyhow::Error,
},
#[error("failed to render cfgsync configuration: {source}")]
Cfgsync {
#[source]
source: anyhow::Error,
},
#[error("missing required script at {path}")]
MissingScript { path: PathBuf },
#[error("missing KZG parameters at {path}; build them with `make kzgrs_test_params`")]
MissingKzg { path: PathBuf },
#[error("missing Helm chart at {path}; ensure the repository is up-to-date")]
MissingChart { path: PathBuf },
#[error("failed to create temporary directory for rendered assets: {source}")]
TempDir {
#[source]
source: io::Error,
},
#[error("failed to write asset at {path}: {source}")]
Io {
path: PathBuf,
#[source]
source: io::Error,
},
#[error("failed to render Helm values: {source}")]
Values {
#[source]
source: serde_yaml::Error,
},
}
pub fn prepare_assets(topology: &GeneratedTopology) -> Result<RunnerAssets, AssetsError> {
let root = workspace_root().map_err(|source| AssetsError::WorkspaceRoot { source })?;
let cfgsync_yaml = render_cfgsync_config(&root, topology)?;
let tempdir = tempfile::Builder::new()
.prefix("nomos-helm-")
.tempdir()
.map_err(|source| AssetsError::TempDir { source })?;
let cfgsync_file = write_temp_file(tempdir.path(), "cfgsync.yaml", cfgsync_yaml)?;
let scripts = validate_scripts(&root)?;
let kzg_path = validate_kzg_params(&root)?;
let chart_path = helm_chart_path()?;
let values_yaml = render_values_yaml(topology)?;
let values_file = write_temp_file(tempdir.path(), "values.yaml", values_yaml)?;
let image =
env::var("NOMOS_TESTNET_IMAGE").unwrap_or_else(|_| String::from("nomos-testnet:local"));
Ok(RunnerAssets {
image,
kzg_path,
chart_path,
cfgsync_file,
run_cfgsync_script: scripts.run_cfgsync,
run_nomos_node_script: scripts.run_node,
run_nomos_executor_script: scripts.run_executor,
values_file,
_tempdir: tempdir,
})
}
const CFGSYNC_K8S_TIMEOUT_SECS: u64 = 300;
fn render_cfgsync_config(root: &Path, topology: &GeneratedTopology) -> Result<String, AssetsError> {
let cfgsync_template_path = root.join("testnet/cfgsync.yaml");
let mut cfg = load_cfgsync_template(&cfgsync_template_path)
.map_err(|source| AssetsError::Cfgsync { source })?;
apply_topology_overrides(&mut cfg, topology, true);
cfg.timeout = cfg.timeout.max(CFGSYNC_K8S_TIMEOUT_SECS);
render_cfgsync_yaml(&cfg).map_err(|source| AssetsError::Cfgsync { source })
}
struct ScriptPaths {
run_cfgsync: PathBuf,
run_node: PathBuf,
run_executor: PathBuf,
}
fn validate_scripts(root: &Path) -> Result<ScriptPaths, AssetsError> {
let scripts_dir = root.join("testnet/scripts");
let run_cfgsync = scripts_dir.join("run_cfgsync.sh");
let run_node = scripts_dir.join("run_nomos_node.sh");
let run_executor = scripts_dir.join("run_nomos_executor.sh");
for path in [&run_cfgsync, &run_node, &run_executor] {
if !path.exists() {
return Err(AssetsError::MissingScript { path: path.clone() });
}
}
Ok(ScriptPaths {
run_cfgsync,
run_node,
run_executor,
})
}
fn validate_kzg_params(root: &Path) -> Result<PathBuf, AssetsError> {
let path = root.join("tests/kzgrs/kzgrs_test_params");
if path.exists() {
Ok(path)
} else {
Err(AssetsError::MissingKzg { path })
}
}
fn helm_chart_path() -> Result<PathBuf, AssetsError> {
let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("helm/nomos-runner");
if path.exists() {
Ok(path)
} else {
Err(AssetsError::MissingChart { path })
}
}
fn render_values_yaml(topology: &GeneratedTopology) -> Result<String, AssetsError> {
let values = build_values(topology);
serde_yaml::to_string(&values).map_err(|source| AssetsError::Values { source })
}
fn write_temp_file(
dir: &Path,
name: &str,
contents: impl AsRef<[u8]>,
) -> Result<PathBuf, AssetsError> {
let path = dir.join(name);
fs::write(&path, contents).map_err(|source| AssetsError::Io {
path: path.clone(),
source,
})?;
Ok(path)
}
pub fn workspace_root() -> AnyResult<PathBuf> {
if let Ok(var) = env::var("CARGO_WORKSPACE_DIR") {
return Ok(PathBuf::from(var));
}
let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
manifest_dir
.parent()
.and_then(Path::parent)
.and_then(Path::parent)
.map(Path::to_path_buf)
.context("resolving workspace root from manifest dir")
}
#[derive(Serialize)]
struct HelmValues {
validators: NodeGroup,
executors: NodeGroup,
}
#[derive(Serialize)]
struct NodeGroup {
count: usize,
nodes: Vec<NodeValues>,
}
#[derive(Serialize)]
struct NodeValues {
#[serde(rename = "apiPort")]
api_port: u16,
#[serde(rename = "testingHttpPort")]
testing_http_port: u16,
env: BTreeMap<String, String>,
}
fn build_values(topology: &GeneratedTopology) -> HelmValues {
let validators = topology
.validators()
.iter()
.map(|validator| {
let mut env = BTreeMap::new();
env.insert(
"CFG_NETWORK_PORT".into(),
validator.network_port().to_string(),
);
env.insert("CFG_DA_PORT".into(), validator.da_port.to_string());
env.insert("CFG_BLEND_PORT".into(), validator.blend_port.to_string());
env.insert(
"CFG_API_PORT".into(),
validator.general.api_config.address.port().to_string(),
);
env.insert(
"CFG_TESTING_HTTP_PORT".into(),
validator
.general
.api_config
.testing_http_address
.port()
.to_string(),
);
NodeValues {
api_port: validator.general.api_config.address.port(),
testing_http_port: validator.general.api_config.testing_http_address.port(),
env,
}
})
.collect();
let executors = topology
.executors()
.iter()
.map(|executor| {
let mut env = BTreeMap::new();
env.insert(
"CFG_NETWORK_PORT".into(),
executor.network_port().to_string(),
);
env.insert("CFG_DA_PORT".into(), executor.da_port.to_string());
env.insert("CFG_BLEND_PORT".into(), executor.blend_port.to_string());
env.insert(
"CFG_API_PORT".into(),
executor.general.api_config.address.port().to_string(),
);
env.insert(
"CFG_TESTING_HTTP_PORT".into(),
executor
.general
.api_config
.testing_http_address
.port()
.to_string(),
);
NodeValues {
api_port: executor.general.api_config.address.port(),
testing_http_port: executor.general.api_config.testing_http_address.port(),
env,
}
})
.collect();
HelmValues {
validators: NodeGroup {
count: topology.validators().len(),
nodes: validators,
},
executors: NodeGroup {
count: topology.executors().len(),
nodes: executors,
},
}
}

View File

@ -0,0 +1,219 @@
use std::thread;
use k8s_openapi::api::core::v1::Namespace;
use kube::{Api, Client, api::DeleteParams};
use testing_framework_core::scenario::CleanupGuard;
use tokio::{
process::Command,
time::{Duration, sleep},
};
use tracing::warn;
use crate::helm::uninstall_release;
pub struct RunnerCleanup {
client: Client,
namespace: String,
release: String,
preserve: bool,
}
impl RunnerCleanup {
pub fn new(client: Client, namespace: String, release: String, preserve: bool) -> Self {
debug_assert!(
!namespace.is_empty() && !release.is_empty(),
"k8s cleanup requires namespace and release"
);
Self {
client,
namespace,
release,
preserve,
}
}
async fn cleanup_async(&self) {
if self.preserve {
println!(
"[k8s-runner] preserving Helm release `{}` in namespace `{}`",
self.release, self.namespace
);
return;
}
if let Err(err) = uninstall_release(&self.release, &self.namespace).await {
println!("[k8s-runner] helm uninstall {} failed: {err}", self.release);
}
println!(
"[k8s-runner] deleting namespace `{}` via k8s API",
self.namespace
);
delete_namespace(&self.client, &self.namespace).await;
println!(
"[k8s-runner] delete request for namespace `{}` finished",
self.namespace
);
}
fn blocking_cleanup_success(&self) -> bool {
match tokio::runtime::Runtime::new() {
Ok(rt) => match rt.block_on(async {
tokio::time::timeout(Duration::from_secs(120), self.cleanup_async()).await
}) {
Ok(()) => true,
Err(err) => {
warn!(
"[k8s-runner] cleanup timed out after 120s: {err}; falling back to background thread"
);
false
}
},
Err(err) => {
warn!(
"[k8s-runner] unable to create cleanup runtime: {err}; falling back to background thread"
);
false
}
}
}
fn spawn_cleanup_thread(self: Box<Self>) {
match thread::Builder::new()
.name("k8s-runner-cleanup".into())
.spawn(move || match tokio::runtime::Runtime::new() {
Ok(rt) => {
if let Err(err) = rt.block_on(async {
tokio::time::timeout(Duration::from_secs(120), self.cleanup_async()).await
}) {
warn!("[k8s-runner] background cleanup timed out: {err}");
}
}
Err(err) => warn!("[k8s-runner] unable to create cleanup runtime: {err}"),
}) {
Ok(handle) => {
if let Err(err) = handle.join() {
warn!("[k8s-runner] cleanup thread panicked: {err:?}");
}
}
Err(err) => warn!("[k8s-runner] failed to spawn cleanup thread: {err}"),
}
}
}
async fn delete_namespace(client: &Client, namespace: &str) {
let namespaces: Api<Namespace> = Api::all(client.clone());
if delete_namespace_via_api(&namespaces, namespace).await {
wait_for_namespace_termination(&namespaces, namespace).await;
return;
}
if delete_namespace_via_cli(namespace).await {
wait_for_namespace_termination(&namespaces, namespace).await;
} else {
warn!("[k8s-runner] unable to delete namespace `{namespace}` using kubectl fallback");
}
}
async fn delete_namespace_via_api(namespaces: &Api<Namespace>, namespace: &str) -> bool {
println!("[k8s-runner] invoking kubernetes API to delete namespace `{namespace}`");
match tokio::time::timeout(
Duration::from_secs(10),
namespaces.delete(namespace, &DeleteParams::default()),
)
.await
{
Ok(Ok(_)) => {
println!(
"[k8s-runner] delete request accepted for namespace `{namespace}`; waiting for termination"
);
true
}
Ok(Err(err)) => {
println!("[k8s-runner] failed to delete namespace `{namespace}` via API: {err}");
warn!("[k8s-runner] api delete failed for namespace {namespace}: {err}");
false
}
Err(_) => {
println!(
"[k8s-runner] kubernetes API timed out deleting namespace `{namespace}`; falling back to kubectl"
);
false
}
}
}
async fn delete_namespace_via_cli(namespace: &str) -> bool {
println!("[k8s-runner] invoking `kubectl delete namespace {namespace}` fallback");
let output = Command::new("kubectl")
.arg("delete")
.arg("namespace")
.arg(namespace)
.arg("--wait=true")
.output()
.await;
match output {
Ok(result) if result.status.success() => {
println!("[k8s-runner] `kubectl delete namespace {namespace}` completed successfully");
true
}
Ok(result) => {
println!(
"[k8s-runner] `kubectl delete namespace {namespace}` failed: {}\n{}",
String::from_utf8_lossy(&result.stderr),
String::from_utf8_lossy(&result.stdout)
);
false
}
Err(err) => {
println!("[k8s-runner] failed to spawn kubectl for namespace `{namespace}`: {err}");
false
}
}
}
async fn wait_for_namespace_termination(namespaces: &Api<Namespace>, namespace: &str) {
for attempt in 0..60 {
match namespaces.get_opt(namespace).await {
Ok(Some(ns)) => {
if attempt == 0 {
println!(
"[k8s-runner] waiting for namespace `{}` to terminate (phase={:?})",
namespace,
ns.status
.as_ref()
.and_then(|status| status.phase.clone())
.unwrap_or_else(|| "Unknown".into())
);
}
}
Ok(None) => {
println!("[k8s-runner] namespace `{namespace}` deleted");
return;
}
Err(err) => {
warn!("[k8s-runner] namespace `{namespace}` poll failed: {err}");
break;
}
}
sleep(Duration::from_secs(1)).await;
}
warn!(
"[k8s-runner] namespace `{}` still present after waiting for deletion",
namespace
);
}
impl CleanupGuard for RunnerCleanup {
fn cleanup(self: Box<Self>) {
if tokio::runtime::Handle::try_current().is_err() && self.blocking_cleanup_success() {
return;
}
self.spawn_cleanup_thread();
}
}

View File

@ -0,0 +1,141 @@
use std::{io, process::Stdio};
use thiserror::Error;
use tokio::process::Command;
use crate::assets::{CFGSYNC_PORT, RunnerAssets, workspace_root};
#[derive(Debug, Error)]
pub enum HelmError {
#[error("failed to spawn {command}: {source}")]
Spawn {
command: String,
#[source]
source: io::Error,
},
#[error("{command} exited with status {status:?}\nstderr:\n{stderr}\nstdout:\n{stdout}")]
Failed {
command: String,
status: Option<i32>,
stdout: String,
stderr: String,
},
}
pub async fn install_release(
assets: &RunnerAssets,
release: &str,
namespace: &str,
validators: usize,
executors: usize,
) -> Result<(), HelmError> {
let host_path_type = if assets.kzg_path.is_dir() {
"Directory"
} else {
"File"
};
let mut cmd = Command::new("helm");
cmd.arg("install")
.arg(release)
.arg(&assets.chart_path)
.arg("--namespace")
.arg(namespace)
.arg("--create-namespace")
.arg("--wait")
.arg("--timeout")
.arg("5m")
.arg("--set")
.arg(format!("image={}", assets.image))
.arg("--set")
.arg(format!("validators.count={validators}"))
.arg("--set")
.arg(format!("executors.count={executors}"))
.arg("--set")
.arg(format!("cfgsync.port={CFGSYNC_PORT}"))
.arg("--set")
.arg(format!("kzg.hostPath={}", assets.kzg_path.display()))
.arg("--set")
.arg(format!("kzg.hostPathType={host_path_type}"))
.arg("-f")
.arg(&assets.values_file)
.arg("--set-file")
.arg(format!("cfgsync.config={}", assets.cfgsync_file.display()))
.arg("--set-file")
.arg(format!(
"scripts.runCfgsyncSh={}",
assets.run_cfgsync_script.display()
))
.arg("--set-file")
.arg(format!(
"scripts.runNomosNodeSh={}",
assets.run_nomos_node_script.display()
))
.arg("--set-file")
.arg(format!(
"scripts.runNomosExecutorSh={}",
assets.run_nomos_executor_script.display()
))
.stdout(Stdio::piped())
.stderr(Stdio::piped());
if let Ok(root) = workspace_root() {
cmd.current_dir(root);
}
let command = format!("helm install {release}");
let output = run_helm_command(cmd, &command).await?;
if std::env::var("K8S_RUNNER_DEBUG").is_ok() {
println!(
"[k8s-runner] {command} stdout:\n{}",
String::from_utf8_lossy(&output.stdout)
);
println!(
"[k8s-runner] {command} stderr:\n{}",
String::from_utf8_lossy(&output.stderr)
);
}
Ok(())
}
pub async fn uninstall_release(release: &str, namespace: &str) -> Result<(), HelmError> {
let mut cmd = Command::new("helm");
cmd.arg("uninstall")
.arg(release)
.arg("--namespace")
.arg(namespace)
.stdout(Stdio::piped())
.stderr(Stdio::piped());
println!("[k8s-runner] issuing `helm uninstall {release}` in namespace `{namespace}`");
run_helm_command(cmd, &format!("helm uninstall {release}")).await?;
println!(
"[k8s-runner] helm uninstall {release} completed successfully (namespace `{namespace}`)"
);
Ok(())
}
async fn run_helm_command(
mut cmd: Command,
command: &str,
) -> Result<std::process::Output, HelmError> {
cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
let output = cmd.output().await.map_err(|source| HelmError::Spawn {
command: command.to_owned(),
source,
})?;
if output.status.success() {
Ok(output)
} else {
Err(HelmError::Failed {
command: command.to_owned(),
status: output.status.code(),
stdout: String::from_utf8_lossy(&output.stdout).into_owned(),
stderr: String::from_utf8_lossy(&output.stderr).into_owned(),
})
}
}

View File

@ -0,0 +1,20 @@
use std::env;
const NODE_HOST_ENV: &str = "K8S_RUNNER_NODE_HOST";
const KUBE_SERVICE_HOST_ENV: &str = "KUBERNETES_SERVICE_HOST";
/// Returns the hostname or IP used to reach `NodePorts` exposed by the cluster.
/// Prefers `K8S_RUNNER_NODE_HOST`, then the standard `KUBERNETES_SERVICE_HOST`
/// (e.g. `kubernetes.docker.internal` on Docker Desktop), and finally falls
/// back to `127.0.0.1`.
pub fn node_host() -> String {
if let Ok(host) = env::var(NODE_HOST_ENV) {
return host;
}
if let Ok(host) = env::var(KUBE_SERVICE_HOST_ENV)
&& !host.is_empty()
{
return host;
}
"127.0.0.1".to_owned()
}

View File

@ -0,0 +1,9 @@
mod assets;
mod cleanup;
mod helm;
mod host;
mod logs;
mod runner;
mod wait;
pub use runner::{K8sRunner, K8sRunnerError};

View File

@ -0,0 +1,44 @@
use k8s_openapi::api::core::v1::Pod;
use kube::{
Api, Client,
api::{ListParams, LogParams},
};
use tracing::{info, warn};
pub async fn dump_namespace_logs(client: &Client, namespace: &str) {
let pod_names = match list_pod_names(client, namespace).await {
Ok(names) => names,
Err(err) => {
warn!("[k8s-runner] failed to list pods in namespace {namespace}: {err}");
return;
}
};
for pod_name in pod_names {
stream_pod_logs(client, namespace, &pod_name).await;
}
}
async fn list_pod_names(client: &Client, namespace: &str) -> Result<Vec<String>, kube::Error> {
let list = Api::<Pod>::namespaced(client.clone(), namespace)
.list(&ListParams::default())
.await?;
Ok(list
.into_iter()
.filter_map(|pod| pod.metadata.name)
.collect())
}
async fn stream_pod_logs(client: &Client, namespace: &str, pod_name: &str) {
let pods: Api<Pod> = Api::namespaced(client.clone(), namespace);
let params = LogParams {
follow: false,
tail_lines: Some(500),
..Default::default()
};
match pods.logs(pod_name, &params).await {
Ok(log) => info!("[k8s-runner] pod {pod_name} logs:\n{log}"),
Err(err) => warn!("[k8s-runner] failed to fetch logs for pod {pod_name}: {err}"),
}
}

View File

@ -0,0 +1,519 @@
use std::env;
use anyhow::Error;
use async_trait::async_trait;
use kube::Client;
use reqwest::Url;
use testing_framework_core::{
nodes::ApiClient,
scenario::{
BlockFeed, BlockFeedTask, CleanupGuard, Deployer, Metrics, MetricsError, NodeClients,
RunContext, Runner, Scenario, http_probe::NodeRole, spawn_block_feed,
},
topology::{GeneratedTopology, ReadinessError},
};
use tracing::{error, info};
use url::ParseError;
use uuid::Uuid;
use crate::{
assets::{AssetsError, RunnerAssets, prepare_assets},
cleanup::RunnerCleanup,
helm::{HelmError, install_release},
host::node_host,
logs::dump_namespace_logs,
wait::{ClusterPorts, ClusterWaitError, NodeConfigPorts, wait_for_cluster_ready},
};
pub struct K8sRunner {
readiness_checks: bool,
}
impl Default for K8sRunner {
fn default() -> Self {
Self::new()
}
}
impl K8sRunner {
#[must_use]
pub const fn new() -> Self {
Self {
readiness_checks: true,
}
}
#[must_use]
pub const fn with_readiness(mut self, enabled: bool) -> Self {
self.readiness_checks = enabled;
self
}
}
#[derive(Default)]
struct PortSpecs {
validators: Vec<NodeConfigPorts>,
executors: Vec<NodeConfigPorts>,
}
struct ClusterEnvironment {
client: Client,
namespace: String,
release: String,
cleanup: Option<RunnerCleanup>,
validator_api_ports: Vec<u16>,
validator_testing_ports: Vec<u16>,
executor_api_ports: Vec<u16>,
executor_testing_ports: Vec<u16>,
prometheus_port: u16,
}
impl ClusterEnvironment {
fn new(
client: Client,
namespace: String,
release: String,
cleanup: RunnerCleanup,
ports: &ClusterPorts,
) -> Self {
Self {
client,
namespace,
release,
cleanup: Some(cleanup),
validator_api_ports: ports.validators.iter().map(|ports| ports.api).collect(),
validator_testing_ports: ports.validators.iter().map(|ports| ports.testing).collect(),
executor_api_ports: ports.executors.iter().map(|ports| ports.api).collect(),
executor_testing_ports: ports.executors.iter().map(|ports| ports.testing).collect(),
prometheus_port: ports.prometheus,
}
}
async fn fail(&mut self, reason: &str) {
error!(
reason = reason,
namespace = %self.namespace,
release = %self.release,
"k8s stack failure; collecting diagnostics"
);
dump_namespace_logs(&self.client, &self.namespace).await;
if let Some(guard) = self.cleanup.take() {
Box::new(guard).cleanup();
}
}
fn into_cleanup(mut self) -> RunnerCleanup {
self.cleanup
.take()
.expect("cleanup guard should be available")
}
}
#[derive(Debug, thiserror::Error)]
pub enum NodeClientError {
#[error("failed to build {endpoint} client URL for {role} port {port}: {source}")]
Endpoint {
role: NodeRole,
endpoint: &'static str,
port: u16,
#[source]
source: ParseError,
},
}
#[derive(Debug, thiserror::Error)]
pub enum RemoteReadinessError {
#[error("failed to build readiness URL for {role} port {port}: {source}")]
Endpoint {
role: NodeRole,
port: u16,
#[source]
source: ParseError,
},
#[error("remote readiness probe failed: {source}")]
Remote {
#[source]
source: ReadinessError,
},
}
fn readiness_urls(ports: &[u16], role: NodeRole) -> Result<Vec<Url>, RemoteReadinessError> {
ports
.iter()
.copied()
.map(|port| readiness_url(role, port))
.collect()
}
fn readiness_url(role: NodeRole, port: u16) -> Result<Url, RemoteReadinessError> {
cluster_host_url(port).map_err(|source| RemoteReadinessError::Endpoint { role, port, source })
}
fn cluster_host_url(port: u16) -> Result<Url, ParseError> {
Url::parse(&format!("http://{}:{port}/", node_host()))
}
fn metrics_handle_from_port(port: u16) -> Result<Metrics, MetricsError> {
let url = cluster_host_url(port)
.map_err(|err| MetricsError::new(format!("invalid prometheus url: {err}")))?;
Metrics::from_prometheus(url)
}
async fn spawn_block_feed_with(
node_clients: &NodeClients,
) -> Result<(BlockFeed, BlockFeedTask), K8sRunnerError> {
let block_source_client = node_clients
.any_client()
.cloned()
.ok_or(K8sRunnerError::BlockFeedMissing)?;
spawn_block_feed(block_source_client)
.await
.map_err(|source| K8sRunnerError::BlockFeed { source })
}
#[derive(Debug, thiserror::Error)]
pub enum K8sRunnerError {
#[error(
"kubernetes runner requires at least one validator and one executor (validators={validators}, executors={executors})"
)]
UnsupportedTopology { validators: usize, executors: usize },
#[error("failed to initialise kubernetes client: {source}")]
ClientInit {
#[source]
source: kube::Error,
},
#[error(transparent)]
Assets(#[from] AssetsError),
#[error(transparent)]
Helm(#[from] HelmError),
#[error(transparent)]
Cluster(#[from] Box<ClusterWaitError>),
#[error(transparent)]
Readiness(#[from] RemoteReadinessError),
#[error(transparent)]
NodeClients(#[from] NodeClientError),
#[error(transparent)]
Telemetry(#[from] MetricsError),
#[error("k8s runner requires at least one node client to follow blocks")]
BlockFeedMissing,
#[error("failed to initialize block feed: {source}")]
BlockFeed {
#[source]
source: Error,
},
}
#[async_trait]
impl Deployer for K8sRunner {
type Error = K8sRunnerError;
async fn deploy(&self, scenario: &Scenario) -> Result<Runner, Self::Error> {
let descriptors = scenario.topology().clone();
ensure_supported_topology(&descriptors)?;
let client = Client::try_default()
.await
.map_err(|source| K8sRunnerError::ClientInit { source })?;
info!(
validators = descriptors.validators().len(),
executors = descriptors.executors().len(),
"starting k8s deployment"
);
let port_specs = collect_port_specs(&descriptors);
let mut cluster =
Some(setup_cluster(&client, &port_specs, &descriptors, self.readiness_checks).await?);
info!("building node clients");
let node_clients = match build_node_clients(
cluster
.as_ref()
.expect("cluster must be available while building clients"),
) {
Ok(clients) => clients,
Err(err) => {
if let Some(env) = cluster.as_mut() {
env.fail("failed to construct node api clients").await;
}
return Err(err.into());
}
};
let telemetry = match metrics_handle_from_port(
cluster
.as_ref()
.expect("cluster must be available for telemetry")
.prometheus_port,
) {
Ok(handle) => handle,
Err(err) => {
if let Some(env) = cluster.as_mut() {
env.fail("failed to configure prometheus metrics handle")
.await;
}
return Err(err.into());
}
};
let (block_feed, block_feed_guard) = match spawn_block_feed_with(&node_clients).await {
Ok(pair) => pair,
Err(err) => {
if let Some(env) = cluster.as_mut() {
env.fail("failed to initialize block feed").await;
}
return Err(err);
}
};
let cleanup = cluster
.take()
.expect("cluster should still be available")
.into_cleanup();
let cleanup_guard: Box<dyn CleanupGuard> =
Box::new(K8sCleanupGuard::new(cleanup, block_feed_guard));
let context = RunContext::new(
descriptors,
None,
node_clients,
scenario.duration(),
telemetry,
block_feed,
None,
);
Ok(Runner::new(context, Some(cleanup_guard)))
}
}
impl From<ClusterWaitError> for K8sRunnerError {
fn from(value: ClusterWaitError) -> Self {
Self::Cluster(Box::new(value))
}
}
fn ensure_supported_topology(descriptors: &GeneratedTopology) -> Result<(), K8sRunnerError> {
let validators = descriptors.validators().len();
let executors = descriptors.executors().len();
if validators == 0 || executors == 0 {
return Err(K8sRunnerError::UnsupportedTopology {
validators,
executors,
});
}
Ok(())
}
fn collect_port_specs(descriptors: &GeneratedTopology) -> PortSpecs {
let validators = descriptors
.validators()
.iter()
.map(|node| NodeConfigPorts {
api: node.general.api_config.address.port(),
testing: node.general.api_config.testing_http_address.port(),
})
.collect();
let executors = descriptors
.executors()
.iter()
.map(|node| NodeConfigPorts {
api: node.general.api_config.address.port(),
testing: node.general.api_config.testing_http_address.port(),
})
.collect();
PortSpecs {
validators,
executors,
}
}
fn build_node_clients(cluster: &ClusterEnvironment) -> Result<NodeClients, NodeClientError> {
let validators = cluster
.validator_api_ports
.iter()
.copied()
.zip(cluster.validator_testing_ports.iter().copied())
.map(|(api_port, testing_port)| {
api_client_from_ports(NodeRole::Validator, api_port, testing_port)
})
.collect::<Result<Vec<_>, _>>()?;
let executors = cluster
.executor_api_ports
.iter()
.copied()
.zip(cluster.executor_testing_ports.iter().copied())
.map(|(api_port, testing_port)| {
api_client_from_ports(NodeRole::Executor, api_port, testing_port)
})
.collect::<Result<Vec<_>, _>>()?;
Ok(NodeClients::new(validators, executors))
}
fn api_client_from_ports(
role: NodeRole,
api_port: u16,
testing_port: u16,
) -> Result<ApiClient, NodeClientError> {
let base_endpoint = cluster_host_url(api_port).map_err(|source| NodeClientError::Endpoint {
role,
endpoint: "api",
port: api_port,
source,
})?;
let testing_endpoint =
Some(
cluster_host_url(testing_port).map_err(|source| NodeClientError::Endpoint {
role,
endpoint: "testing",
port: testing_port,
source,
})?,
);
Ok(ApiClient::from_urls(base_endpoint, testing_endpoint))
}
async fn setup_cluster(
client: &Client,
specs: &PortSpecs,
descriptors: &GeneratedTopology,
readiness_checks: bool,
) -> Result<ClusterEnvironment, K8sRunnerError> {
let assets = prepare_assets(descriptors)?;
let validators = descriptors.validators().len();
let executors = descriptors.executors().len();
let (namespace, release) = cluster_identifiers();
let mut cleanup_guard =
Some(install_stack(client, &assets, &namespace, &release, validators, executors).await?);
let cluster_ports =
wait_for_ports_or_cleanup(client, &namespace, &release, specs, &mut cleanup_guard).await?;
info!(
prometheus_port = cluster_ports.prometheus,
"discovered prometheus endpoint"
);
let environment = ClusterEnvironment::new(
client.clone(),
namespace,
release,
cleanup_guard
.take()
.expect("cleanup guard must exist after successful cluster startup"),
&cluster_ports,
);
if readiness_checks {
ensure_cluster_readiness(descriptors, &environment).await?;
}
Ok(environment)
}
fn cluster_identifiers() -> (String, String) {
let run_id = Uuid::new_v4().simple().to_string();
let namespace = format!("nomos-k8s-{run_id}");
(namespace.clone(), namespace)
}
async fn install_stack(
client: &Client,
assets: &RunnerAssets,
namespace: &str,
release: &str,
validators: usize,
executors: usize,
) -> Result<RunnerCleanup, K8sRunnerError> {
info!(
release = %release,
namespace = %namespace,
"installing helm release"
);
install_release(assets, release, namespace, validators, executors).await?;
info!(release = %release, "helm install succeeded");
let preserve = env::var("K8S_RUNNER_PRESERVE").is_ok();
Ok(RunnerCleanup::new(
client.clone(),
namespace.to_owned(),
release.to_owned(),
preserve,
))
}
async fn wait_for_ports_or_cleanup(
client: &Client,
namespace: &str,
release: &str,
specs: &PortSpecs,
cleanup_guard: &mut Option<RunnerCleanup>,
) -> Result<ClusterPorts, K8sRunnerError> {
match wait_for_cluster_ready(
client,
namespace,
release,
&specs.validators,
&specs.executors,
)
.await
{
Ok(ports) => Ok(ports),
Err(err) => {
cleanup_pending(client, namespace, cleanup_guard).await;
Err(err.into())
}
}
}
async fn cleanup_pending(client: &Client, namespace: &str, guard: &mut Option<RunnerCleanup>) {
dump_namespace_logs(client, namespace).await;
if let Some(guard) = guard.take() {
Box::new(guard).cleanup();
}
}
async fn ensure_cluster_readiness(
descriptors: &GeneratedTopology,
cluster: &ClusterEnvironment,
) -> Result<(), RemoteReadinessError> {
let validator_urls = readiness_urls(&cluster.validator_api_ports, NodeRole::Validator)?;
let executor_urls = readiness_urls(&cluster.executor_api_ports, NodeRole::Executor)?;
let validator_membership_urls =
readiness_urls(&cluster.validator_testing_ports, NodeRole::Validator)?;
let executor_membership_urls =
readiness_urls(&cluster.executor_testing_ports, NodeRole::Executor)?;
descriptors
.wait_remote_readiness(
&validator_urls,
&executor_urls,
Some(&validator_membership_urls),
Some(&executor_membership_urls),
)
.await
.map_err(|source| RemoteReadinessError::Remote { source })
}
struct K8sCleanupGuard {
cleanup: RunnerCleanup,
block_feed: Option<BlockFeedTask>,
}
impl K8sCleanupGuard {
const fn new(cleanup: RunnerCleanup, block_feed: BlockFeedTask) -> Self {
Self {
cleanup,
block_feed: Some(block_feed),
}
}
}
impl CleanupGuard for K8sCleanupGuard {
fn cleanup(mut self: Box<Self>) {
if let Some(block_feed) = self.block_feed.take() {
CleanupGuard::cleanup(Box::new(block_feed));
}
CleanupGuard::cleanup(Box::new(self.cleanup));
}
}

View File

@ -0,0 +1,255 @@
use std::time::Duration;
use k8s_openapi::api::{apps::v1::Deployment, core::v1::Service};
use kube::{Api, Client, Error as KubeError};
use testing_framework_core::scenario::http_probe::{self, HttpReadinessError, NodeRole};
use thiserror::Error;
use tokio::time::sleep;
use crate::host::node_host;
const DEPLOYMENT_TIMEOUT: Duration = Duration::from_secs(180);
const PROMETHEUS_HTTP_PORT: u16 = 9090;
const PROMETHEUS_SERVICE_NAME: &str = "prometheus";
#[derive(Clone, Copy)]
pub struct NodeConfigPorts {
pub api: u16,
pub testing: u16,
}
#[derive(Clone, Copy)]
pub struct NodePortAllocation {
pub api: u16,
pub testing: u16,
}
pub struct ClusterPorts {
pub validators: Vec<NodePortAllocation>,
pub executors: Vec<NodePortAllocation>,
pub prometheus: u16,
}
#[derive(Debug, Error)]
pub enum ClusterWaitError {
#[error("deployment {name} in namespace {namespace} did not become ready within {timeout:?}")]
DeploymentTimeout {
name: String,
namespace: String,
timeout: Duration,
},
#[error("failed to fetch deployment {name}: {source}")]
DeploymentFetch {
name: String,
#[source]
source: KubeError,
},
#[error("failed to fetch service {service}: {source}")]
ServiceFetch {
service: String,
#[source]
source: KubeError,
},
#[error("service {service} did not allocate a node port for {port}")]
NodePortUnavailable { service: String, port: u16 },
#[error("cluster must have at least one validator")]
MissingValidator,
#[error("timeout waiting for {role} HTTP endpoint on port {port} after {timeout:?}")]
NodeHttpTimeout {
role: NodeRole,
port: u16,
timeout: Duration,
},
#[error("timeout waiting for prometheus readiness on NodePort {port}")]
PrometheusTimeout { port: u16 },
}
pub async fn wait_for_deployment_ready(
client: &Client,
namespace: &str,
name: &str,
timeout: Duration,
) -> Result<(), ClusterWaitError> {
let mut elapsed = Duration::ZERO;
let interval = Duration::from_secs(2);
while elapsed <= timeout {
match Api::<Deployment>::namespaced(client.clone(), namespace)
.get(name)
.await
{
Ok(deployment) => {
let desired = deployment
.spec
.as_ref()
.and_then(|spec| spec.replicas)
.unwrap_or(1);
let ready = deployment
.status
.as_ref()
.and_then(|status| status.ready_replicas)
.unwrap_or(0);
if ready >= desired {
return Ok(());
}
}
Err(err) => {
return Err(ClusterWaitError::DeploymentFetch {
name: name.to_owned(),
source: err,
});
}
}
sleep(interval).await;
elapsed += interval;
}
Err(ClusterWaitError::DeploymentTimeout {
name: name.to_owned(),
namespace: namespace.to_owned(),
timeout,
})
}
pub async fn find_node_port(
client: &Client,
namespace: &str,
service_name: &str,
service_port: u16,
) -> Result<u16, ClusterWaitError> {
let interval = Duration::from_secs(1);
for _ in 0..120 {
match Api::<Service>::namespaced(client.clone(), namespace)
.get(service_name)
.await
{
Ok(service) => {
if let Some(spec) = service.spec.clone()
&& let Some(ports) = spec.ports
{
for port in ports {
if port.port == i32::from(service_port)
&& let Some(node_port) = port.node_port
{
return Ok(node_port as u16);
}
}
}
}
Err(err) => {
return Err(ClusterWaitError::ServiceFetch {
service: service_name.to_owned(),
source: err,
});
}
}
sleep(interval).await;
}
Err(ClusterWaitError::NodePortUnavailable {
service: service_name.to_owned(),
port: service_port,
})
}
pub async fn wait_for_cluster_ready(
client: &Client,
namespace: &str,
release: &str,
validator_ports: &[NodeConfigPorts],
executor_ports: &[NodeConfigPorts],
) -> Result<ClusterPorts, ClusterWaitError> {
if validator_ports.is_empty() {
return Err(ClusterWaitError::MissingValidator);
}
let mut validator_allocations = Vec::with_capacity(validator_ports.len());
for (index, ports) in validator_ports.iter().enumerate() {
let name = format!("{release}-validator-{index}");
wait_for_deployment_ready(client, namespace, &name, DEPLOYMENT_TIMEOUT).await?;
let api_port = find_node_port(client, namespace, &name, ports.api).await?;
let testing_port = find_node_port(client, namespace, &name, ports.testing).await?;
validator_allocations.push(NodePortAllocation {
api: api_port,
testing: testing_port,
});
}
let validator_api_ports: Vec<u16> = validator_allocations
.iter()
.map(|ports| ports.api)
.collect();
wait_for_node_http(&validator_api_ports, NodeRole::Validator).await?;
let mut executor_allocations = Vec::with_capacity(executor_ports.len());
for (index, ports) in executor_ports.iter().enumerate() {
let name = format!("{release}-executor-{index}");
wait_for_deployment_ready(client, namespace, &name, DEPLOYMENT_TIMEOUT).await?;
let api_port = find_node_port(client, namespace, &name, ports.api).await?;
let testing_port = find_node_port(client, namespace, &name, ports.testing).await?;
executor_allocations.push(NodePortAllocation {
api: api_port,
testing: testing_port,
});
}
if !executor_allocations.is_empty() {
let executor_api_ports: Vec<u16> =
executor_allocations.iter().map(|ports| ports.api).collect();
wait_for_node_http(&executor_api_ports, NodeRole::Executor).await?;
}
let prometheus_port = find_node_port(
client,
namespace,
PROMETHEUS_SERVICE_NAME,
PROMETHEUS_HTTP_PORT,
)
.await?;
wait_for_prometheus_http(prometheus_port).await?;
Ok(ClusterPorts {
validators: validator_allocations,
executors: executor_allocations,
prometheus: prometheus_port,
})
}
async fn wait_for_node_http(ports: &[u16], role: NodeRole) -> Result<(), ClusterWaitError> {
let host = node_host();
http_probe::wait_for_http_ports_with_host(
ports,
role,
&host,
Duration::from_secs(240),
Duration::from_secs(1),
)
.await
.map_err(map_http_error)
}
const fn map_http_error(error: HttpReadinessError) -> ClusterWaitError {
ClusterWaitError::NodeHttpTimeout {
role: error.role(),
port: error.port(),
timeout: error.timeout(),
}
}
pub async fn wait_for_prometheus_http(port: u16) -> Result<(), ClusterWaitError> {
let client = reqwest::Client::new();
let url = format!("http://{}:{port}/-/ready", node_host());
for _ in 0..240 {
if let Ok(resp) = client.get(&url).send().await
&& resp.status().is_success()
{
return Ok(());
}
sleep(Duration::from_secs(1)).await;
}
Err(ClusterWaitError::PrometheusTimeout { port })
}

View File

@ -0,0 +1,18 @@
[package]
categories.workspace = true
description.workspace = true
edition.workspace = true
keywords.workspace = true
license.workspace = true
name = "testing-framework-runner-local"
readme.workspace = true
repository.workspace = true
version = "0.1.0"
[lints]
workspace = true
[dependencies]
async-trait = "0.1"
testing-framework-core = { path = "../../core" }
thiserror = { workspace = true }

View File

@ -0,0 +1,3 @@
mod runner;
pub use runner::{LocalDeployer, LocalDeployerError};

View File

@ -0,0 +1,133 @@
use async_trait::async_trait;
use testing_framework_core::{
scenario::{
BlockFeed, BlockFeedTask, Deployer, DynError, Metrics, NodeClients, RunContext, Runner,
Scenario, ScenarioError, spawn_block_feed,
},
topology::{ReadinessError, Topology},
};
use thiserror::Error;
/// Spawns validators and executors as local processes, reusing the existing
/// integration harness.
#[derive(Clone)]
pub struct LocalDeployer {
membership_check: bool,
}
#[derive(Debug, Error)]
pub enum LocalDeployerError {
#[error("readiness probe failed: {source}")]
ReadinessFailed {
#[source]
source: ReadinessError,
},
#[error("workload failed: {source}")]
WorkloadFailed {
#[source]
source: DynError,
},
#[error("expectations failed: {source}")]
ExpectationsFailed {
#[source]
source: DynError,
},
}
impl From<ScenarioError> for LocalDeployerError {
fn from(value: ScenarioError) -> Self {
match value {
ScenarioError::Workload(source) => Self::WorkloadFailed { source },
ScenarioError::ExpectationCapture(source) | ScenarioError::Expectations(source) => {
Self::ExpectationsFailed { source }
}
}
}
}
#[async_trait]
impl Deployer<()> for LocalDeployer {
type Error = LocalDeployerError;
async fn deploy(&self, scenario: &Scenario<()>) -> Result<Runner, Self::Error> {
let topology = Self::prepare_topology(scenario, self.membership_check).await?;
let node_clients = NodeClients::from_topology(scenario.topology(), &topology);
let (block_feed, block_feed_guard) = spawn_block_feed_with(&node_clients).await?;
let context = RunContext::new(
scenario.topology().clone(),
Some(topology),
node_clients,
scenario.duration(),
Metrics::empty(),
block_feed,
None,
);
Ok(Runner::new(context, Some(Box::new(block_feed_guard))))
}
}
impl LocalDeployer {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub const fn with_membership_check(mut self, enabled: bool) -> Self {
self.membership_check = enabled;
self
}
async fn prepare_topology(
scenario: &Scenario<()>,
membership_check: bool,
) -> Result<Topology, LocalDeployerError> {
let descriptors = scenario.topology();
let topology = descriptors.clone().spawn_local().await;
let skip_membership = !membership_check;
if let Err(source) = wait_for_readiness(&topology, skip_membership).await {
return Err(LocalDeployerError::ReadinessFailed { source });
}
Ok(topology)
}
}
impl Default for LocalDeployer {
fn default() -> Self {
Self {
membership_check: true,
}
}
}
async fn wait_for_readiness(
topology: &Topology,
skip_membership: bool,
) -> Result<(), ReadinessError> {
topology.wait_network_ready().await?;
if !skip_membership {
topology.wait_membership_ready().await?;
}
topology.wait_da_balancer_ready().await
}
async fn spawn_block_feed_with(
node_clients: &NodeClients,
) -> Result<(BlockFeed, BlockFeedTask), LocalDeployerError> {
let block_source_client = node_clients.random_validator().cloned().ok_or_else(|| {
LocalDeployerError::WorkloadFailed {
source: "block feed requires at least one validator".into(),
}
})?;
spawn_block_feed(block_source_client)
.await
.map_err(|source| LocalDeployerError::WorkloadFailed {
source: source.into(),
})
}

View File

@ -0,0 +1,26 @@
[package]
categories.workspace = true
description.workspace = true
edition.workspace = true
keywords.workspace = true
license.workspace = true
name = "testing-framework-workflows"
readme.workspace = true
repository.workspace = true
version = "0.1.0"
[lints]
workspace = true
[dependencies]
async-trait = "0.1"
ed25519-dalek = { version = "2.2.0", features = ["rand_core", "serde"] }
executor-http-client = { workspace = true }
integration-configs = { workspace = true }
nomos-core = { workspace = true }
rand = { workspace = true }
testing-framework-core = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true, features = ["macros", "net", "rt-multi-thread", "time"] }
tracing = { workspace = true }
zksign = { workspace = true }

View File

@ -0,0 +1,298 @@
use std::{
num::{NonZeroU64, NonZeroUsize},
time::Duration,
};
use integration_configs::topology::configs::network::Libp2pNetworkLayout;
use testing_framework_core::{
scenario::{Builder as CoreScenarioBuilder, NodeControlCapability},
topology::configs::wallet::WalletConfig,
};
use crate::{
expectations::ConsensusLiveness,
workloads::{chaos::RandomRestartWorkload, da, transaction},
};
macro_rules! non_zero_rate_fn {
($name:ident, $message:literal) => {
const fn $name(rate: u64) -> NonZeroU64 {
match NonZeroU64::new(rate) {
Some(value) => value,
None => panic!($message),
}
}
};
}
non_zero_rate_fn!(
transaction_rate_checked,
"transaction rate must be non-zero"
);
non_zero_rate_fn!(channel_rate_checked, "channel rate must be non-zero");
non_zero_rate_fn!(blob_rate_checked, "blob rate must be non-zero");
pub trait ScenarioBuilderExt<Caps>: Sized {
fn topology(self) -> TopologyConfigurator<Caps>;
fn transactions(self) -> TransactionFlowBuilder<Caps>;
fn da(self) -> DataAvailabilityFlowBuilder<Caps>;
#[must_use]
fn expect_consensus_liveness(self) -> Self;
#[must_use]
fn initialize_wallet(self, total_funds: u64, users: usize) -> Self;
}
impl<Caps> ScenarioBuilderExt<Caps> for CoreScenarioBuilder<Caps> {
fn topology(self) -> TopologyConfigurator<Caps> {
TopologyConfigurator { builder: self }
}
fn transactions(self) -> TransactionFlowBuilder<Caps> {
TransactionFlowBuilder::new(self)
}
fn da(self) -> DataAvailabilityFlowBuilder<Caps> {
DataAvailabilityFlowBuilder::new(self)
}
fn expect_consensus_liveness(self) -> Self {
self.with_expectation(ConsensusLiveness::default())
}
fn initialize_wallet(self, total_funds: u64, users: usize) -> Self {
let user_count = NonZeroUsize::new(users).expect("wallet user count must be non-zero");
let wallet = WalletConfig::uniform(total_funds, user_count);
self.with_wallet_config(wallet)
}
}
pub struct TopologyConfigurator<Caps> {
builder: CoreScenarioBuilder<Caps>,
}
impl<Caps> TopologyConfigurator<Caps> {
#[must_use]
pub fn validators(mut self, count: usize) -> Self {
self.builder = self
.builder
.map_topology(|topology| topology.with_validator_count(count));
self
}
#[must_use]
pub fn executors(mut self, count: usize) -> Self {
self.builder = self
.builder
.map_topology(|topology| topology.with_executor_count(count));
self
}
#[must_use]
pub fn network_star(mut self) -> Self {
self.builder = self
.builder
.map_topology(|topology| topology.with_network_layout(Libp2pNetworkLayout::Star));
self
}
#[must_use]
pub fn apply(self) -> CoreScenarioBuilder<Caps> {
self.builder
}
}
pub struct TransactionFlowBuilder<Caps> {
builder: CoreScenarioBuilder<Caps>,
rate: NonZeroU64,
users: Option<NonZeroUsize>,
}
impl<Caps> TransactionFlowBuilder<Caps> {
const fn default_rate() -> NonZeroU64 {
transaction_rate_checked(1)
}
const fn new(builder: CoreScenarioBuilder<Caps>) -> Self {
Self {
builder,
rate: Self::default_rate(),
users: None,
}
}
#[must_use]
pub const fn rate(mut self, rate: u64) -> Self {
self.rate = transaction_rate_checked(rate);
self
}
#[must_use]
pub const fn rate_per_block(mut self, rate: NonZeroU64) -> Self {
self.rate = rate;
self
}
#[must_use]
pub const fn users(mut self, users: usize) -> Self {
match NonZeroUsize::new(users) {
Some(value) => self.users = Some(value),
None => panic!("transaction user count must be non-zero"),
}
self
}
#[must_use]
pub fn apply(mut self) -> CoreScenarioBuilder<Caps> {
let workload = transaction::Workload::with_rate(self.rate.get())
.expect("transaction rate must be non-zero")
.with_user_limit(self.users);
self.builder = self.builder.with_workload(workload);
self.builder
}
}
pub struct DataAvailabilityFlowBuilder<Caps> {
builder: CoreScenarioBuilder<Caps>,
channel_rate: NonZeroU64,
blob_rate: NonZeroU64,
}
impl<Caps> DataAvailabilityFlowBuilder<Caps> {
const fn default_channel_rate() -> NonZeroU64 {
channel_rate_checked(1)
}
const fn default_blob_rate() -> NonZeroU64 {
blob_rate_checked(1)
}
const fn new(builder: CoreScenarioBuilder<Caps>) -> Self {
Self {
builder,
channel_rate: Self::default_channel_rate(),
blob_rate: Self::default_blob_rate(),
}
}
#[must_use]
pub const fn channel_rate(mut self, rate: u64) -> Self {
self.channel_rate = channel_rate_checked(rate);
self
}
#[must_use]
pub const fn channel_rate_per_block(mut self, rate: NonZeroU64) -> Self {
self.channel_rate = rate;
self
}
#[must_use]
pub const fn blob_rate(mut self, rate: u64) -> Self {
self.blob_rate = blob_rate_checked(rate);
self
}
#[must_use]
pub const fn blob_rate_per_block(mut self, rate: NonZeroU64) -> Self {
self.blob_rate = rate;
self
}
#[must_use]
pub fn apply(mut self) -> CoreScenarioBuilder<Caps> {
let count = (self.channel_rate.get() * self.blob_rate.get()) as usize;
let workload = da::Workload::with_channel_count(count.max(1));
self.builder = self.builder.with_workload(workload);
self.builder
}
}
pub trait ChaosBuilderExt: Sized {
fn chaos_random_restart(self) -> ChaosRestartBuilder;
}
impl ChaosBuilderExt for CoreScenarioBuilder<NodeControlCapability> {
fn chaos_random_restart(self) -> ChaosRestartBuilder {
ChaosRestartBuilder {
builder: self,
min_delay: Duration::from_secs(10),
max_delay: Duration::from_secs(30),
target_cooldown: Duration::from_secs(60),
include_validators: true,
include_executors: true,
}
}
}
pub struct ChaosRestartBuilder {
builder: CoreScenarioBuilder<NodeControlCapability>,
min_delay: Duration,
max_delay: Duration,
target_cooldown: Duration,
include_validators: bool,
include_executors: bool,
}
impl ChaosRestartBuilder {
#[must_use]
pub fn min_delay(mut self, delay: Duration) -> Self {
assert!(!delay.is_zero(), "chaos restart min delay must be non-zero");
self.min_delay = delay;
self
}
#[must_use]
pub fn max_delay(mut self, delay: Duration) -> Self {
assert!(!delay.is_zero(), "chaos restart max delay must be non-zero");
self.max_delay = delay;
self
}
#[must_use]
pub fn target_cooldown(mut self, cooldown: Duration) -> Self {
assert!(
!cooldown.is_zero(),
"chaos restart target cooldown must be non-zero"
);
self.target_cooldown = cooldown;
self
}
#[must_use]
pub const fn include_validators(mut self, enabled: bool) -> Self {
self.include_validators = enabled;
self
}
#[must_use]
pub const fn include_executors(mut self, enabled: bool) -> Self {
self.include_executors = enabled;
self
}
#[must_use]
pub fn apply(mut self) -> CoreScenarioBuilder<NodeControlCapability> {
assert!(
self.min_delay <= self.max_delay,
"chaos restart min delay must not exceed max delay"
);
assert!(
self.target_cooldown >= self.min_delay,
"chaos restart target cooldown must be >= min delay"
);
assert!(
self.include_validators || self.include_executors,
"chaos restart requires at least one node group"
);
let workload = RandomRestartWorkload::new(
self.min_delay,
self.max_delay,
self.target_cooldown,
self.include_validators,
self.include_executors,
);
self.builder = self.builder.with_workload(workload);
self.builder
}
}

View File

@ -0,0 +1,220 @@
use std::time::Duration;
use async_trait::async_trait;
use testing_framework_core::scenario::{DynError, Expectation, RunContext};
use thiserror::Error;
use tokio::time::sleep;
#[derive(Clone, Copy, Debug)]
pub struct ConsensusLiveness {
lag_allowance: u64,
}
impl Default for ConsensusLiveness {
fn default() -> Self {
Self {
lag_allowance: LAG_ALLOWANCE,
}
}
}
const LAG_ALLOWANCE: u64 = 2;
const MIN_PROGRESS_BLOCKS: u64 = 5;
const REQUEST_RETRIES: usize = 5;
const REQUEST_RETRY_DELAY: Duration = Duration::from_secs(2);
#[async_trait]
impl Expectation for ConsensusLiveness {
fn name(&self) -> &'static str {
"consensus_liveness"
}
async fn evaluate(&mut self, ctx: &RunContext) -> Result<(), DynError> {
Self::ensure_participants(ctx)?;
let target_hint = Self::target_blocks(ctx);
let check = Self::collect_results(ctx).await;
(*self).report(target_hint, check)
}
}
const fn consensus_target_blocks(ctx: &RunContext) -> u64 {
ctx.expected_blocks()
}
#[derive(Debug, Error)]
enum ConsensusLivenessIssue {
#[error("{node} height {height} below target {target}")]
HeightBelowTarget {
node: String,
height: u64,
target: u64,
},
#[error("{node} consensus_info failed: {source}")]
RequestFailed {
node: String,
#[source]
source: DynError,
},
}
#[derive(Debug, Error)]
enum ConsensusLivenessError {
#[error("consensus liveness requires at least one validator or executor")]
MissingParticipants,
#[error("consensus liveness violated (target={target}):\n{details}")]
Violations {
target: u64,
#[source]
details: ViolationIssues,
},
}
#[derive(Debug, Error)]
#[error("{message}")]
struct ViolationIssues {
issues: Vec<ConsensusLivenessIssue>,
message: String,
}
impl ConsensusLiveness {
const fn target_blocks(ctx: &RunContext) -> u64 {
consensus_target_blocks(ctx)
}
fn ensure_participants(ctx: &RunContext) -> Result<(), DynError> {
if ctx.node_clients().all_clients().count() == 0 {
Err(Box::new(ConsensusLivenessError::MissingParticipants))
} else {
Ok(())
}
}
async fn collect_results(ctx: &RunContext) -> LivenessCheck {
let participant_count = ctx.node_clients().all_clients().count().max(1);
let max_attempts = participant_count * REQUEST_RETRIES;
let mut samples = Vec::with_capacity(participant_count);
let mut issues = Vec::new();
for attempt in 0..max_attempts {
match Self::fetch_cluster_height(ctx).await {
Ok(height) => {
samples.push(NodeSample {
label: format!("sample-{attempt}"),
height,
});
if samples.len() >= participant_count {
break;
}
}
Err(err) => issues.push(ConsensusLivenessIssue::RequestFailed {
node: format!("sample-{attempt}"),
source: err,
}),
}
if samples.len() < participant_count {
sleep(REQUEST_RETRY_DELAY).await;
}
}
LivenessCheck { samples, issues }
}
async fn fetch_cluster_height(ctx: &RunContext) -> Result<u64, DynError> {
ctx.cluster_client()
.try_all_clients(|client| {
Box::pin(async move {
client
.consensus_info()
.await
.map(|info| info.height)
.map_err(|err| -> DynError { err.into() })
})
})
.await
}
#[must_use]
pub const fn with_lag_allowance(mut self, lag_allowance: u64) -> Self {
self.lag_allowance = lag_allowance;
self
}
fn report(self, target_hint: u64, mut check: LivenessCheck) -> Result<(), DynError> {
if check.samples.is_empty() {
return Err(Box::new(ConsensusLivenessError::MissingParticipants));
}
let max_height = check
.samples
.iter()
.map(|sample| sample.height)
.max()
.unwrap_or(0);
let mut target = target_hint;
if target == 0 || target > max_height {
target = max_height;
}
if max_height < MIN_PROGRESS_BLOCKS {
check
.issues
.push(ConsensusLivenessIssue::HeightBelowTarget {
node: "network".to_owned(),
height: max_height,
target: MIN_PROGRESS_BLOCKS,
});
}
for sample in &check.samples {
if sample.height + self.lag_allowance < target {
check
.issues
.push(ConsensusLivenessIssue::HeightBelowTarget {
node: sample.label.clone(),
height: sample.height,
target,
});
}
}
if check.issues.is_empty() {
tracing::info!(
target,
heights = ?check.samples.iter().map(|s| s.height).collect::<Vec<_>>(),
"consensus liveness expectation satisfied"
);
Ok(())
} else {
Err(Box::new(ConsensusLivenessError::Violations {
target,
details: check.issues.into(),
}))
}
}
}
struct NodeSample {
label: String,
height: u64,
}
struct LivenessCheck {
samples: Vec<NodeSample>,
issues: Vec<ConsensusLivenessIssue>,
}
impl From<Vec<ConsensusLivenessIssue>> for ViolationIssues {
fn from(issues: Vec<ConsensusLivenessIssue>) -> Self {
let mut message = String::new();
for issue in &issues {
if !message.is_empty() {
message.push('\n');
}
message.push_str("- ");
message.push_str(&issue.to_string());
}
Self { issues, message }
}
}

View File

@ -0,0 +1,3 @@
mod consensus_liveness;
pub use consensus_liveness::ConsensusLiveness;

View File

@ -0,0 +1,8 @@
pub mod builder;
pub mod expectations;
pub mod util;
pub mod workloads;
pub use builder::{ChaosBuilderExt, ScenarioBuilderExt};
pub use expectations::ConsensusLiveness;
pub use workloads::transaction::TxInclusionExpectation;

View File

@ -0,0 +1 @@
pub mod tx;

View File

@ -0,0 +1,37 @@
use ed25519_dalek::{Signer as _, SigningKey};
use nomos_core::mantle::{
MantleTx, Op, OpProof, SignedMantleTx, Transaction as _,
ledger::Tx as LedgerTx,
ops::channel::{ChannelId, MsgId, inscribe::InscriptionOp},
};
use zksign::SecretKey;
#[must_use]
pub fn create_inscription_transaction_with_id(id: ChannelId) -> SignedMantleTx {
let signing_key = SigningKey::from_bytes(&[0u8; 32]);
let signer = signing_key.verifying_key();
let inscription_op = InscriptionOp {
channel_id: id,
inscription: format!("Test channel inscription {id:?}").into_bytes(),
parent: MsgId::root(),
signer,
};
let mantle_tx = MantleTx {
ops: vec![Op::ChannelInscribe(inscription_op)],
ledger_tx: LedgerTx::new(vec![], vec![]),
storage_gas_price: 0,
execution_gas_price: 0,
};
let tx_hash = mantle_tx.hash();
let signature = signing_key.sign(&tx_hash.as_signing_bytes());
SignedMantleTx::new(
mantle_tx,
vec![OpProof::Ed25519Sig(signature)],
SecretKey::multi_sign(&[], tx_hash.as_ref()).expect("zk signature generation"),
)
.expect("valid transaction")
}

View File

@ -0,0 +1,160 @@
use std::{collections::HashMap, time::Duration};
use async_trait::async_trait;
use rand::{Rng as _, seq::SliceRandom as _, thread_rng};
use testing_framework_core::scenario::{DynError, RunContext, Workload};
use tokio::time::{Instant, sleep};
use tracing::info;
pub struct RandomRestartWorkload {
min_delay: Duration,
max_delay: Duration,
target_cooldown: Duration,
include_validators: bool,
include_executors: bool,
}
impl RandomRestartWorkload {
#[must_use]
pub const fn new(
min_delay: Duration,
max_delay: Duration,
target_cooldown: Duration,
include_validators: bool,
include_executors: bool,
) -> Self {
Self {
min_delay,
max_delay,
target_cooldown,
include_validators,
include_executors,
}
}
fn targets(&self, ctx: &RunContext) -> Vec<Target> {
let mut targets = Vec::new();
let validator_count = ctx.descriptors().validators().len();
if self.include_validators {
if validator_count > 1 {
for index in 0..validator_count {
targets.push(Target::Validator(index));
}
} else if validator_count == 1 {
info!("chaos restart skipping validators: only one validator configured");
}
}
if self.include_executors {
for index in 0..ctx.descriptors().executors().len() {
targets.push(Target::Executor(index));
}
}
targets
}
fn random_delay(&self) -> Duration {
if self.max_delay <= self.min_delay {
return self.min_delay;
}
let spread = self
.max_delay
.checked_sub(self.min_delay)
.unwrap_or_else(|| Duration::from_millis(1))
.as_secs_f64();
let offset = thread_rng().gen_range(0.0..=spread);
self.min_delay
.checked_add(Duration::from_secs_f64(offset))
.unwrap_or(self.max_delay)
}
fn initialize_cooldowns(&self, targets: &[Target]) -> HashMap<Target, Instant> {
let now = Instant::now();
let ready = now.checked_sub(self.target_cooldown).unwrap_or(now);
targets
.iter()
.copied()
.map(|target| (target, ready))
.collect()
}
async fn pick_target(
&self,
targets: &[Target],
cooldowns: &HashMap<Target, Instant>,
) -> Target {
loop {
let now = Instant::now();
if let Some(next_ready) = cooldowns
.values()
.copied()
.filter(|ready| *ready > now)
.min()
{
let wait = next_ready.saturating_duration_since(now);
if !wait.is_zero() {
sleep(wait).await;
continue;
}
}
let available: Vec<Target> = targets
.iter()
.copied()
.filter(|target| cooldowns.get(target).is_none_or(|ready| *ready <= now))
.collect();
if let Some(choice) = available.choose(&mut thread_rng()).copied() {
return choice;
}
return targets
.choose(&mut thread_rng())
.copied()
.expect("chaos restart workload has targets");
}
}
}
#[async_trait]
impl Workload for RandomRestartWorkload {
fn name(&self) -> &'static str {
"chaos_random_restart"
}
async fn start(&self, ctx: &RunContext) -> Result<(), DynError> {
let handle = ctx
.node_control()
.ok_or_else(|| "chaos restart workload requires node control".to_owned())?;
let targets = self.targets(ctx);
if targets.is_empty() {
return Err("chaos restart workload has no eligible targets".into());
}
let mut cooldowns = self.initialize_cooldowns(&targets);
loop {
sleep(self.random_delay()).await;
let target = self.pick_target(&targets, &cooldowns).await;
match target {
Target::Validator(index) => handle
.restart_validator(index)
.await
.map_err(|err| format!("validator restart failed: {err}"))?,
Target::Executor(index) => handle
.restart_executor(index)
.await
.map_err(|err| format!("executor restart failed: {err}"))?,
}
cooldowns.insert(target, Instant::now() + self.target_cooldown);
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
enum Target {
Validator(usize),
Executor(usize),
}

View File

@ -0,0 +1,177 @@
use std::{
collections::HashSet,
sync::{Arc, Mutex},
};
use async_trait::async_trait;
use nomos_core::mantle::{
AuthenticatedMantleTx as _,
ops::{Op, channel::ChannelId},
};
use testing_framework_core::scenario::{BlockRecord, DynError, Expectation, RunContext};
use thiserror::Error;
use tokio::sync::broadcast;
#[derive(Debug)]
pub struct DaWorkloadExpectation {
planned_channels: Vec<ChannelId>,
capture_state: Option<CaptureState>,
}
#[derive(Debug)]
struct CaptureState {
planned: Arc<HashSet<ChannelId>>,
inscriptions: Arc<Mutex<HashSet<ChannelId>>>,
blobs: Arc<Mutex<HashSet<ChannelId>>>,
}
const MIN_INCLUSION_RATIO: f64 = 0.8;
#[derive(Debug, Error)]
enum DaExpectationError {
#[error("da workload expectation not started")]
NotCaptured,
#[error("missing inscriptions for {missing:?}")]
MissingInscriptions { missing: Vec<ChannelId> },
#[error("missing blobs for {missing:?}")]
MissingBlobs { missing: Vec<ChannelId> },
}
impl DaWorkloadExpectation {
pub const fn new(planned_channels: Vec<ChannelId>) -> Self {
Self {
planned_channels,
capture_state: None,
}
}
}
#[async_trait]
impl Expectation for DaWorkloadExpectation {
fn name(&self) -> &'static str {
"da_workload_inclusions"
}
async fn start_capture(&mut self, ctx: &RunContext) -> Result<(), DynError> {
if self.capture_state.is_some() {
return Ok(());
}
let planned = Arc::new(
self.planned_channels
.iter()
.copied()
.collect::<HashSet<_>>(),
);
let inscriptions = Arc::new(Mutex::new(HashSet::new()));
let blobs = Arc::new(Mutex::new(HashSet::new()));
let mut receiver = ctx.block_feed().subscribe();
let planned_for_task = Arc::clone(&planned);
let inscriptions_for_task = Arc::clone(&inscriptions);
let blobs_for_task = Arc::clone(&blobs);
tokio::spawn(async move {
loop {
match receiver.recv().await {
Ok(record) => capture_block(
record.as_ref(),
&planned_for_task,
&inscriptions_for_task,
&blobs_for_task,
),
Err(broadcast::error::RecvError::Lagged(_)) => {}
Err(broadcast::error::RecvError::Closed) => break,
}
}
});
self.capture_state = Some(CaptureState {
planned,
inscriptions,
blobs,
});
Ok(())
}
async fn evaluate(&mut self, _ctx: &RunContext) -> Result<(), DynError> {
let state = self
.capture_state
.as_ref()
.ok_or(DaExpectationError::NotCaptured)
.map_err(DynError::from)?;
let planned_total = state.planned.len();
let missing_inscriptions = {
let inscriptions = state
.inscriptions
.lock()
.expect("inscription lock poisoned");
missing_channels(&state.planned, &inscriptions)
};
let required_inscriptions = minimum_required(planned_total, MIN_INCLUSION_RATIO);
if planned_total.saturating_sub(missing_inscriptions.len()) < required_inscriptions {
return Err(DaExpectationError::MissingInscriptions {
missing: missing_inscriptions,
}
.into());
}
let missing_blobs = {
let blobs = state.blobs.lock().expect("blob lock poisoned");
missing_channels(&state.planned, &blobs)
};
let required_blobs = minimum_required(planned_total, MIN_INCLUSION_RATIO);
if planned_total.saturating_sub(missing_blobs.len()) < required_blobs {
return Err(DaExpectationError::MissingBlobs {
missing: missing_blobs,
}
.into());
}
Ok(())
}
}
fn capture_block(
block: &BlockRecord,
planned: &HashSet<ChannelId>,
inscriptions: &Arc<Mutex<HashSet<ChannelId>>>,
blobs: &Arc<Mutex<HashSet<ChannelId>>>,
) {
let mut new_inscriptions = Vec::new();
let mut new_blobs = Vec::new();
for tx in block.block.transactions() {
for op in &tx.mantle_tx().ops {
match op {
Op::ChannelInscribe(inscribe) if planned.contains(&inscribe.channel_id) => {
new_inscriptions.push(inscribe.channel_id);
}
Op::ChannelBlob(blob) if planned.contains(&blob.channel) => {
new_blobs.push(blob.channel);
}
_ => {}
}
}
}
if !new_inscriptions.is_empty() {
let mut guard = inscriptions.lock().expect("inscription lock poisoned");
guard.extend(new_inscriptions);
}
if !new_blobs.is_empty() {
let mut guard = blobs.lock().expect("blob lock poisoned");
guard.extend(new_blobs);
}
}
fn missing_channels(planned: &HashSet<ChannelId>, observed: &HashSet<ChannelId>) -> Vec<ChannelId> {
planned.difference(observed).copied().collect()
}
fn minimum_required(total: usize, ratio: f64) -> usize {
((total as f64) * ratio).ceil() as usize
}

Some files were not shown because too many files have changed in this diff Show More