From a1712199090f81a5956e02ecfe6c17f6b78642b1 Mon Sep 17 00:00:00 2001 From: andrussal Date: Tue, 9 Dec 2025 17:45:10 +0100 Subject: [PATCH] Centralize timeouts and cfgsync defaults --- book/src/operations.md | 8 +-- book/src/quickstart.md | 3 ++ book/src/troubleshooting.md | 13 +++++ testing-framework/configs/src/lib.rs | 1 + testing-framework/configs/src/nodes/common.rs | 14 +++--- .../configs/src/nodes/executor.rs | 7 ++- testing-framework/configs/src/timeouts.rs | 50 +++++++++++++++++++ testing-framework/core/src/constants.rs | 24 +++++++++ 8 files changed, 106 insertions(+), 14 deletions(-) create mode 100644 testing-framework/configs/src/timeouts.rs create mode 100644 testing-framework/core/src/constants.rs diff --git a/book/src/operations.md b/book/src/operations.md index 582bb90..585a81d 100644 --- a/book/src/operations.md +++ b/book/src/operations.md @@ -3,9 +3,11 @@ Operational readiness focuses on prerequisites, environment fit, and clear signals: -- **Prerequisites**: keep a sibling `nomos-node` checkout available; ensure the - chosen runner’s platform needs are met (local binaries for host runs, Docker - for compose, cluster access for k8s). +- **Prerequisites**: + - **`versions.env` file** at repository root (required by helper scripts; defines VERSION, NOMOS_NODE_REV, NOMOS_BUNDLE_VERSION) + - Keep a sibling `nomos-node` checkout available, or use `scripts/run-examples.sh` which clones/builds on demand + - Ensure the chosen runner's platform needs are met (Docker for compose, cluster access for k8s) + - CI uses prebuilt binary artifacts from the `build-binaries` workflow - **Artifacts**: DA scenarios require KZG parameters (circuit assets) located at `testing-framework/assets/stack/kzgrs_test_params`. Fetch them via `scripts/setup-nomos-circuits.sh` or override the path with `NOMOS_KZGRS_PARAMS_PATH`. diff --git a/book/src/quickstart.md b/book/src/quickstart.md index 8b7293d..919ce0b 100644 --- a/book/src/quickstart.md +++ b/book/src/quickstart.md @@ -8,9 +8,12 @@ Get a working example running quickly. - This repository cloned - Unix-like system (tested on Linux and macOS) - For Docker Compose examples: Docker daemon running +- **`versions.env` file** at repository root (defines VERSION, NOMOS_NODE_REV, NOMOS_BUNDLE_VERSION) **Note:** `nomos-node` binaries are built automatically on demand or can be provided via prebuilt bundles. +**Important:** The `versions.env` file is required by helper scripts. If missing, the scripts will fail with an error. The file should already exist in the repository root. + ## Your First Test The framework ships with runnable example binaries in `examples/src/bin/`. diff --git a/book/src/troubleshooting.md b/book/src/troubleshooting.md index ca843cd..2a700c3 100644 --- a/book/src/troubleshooting.md +++ b/book/src/troubleshooting.md @@ -1,6 +1,7 @@ # Troubleshooting Scenarios **Prerequisites for All Runners:** +- **`versions.env` file** at repository root (required by helper scripts) - **`POL_PROOF_DEV_MODE=true`** MUST be set for all runners (host, compose, k8s) to avoid expensive Groth16 proof generation that causes timeouts - **KZG circuit assets** must be present at `testing-framework/assets/stack/kzgrs_test_params/kzgrs_test_params` (note the repeated filename) for DA workloads @@ -183,6 +184,7 @@ Focus on the first node that exhibited problems or the node with the highest ind **Common error patterns:** +- "ERROR: versions.env missing" → missing required `versions.env` file at repository root - "Failed to bind address" → port conflict - "Connection refused" → peer not ready or network issue - "Proof verification failed" or "Proof generation timeout" → missing `POL_PROOF_DEV_MODE=true` (REQUIRED for all runners) @@ -258,6 +260,17 @@ Run a minimal baseline test (e.g., 2 validators, consensus liveness only). If it 3. Verify network connectivity between nodes. 4. For DA workloads, ensure KZG circuit assets are present. +### "ERROR: versions.env missing" + +- **Cause**: Helper scripts (`run-examples.sh`, `build-bundle.sh`, `setup-circuits-stack.sh`) require `versions.env` file at repository root. +- **Fix**: Ensure you're running from the repository root directory. The `versions.env` file should already exist and contains: + ``` + VERSION=v0.3.1 + NOMOS_NODE_REV=d2dd5a5084e1daef4032562c77d41de5e4d495f8 + NOMOS_BUNDLE_VERSION=v4 + ``` + If the file is missing, restore it from version control or create it with the above content. + ### "Port already in use" - **Cause**: Previous test didn't clean up, or another process holds the port. diff --git a/testing-framework/configs/src/lib.rs b/testing-framework/configs/src/lib.rs index a108bbb..fe87c39 100644 --- a/testing-framework/configs/src/lib.rs +++ b/testing-framework/configs/src/lib.rs @@ -4,6 +4,7 @@ use nomos_core::sdp::ProviderId; use nomos_libp2p::{Multiaddr, PeerId, multiaddr}; pub mod nodes; +pub mod timeouts; pub mod topology; static IS_SLOW_TEST_ENV: LazyLock = diff --git a/testing-framework/configs/src/nodes/common.rs b/testing-framework/configs/src/nodes/common.rs index 224969b..4fea692 100644 --- a/testing-framework/configs/src/nodes/common.rs +++ b/testing-framework/configs/src/nodes/common.rs @@ -33,7 +33,7 @@ use nomos_node::{ use nomos_utils::math::NonNegativeF64; use nomos_wallet::WalletServiceSettings; -use crate::topology::configs::GeneralConfig; +use crate::{timeouts, topology::configs::GeneralConfig}; pub(crate) fn cryptarchia_deployment(config: &GeneralConfig) -> CryptarchiaDeploymentSettings { CryptarchiaDeploymentSettings { @@ -77,7 +77,7 @@ pub(crate) fn cryptarchia_config(config: &GeneralConfig) -> CryptarchiaConfig { prolonged_bootstrap_period: config.bootstrapping_config.prolonged_bootstrap_period, force_bootstrap: false, offline_grace_period: chain_service::OfflineGracePeriodConfig { - grace_period: Duration::from_secs(20 * 60), + grace_period: timeouts::grace_period(), state_recording_interval: Duration::from_secs(60), }, }, @@ -128,9 +128,9 @@ pub(crate) fn da_verifier_config( }, mempool_trigger_settings: MempoolPublishTriggerConfig { publish_threshold: NonNegativeF64::try_from(0.8).unwrap(), - share_duration: Duration::from_secs(5), - prune_duration: Duration::from_secs(30), - prune_interval: Duration::from_secs(5), + share_duration: timeouts::share_duration(), + prune_duration: timeouts::prune_duration(), + prune_interval: timeouts::prune_interval(), }, } } @@ -149,8 +149,8 @@ pub(crate) fn da_sampling_config( global_params_path: kzg_params_path(&config.da_config.global_params_path), domain_size: config.da_config.num_subnets as usize, }, - commitments_wait_duration: Duration::from_secs(1), - sdp_blob_trigger_sampling_delay: crate::adjust_timeout(Duration::from_secs(5)), + commitments_wait_duration: timeouts::commitments_wait(), + sdp_blob_trigger_sampling_delay: crate::adjust_timeout(timeouts::sdp_trigger_delay()), } } diff --git a/testing-framework/configs/src/nodes/executor.rs b/testing-framework/configs/src/nodes/executor.rs index 04669f7..c0c3ed8 100644 --- a/testing-framework/configs/src/nodes/executor.rs +++ b/testing-framework/configs/src/nodes/executor.rs @@ -1,5 +1,3 @@ -use std::time::Duration; - use nomos_da_dispersal::{ DispersalServiceSettings, backend::kzgrs::{DispersalKZGRSBackendSettings, EncoderSettings}, @@ -25,6 +23,7 @@ use crate::{ time_deployment, tracing_settings, wallet_settings, }, }, + timeouts, topology::configs::GeneralConfig, }; @@ -90,8 +89,8 @@ pub fn create_executor_config(config: GeneralConfig) -> ExecutorConfig { with_cache: false, global_params_path: config.da_config.global_params_path.clone(), }, - dispersal_timeout: Duration::from_secs(20), - retry_cooldown: Duration::from_secs(3), + dispersal_timeout: timeouts::dispersal_timeout(), + retry_cooldown: timeouts::retry_cooldown(), retry_limit: 2, }, }, diff --git a/testing-framework/configs/src/timeouts.rs b/testing-framework/configs/src/timeouts.rs new file mode 100644 index 0000000..8741e04 --- /dev/null +++ b/testing-framework/configs/src/timeouts.rs @@ -0,0 +1,50 @@ +use std::{env, time::Duration}; + +pub const DISPERSAL_TIMEOUT_SECS: u64 = 20; +pub const RETRY_COOLDOWN_SECS: u64 = 3; +pub const GRACE_PERIOD_SECS: u64 = 20 * 60; +pub const PRUNE_DURATION_SECS: u64 = 30; +pub const PRUNE_INTERVAL_SECS: u64 = 5; +pub const SHARE_DURATION_SECS: u64 = 5; +pub const COMMITMENTS_WAIT_SECS: u64 = 1; +pub const SDP_TRIGGER_DELAY_SECS: u64 = 5; + +fn env_duration(key: &str, default: u64) -> Duration { + env::var(key) + .ok() + .and_then(|v| v.parse::().ok()) + .map(Duration::from_secs) + .unwrap_or_else(|| Duration::from_secs(default)) +} + +pub fn dispersal_timeout() -> Duration { + env_duration("NOMOS_DISPERSAL_TIMEOUT_SECS", DISPERSAL_TIMEOUT_SECS) +} + +pub fn retry_cooldown() -> Duration { + env_duration("NOMOS_RETRY_COOLDOWN_SECS", RETRY_COOLDOWN_SECS) +} + +pub fn grace_period() -> Duration { + env_duration("NOMOS_GRACE_PERIOD_SECS", GRACE_PERIOD_SECS) +} + +pub fn prune_duration() -> Duration { + env_duration("NOMOS_PRUNE_DURATION_SECS", PRUNE_DURATION_SECS) +} + +pub fn prune_interval() -> Duration { + env_duration("NOMOS_PRUNE_INTERVAL_SECS", PRUNE_INTERVAL_SECS) +} + +pub fn share_duration() -> Duration { + env_duration("NOMOS_SHARE_DURATION_SECS", SHARE_DURATION_SECS) +} + +pub fn commitments_wait() -> Duration { + env_duration("NOMOS_COMMITMENTS_WAIT_SECS", COMMITMENTS_WAIT_SECS) +} + +pub fn sdp_trigger_delay() -> Duration { + env_duration("NOMOS_SDP_TRIGGER_DELAY_SECS", SDP_TRIGGER_DELAY_SECS) +} diff --git a/testing-framework/core/src/constants.rs b/testing-framework/core/src/constants.rs new file mode 100644 index 0000000..d57e4fd --- /dev/null +++ b/testing-framework/core/src/constants.rs @@ -0,0 +1,24 @@ +use std::env; + +/// Default cfgsync port used across runners. +pub const DEFAULT_CFGSYNC_PORT: u16 = 4400; + +/// Default container path for KZG parameters (compose/k8s mount point). +pub const DEFAULT_KZG_CONTAINER_PATH: &str = "/kzgrs_test_params/kzgrs_test_params"; + +/// Default host-relative directory for KZG assets. +pub const DEFAULT_KZG_HOST_DIR: &str = "testing-framework/assets/stack/kzgrs_test_params"; + +/// Resolve cfgsync port from `NOMOS_CFGSYNC_PORT`, falling back to the default. +pub fn cfgsync_port() -> u16 { + env::var("NOMOS_CFGSYNC_PORT") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(DEFAULT_CFGSYNC_PORT) +} + +/// Resolve container KZG path from `NOMOS_KZG_CONTAINER_PATH`, falling back to +/// the default. +pub fn kzg_container_path() -> String { + env::var("NOMOS_KZG_CONTAINER_PATH").unwrap_or_else(|_| DEFAULT_KZG_CONTAINER_PATH.to_string()) +}