diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 828921d..229f473 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -239,7 +239,6 @@ jobs: host_smoke: runs-on: ubuntu-latest env: - POL_PROOF_DEV_MODE: true LOCAL_DEMO_RUN_SECS: 120 LOCAL_DEMO_VALIDATORS: 1 LOGOS_BLOCKCHAIN_CIRCUITS: ${{ github.workspace }}/.tmp/logos-blockchain-circuits @@ -500,7 +499,6 @@ jobs: - name: Run compose mixed workload binary env: - POL_PROOF_DEV_MODE: "true" COMPOSE_NODE_PAIRS: "1x1" LOGOS_BLOCKCHAIN_TESTNET_IMAGE: ${{ env.LOGOS_BLOCKCHAIN_TESTNET_IMAGE }} COMPOSE_RUNNER_HOST: "127.0.0.1" diff --git a/README.md b/README.md index 48d5503..a3aea03 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,6 @@ Key environment variables for customization: | Variable | Purpose | Default | |----------|---------|---------| -| `POL_PROOF_DEV_MODE=true` | **Required** — Disable expensive proof generation (set automatically by `scripts/run/run-examples.sh`) | (none) | | `LOGOS_BLOCKCHAIN_TESTNET_IMAGE` | Docker image tag for compose/k8s | `logos-blockchain-testing:local` | | `LOGOS_BLOCKCHAIN_DEMO_NODES` | Number of nodes | Varies by example | | `LOGOS_BLOCKCHAIN_LOG_DIR` | Directory for persistent log files | (temporary) | diff --git a/book/src/annotated-tree.md b/book/src/annotated-tree.md index 5835460..d4d05e9 100644 --- a/book/src/annotated-tree.md +++ b/book/src/annotated-tree.md @@ -65,9 +65,7 @@ Convenience utilities: - `compose_runner.rs` — Docker Compose (requires `LOGOS_BLOCKCHAIN_TESTNET_IMAGE` built) - `k8s_runner.rs` — Kubernetes (requires cluster + image) -**Run with:** `POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin ` - -**All runners require `POL_PROOF_DEV_MODE=true`** to avoid expensive proof generation. +**Run with:** `cargo run -p runner-examples --bin ` ### `scripts/` Helper utilities: diff --git a/book/src/architecture-overview.md b/book/src/architecture-overview.md index f55a159..73c8759 100644 --- a/book/src/architecture-overview.md +++ b/book/src/architecture-overview.md @@ -150,11 +150,9 @@ This handles circuit setup, binary building/bundling, image building, and execut **Alternative:** Direct cargo run (requires manual setup): ```bash -POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin +cargo run -p runner-examples --bin ``` -**Important:** All runners require `POL_PROOF_DEV_MODE=true` to avoid expensive Groth16 proof generation that causes timeouts. - These binaries use the framework API (`ScenarioBuilder`) to construct and execute scenarios. ## Builder API diff --git a/book/src/authoring-scenarios.md b/book/src/authoring-scenarios.md index c9519da..95e9ef9 100644 --- a/book/src/authoring-scenarios.md +++ b/book/src/authoring-scenarios.md @@ -153,7 +153,7 @@ async fn hello_consensus_liveness() -> Result<()> { **Run it:** ```bash -POL_PROOF_DEV_MODE=true cargo test hello_consensus_liveness +cargo test hello_consensus_liveness ``` --- diff --git a/book/src/best-practices.md b/book/src/best-practices.md index 3fdb756..6bc802a 100644 --- a/book/src/best-practices.md +++ b/book/src/best-practices.md @@ -128,13 +128,9 @@ strategy: ## Anti-Patterns to Avoid -**DON'T: Run without POL_PROOF_DEV_MODE** ```bash # BAD: Will hang/timeout on proof generation cargo run -p runner-examples --bin local_runner - -# GOOD: Fast mode for testing -POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner ``` **DON'T: Use tiny durations** diff --git a/book/src/ci-integration.md b/book/src/ci-integration.md index 13d5a2f..a258d2c 100644 --- a/book/src/ci-integration.md +++ b/book/src/ci-integration.md @@ -39,7 +39,6 @@ on: branches: [main] env: - POL_PROOF_DEV_MODE: true CARGO_TERM_COLOR: always RUST_BACKTRACE: 1 @@ -243,17 +242,6 @@ if: github.event_name == 'push' && github.ref == 'refs/heads/main' ## Best Practices -### Required: Set POL_PROOF_DEV_MODE - -**Always set `POL_PROOF_DEV_MODE=true` globally** in your workflow env: - -```yaml -env: - POL_PROOF_DEV_MODE: true # REQUIRED! -``` - -Without this, tests will hang due to expensive proof generation. - ### Use Helper Scripts Prefer `scripts/run/run-examples.sh` which handles all setup automatically: diff --git a/book/src/environment-variables.md b/book/src/environment-variables.md index 239e38b..bcfd7de 100644 --- a/book/src/environment-variables.md +++ b/book/src/environment-variables.md @@ -2,27 +2,6 @@ Complete reference of environment variables used by the testing framework, organized by category. -## Critical Variables - -These MUST be set for successful test runs: - -| Variable | Required | Default | Effect | -|----------|----------|---------|--------| -| `POL_PROOF_DEV_MODE` | **YES** | — | **REQUIRED for all runners**. Set to `true` to use fast dev-mode proving instead of expensive Groth16. Without this, tests will hang/timeout. | - -**Example:** - -```bash -export POL_PROOF_DEV_MODE=true -``` - -Or add to your shell profile (`~/.bashrc`, `~/.zshrc`): - -```bash -# Required for nomos-testing framework -export POL_PROOF_DEV_MODE=true -``` - --- ## Runner Selection & Topology @@ -138,7 +117,6 @@ Control node log output (not framework runner logs): LOGOS_BLOCKCHAIN_LOG_DIR=/tmp/test-logs \ LOGOS_BLOCKCHAIN_LOG_LEVEL=debug \ LOGOS_BLOCKCHAIN_LOG_FILTER="cryptarchia=trace" \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin local_runner # Inspect logs @@ -303,7 +281,6 @@ Node-level configuration passed through to logos-blockchain-node: # Faster block production CONSENSUS_SLOT_TIME=5 \ CONSENSUS_ACTIVE_SLOT_COEFF=0.9 \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin local_runner ``` @@ -350,14 +327,12 @@ Variables used by helper scripts (`scripts/run/run-examples.sh`, etc.): ### Minimal Host Run ```bash -POL_PROOF_DEV_MODE=true \ scripts/run/run-examples.sh -t 60 -n 3 host ``` ### Debug Logging (Host) ```bash -POL_PROOF_DEV_MODE=true \ LOGOS_BLOCKCHAIN_LOG_DIR=/tmp/logs \ LOGOS_BLOCKCHAIN_LOG_LEVEL=debug \ LOGOS_BLOCKCHAIN_LOG_FILTER="cryptarchia=trace" \ @@ -367,7 +342,6 @@ scripts/run/run-examples.sh -t 60 -n 3 host ### Compose with Observability ```bash -POL_PROOF_DEV_MODE=true \ LOGOS_BLOCKCHAIN_METRICS_QUERY_URL=http://localhost:9090 \ LOGOS_BLOCKCHAIN_GRAFANA_URL=http://localhost:3000 \ scripts/run/run-examples.sh -t 60 -n 3 compose @@ -376,7 +350,6 @@ scripts/run/run-examples.sh -t 60 -n 3 compose ### K8s with Debug ```bash -POL_PROOF_DEV_MODE=true \ K8S_RUNNER_NAMESPACE=nomos-debug \ K8S_RUNNER_DEBUG=1 \ K8S_RUNNER_PRESERVE=1 \ @@ -387,7 +360,6 @@ scripts/run/run-examples.sh -t 60 -n 3 k8s ```yaml env: - POL_PROOF_DEV_MODE: true RUST_BACKTRACE: 1 LOGOS_BLOCKCHAIN_TESTS_KEEP_LOGS: 1 ``` diff --git a/book/src/examples.md b/book/src/examples.md index de8cd42..6f696a2 100644 --- a/book/src/examples.md +++ b/book/src/examples.md @@ -15,9 +15,7 @@ and expectations. **Recommended:** Use `scripts/run/run-examples.sh -t -n ` where mode is `host`, `compose`, or `k8s`. -**Alternative:** Direct cargo run: `POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin ` - -**All runners require `POL_PROOF_DEV_MODE=true`** to avoid expensive proof generation. +**Alternative:** Direct cargo run: `cargo run -p runner-examples --bin ` **Code patterns** below show how to build scenarios. Wrap these in `#[tokio::test]` functions for integration tests, or `#[tokio::main]` for binaries. diff --git a/book/src/glossary.md b/book/src/glossary.md index c684bff..4e5a55b 100644 --- a/book/src/glossary.md +++ b/book/src/glossary.md @@ -38,10 +38,6 @@ Also called "correctness expectations." - **Mantle transaction**: transaction type in Logos that can contain UTXO transfers (LedgerTx) and operations (Op). -- **POL_PROOF_DEV_MODE**: environment variable that disables expensive Groth16 zero-knowledge - proof generation for leader election. **Required for all runners** (local, compose, k8s) - for practical testing—without it, proof generation causes timeouts. Should never be - used in production environments. --- diff --git a/book/src/logging-observability.md b/book/src/logging-observability.md index 16b79fb..9243874 100644 --- a/book/src/logging-observability.md +++ b/book/src/logging-observability.md @@ -46,7 +46,6 @@ LOGOS_BLOCKCHAIN_TESTS_TRACING=true \ LOGOS_BLOCKCHAIN_LOG_DIR=/tmp/test-logs \ LOGOS_BLOCKCHAIN_LOG_LEVEL=debug \ LOGOS_BLOCKCHAIN_LOG_FILTER="cryptarchia=trace,chain_service=info,chain_network=info" \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin local_runner ``` @@ -90,7 +89,7 @@ LOGOS_BLOCKCHAIN_LOG_FILTER="cryptarchia=trace,chain_service=info,chain_network= **Default (temporary directories, auto-cleanup):** ```bash -POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner +cargo run -p runner-examples --bin local_runner # Logs written to temporary directories in working directory # Automatically cleaned up after test completes ``` @@ -99,7 +98,6 @@ POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner ```bash LOGOS_BLOCKCHAIN_LOG_DIR=/tmp/local-logs \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin local_runner # After test completes: @@ -137,7 +135,6 @@ To write per-node log files inside containers, set `tracing_settings.logger: !Fi ```bash # Ensure cfgsync.yaml is configured to log to /logs LOGOS_BLOCKCHAIN_TESTNET_IMAGE=logos-blockchain-testing:local \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin compose_runner # After test, copy files from containers: @@ -257,7 +254,6 @@ scripts/setup/setup-observability.sh compose up eval $(scripts/setup/setup-observability.sh compose env) # Run scenario with metrics -POL_PROOF_DEV_MODE=true \ scripts/run/run-examples.sh -t 60 -n 3 compose ``` @@ -275,7 +271,7 @@ scripts/setup/setup-observability.sh compose up eval $(scripts/setup/setup-observability.sh compose env) export LOGOS_BLOCKCHAIN_GRAFANA_URL=http://localhost:3000 -POL_PROOF_DEV_MODE=true scripts/run/run-examples.sh -t 60 -n 3 compose +scripts/run/run-examples.sh -t 60 -n 3 compose ``` **Default bundled Grafana login:** `admin` / `admin` (see `scripts/observability/compose/docker-compose.yml`). @@ -322,7 +318,6 @@ flowchart TD LOGOS_BLOCKCHAIN_LOG_DIR=/tmp/logs \ LOGOS_BLOCKCHAIN_LOG_LEVEL=debug \ LOGOS_BLOCKCHAIN_LOG_FILTER="cryptarchia=trace" \ -POL_PROOF_DEV_MODE=true \ scripts/run/run-examples.sh -t 60 -n 3 host ``` @@ -334,7 +329,6 @@ scripts/setup/setup-observability.sh compose up eval $(scripts/setup/setup-observability.sh compose env) # Run with metrics -POL_PROOF_DEV_MODE=true \ scripts/run/run-examples.sh -t 60 -n 3 compose # Access Grafana at http://localhost:3000 @@ -346,7 +340,6 @@ scripts/run/run-examples.sh -t 60 -n 3 compose K8S_RUNNER_NAMESPACE=nomos-debug \ K8S_RUNNER_DEBUG=1 \ K8S_RUNNER_PRESERVE=1 \ -POL_PROOF_DEV_MODE=true \ scripts/run/run-examples.sh -t 60 -n 3 k8s # Inspect logs diff --git a/book/src/manual-cluster.md b/book/src/manual-cluster.md index b9fce9a..d42b2da 100644 --- a/book/src/manual-cluster.md +++ b/book/src/manual-cluster.md @@ -375,7 +375,6 @@ async fn external_driver_example() -> Result<()> { ```bash # Required: dev mode for fast proofs -POL_PROOF_DEV_MODE=true \ cargo test -p runner-examples -- --ignored external_driver_example ``` @@ -385,7 +384,6 @@ cargo test -p runner-examples -- --ignored external_driver_example # Preserve logs after test LOGOS_BLOCKCHAIN_TESTS_KEEP_LOGS=1 \ RUST_LOG=info \ -POL_PROOF_DEV_MODE=true \ cargo test -p runner-examples -- --ignored external_driver_example ``` diff --git a/book/src/operations-overview.md b/book/src/operations-overview.md index 230d9ef..5df7156 100644 --- a/book/src/operations-overview.md +++ b/book/src/operations-overview.md @@ -23,7 +23,6 @@ Operational readiness focuses on prerequisites, environment fit, and clear signa - Binary bundles for reproducible builds **Environment Configuration:** -- `POL_PROOF_DEV_MODE=true` is **REQUIRED for all runners** to avoid expensive proof generation - Logging configured via `LOGOS_BLOCKCHAIN_LOG_*` variables - Observability endpoints (Prometheus, Grafana) optional but useful diff --git a/book/src/prerequisites.md b/book/src/prerequisites.md index 7047e72..83c3415 100644 --- a/book/src/prerequisites.md +++ b/book/src/prerequisites.md @@ -194,30 +194,6 @@ minikube image load logos-blockchain-testing:local - Resource isolation - Large topologies -## Critical Environment Variable - -**`POL_PROOF_DEV_MODE=true` is REQUIRED for ALL runners!** - -Without this, proof generation uses expensive Groth16 proving, causing: -- Tests "hang" for minutes -- CPU spikes to 100% -- Timeouts and failures - -**Always set:** - -```bash -POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner -POL_PROOF_DEV_MODE=true scripts/run/run-examples.sh -t 60 -n 3 compose -# etc. -``` - -**Or add to your shell profile:** - -```bash -# ~/.bashrc or ~/.zshrc -export POL_PROOF_DEV_MODE=true -``` - ## Quick Setup Check Run this checklist before your first scenario: @@ -229,16 +205,13 @@ cat versions.env # 2. Check circuit assets ls -lh "${HOME}/.logos-blockchain-circuits" -# 3. Verify POL_PROOF_DEV_MODE is set -echo $POL_PROOF_DEV_MODE # Should print: true - -# 4. For compose/k8s: verify Docker is running +# 3. For compose/k8s: verify Docker is running docker ps -# 5. For compose/k8s: verify image exists +# 4. For compose/k8s: verify image exists docker images | grep logos-blockchain-testing -# 6. For host runner: verify node binaries (if not using scripts) +# 5. For host runner: verify node binaries (if not using scripts) $LOGOS_BLOCKCHAIN_NODE_BIN --version ``` diff --git a/book/src/quickstart.md b/book/src/quickstart.md index 450afec..991fd89 100644 --- a/book/src/quickstart.md +++ b/book/src/quickstart.md @@ -16,7 +16,7 @@ git clone https://github.com/logos-blockchain/logos-blockchain-testing.git cd logos-blockchain-testing # 3. Run your first scenario (downloads dependencies automatically) -POL_PROOF_DEV_MODE=true scripts/run/run-examples.sh -t 60 -n 1 host +scripts/run/run-examples.sh -t 60 -n 1 host ``` **First run takes 5-10 minutes** (downloads ~120MB circuit assets, builds binaries). @@ -56,7 +56,7 @@ This handles circuit setup, binary building, and runs a complete scenario: 1 nod ```bash # Requires circuits in place and LOGOS_BLOCKCHAIN_NODE_BIN set -POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner +cargo run -p runner-examples --bin local_runner ``` **Core API Pattern** (simplified example): @@ -92,8 +92,6 @@ pub async fn run_local_demo() -> Result<()> { **Note:** The examples are binaries with `#[tokio::main]`, not test functions. If you want to write integration tests, wrap this pattern in `#[tokio::test]` functions in your own test suite. -**Important:** `POL_PROOF_DEV_MODE=true` disables expensive Groth16 zero-knowledge proof generation for leader election. Without it, proof generation is CPU-intensive and tests will timeout. **This is required for all runners** (local, compose, k8s) for practical testing. Never use in production. - **What you should see:** - Nodes spawn as local processes - Consensus starts producing blocks @@ -213,7 +211,6 @@ scripts/run/run-examples.sh -t 120 -n 3 host # Uses LOGOS_BLOCKCHAIN_DEMO_* env vars (or legacy *_DEMO_* vars) LOGOS_BLOCKCHAIN_DEMO_NODES=3 \ LOGOS_BLOCKCHAIN_DEMO_RUN_SECS=120 \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin local_runner ``` @@ -246,7 +243,6 @@ scripts/build/build_test_image.sh # Run with Compose LOGOS_BLOCKCHAIN_TESTNET_IMAGE=logos-blockchain-testing:local \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin compose_runner ``` diff --git a/book/src/runners.md b/book/src/runners.md index 000dbd5..40f72b0 100644 --- a/book/src/runners.md +++ b/book/src/runners.md @@ -4,8 +4,6 @@ Runners turn a scenario plan into a live environment while keeping the plan unchanged. Choose based on feedback speed, reproducibility, and fidelity. For environment and operational considerations, see [Operations Overview](operations-overview.md). -**Important:** All runners require `POL_PROOF_DEV_MODE=true` to avoid expensive Groth16 proof generation that causes timeouts. - ## Host runner (local processes) - Launches node processes directly on the host (via `LocalDeployer`). - Binary: `local_runner.rs`, script mode: `host` diff --git a/book/src/running-examples.md b/book/src/running-examples.md index f05f863..5cab4eb 100644 --- a/book/src/running-examples.md +++ b/book/src/running-examples.md @@ -99,7 +99,6 @@ scripts/ops/clean.sh --docker For manual control, run the `local_runner` binary directly: ```bash -POL_PROOF_DEV_MODE=true \ LOGOS_BLOCKCHAIN_NODE_BIN=/path/to/logos-blockchain-node \ cargo run -p runner-examples --bin local_runner ``` @@ -116,7 +115,6 @@ cargo run -p runner-examples --bin local_runner | `LOGOS_BLOCKCHAIN_TESTS_TRACING` | false | Enable debug tracing preset | | `LOGOS_BLOCKCHAIN_LOG_LEVEL` | info | Global log level: error, warn, info, debug, trace | | `LOGOS_BLOCKCHAIN_LOG_FILTER` | None | Fine-grained module filtering (e.g., `cryptarchia=trace`) | -| `POL_PROOF_DEV_MODE` | — | **REQUIRED**: Set to `true` for all runners | **Note:** Requires circuit assets and host binaries. Use `scripts/run/run-examples.sh host` to handle setup automatically. @@ -139,7 +137,6 @@ scripts/build/build_test_image.sh # 3. Run LOGOS_BLOCKCHAIN_TESTNET_IMAGE=logos-blockchain-testing:local \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin compose_runner ``` @@ -154,7 +151,6 @@ scripts/build/build_test_image.sh # Run LOGOS_BLOCKCHAIN_TESTNET_IMAGE=logos-blockchain-testing:local \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin compose_runner ``` @@ -169,7 +165,6 @@ cargo run -p runner-examples --bin compose_runner | Variable | Default | Effect | |----------|---------|--------| | `LOGOS_BLOCKCHAIN_TESTNET_IMAGE` | — | Image tag (required, must match built image) | -| `POL_PROOF_DEV_MODE` | — | **REQUIRED**: Set to `true` for all runners | | `LOGOS_BLOCKCHAIN_DEMO_NODES` | 1 | Number of nodes | | `LOGOS_BLOCKCHAIN_DEMO_RUN_SECS` | 60 | Run duration in seconds | | `COMPOSE_NODE_PAIRS` | — | Alternative topology format: "nodes" (e.g., `3`) | @@ -232,7 +227,6 @@ export LOGOS_BLOCKCHAIN_TESTNET_IMAGE=your-registry/logos-blockchain-testing:lat ```bash export LOGOS_BLOCKCHAIN_TESTNET_IMAGE=logos-blockchain-testing:local -export POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin k8s_runner ``` @@ -241,7 +235,6 @@ cargo run -p runner-examples --bin k8s_runner | Variable | Default | Effect | |----------|---------|--------| | `LOGOS_BLOCKCHAIN_TESTNET_IMAGE` | — | Image tag (required) | -| `POL_PROOF_DEV_MODE` | — | **REQUIRED**: Set to `true` for all runners | | `LOGOS_BLOCKCHAIN_DEMO_NODES` | 1 | Number of nodes | | `LOGOS_BLOCKCHAIN_DEMO_RUN_SECS` | 60 | Run duration in seconds | | `LOGOS_BLOCKCHAIN_METRICS_QUERY_URL` | None | Prometheus-compatible base URL for runner to query (PromQL) | diff --git a/book/src/running-scenarios.md b/book/src/running-scenarios.md index 995e539..d648ade 100644 --- a/book/src/running-scenarios.md +++ b/book/src/running-scenarios.md @@ -69,7 +69,6 @@ Notes: Run the built-in local examples: ```bash -POL_PROOF_DEV_MODE=true \ scripts/run/run-examples.sh -t 60 -n 3 host ``` @@ -82,7 +81,6 @@ scripts/run/run-examples.sh -t 60 -n 3 host Run the built-in compose examples: ```bash -POL_PROOF_DEV_MODE=true \ scripts/run/run-examples.sh -t 60 -n 3 compose ``` @@ -95,7 +93,6 @@ scripts/run/run-examples.sh -t 60 -n 3 compose Run the built-in k8s examples: ```bash -POL_PROOF_DEV_MODE=true \ scripts/run/run-examples.sh -t 60 -n 3 k8s ``` diff --git a/book/src/troubleshooting.md b/book/src/troubleshooting.md index 50b5330..be18f5c 100644 --- a/book/src/troubleshooting.md +++ b/book/src/troubleshooting.md @@ -2,7 +2,6 @@ **Prerequisites for All Runners:** - **`versions.env` file** at repository root (required by helper scripts) -- **`POL_PROOF_DEV_MODE=true`** MUST be set for all runners (host, compose, k8s) to avoid expensive Groth16 proof generation that causes timeouts - **Circuit assets** must be present and `LOGOS_BLOCKCHAIN_CIRCUITS` must point to a directory that contains them **Platform/Environment Notes:** @@ -18,7 +17,6 @@ Common symptoms and likely causes: -- **No or slow block progression**: missing `POL_PROOF_DEV_MODE=true`, missing circuit assets, too-short run window, port conflicts, or resource exhaustion—set required env vars, verify assets exist, extend duration, check node logs for startup errors. - **Transactions not included**: unfunded or misconfigured wallets (check `.wallets(N)` vs `.users(M)`), transaction rate exceeding block capacity, or rates exceeding block production speed—reduce rate, increase wallet count, verify wallet setup in logs. - **Chaos stalls the run**: chaos (node control) only works with ComposeDeployer; host runner (LocalDeployer) and K8sDeployer don't support it (won't "stall", just can't execute chaos workloads). With compose, aggressive restart cadence can prevent consensus recovery—widen restart intervals. - **Observability gaps**: metrics or logs unreachable because ports clash or services are not exposed—adjust observability ports and confirm runner wiring. @@ -28,40 +26,7 @@ Common symptoms and likely causes: This section shows what you'll actually see when common issues occur. Each example includes realistic console output and the fix. -### 1. Missing `POL_PROOF_DEV_MODE=true` (Most Common!) - -**Symptoms:** -- Test "hangs" with no visible progress -- CPU usage spikes to 100% -- Eventually hits timeout after several minutes -- Nodes appear to start but blocks aren't produced - -**What you'll see:** - -```text -$ cargo run -p runner-examples --bin local_runner - Finished dev [unoptimized + debuginfo] target(s) in 0.48s - Running `target/debug/local_runner` -[INFO runner_examples::local_runner] Starting local runner scenario -[INFO testing_framework_runner_local] Launching 3 nodes -[INFO testing_framework_runner_local] Waiting for node readiness... -(hangs here for 5+ minutes, CPU at 100%) -thread 'main' panicked at 'readiness timeout expired' -``` - -**Root Cause:** Groth16 proof generation is extremely slow without dev mode. The system tries to compute real cryptographic proofs, which can take minutes per block. - -**Fix:** - -```bash -POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner -``` - -**Prevention:** Set this in your shell profile or `.env` file so you never forget it. - ---- - -### 2. Missing `versions.env` File +### 1. Missing `versions.env` File **Symptoms:** - Helper scripts fail immediately @@ -93,7 +58,7 @@ cat versions.env --- -### 3. Missing Circuit Assets +### 2. Missing Circuit Assets **Symptoms:** - Node startup fails early @@ -102,7 +67,7 @@ cat versions.env **What you'll see:** ```text -$ POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner +$ cargo run -p runner-examples --bin local_runner [INFO testing_framework_runner_local] Starting local runner scenario Error: circuit assets directory missing or invalid thread 'main' panicked at 'workload init failed' @@ -129,7 +94,7 @@ export LOGOS_BLOCKCHAIN_CIRCUITS=$HOME/.logos-blockchain-circuits --- -### 4. Node Binaries Not Found +### 3. Node Binaries Not Found **Symptoms:** - Error about missing `logos-blockchain-node` binary @@ -139,7 +104,7 @@ export LOGOS_BLOCKCHAIN_CIRCUITS=$HOME/.logos-blockchain-circuits **What you'll see:** ```text -$ POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner +$ cargo run -p runner-examples --bin local_runner [INFO testing_framework_runner_local] Spawning node 0 Error: Os { code: 2, kind: NotFound, message: "No such file or directory" } thread 'main' panicked at 'failed to spawn logos-blockchain-node process' @@ -166,12 +131,12 @@ export LOGOS_BLOCKCHAIN_NODE_BIN=$PWD/target/release/logos-blockchain-node # Return to testing framework cd ../nomos-testing -POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner +cargo run -p runner-examples --bin local_runner ``` --- -### 5. Docker Daemon Not Running (Compose) +### 4. Docker Daemon Not Running (Compose) **Symptoms:** - Compose tests fail immediately @@ -208,7 +173,7 @@ sudo usermod -aG docker $USER --- -### 6. Image Not Found (Compose/K8s) +### 5. Image Not Found (Compose/K8s) **Symptoms:** - Compose/K8s tests fail during deployment @@ -218,7 +183,7 @@ sudo usermod -aG docker $USER **What you'll see:** ```text -$ POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin compose_runner +$ cargo run -p runner-examples --bin compose_runner [INFO testing_framework_runner_compose] Starting compose deployment Error: Failed to pull image 'logos-blockchain-testing:local': No such image thread 'main' panicked at 'compose deployment failed' @@ -255,7 +220,7 @@ kind load docker-image logos-blockchain-testing:local --- -### 7. Port Conflicts +### 6. Port Conflicts **Symptoms:** - "Address already in use" errors @@ -265,7 +230,7 @@ kind load docker-image logos-blockchain-testing:local **What you'll see:** ```text -$ POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner +$ cargo run -p runner-examples --bin local_runner [INFO testing_framework_runner_local] Launching node 0 on port 18080 Error: Os { code: 48, kind: AddrInUse, message: "Address already in use" } thread 'main' panicked at 'failed to bind port 18080' @@ -305,7 +270,7 @@ vim scripts/observability/compose/docker-compose.yml --- -### 8. Wallet Seeding Failed (Insufficient Funds) +### 7. Wallet Seeding Failed (Insufficient Funds) **Symptoms:** - Transaction workload reports wallet issues @@ -315,7 +280,7 @@ vim scripts/observability/compose/docker-compose.yml **What you'll see:** ```text -$ POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner +$ cargo run -p runner-examples --bin local_runner [INFO testing_framework_workflows] Starting transaction workload with 10 users [ERROR testing_framework_workflows] Wallet seeding failed: requested 10 users but only 3 wallets available thread 'main' panicked at 'workload init failed: insufficient wallets' @@ -340,7 +305,7 @@ let scenario = ScenarioBuilder::topology_with(|t| t.network_star().nodes(3)) --- -### 9. Resource Exhaustion (OOM / CPU) +### 8. Resource Exhaustion (OOM / CPU) **Symptoms:** - Nodes crash randomly @@ -385,7 +350,7 @@ ulimit -n 4096 --- -### 10. Logs Disappear After Run +### 9. Logs Disappear After Run **Symptoms:** - Test completes but no logs on disk @@ -395,7 +360,7 @@ ulimit -n 4096 **What you'll see:** ```text -$ POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner +$ cargo run -p runner-examples --bin local_runner [INFO runner_examples] Test complete, cleaning up [INFO testing_framework_runner_local] Removing temporary directories $ ls .tmp/ @@ -410,7 +375,6 @@ $ ls .tmp/ # Persist logs to a specific directory LOGOS_BLOCKCHAIN_LOG_DIR=/tmp/test-logs \ LOGOS_BLOCKCHAIN_TESTS_KEEP_LOGS=1 \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin local_runner # Logs persist after run @@ -422,7 +386,7 @@ ls /tmp/test-logs/ --- -### 11. Consensus Timing Too Tight / Run Duration Too Short +### 10. Consensus Timing Too Tight / Run Duration Too Short **Symptoms:** - "Consensus liveness expectation failed" @@ -432,7 +396,7 @@ ls /tmp/test-logs/ **What you'll see:** ```text -$ POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner +$ cargo run -p runner-examples --bin local_runner [INFO testing_framework_core] Starting workloads [INFO testing_framework_core] Run window: 10 seconds [INFO testing_framework_core] Evaluating expectations @@ -463,7 +427,6 @@ let scenario = ScenarioBuilder::topology_with(|t| t.network_star().nodes(3)) # Faster block production (shorter slot time) CONSENSUS_SLOT_TIME=5 \ CONSENSUS_ACTIVE_SLOT_COEFF=0.9 \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin local_runner ``` @@ -473,17 +436,16 @@ cargo run -p runner-examples --bin local_runner When a test fails, check these in order: -1. **`POL_PROOF_DEV_MODE=true` is set** (REQUIRED for all runners) -2. **`versions.env` exists at repo root** -3. **Circuit assets present** (`LOGOS_BLOCKCHAIN_CIRCUITS` points to a valid directory) -4. **Node binaries available** (`LOGOS_BLOCKCHAIN_NODE_BIN` set, or using `run-examples.sh`) -5. **Docker daemon running** (for compose/k8s) -6. **Docker image built** (`logos-blockchain-testing:local` exists for compose/k8s) -7. **No port conflicts** (`lsof -i :18080`, kill orphaned processes) -8. **Sufficient wallets** (`.wallets(N)` ≥ `.users(M)`) -9. **Enough resources** (Docker memory 8GB+, ulimit -n 4096) -10. **Run duration appropriate** (long enough for consensus timing) -11. **Logs persisted** (`LOGOS_BLOCKCHAIN_LOG_DIR` + `LOGOS_BLOCKCHAIN_TESTS_KEEP_LOGS=1` if needed) +1. **`versions.env` exists at repo root** +2. **Circuit assets present** (`LOGOS_BLOCKCHAIN_CIRCUITS` points to a valid directory) +3. **Node binaries available** (`LOGOS_BLOCKCHAIN_NODE_BIN` set, or using `run-examples.sh`) +4. **Docker daemon running** (for compose/k8s) +5. **Docker image built** (`logos-blockchain-testing:local` exists for compose/k8s) +6. **No port conflicts** (`lsof -i :18080`, kill orphaned processes) +7. **Sufficient wallets** (`.wallets(N)` ≥ `.users(M)`) +8. **Enough resources** (Docker memory 8GB+, ulimit -n 4096) +9. **Run duration appropriate** (long enough for consensus timing) +10. **Logs persisted** (`LOGOS_BLOCKCHAIN_LOG_DIR` + `LOGOS_BLOCKCHAIN_TESTS_KEEP_LOGS=1` if needed) **Still stuck?** Check node logs (see [Where to Find Logs](#where-to-find-logs)) for the actual error. @@ -509,14 +471,13 @@ When a test fails, check these in order: **Console output (default):** ```bash -POL_PROOF_DEV_MODE=true cargo run -p runner-examples --bin local_runner 2>&1 | tee test.log +cargo run -p runner-examples --bin local_runner 2>&1 | tee test.log ``` **Persistent file output:** ```bash LOGOS_BLOCKCHAIN_LOG_DIR=/tmp/debug-logs \ LOGOS_BLOCKCHAIN_LOG_LEVEL=debug \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin local_runner # Inspect logs (note: filenames include timestamps): @@ -546,7 +507,6 @@ docker logs --tail 100 ```bash COMPOSE_RUNNER_PRESERVE=1 \ LOGOS_BLOCKCHAIN_TESTNET_IMAGE=logos-blockchain-testing:local \ -POL_PROOF_DEV_MODE=true \ cargo run -p runner-examples --bin compose_runner # OR: Use run-examples.sh (handles setup automatically) @@ -651,7 +611,6 @@ Focus on the first node that exhibited problems or the node with the highest ind - "ERROR: versions.env missing" → missing required `versions.env` file at repository root - "Failed to bind address" → port conflict - "Connection refused" → peer not ready or network issue -- "Proof verification failed" or "Proof generation timeout" → missing `POL_PROOF_DEV_MODE=true` (REQUIRED for all runners) - "Circuit file not found" → missing circuit assets at the path in `LOGOS_BLOCKCHAIN_CIRCUITS` - "Insufficient funds" → wallet seeding issue (increase `.wallets(N)` or reduce `.users(M)`) @@ -689,16 +648,14 @@ Run a minimal baseline test (e.g., 2 nodes, consensus liveness only). If it pass ### "Consensus liveness expectation failed" -- **Cause**: Not enough blocks produced during the run window, missing - `POL_PROOF_DEV_MODE=true` (causes slow proof generation), or missing circuit +- **Cause**: Not enough blocks produced during the run window, missing circuit assets. - **Fix**: - 1. Verify `POL_PROOF_DEV_MODE=true` is set (REQUIRED for all runners). - 2. Verify circuit assets exist at the path referenced by + 1. Verify circuit assets exist at the path referenced by `LOGOS_BLOCKCHAIN_CIRCUITS`. - 3. Extend `with_run_duration()` to allow more blocks. - 4. Check node logs for proof generation or circuit asset errors. - 5. Reduce transaction rate if nodes are overwhelmed. + 2. Extend `with_run_duration()` to allow more blocks. + 3. Check node logs for proof generation or circuit asset errors. + 4. Reduce transaction rate if nodes are overwhelmed. ### "Wallet seeding failed" @@ -720,11 +677,10 @@ Run a minimal baseline test (e.g., 2 nodes, consensus liveness only). If it pass - **Cause**: Nodes didn't become responsive within expected time (often due to missing prerequisites). - **Fix**: - 1. **Verify `POL_PROOF_DEV_MODE=true` is set** (REQUIRED for all runners—without it, proof generation is too slow). - 2. Check node logs for startup errors (port conflicts, missing assets). - 3. Verify network connectivity between nodes. - 4. Ensure circuit assets are present and `LOGOS_BLOCKCHAIN_CIRCUITS` points to them. + 1. Check node logs for startup errors (port conflicts, missing assets). + 2. Verify network connectivity between nodes. + 3. Ensure circuit assets are present and `LOGOS_BLOCKCHAIN_CIRCUITS` points to them. ### "ERROR: versions.env missing" diff --git a/book/src/workloads.md b/book/src/workloads.md index a412b59..5712ca3 100644 --- a/book/src/workloads.md +++ b/book/src/workloads.md @@ -78,13 +78,7 @@ ScenarioBuilder::topology_with(|t| t.network_star().nodes(3)) ``` The workload will fail during `init()` if no wallets are configured. -2. **Proof generation must be fast:** - ```bash - export POL_PROOF_DEV_MODE=true - ``` - Without this, proof generation takes ~30-60 seconds per transaction, causing timeouts. - -3. **Circuit artifacts must be available:** +2. **Circuit artifacts must be available:** - Automatically staged by `scripts/run/run-examples.sh` - Or manually via `scripts/setup/setup-logos-blockchain-circuits.sh` (recommended) / `scripts/setup/setup-logos-blockchain-circuits.sh` @@ -108,7 +102,6 @@ Error: Expectation failed: TxInclusionExpectation Observed: 127 transactions Possible causes: - - POL_PROOF_DEV_MODE not set (proof generation too slow) - Duration too short (nodes still syncing) - Node crashes (check logs for panics/OOM) - Wallet accounts not seeded (check topology config) @@ -119,9 +112,8 @@ Error: Expectation failed: TxInclusionExpectation ```bash grep "proof generation" $LOGOS_BLOCKCHAIN_LOG_DIR/*/*.log ``` -2. Verify `POL_PROOF_DEV_MODE=true` was set -3. Increase duration: `.with_run_duration(Duration::from_secs(120))` -4. Reduce rate: `.rate(5)` instead of `.rate(10)` +2. Increase duration: `.with_run_duration(Duration::from_secs(120))` +3. Reduce rate: `.rate(5)` instead of `.rate(10)` --- @@ -382,12 +374,11 @@ ScenarioBuilder::topology_with(|t| t.network_star().nodes(3)) When a workload or expectation fails: 1. Check logs: `$LOGOS_BLOCKCHAIN_LOG_DIR/*/` or `docker compose logs` or `kubectl logs` -2. Verify environment variables: `POL_PROOF_DEV_MODE`, `LOGOS_BLOCKCHAIN_NODE_BIN`, etc. -3. Check prerequisites: wallets, node control, circuits -4. Increase duration: Double the run duration and retry -5. Reduce rates: Half the traffic rates and retry -6. Check metrics: Prometheus queries for block height and tx count -7. Reproduce locally: Use local runner for faster iteration +2. Check prerequisites: wallets, node control, circuits +3. Increase duration: Double the run duration and retry +4. Reduce rates: Half the traffic rates and retry +5. Check metrics: Prometheus queries for block height and tx count +6. Reproduce locally: Use local runner for faster iteration --- diff --git a/examples/src/bin/local_runner.rs b/examples/src/bin/local_runner.rs index e21c6ff..72a929e 100644 --- a/examples/src/bin/local_runner.rs +++ b/examples/src/bin/local_runner.rs @@ -1,4 +1,4 @@ -use std::{env, process, time::Duration}; +use std::{process, time::Duration}; use anyhow::{Context as _, Result}; use runner_examples::{DeployerKind, ScenarioBuilderExt as _, demo, read_env_any}; @@ -17,11 +17,6 @@ async fn main() { tracing_subscriber::fmt::init(); - if env::var("POL_PROOF_DEV_MODE").is_err() { - warn!("POL_PROOF_DEV_MODE=true is required for the local runner demo"); - process::exit(1); - } - let nodes = read_env_any(&["LOGOS_BLOCKCHAIN_DEMO_NODES"], demo::DEFAULT_NODES); let run_secs = read_env_any(&["LOGOS_BLOCKCHAIN_DEMO_RUN_SECS"], demo::DEFAULT_RUN_SECS); diff --git a/examples/src/defaults.rs b/examples/src/defaults.rs index 0520321..b441a37 100644 --- a/examples/src/defaults.rs +++ b/examples/src/defaults.rs @@ -21,7 +21,6 @@ fn set_default_env(key: &str, value: &str) { } pub fn init_logging_defaults() { - set_default_env("POL_PROOF_DEV_MODE", "true"); set_default_env("LOGOS_BLOCKCHAIN_TESTS_KEEP_LOGS", "1"); set_default_env("LOGOS_BLOCKCHAIN_LOG_LEVEL", "info"); set_default_env("RUST_LOG", "info"); diff --git a/examples/tests/dynamic_join.rs b/examples/tests/dynamic_join.rs index 6c9f316..7ceea1f 100644 --- a/examples/tests/dynamic_join.rs +++ b/examples/tests/dynamic_join.rs @@ -86,6 +86,7 @@ impl Workload for JoinNodeWithPeersWorkload { let options = StartNodeOptions { peers: PeerSelection::Named(self.peers.clone()), config_patch: None, + persist_dir: None, }; let node = handle.start_node_with(&self.name, options).await?; let client = node.api; diff --git a/examples/tests/manual_cluster.rs b/examples/tests/manual_cluster.rs index 805238c..a15ba3c 100644 --- a/examples/tests/manual_cluster.rs +++ b/examples/tests/manual_cluster.rs @@ -18,7 +18,6 @@ const CONVERGENCE_POLL: Duration = Duration::from_secs(2); async fn manual_cluster_two_clusters_merge() -> Result<()> { let _ = try_init(); // Required env vars (set on the command line when running this test): - // - `POL_PROOF_DEV_MODE=true` // - `RUST_LOG=info` (optional) let config = TopologyConfig::with_node_numbers(2); let deployer = LocalDeployer::new(); @@ -33,6 +32,7 @@ async fn manual_cluster_two_clusters_merge() -> Result<()> { StartNodeOptions { peers: PeerSelection::None, config_patch: None, + persist_dir: None, }, ) .await? @@ -48,6 +48,7 @@ async fn manual_cluster_two_clusters_merge() -> Result<()> { StartNodeOptions { peers: PeerSelection::Named(vec!["node-a".to_owned()]), config_patch: None, + persist_dir: None, }, ) .await? @@ -82,3 +83,57 @@ async fn manual_cluster_two_clusters_merge() -> Result<()> { sleep(CONVERGENCE_POLL).await; } } + +#[tokio::test] +#[ignore = "run manually with `cargo test -p runner-examples -- --ignored manual_cluster_with_persist_dir`"] +async fn manual_cluster_with_persist_dir() -> Result<()> { + use std::path::PathBuf; + + let _ = try_init(); + // Required env vars (set on the command line when running this test): + // - `RUST_LOG=info` (optional) + let config = TopologyConfig::with_node_numbers(1); + let deployer = LocalDeployer::new(); + let cluster = deployer.manual_cluster(config)?; + + let persist_dir = PathBuf::from("/tmp/test-node-persist-dir"); + + println!("starting validator with persist_dir: {:?}", persist_dir); + + let _node = cluster + .start_node_with( + "test", + StartNodeOptions { + peers: PeerSelection::None, + config_patch: None, + persist_dir: Some(persist_dir.clone()), + }, + ) + .await? + .api; + + println!("validator started, waiting briefly"); + sleep(Duration::from_secs(5)).await; + + // Drop the cluster to trigger the persist logic + drop(cluster); + + println!("cluster dropped, checking if persist_dir exists"); + + // Verify the persist_dir was created + if !persist_dir.exists() { + return Err(anyhow::anyhow!( + "persist_dir was not created: {:?}", + persist_dir + )); + } + + println!("persist_dir verified: {:?}", persist_dir); + + // Clean up + if persist_dir.exists() { + std::fs::remove_dir_all(&persist_dir)?; + } + + Ok(()) +} diff --git a/examples/tests/node_config_override.rs b/examples/tests/node_config_override.rs index 85f9db2..d16071c 100644 --- a/examples/tests/node_config_override.rs +++ b/examples/tests/node_config_override.rs @@ -17,7 +17,6 @@ use tracing_subscriber::fmt::try_init; async fn manual_cluster_api_port_override() -> Result<()> { let _ = try_init(); // Required env vars (set on the command line when running this test): - // - `POL_PROOF_DEV_MODE=true` // - `LOGOS_BLOCKCHAIN_NODE_BIN=...` // - `LOGOS_BLOCKCHAIN_CIRCUITS=...` // - `RUST_LOG=info` (optional) @@ -33,6 +32,7 @@ async fn manual_cluster_api_port_override() -> Result<()> { StartNodeOptions { peers: PeerSelection::None, config_patch: None, + persist_dir: None, } .create_patch(move |mut config| { println!("overriding API port to {api_port}"); @@ -62,7 +62,6 @@ async fn manual_cluster_api_port_override() -> Result<()> { async fn scenario_builder_api_port_override() -> Result<()> { let _ = try_init(); // Required env vars (set on the command line when running this test): - // - `POL_PROOF_DEV_MODE=true` // - `LOGOS_BLOCKCHAIN_NODE_BIN=...` // - `LOGOS_BLOCKCHAIN_CIRCUITS=...` // - `RUST_LOG=info` (optional) diff --git a/examples/tests/orphan_manual_cluster.rs b/examples/tests/orphan_manual_cluster.rs index f6243d5..7c2ea0c 100644 --- a/examples/tests/orphan_manual_cluster.rs +++ b/examples/tests/orphan_manual_cluster.rs @@ -25,7 +25,6 @@ const POLL_INTERVAL: Duration = Duration::from_secs(1); async fn orphan_manual_cluster() -> Result<()> { let _ = try_init(); // Required env vars (set on the command line when running this test): - // - `POL_PROOF_DEV_MODE=true` // - `LOGOS_BLOCKCHAIN_NODE_BIN=...` // - `NOMOS_KZGRS_PARAMS_PATH=...` (path to KZG params directory/file) // - `RUST_LOG=info` (optional; better visibility) diff --git a/scripts/run/run-examples.sh b/scripts/run/run-examples.sh index 8c7d2e4..777a6b4 100755 --- a/scripts/run/run-examples.sh +++ b/scripts/run/run-examples.sh @@ -515,7 +515,6 @@ run_examples::run() { echo "==> Running ${BIN} for ${RUN_SECS}s (mode=${MODE}, image=${IMAGE})" cd "${ROOT_DIR}" - POL_PROOF_DEV_MODE=true \ TESTNET_PRINT_ENDPOINTS=1 \ LOGOS_BLOCKCHAIN_TESTNET_IMAGE="${IMAGE}" \ LOGOS_BLOCKCHAIN_NODE_BIN="${LOGOS_BLOCKCHAIN_NODE_BIN:-}" \ diff --git a/testing-framework/assets/stack/scripts/run_nomos.sh b/testing-framework/assets/stack/scripts/run_nomos.sh index a45a202..d27213f 100755 --- a/testing-framework/assets/stack/scripts/run_nomos.sh +++ b/testing-framework/assets/stack/scripts/run_nomos.sh @@ -47,8 +47,7 @@ export CFG_FILE_PATH="/config.yaml" \ CFG_HOST_KIND="${CFG_HOST_KIND:-$role}" \ CFG_HOST_IDENTIFIER="${CFG_HOST_IDENTIFIER:-$host_identifier_default}" \ LOGOS_BLOCKCHAIN_TIME_BACKEND="${LOGOS_BLOCKCHAIN_TIME_BACKEND:-monotonic}" \ - LOG_LEVEL="${LOG_LEVEL:-INFO}" \ - POL_PROOF_DEV_MODE="${POL_PROOF_DEV_MODE:-true}" + LOG_LEVEL="${LOG_LEVEL:-INFO}"``" # Ensure recovery directory exists to avoid early crashes in services that # persist state. diff --git a/testing-framework/configs/src/topology/configs/mod.rs b/testing-framework/configs/src/topology/configs/mod.rs index 8f09ae7..259c0ef 100644 --- a/testing-framework/configs/src/topology/configs/mod.rs +++ b/testing-framework/configs/src/topology/configs/mod.rs @@ -13,7 +13,8 @@ use std::cmp; use blend::GeneralBlendConfig; use consensus::{ - ConsensusConfigError, GeneralConsensusConfig, ProviderInfo, create_genesis_tx_with_declarations, + ConsensusConfigError, GeneralConsensusConfig, ProviderInfo, + create_genesis_tx_with_declarations, sync_utxos_with_genesis, }; use key_management_system_service::{backend::preload::PreloadKMSBackendSettings, keys::Key}; use network::GeneralNetworkConfig; @@ -203,7 +204,7 @@ fn apply_consensus_genesis_tx( ) -> Result<(), ConsensusConfigError> { for c in consensus_configs { c.genesis_tx = genesis_tx.clone(); - consensus::sync_utxos_with_genesis(&mut c.utxos, genesis_tx)?; + sync_utxos_with_genesis(&mut c.utxos, genesis_tx)?; } Ok(()) diff --git a/testing-framework/core/src/nodes/common/node.rs b/testing-framework/core/src/nodes/common/node.rs index 79323ce..581d5f0 100644 --- a/testing-framework/core/src/nodes/common/node.rs +++ b/testing-framework/core/src/nodes/common/node.rs @@ -71,15 +71,23 @@ pub struct NodeHandle { pub(crate) tempdir: TempDir, pub(crate) config: T, pub(crate) api: ApiClient, + pub(crate) persist_dir: Option, } impl NodeHandle { - pub fn new(child: Child, tempdir: TempDir, config: T, api: ApiClient) -> Self { + pub fn new( + child: Child, + tempdir: TempDir, + config: T, + api: ApiClient, + persist_dir: Option, + ) -> Self { Self { child, tempdir, config, api, + persist_dir, } } @@ -154,6 +162,7 @@ pub async fn spawn_node( config_filename: &str, binary_path: PathBuf, enable_logging: bool, + persist_dir: Option, ) -> Result, SpawnNodeError> where C: NodeConfigCommon + Serialize, @@ -168,7 +177,13 @@ where let child = spawn_node_process(&binary_path, &config_path, dir.path())?; - let mut handle = NodeHandle::new(child, dir, config, ApiClient::new(addr, testing_addr)); + let mut handle = NodeHandle::new( + child, + dir, + config, + ApiClient::new(addr, testing_addr), + persist_dir, + ); // Wait for readiness via consensus_info let ready = wait_for_consensus_readiness(&handle.api).await; diff --git a/testing-framework/core/src/nodes/mod.rs b/testing-framework/core/src/nodes/mod.rs index d6abce5..26e70f9 100644 --- a/testing-framework/core/src/nodes/mod.rs +++ b/testing-framework/core/src/nodes/mod.rs @@ -7,6 +7,7 @@ use std::sync::LazyLock; pub use api_client::{ApiClient, ApiClientError}; use tempfile::TempDir; use testing_framework_env as tf_env; +use tracing::info; pub(crate) const LOGS_PREFIX: &str = "__logs"; static KEEP_NODE_TEMPDIRS: LazyLock = LazyLock::new(tf_env::nomos_tests_keep_logs); @@ -30,6 +31,55 @@ fn persist_tempdir(tempdir: &mut TempDir, label: &str) -> std::io::Result<()> { Ok(()) } +pub(crate) fn persist_tempdir_to( + tempdir: &mut TempDir, + target_dir: &std::path::Path, + label: &str, +) -> std::io::Result<()> { + use std::fs; + + info!( + label, + from = %tempdir.path().display(), + to = %target_dir.display(), + "persisting directory" + ); + + // Create parent directory if it doesn't exist + if let Some(parent) = target_dir.parent() { + fs::create_dir_all(parent)?; + } + + // Copy tempdir contents to target directory + if target_dir.exists() { + fs::remove_dir_all(target_dir)?; + } + + /// Recursively copies all contents from src directory to dst directory. + /// + /// # Arguments + /// * `src` - Source directory path to copy from + /// * `dst` - Destination directory path to copy to + fn copy_dir_all(src: &std::path::Path, dst: &std::path::Path) -> std::io::Result<()> { + use std::fs; + fs::create_dir_all(dst)?; + for entry in fs::read_dir(src)? { + let entry = entry?; + let ty = entry.file_type()?; + if ty.is_dir() { + copy_dir_all(&entry.path(), &dst.join(entry.file_name()))?; + } else { + fs::copy(entry.path(), dst.join(entry.file_name()))?; + } + } + Ok(()) + } + + copy_dir_all(tempdir.path(), target_dir)?; + + Ok(()) +} + pub(crate) fn should_persist_tempdir() -> bool { std::thread::panicking() || *KEEP_NODE_TEMPDIRS } diff --git a/testing-framework/core/src/nodes/node.rs b/testing-framework/core/src/nodes/node.rs index 80035a3..6b7b8da 100644 --- a/testing-framework/core/src/nodes/node.rs +++ b/testing-framework/core/src/nodes/node.rs @@ -5,7 +5,7 @@ use nomos_tracing_service::LoggerLayer; pub use testing_framework_config::nodes::node::create_node_config; use tracing::{debug, info}; -use super::{persist_tempdir, should_persist_tempdir}; +use super::{persist_tempdir, persist_tempdir_to, should_persist_tempdir}; use crate::{ IS_DEBUG_TRACING, nodes::{ @@ -67,10 +67,20 @@ impl Deref for Node { impl Drop for Node { fn drop(&mut self) { - if should_persist_tempdir() - && let Err(e) = persist_tempdir(&mut self.handle.tempdir, "logos-blockchain-node") - { - debug!(error = ?e, "failed to persist node tempdir"); + if should_persist_tempdir() { + if let Some(ref persist_dir) = self.handle.persist_dir { + if let Err(e) = persist_tempdir_to( + &mut self.handle.tempdir, + persist_dir, + "logos-blockchain-node", + ) { + debug!(error = ?e, persist_dir = %persist_dir.display(), "failed to persist node tempdir to custom directory"); + } + } else { + if let Err(e) = persist_tempdir(&mut self.handle.tempdir, "logos-blockchain-node") { + debug!(error = ?e, "failed to persist node tempdir"); + } + } } debug!("stopping node process"); @@ -96,7 +106,11 @@ impl Node { self.handle.wait_for_exit(timeout).await } - pub async fn spawn(config: RunConfig, label: &str) -> Result { + pub async fn spawn( + config: RunConfig, + label: &str, + persist_dir: Option, + ) -> Result { let log_prefix = format!("{LOGS_PREFIX}-{label}"); let handle = spawn_node( config, @@ -104,6 +118,7 @@ impl Node { "node.yaml", binary_path(), !*IS_DEBUG_TRACING, + persist_dir, ) .await?; diff --git a/testing-framework/core/src/scenario/capabilities.rs b/testing-framework/core/src/scenario/capabilities.rs index c0e657f..712c966 100644 --- a/testing-framework/core/src/scenario/capabilities.rs +++ b/testing-framework/core/src/scenario/capabilities.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::{path::PathBuf, sync::Arc}; use reqwest::Url; @@ -41,6 +41,8 @@ pub struct StartNodeOptions { pub peers: PeerSelection, /// Optional node config patch applied before spawn. pub config_patch: Option, + /// Optional directory to persist node's tempdir to on stop. + pub persist_dir: Option, } impl Default for StartNodeOptions { @@ -48,6 +50,7 @@ impl Default for StartNodeOptions { Self { peers: PeerSelection::DefaultLayout, config_patch: None, + persist_dir: None, } } } diff --git a/testing-framework/core/src/topology/config.rs b/testing-framework/core/src/topology/config.rs index da9b2ac..0360754 100644 --- a/testing-framework/core/src/topology/config.rs +++ b/testing-framework/core/src/topology/config.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, sync::Arc}; +use std::{collections::HashMap, path::PathBuf, sync::Arc}; use nomos_core::{ mantle::GenesisTx as _, @@ -11,7 +11,7 @@ use testing_framework_config::topology::{ base::{BaseConfigError, BaseConfigs, build_base_configs}, consensus::{ ConsensusConfigError, ConsensusParams, ProviderInfo, - create_genesis_tx_with_declarations, + create_genesis_tx_with_declarations, sync_utxos_with_genesis, }, network::{Libp2pNetworkLayout, NetworkParams}, tracing::create_tracing_configs, @@ -65,6 +65,7 @@ pub struct TopologyConfig { pub network_params: NetworkParams, pub wallet_config: WalletConfig, pub node_config_patches: HashMap, + pub persist_dirs: HashMap, } impl TopologyConfig { @@ -77,6 +78,7 @@ impl TopologyConfig { network_params: NetworkParams::default(), wallet_config: WalletConfig::default(), node_config_patches: HashMap::new(), + persist_dirs: HashMap::new(), } } @@ -89,6 +91,7 @@ impl TopologyConfig { network_params: NetworkParams::default(), wallet_config: WalletConfig::default(), node_config_patches: HashMap::new(), + persist_dirs: HashMap::new(), } } @@ -103,6 +106,7 @@ impl TopologyConfig { network_params: NetworkParams::default(), wallet_config: WalletConfig::default(), node_config_patches: HashMap::new(), + persist_dirs: HashMap::new(), } } @@ -121,6 +125,17 @@ impl TopologyConfig { self.node_config_patches.insert(index, patch); self } + + #[must_use] + pub fn persist_dir(&self, index: usize) -> Option<&PathBuf> { + self.persist_dirs.get(&index) + } + + #[must_use] + pub fn with_persist_dir(mut self, index: usize, dir: PathBuf) -> Self { + self.persist_dirs.insert(index, dir); + self + } } /// Builder that produces `GeneratedTopology` instances from a `TopologyConfig`. @@ -163,6 +178,13 @@ impl TopologyBuilder { self } + #[must_use] + /// Apply a persist dir for a specific node index. + pub fn with_persist_dir(mut self, index: usize, dir: PathBuf) -> Self { + self.config.persist_dirs.insert(index, dir); + self + } + #[must_use] /// Set node counts. pub const fn with_node_count(mut self, nodes: usize) -> Self { @@ -240,6 +262,7 @@ impl TopologyBuilder { &kms_configs, &time_config, &config.node_config_patches, + &config.persist_dirs, )?; Ok(GeneratedTopology { config, nodes }) @@ -310,10 +333,7 @@ fn apply_consensus_genesis_tx( ) -> Result<(), TopologyBuildError> { for c in consensus_configs { c.genesis_tx = genesis_tx.clone(); - testing_framework_config::topology::configs::consensus::sync_utxos_with_genesis( - &mut c.utxos, - genesis_tx, - )?; + sync_utxos_with_genesis(&mut c.utxos, genesis_tx)?; } Ok(()) } @@ -333,6 +353,7 @@ fn build_node_descriptors( kms_configs: &[key_management_system_service::backend::preload::PreloadKMSBackendSettings], time_config: &testing_framework_config::topology::configs::time::GeneralTimeConfig, node_config_patches: &HashMap, + persist_dirs: &HashMap, ) -> Result, TopologyBuildError> { let mut nodes = Vec::with_capacity(config.n_nodes); @@ -367,6 +388,7 @@ fn build_node_descriptors( general, blend_port, config_patch: node_config_patches.get(&i).cloned(), + persist_dir: persist_dirs.get(&i).cloned(), }; nodes.push(descriptor); diff --git a/testing-framework/core/src/topology/generation.rs b/testing-framework/core/src/topology/generation.rs index 3ea2b2d..4aa779a 100644 --- a/testing-framework/core/src/topology/generation.rs +++ b/testing-framework/core/src/topology/generation.rs @@ -1,4 +1,4 @@ -use std::{collections::HashSet, time::Duration}; +use std::{collections::HashSet, path::PathBuf, time::Duration}; use reqwest::{Client, Url}; @@ -16,6 +16,7 @@ pub struct GeneratedNodeConfig { pub general: GeneralConfig, pub blend_port: u16, pub config_patch: Option, + pub persist_dir: Option, } impl GeneratedNodeConfig { diff --git a/testing-framework/deployers/compose/src/descriptor/mod.rs b/testing-framework/deployers/compose/src/descriptor/mod.rs index 305e5f7..47077dd 100644 --- a/testing-framework/deployers/compose/src/descriptor/mod.rs +++ b/testing-framework/deployers/compose/src/descriptor/mod.rs @@ -119,12 +119,10 @@ fn default_extra_hosts() -> Vec { } fn base_environment(cfgsync_port: u16) -> Vec { - let pol_mode = tf_env::pol_proof_dev_mode().unwrap_or_else(|| "true".to_string()); let rust_log = tf_env::rust_log().unwrap_or_else(|| "info".to_string()); let nomos_log_level = tf_env::nomos_log_level().unwrap_or_else(|| "info".to_string()); let time_backend = tf_env::nomos_time_backend().unwrap_or_else(|| "monotonic".into()); vec![ - EnvEntry::new("POL_PROOF_DEV_MODE", pol_mode), EnvEntry::new("RUST_LOG", rust_log), EnvEntry::new("LOGOS_BLOCKCHAIN_LOG_LEVEL", nomos_log_level), EnvEntry::new("LOGOS_BLOCKCHAIN_TIME_BACKEND", time_backend), diff --git a/testing-framework/deployers/k8s/src/infrastructure/assets.rs b/testing-framework/deployers/k8s/src/infrastructure/assets.rs index 729728e..d6980f2 100644 --- a/testing-framework/deployers/k8s/src/infrastructure/assets.rs +++ b/testing-framework/deployers/k8s/src/infrastructure/assets.rs @@ -291,11 +291,9 @@ fn build_values(topology: &GeneratedTopology) -> HelmValues { let cfgsync = CfgsyncValues { port: cfgsync_port(), }; - let pol_mode = pol_proof_mode(); let image_pull_policy = tf_env::nomos_testnet_image_pull_policy().unwrap_or_else(|| "IfNotPresent".into()); - debug!(pol_mode, "rendering Helm values for k8s stack"); - let nodes = build_node_group("node", topology.nodes(), &pol_mode); + let nodes = build_node_group("node", topology.nodes()); HelmValues { image_pull_policy, @@ -307,12 +305,11 @@ fn build_values(topology: &GeneratedTopology) -> HelmValues { fn build_node_group( kind: &'static str, nodes: &[testing_framework_core::topology::generation::GeneratedNodeConfig], - pol_mode: &str, ) -> NodeGroup { let node_values = nodes .iter() .enumerate() - .map(|(index, node)| build_node_values(kind, index, node, pol_mode)) + .map(|(index, node)| build_node_values(kind, index, node)) .collect(); NodeGroup { @@ -325,10 +322,8 @@ fn build_node_values( kind: &'static str, index: usize, node: &testing_framework_core::topology::generation::GeneratedNodeConfig, - pol_mode: &str, ) -> NodeValues { let mut env = BTreeMap::new(); - env.insert("POL_PROOF_DEV_MODE".into(), pol_mode.to_string()); env.insert("CFG_NETWORK_PORT".into(), node.network_port().to_string()); env.insert("CFG_BLEND_PORT".into(), node.blend_port.to_string()); env.insert( @@ -352,7 +347,3 @@ fn build_node_values( env, } } - -fn pol_proof_mode() -> String { - tf_env::pol_proof_dev_mode().unwrap_or_else(|| "true".to_string()) -} diff --git a/testing-framework/deployers/local/src/node_control/mod.rs b/testing-framework/deployers/local/src/node_control/mod.rs index b3b9846..da7ae79 100644 --- a/testing-framework/deployers/local/src/node_control/mod.rs +++ b/testing-framework/deployers/local/src/node_control/mod.rs @@ -110,7 +110,7 @@ impl LocalNodeManager { for node in descriptors.nodes() { let label = Self::default_label(node.index()); let config = create_node_config(node.general.clone()); - let spawned = Node::spawn(config, &label).await?; + let spawned = Node::spawn(config, &label, node.persist_dir.clone()).await?; nodes.push(spawned); } @@ -335,7 +335,7 @@ impl LocalNodeManager { )?; let api_client = self - .spawn_and_register_node(&node_name, network_port, config) + .spawn_and_register_node(&node_name, network_port, config, options.persist_dir) .await?; Ok(StartedNode { @@ -428,8 +428,9 @@ impl LocalNodeManager { node_name: &str, network_port: u16, config: RunConfig, + persist_dir: Option, ) -> Result { - let node = Node::spawn(config, node_name) + let node = Node::spawn(config, node_name, persist_dir) .await .map_err(|source| LocalNodeManagerError::Spawn { source })?; let client = node.api().clone(); diff --git a/testing-framework/env/src/lib.rs b/testing-framework/env/src/lib.rs index 671a392..b5a2437 100644 --- a/testing-framework/env/src/lib.rs +++ b/testing-framework/env/src/lib.rs @@ -52,11 +52,6 @@ pub fn nomos_testnet_image_pull_policy() -> Option { env::var("LOGOS_BLOCKCHAIN_TESTNET_IMAGE_PULL_POLICY").ok() } -#[must_use] -pub fn pol_proof_dev_mode() -> Option { - env::var("POL_PROOF_DEV_MODE").ok() -} - #[must_use] pub fn rust_log() -> Option { env::var("RUST_LOG").ok() diff --git a/testing-framework/workflows/src/expectations/consensus_liveness.rs b/testing-framework/workflows/src/expectations/consensus_liveness.rs index 085349f..434ef4f 100644 --- a/testing-framework/workflows/src/expectations/consensus_liveness.rs +++ b/testing-framework/workflows/src/expectations/consensus_liveness.rs @@ -25,7 +25,7 @@ impl Default for ConsensusLiveness { } const LAG_ALLOWANCE: u64 = 2; -const MIN_PROGRESS_BLOCKS: u64 = 5; +const MIN_PROGRESS_BLOCKS: u64 = 3; const REQUEST_RETRIES: usize = 15; const REQUEST_RETRY_DELAY: Duration = Duration::from_secs(2); const MAX_LAG_ALLOWANCE: u64 = 5; @@ -67,7 +67,7 @@ enum ConsensusLivenessIssue { #[derive(Debug, Error)] enum ConsensusLivenessError { - #[error("consensus liveness requires at least one node")] + #[error("consensus liveness requires at least one validator")] MissingParticipants, #[error("consensus liveness violated (target={target}):\n{details}")] Violations {