diff --git a/.cargo-deny.toml b/.cargo-deny.toml index ab01959..af01722 100644 --- a/.cargo-deny.toml +++ b/.cargo-deny.toml @@ -53,9 +53,10 @@ license-files = [{ hash = 0xcb90f5db, path = "LICENSE" }] name = "jsonpath-rust" [sources] -allow-git = ["https://github.com/EspressoSystems/jellyfish.git"] -unknown-git = "deny" +allow-git = [ + "https://github.com/EspressoSystems/jellyfish.git", + "https://github.com/logos-blockchain/logos-blockchain.git", + "https://github.com/logos-co/Overwatch", +] +unknown-git = "deny" unknown-registry = "deny" - -[sources.allow-org] -github = ["logos-co"] diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 229f473..9960f28 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -98,41 +98,6 @@ jobs: restore-keys: ${{ runner.os }}-target-clippy- - run: cargo +nightly-2025-09-14 clippy --all --all-targets --all-features -- -D warnings - doc_snippets: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Load versions - run: | - set -euo pipefail - if [ ! -f versions.env ]; then - echo "versions.env missing; populate VERSION, LOGOS_BLOCKCHAIN_NODE_REV, LOGOS_BLOCKCHAIN_BUNDLE_VERSION" >&2 - exit 1 - fi - set -a - . versions.env - set +a - # $GITHUB_ENV does not accept comments/blank lines; keep only KEY=VALUE exports. - grep -E '^[A-Za-z_][A-Za-z0-9_]*=' versions.env >> "$GITHUB_ENV" - : "${VERSION:?Missing VERSION}" - : "${LOGOS_BLOCKCHAIN_NODE_REV:?Missing LOGOS_BLOCKCHAIN_NODE_REV}" - : "${LOGOS_BLOCKCHAIN_BUNDLE_VERSION:?Missing LOGOS_BLOCKCHAIN_BUNDLE_VERSION}" - - name: Install nomos circuits - run: | - ./scripts/setup/setup-logos-blockchain-circuits.sh "${VERSION}" "$HOME/.logos-blockchain-circuits" - echo "LOGOS_BLOCKCHAIN_CIRCUITS=$HOME/.logos-blockchain-circuits" >> "$GITHUB_ENV" - - uses: dtolnay/rust-toolchain@master - with: - toolchain: nightly-2025-09-14 - - uses: actions/cache@v4 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: ${{ runner.os }}-cargo- - - run: cargo +nightly-2025-09-14 check -p doc-snippets - deny: runs-on: ubuntu-latest steps: diff --git a/.gitignore b/.gitignore index 991ebde..3ce2f54 100644 --- a/.gitignore +++ b/.gitignore @@ -15,7 +15,7 @@ ci-artifacts/ tests/kzgrs/circuits_bundle/ NOMOS_RUST_SOURCES_ONLY.txt dump.zsh -testing-framework/assets/stack/bin/ +nomos/assets/stack/bin/ testing-framework/assets/stack/kzgrs_test_params/ null diff --git a/Cargo.lock b/Cargo.lock index 19a72d8..3bff296 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -54,6 +54,21 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + [[package]] name = "anstyle" version = "1.0.13" @@ -61,10 +76,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] -name = "anyhow" -version = "1.0.100" +name = "anstyle-parse" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" [[package]] name = "arbitrary" @@ -818,9 +862,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.54" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "jobserver", @@ -861,32 +905,28 @@ dependencies = [ ] [[package]] -name = "cfgsync_tf" +name = "cfgsync-core" version = "0.1.0" dependencies = [ - "anyhow", "axum", - "clap", - "hex", - "logos-blockchain-core", - "logos-blockchain-groth16", - "logos-blockchain-key-management-system-service", - "logos-blockchain-libp2p", - "logos-blockchain-node", - "logos-blockchain-tracing-service", - "logos-blockchain-utils", - "rand 0.8.5", "reqwest", "serde", "serde_json", - "serde_path_to_error", - "serde_with", - "serde_yaml", - "testing-framework-config", - "testing-framework-core", "thiserror 2.0.18", "tokio", - "tracing", +] + +[[package]] +name = "cfgsync-runtime" +version = "0.1.0" +dependencies = [ + "anyhow", + "cfgsync-core", + "clap", + "serde", + "serde_yaml", + "testing-framework-core", + "tokio", ] [[package]] @@ -946,9 +986,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.54" +version = "4.5.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +checksum = "6899ea499e3fb9305a65d5ebf6e3d2248c5fab291f300ad0a704fbe142eae31a" dependencies = [ "clap_builder", "clap_derive", @@ -956,19 +996,21 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.54" +version = "4.5.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +checksum = "7b12c8b680195a62a8364d16b8447b01b6c2c8f9aaf68bee653be34d4245e238" dependencies = [ + "anstream", "anstyle", "clap_lex", + "strsim", ] [[package]] name = "clap_derive" -version = "4.5.49" +version = "4.5.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" dependencies = [ "heck", "proc-macro2", @@ -995,6 +1037,12 @@ dependencies = [ "owo-colors", ] +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -1239,7 +1287,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ab67060fc6b8ef687992d439ca0fa36e7ed17e9a0b16b25b601e8757df720de" dependencies = [ "data-encoding", - "syn 1.0.109", + "syn 2.0.114", ] [[package]] @@ -1411,19 +1459,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "doc-snippets" -version = "0.1.0" -dependencies = [ - "anyhow", - "async-trait", - "testing-framework-core", - "testing-framework-runner-compose", - "testing-framework-runner-k8s", - "testing-framework-runner-local", - "testing-framework-workflows", -] - [[package]] name = "dtoa" version = "1.0.11" @@ -1610,15 +1645,15 @@ checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" [[package]] name = "find-msvc-tools" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "flate2" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -2294,14 +2329,13 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", - "futures-core", "futures-util", "http 1.4.0", "http-body 1.0.1", @@ -2318,9 +2352,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -2613,6 +2647,12 @@ dependencies = [ "serde", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" version = "0.10.5" @@ -2849,6 +2889,43 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "lb-ext" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "cfgsync-runtime", + "kube", + "logos-blockchain-http-api-common", + "logos-blockchain-tracing", + "logos-blockchain-tracing-service", + "reqwest", + "serde", + "serde_yaml", + "tempfile", + "testing-framework-core", + "testing-framework-env", + "testing-framework-runner-compose", + "testing-framework-runner-k8s", + "testing_framework", + "thiserror 2.0.18", + "tokio", + "tracing", + "uuid", +] + +[[package]] +name = "lb-workloads" +version = "0.1.0" +dependencies = [ + "lb-ext", + "testing-framework-core", + "testing_framework", + "thiserror 2.0.18", + "tokio", +] + [[package]] name = "libc" version = "0.2.180" @@ -3344,7 +3421,7 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "logos-blockchain-api-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "bytes", @@ -3371,7 +3448,7 @@ dependencies = [ [[package]] name = "logos-blockchain-blend" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "logos-blockchain-blend-crypto", "logos-blockchain-blend-message", @@ -3383,7 +3460,7 @@ dependencies = [ [[package]] name = "logos-blockchain-blend-crypto" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "blake2", "logos-blockchain-groth16", @@ -3397,7 +3474,7 @@ dependencies = [ [[package]] name = "logos-blockchain-blend-message" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "blake2", "derivative", @@ -3419,7 +3496,7 @@ dependencies = [ [[package]] name = "logos-blockchain-blend-network" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "either", "futures", @@ -3437,7 +3514,7 @@ dependencies = [ [[package]] name = "logos-blockchain-blend-proofs" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "ed25519-dalek", "generic-array 1.3.5", @@ -3452,7 +3529,7 @@ dependencies = [ [[package]] name = "logos-blockchain-blend-scheduling" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "derivative", @@ -3476,7 +3553,7 @@ dependencies = [ [[package]] name = "logos-blockchain-blend-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "fork_stream", @@ -3511,7 +3588,7 @@ dependencies = [ [[package]] name = "logos-blockchain-chain-broadcast-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "derivative", @@ -3527,7 +3604,7 @@ dependencies = [ [[package]] name = "logos-blockchain-chain-leader-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "futures", @@ -3555,7 +3632,7 @@ dependencies = [ [[package]] name = "logos-blockchain-chain-network-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "futures", @@ -3583,7 +3660,7 @@ dependencies = [ [[package]] name = "logos-blockchain-chain-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "bytes", @@ -3613,7 +3690,7 @@ dependencies = [ [[package]] name = "logos-blockchain-chain-service-common" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "logos-blockchain-core", "serde", @@ -3622,7 +3699,7 @@ dependencies = [ [[package]] name = "logos-blockchain-circuits-prover" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "logos-blockchain-circuits-utils", "tempfile", @@ -3631,7 +3708,7 @@ dependencies = [ [[package]] name = "logos-blockchain-circuits-utils" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "dirs", ] @@ -3639,7 +3716,7 @@ dependencies = [ [[package]] name = "logos-blockchain-common-http-client" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "futures", "hex", @@ -3659,7 +3736,7 @@ dependencies = [ [[package]] name = "logos-blockchain-core" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "ark-ff 0.4.2", "bincode", @@ -3689,7 +3766,7 @@ dependencies = [ [[package]] name = "logos-blockchain-cryptarchia-engine" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "cfg_eval", "logos-blockchain-utils", @@ -3704,7 +3781,7 @@ dependencies = [ [[package]] name = "logos-blockchain-cryptarchia-sync" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "bytes", "futures", @@ -3723,7 +3800,7 @@ dependencies = [ [[package]] name = "logos-blockchain-groth16" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "ark-bn254 0.4.0", "ark-ec 0.4.2", @@ -3741,7 +3818,7 @@ dependencies = [ [[package]] name = "logos-blockchain-http-api-common" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "axum", "logos-blockchain-core", @@ -3749,12 +3826,13 @@ dependencies = [ "serde", "serde_json", "serde_with", + "tracing", ] [[package]] name = "logos-blockchain-key-management-system-keys" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "bytes", @@ -3779,7 +3857,7 @@ dependencies = [ [[package]] name = "logos-blockchain-key-management-system-macros" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "proc-macro2", "quote", @@ -3789,7 +3867,7 @@ dependencies = [ [[package]] name = "logos-blockchain-key-management-system-operators" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "logos-blockchain-blend-proofs", @@ -3805,7 +3883,7 @@ dependencies = [ [[package]] name = "logos-blockchain-key-management-system-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "log", @@ -3821,7 +3899,7 @@ dependencies = [ [[package]] name = "logos-blockchain-ledger" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "derivative", "logos-blockchain-blend-crypto", @@ -3845,7 +3923,7 @@ dependencies = [ [[package]] name = "logos-blockchain-libp2p" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "backon", @@ -3874,7 +3952,7 @@ dependencies = [ [[package]] name = "logos-blockchain-network-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "futures", @@ -3893,15 +3971,17 @@ dependencies = [ [[package]] name = "logos-blockchain-node" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "axum", + "cfg-if", "clap", "color-eyre", "futures", "hex", "http 1.4.0", + "libp2p", "logos-blockchain-api-service", "logos-blockchain-blend", "logos-blockchain-blend-service", @@ -3926,10 +4006,13 @@ dependencies = [ "logos-blockchain-time-service", "logos-blockchain-tracing", "logos-blockchain-tracing-service", + "logos-blockchain-tui-zone", "logos-blockchain-tx-service", "logos-blockchain-utils", "logos-blockchain-wallet-service", + "num-bigint", "overwatch", + "rand 0.8.5", "serde", "serde_ignored", "serde_json", @@ -3942,6 +4025,7 @@ dependencies = [ "tower 0.4.13", "tower-http 0.5.2", "tracing", + "url", "utoipa", "utoipa-swagger-ui", ] @@ -3949,7 +4033,7 @@ dependencies = [ [[package]] name = "logos-blockchain-poc" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "logos-blockchain-circuits-prover", "logos-blockchain-circuits-utils", @@ -3959,12 +4043,13 @@ dependencies = [ "serde", "serde_json", "thiserror 2.0.18", + "tracing", ] [[package]] name = "logos-blockchain-pol" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "logos-blockchain-circuits-prover", "logos-blockchain-circuits-utils", @@ -3975,12 +4060,13 @@ dependencies = [ "serde", "serde_json", "thiserror 2.0.18", + "tracing", ] [[package]] name = "logos-blockchain-poq" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "logos-blockchain-circuits-prover", "logos-blockchain-circuits-utils", @@ -3991,12 +4077,13 @@ dependencies = [ "serde", "serde_json", "thiserror 2.0.18", + "tracing", ] [[package]] name = "logos-blockchain-poseidon2" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "ark-bn254 0.4.0", "ark-ff 0.4.2", @@ -4007,7 +4094,7 @@ dependencies = [ [[package]] name = "logos-blockchain-sdp-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "futures", @@ -4023,7 +4110,7 @@ dependencies = [ [[package]] name = "logos-blockchain-services-utils" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "futures", @@ -4038,7 +4125,7 @@ dependencies = [ [[package]] name = "logos-blockchain-storage-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "bytes", @@ -4056,7 +4143,7 @@ dependencies = [ [[package]] name = "logos-blockchain-system-sig-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-ctrlc", "async-trait", @@ -4067,7 +4154,7 @@ dependencies = [ [[package]] name = "logos-blockchain-time-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "cfg_eval", @@ -4089,7 +4176,7 @@ dependencies = [ [[package]] name = "logos-blockchain-tracing" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "opentelemetry", "opentelemetry-http", @@ -4112,7 +4199,7 @@ dependencies = [ [[package]] name = "logos-blockchain-tracing-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "logos-blockchain-tracing", @@ -4123,10 +4210,27 @@ dependencies = [ "tracing-subscriber 0.3.22", ] +[[package]] +name = "logos-blockchain-tui-zone" +version = "0.1.0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" +dependencies = [ + "clap", + "hex", + "logos-blockchain-core", + "logos-blockchain-key-management-system-service", + "logos-blockchain-zone-sdk", + "rand 0.8.5", + "reqwest", + "serde_json", + "tokio", + "tracing-subscriber 0.3.22", +] + [[package]] name = "logos-blockchain-tx-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "futures", @@ -4148,7 +4252,7 @@ dependencies = [ [[package]] name = "logos-blockchain-utils" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "blake2", @@ -4165,7 +4269,7 @@ dependencies = [ [[package]] name = "logos-blockchain-utxotree" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "ark-ff 0.4.2", "logos-blockchain-groth16", @@ -4179,7 +4283,7 @@ dependencies = [ [[package]] name = "logos-blockchain-wallet" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "logos-blockchain-core", "logos-blockchain-key-management-system-keys", @@ -4194,7 +4298,7 @@ dependencies = [ [[package]] name = "logos-blockchain-wallet-service" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "async-trait", "bytes", @@ -4219,7 +4323,7 @@ dependencies = [ [[package]] name = "logos-blockchain-witness-generator" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "tempfile", ] @@ -4227,7 +4331,7 @@ dependencies = [ [[package]] name = "logos-blockchain-zksign" version = "0.1.0" -source = "git+https://github.com/logos-co/nomos-node.git?rev=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0#feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ "logos-blockchain-circuits-prover", "logos-blockchain-circuits-utils", @@ -4238,6 +4342,24 @@ dependencies = [ "serde", "serde_json", "thiserror 2.0.18", + "tracing", +] + +[[package]] +name = "logos-blockchain-zone-sdk" +version = "0.1.0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" +dependencies = [ + "futures", + "logos-blockchain-common-http-client", + "logos-blockchain-core", + "logos-blockchain-key-management-system-service", + "reqwest", + "rpds", + "serde", + "thiserror 2.0.18", + "tokio", + "tracing", ] [[package]] @@ -4735,6 +4857,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + [[package]] name = "openssl" version = "0.10.75" @@ -4973,9 +5101,9 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "pest" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9eb05c21a464ea704b53158d358a31e6425db2f63a1a7312268b05fe2b75f7" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" dependencies = [ "memchr", "ucd-trie", @@ -4983,9 +5111,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f9dbced329c441fa79d80472764b1a2c7e57123553b8519b36663a2fb234ed" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" dependencies = [ "pest", "pest_generator", @@ -4993,9 +5121,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bb96d5051a78f44f43c8f712d8e810adb0ebf923fc9ed2655a7f66f63ba8ee5" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" dependencies = [ "pest", "pest_meta", @@ -5006,9 +5134,9 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.8.5" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "602113b5b5e8621770cfd490cfd90b9f84ab29bd2b0e49ad83eb6d186cef2365" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" dependencies = [ "pest", "sha2", @@ -5116,9 +5244,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "potential_utf" @@ -5247,9 +5375,9 @@ dependencies = [ [[package]] name = "proptest" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40" +checksum = "37566cb3fdacef14c0737f9546df7cfeadbfbc9fef10991038bf5015d0c80532" dependencies = [ "bitflags 2.10.0", "num-traits", @@ -5487,9 +5615,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -5499,9 +5627,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -5510,9 +5638,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" [[package]] name = "reqwest" @@ -5652,11 +5780,12 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", + "lb-ext", + "lb-workloads", "testing-framework-core", "testing-framework-runner-compose", "testing-framework-runner-k8s", - "testing-framework-runner-local", - "testing-framework-workflows", + "testing_framework", "tokio", "tracing", "tracing-subscriber 0.3.22", @@ -6116,15 +6245,15 @@ checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] name = "siphasher" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "slug" @@ -6358,69 +6487,16 @@ dependencies = [ "unicode-segmentation", ] -[[package]] -name = "testing-framework-config" -version = "0.1.0" -dependencies = [ - "hex", - "logos-blockchain-api-service", - "logos-blockchain-blend-service", - "logos-blockchain-chain-leader-service", - "logos-blockchain-chain-network-service", - "logos-blockchain-chain-service", - "logos-blockchain-core", - "logos-blockchain-cryptarchia-engine", - "logos-blockchain-cryptarchia-sync", - "logos-blockchain-groth16", - "logos-blockchain-key-management-system-service", - "logos-blockchain-ledger", - "logos-blockchain-libp2p", - "logos-blockchain-node", - "logos-blockchain-sdp-service", - "logos-blockchain-time-service", - "logos-blockchain-tracing", - "logos-blockchain-tracing-service", - "logos-blockchain-utils", - "logos-blockchain-wallet-service", - "num-bigint", - "rand 0.8.5", - "serde", - "testing-framework-env", - "thiserror 2.0.18", - "time", - "tracing", -] - [[package]] name = "testing-framework-core" version = "0.1.0" dependencies = [ - "anyhow", "async-trait", "futures", - "hex", - "logos-blockchain-chain-service", - "logos-blockchain-common-http-client", - "logos-blockchain-core", - "logos-blockchain-groth16", - "logos-blockchain-http-api-common", - "logos-blockchain-key-management-system-service", - "logos-blockchain-libp2p", - "logos-blockchain-network-service", - "logos-blockchain-node", - "logos-blockchain-tracing", - "logos-blockchain-tracing-service", - "logos-blockchain-utils", + "parking_lot", "prometheus-http-query", "rand 0.8.5", "reqwest", - "serde", - "serde_json", - "serde_with", - "serde_yaml", - "tempfile", - "testing-framework-config", - "testing-framework-env", "thiserror 2.0.18", "tokio", "tracing", @@ -6436,23 +6512,18 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "cfgsync_tf", - "logos-blockchain-core", "logos-blockchain-groth16", "logos-blockchain-key-management-system-service", - "logos-blockchain-ledger", - "logos-blockchain-tracing", - "logos-blockchain-tracing-service", "logos-blockchain-zksign", "reqwest", "serde", + "serde_json", "tempfile", "tera", - "testing-framework-config", "testing-framework-core", - "testing-framework-env", "thiserror 2.0.18", "tokio", + "tokio-retry", "tracing", "url", "uuid", @@ -6466,20 +6537,13 @@ dependencies = [ "async-trait", "k8s-openapi", "kube", - "logos-blockchain-tracing", - "logos-blockchain-tracing-service", "reqwest", - "serde", - "serde_yaml", - "tempfile", - "testing-framework-config", "testing-framework-core", - "testing-framework-env", "thiserror 2.0.18", "tokio", + "tokio-retry", "tracing", "url", - "uuid", ] [[package]] @@ -6487,31 +6551,51 @@ name = "testing-framework-runner-local" version = "0.1.0" dependencies = [ "async-trait", - "logos-blockchain-libp2p", - "logos-blockchain-network-service", - "logos-blockchain-node", - "logos-blockchain-utils", - "rand 0.8.5", - "testing-framework-config", + "tempfile", "testing-framework-core", "thiserror 2.0.18", "tokio", + "tokio-retry", "tracing", - "tracing-subscriber 0.3.22", ] [[package]] -name = "testing-framework-workflows" +name = "testing_framework" version = "0.1.0" +source = "git+https://github.com/logos-blockchain/logos-blockchain.git?rev=a4275d00eb3041ed6bfb394e0913cd1ad172224c#a4275d00eb3041ed6bfb394e0913cd1ad172224c" dependencies = [ + "anyhow", "async-trait", + "futures", + "hex", + "logos-blockchain-blend-service", + "logos-blockchain-chain-leader-service", + "logos-blockchain-chain-network-service", + "logos-blockchain-chain-service", + "logos-blockchain-common-http-client", "logos-blockchain-core", + "logos-blockchain-cryptarchia-engine", + "logos-blockchain-cryptarchia-sync", + "logos-blockchain-groth16", + "logos-blockchain-http-api-common", "logos-blockchain-key-management-system-service", + "logos-blockchain-ledger", + "logos-blockchain-libp2p", + "logos-blockchain-network-service", + "logos-blockchain-node", + "logos-blockchain-time-service", + "logos-blockchain-utils", + "num-bigint", "rand 0.8.5", "reqwest", - "testing-framework-config", + "serde", + "serde_json", + "serde_yaml", "testing-framework-core", + "testing-framework-env", + "testing-framework-runner-local", "thiserror 2.0.18", + "time", "tokio", "tracing", ] @@ -6678,6 +6762,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-retry" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f" +dependencies = [ + "pin-project", + "rand 0.8.5", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.24.1" @@ -7151,6 +7246,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "utoipa" version = "4.2.3" @@ -7356,9 +7457,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -7391,7 +7492,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.48.0", ] [[package]] @@ -7862,18 +7963,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.34" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71ddd76bcebeed25db614f82bf31a9f4222d3fbba300e6fb6c00afa26cbd4d9d" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.34" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8187381b52e32220d50b255276aa16a084ec0a9017a0ca2152a1f55c539758d" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", @@ -7972,6 +8073,6 @@ dependencies = [ [[package]] name = "zmij" -version = "1.0.17" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02aae0f83f69aafc94776e879363e9771d7ecbffe2c7fbb6c14c5e00dfe88439" +checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445" diff --git a/Cargo.toml b/Cargo.toml index e05be81..528db3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,15 +1,15 @@ [workspace] members = [ - "examples", - "examples/doc-snippets", - "testing-framework/configs", + "logos/examples", + "logos/runtime/env", + "logos/runtime/ext", + "logos/runtime/workloads", "testing-framework/core", "testing-framework/deployers/compose", "testing-framework/deployers/k8s", "testing-framework/deployers/local", - "testing-framework/env", - "testing-framework/tools/cfgsync_tf", - "testing-framework/workflows", + "testing-framework/tools/cfgsync-core", + "testing-framework/tools/cfgsync-runtime", ] resolver = "2" @@ -31,61 +31,79 @@ all = "allow" [workspace.dependencies] # Local testing framework crates -testing-framework-config = { default-features = false, path = "testing-framework/configs" } +cfgsync-core = { default-features = false, path = "testing-framework/tools/cfgsync-core" } +lb-ext = { default-features = false, path = "logos/runtime/ext" } +lb-framework = { default-features = false, package = "testing_framework", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +lb-workloads = { default-features = false, path = "logos/runtime/workloads" } testing-framework-core = { default-features = false, path = "testing-framework/core" } -testing-framework-env = { default-features = false, path = "testing-framework/env" } +testing-framework-env = { default-features = false, path = "logos/runtime/env" } testing-framework-runner-compose = { default-features = false, path = "testing-framework/deployers/compose" } testing-framework-runner-k8s = { default-features = false, path = "testing-framework/deployers/k8s" } testing-framework-runner-local = { default-features = false, path = "testing-framework/deployers/local" } -testing-framework-workflows = { default-features = false, path = "testing-framework/workflows" } - -# Logos git dependencies (pinned to latest master) -cfgsync_tf = { default-features = false, path = "testing-framework/tools/cfgsync_tf" } -lb-api-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-api-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-blend-message = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-blend-message", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-blend-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-blend-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-chain-broadcast-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-chain-broadcast-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-chain-leader-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-chain-leader-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-chain-network = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-chain-network-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-chain-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-chain-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-common-http-client = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-common-http-client", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-core = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-core", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-cryptarchia-engine = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-cryptarchia-engine", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-cryptarchia-sync = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-cryptarchia-sync", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-groth16 = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-groth16", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-http-api-common = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-http-api-common", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-key-management-system-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-key-management-system-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-ledger = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-ledger", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-libp2p = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-libp2p", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-network-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-network-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-node = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-node", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-poc = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-poc", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-pol = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-pol", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-sdp-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-sdp-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-tests = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-tests", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-time-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-time-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-tracing = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-tracing", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-tracing-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-tracing-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-tx-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-tx-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-utils = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-utils", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-wallet = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-wallet", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-wallet-service = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-wallet-service", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } -lb-zksign = { default-features = false, git = "https://github.com/logos-co/nomos-node.git", package = "logos-blockchain-zksign", rev = "feac5ab97ef6dfcebcf6536363a5f330cb79b5e0" } +# Logos dependencies (from logos-blockchain master @ deccbb2d2) +broadcast-service = { default-features = false, package = "logos-blockchain-chain-broadcast-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +cfgsync_runtime = { default-features = false, package = "cfgsync-runtime", path = "testing-framework/tools/cfgsync-runtime" } +chain-leader = { default-features = false, features = [ + "pol-dev-mode", +], package = "logos-blockchain-chain-leader-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +chain-network = { default-features = false, package = "logos-blockchain-chain-network-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +chain-service = { default-features = false, package = "logos-blockchain-chain-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +common-http-client = { default-features = false, package = "logos-blockchain-common-http-client", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +cryptarchia-engine = { default-features = false, package = "logos-blockchain-cryptarchia-engine", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +cryptarchia-sync = { default-features = false, package = "logos-blockchain-cryptarchia-sync", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +groth16 = { default-features = false, package = "logos-blockchain-groth16", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +key-management-system-service = { default-features = false, package = "logos-blockchain-key-management-system-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-api = { default-features = false, package = "logos-blockchain-api-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-blend-message = { default-features = false, package = "logos-blockchain-blend-message", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-blend-service = { default-features = false, package = "logos-blockchain-blend-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-core = { default-features = false, package = "logos-blockchain-core", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-http-api-common = { default-features = false, package = "logos-blockchain-http-api-common", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-ledger = { default-features = false, package = "logos-blockchain-ledger", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-libp2p = { default-features = false, package = "logos-blockchain-libp2p", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-network = { default-features = false, package = "logos-blockchain-network-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-node = { default-features = false, features = [ + "testing", +], package = "logos-blockchain-node", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-sdp = { default-features = false, package = "logos-blockchain-sdp-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-time = { default-features = false, package = "logos-blockchain-time-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-tracing = { default-features = false, package = "logos-blockchain-tracing", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-tracing-service = { default-features = false, package = "logos-blockchain-tracing-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-utils = { default-features = false, package = "logos-blockchain-utils", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +nomos-wallet = { default-features = false, package = "logos-blockchain-wallet-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +poc = { default-features = false, package = "logos-blockchain-poc", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +pol = { default-features = false, package = "logos-blockchain-pol", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +tx-service = { default-features = false, package = "logos-blockchain-tx-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +wallet = { default-features = false, package = "logos-blockchain-wallet", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +zksign = { default-features = false, package = "logos-blockchain-zksign", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +# lb_* aliases (nomos-node repo naming) +lb_http_api_common = { default-features = false, package = "logos-blockchain-http-api-common", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +lb_tracing = { default-features = false, package = "logos-blockchain-tracing", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } +lb_tracing_service = { default-features = false, package = "logos-blockchain-tracing-service", git = "https://github.com/logos-blockchain/logos-blockchain.git", rev = "a4275d00eb3041ed6bfb394e0913cd1ad172224c" } # External crates -async-trait = { default-features = false, version = "0.1" } -bytes = { default-features = false, version = "1.3" } -hex = { default-features = false, version = "0.4.3" } -libp2p = { default-features = false, version = "0.55" } -overwatch = { default-features = false, git = "https://github.com/logos-co/Overwatch", rev = "f5a9902" } -overwatch-derive = { default-features = false, git = "https://github.com/logos-co/Overwatch", rev = "f5a9902" } -rand = { default-features = false, version = "0.8" } -reqwest = { default-features = false, version = "0.12" } -serde = { default-features = true, features = ["derive"], version = "1.0" } -serde_json = { default-features = false, version = "1.0" } -serde_with = { default-features = false, version = "3.14.0" } -serde_yaml = { default-features = false, version = "0.9.33" } -tempfile = { default-features = false, version = "3" } -thiserror = { default-features = false, version = "2.0" } -tokio = { default-features = false, version = "1" } -tracing = { default-features = false, version = "0.1" } +async-trait = { default-features = false, version = "0.1" } +bytes = { default-features = false, version = "1.3" } +hex = { default-features = false, version = "0.4.3" } +libp2p = { default-features = false, version = "0.55" } +num-bigint = { default-features = false, version = "0.4" } +overwatch = { default-features = false, git = "https://github.com/logos-co/Overwatch" } +overwatch-derive = { default-features = false, git = "https://github.com/logos-co/Overwatch" } +parking_lot = { default-features = false, version = "0.12" } +rand = { default-features = false, features = ["std", "std_rng"], version = "0.8" } +reqwest = { default-features = false, version = "0.12" } +serde = { default-features = true, features = ["derive"], version = "1.0" } +serde_json = { default-features = false, version = "1.0" } +serde_path_to_error = { default-features = false, version = "0.1" } +serde_with = { default-features = false, version = "3.14.0" } +serde_yaml = { default-features = false, version = "0.9.33" } +tempfile = { default-features = false, version = "3" } +thiserror = { default-features = false, version = "2.0" } +time = { default-features = false, version = "0.3" } +tokio = { default-features = false, version = "1" } +tracing = { default-features = false, version = "0.1" } +uuid = { default-features = false, version = "1", features = ["v4"] } + +[patch."https://github.com/logos-blockchain/logos-blockchain-testing.git"] +testing-framework-core = { path = "testing-framework/core" } +testing-framework-env = { path = "logos/runtime/env" } +testing-framework-runner-local = { path = "testing-framework/deployers/local" } diff --git a/examples/cucumber/features/auto_deployer_smoke.feature b/examples/cucumber/features/auto_deployer_smoke.feature deleted file mode 100644 index e69de29..0000000 diff --git a/examples/cucumber/features/compose_smoke.feature b/examples/cucumber/features/compose_smoke.feature deleted file mode 100644 index e69de29..0000000 diff --git a/examples/cucumber/features/local_smoke.feature b/examples/cucumber/features/local_smoke.feature deleted file mode 100644 index e69de29..0000000 diff --git a/examples/src/bin/compose_runner.rs b/examples/src/bin/compose_runner.rs deleted file mode 100644 index 70f2308..0000000 --- a/examples/src/bin/compose_runner.rs +++ /dev/null @@ -1,116 +0,0 @@ -use std::{process, time::Duration}; - -use anyhow::{Context as _, Result}; -use runner_examples::{ - ChaosBuilderExt as _, DeployerKind, ScenarioBuilderExt as _, demo, read_env_any, -}; -use testing_framework_core::scenario::{Deployer as _, Runner, ScenarioBuilder}; -use testing_framework_runner_compose::{ComposeDeployer, ComposeRunnerError}; -use tracing::{info, warn}; - -const MIXED_TXS_PER_BLOCK: u64 = 5; -const TOTAL_WALLETS: usize = 1000; -const TRANSACTION_WALLETS: usize = 500; - -// Chaos Testing Constants -const CHAOS_MIN_DELAY_SECS: u64 = 120; -const CHAOS_MAX_DELAY_SECS: u64 = 180; -const CHAOS_COOLDOWN_SECS: u64 = 240; -const CHAOS_DELAY_HEADROOM_SECS: u64 = 1; - -#[tokio::main] -async fn main() { - runner_examples::defaults::init_node_log_dir_defaults(DeployerKind::Compose); - - tracing_subscriber::fmt::init(); - - let nodes = read_env_any(&["LOGOS_BLOCKCHAIN_DEMO_NODES"], demo::DEFAULT_NODES); - - let run_secs = read_env_any(&["LOGOS_BLOCKCHAIN_DEMO_RUN_SECS"], demo::DEFAULT_RUN_SECS); - - info!(nodes, run_secs, "starting compose runner demo"); - - if let Err(err) = run_compose_case(nodes, Duration::from_secs(run_secs)).await { - warn!("compose runner demo failed: {err:#}"); - process::exit(1); - } -} - -async fn run_compose_case(nodes: usize, run_duration: Duration) -> Result<()> { - info!( - nodes, - duration_secs = run_duration.as_secs(), - "building scenario plan" - ); - - let scenario = - ScenarioBuilder::topology_with(|t| t.network_star().nodes(nodes)).enable_node_control(); - - let scenario = if let Some((chaos_min_delay, chaos_max_delay, chaos_target_cooldown)) = - chaos_timings(run_duration) - { - scenario.chaos_with(|c| { - c.restart() - .min_delay(chaos_min_delay) - .max_delay(chaos_max_delay) - .target_cooldown(chaos_target_cooldown) - .apply() - }) - } else { - scenario - }; - - let mut plan = scenario - .wallets(TOTAL_WALLETS) - .transactions_with(|txs| txs.rate(MIXED_TXS_PER_BLOCK).users(TRANSACTION_WALLETS)) - .with_run_duration(run_duration) - .expect_consensus_liveness() - .build()?; - - let deployer = ComposeDeployer::new(); - info!("deploying compose stack"); - - let runner: Runner = match deployer.deploy(&plan).await { - Ok(runner) => runner, - Err(ComposeRunnerError::DockerUnavailable) => { - warn!("Docker is unavailable; cannot run compose demo"); - return Ok(()); - } - Err(err) => return Err(anyhow::Error::new(err)).context("deploying compose stack failed"), - }; - - if !runner.context().telemetry().is_configured() { - warn!( - "metrics querying is disabled; set LOGOS_BLOCKCHAIN_METRICS_QUERY_URL to enable PromQL queries" - ); - } - - info!("running scenario"); - runner - .run(&mut plan) - .await - .context("running compose scenario failed")?; - Ok(()) -} - -fn chaos_timings(run_duration: Duration) -> Option<(Duration, Duration, Duration)> { - let headroom = Duration::from_secs(CHAOS_DELAY_HEADROOM_SECS); - let Some(max_allowed_delay) = run_duration.checked_sub(headroom) else { - return None; - }; - - let chaos_min_delay = Duration::from_secs(CHAOS_MIN_DELAY_SECS); - if max_allowed_delay <= chaos_min_delay { - return None; - } - - let chaos_max_delay = Duration::from_secs(CHAOS_MAX_DELAY_SECS) - .min(max_allowed_delay) - .max(chaos_min_delay); - - let chaos_target_cooldown = Duration::from_secs(CHAOS_COOLDOWN_SECS) - .min(max_allowed_delay) - .max(chaos_max_delay); - - Some((chaos_min_delay, chaos_max_delay, chaos_target_cooldown)) -} diff --git a/examples/src/demo.rs b/examples/src/demo.rs deleted file mode 100644 index cf56a9b..0000000 --- a/examples/src/demo.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub const DEFAULT_NODES: usize = 2; -pub const DEFAULT_RUN_SECS: u64 = 60; diff --git a/examples/src/env.rs b/examples/src/env.rs deleted file mode 100644 index f6a1ed0..0000000 --- a/examples/src/env.rs +++ /dev/null @@ -1,10 +0,0 @@ -use std::{env, str::FromStr}; - -pub fn read_env_any(keys: &[&str], default: T) -> T -where - T: FromStr + Copy, -{ - keys.iter() - .find_map(|key| env::var(key).ok().and_then(|raw| raw.parse::().ok())) - .unwrap_or(default) -} diff --git a/examples/src/lib.rs b/examples/src/lib.rs deleted file mode 100644 index 7512183..0000000 --- a/examples/src/lib.rs +++ /dev/null @@ -1,13 +0,0 @@ -pub mod defaults; -pub mod demo; -pub mod env; - -pub use env::read_env_any; -pub use testing_framework_workflows::{ChaosBuilderExt, ScenarioBuilderExt}; - -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] -pub enum DeployerKind { - #[default] - Local, - Compose, -} diff --git a/examples/tests/manual_cluster.rs b/examples/tests/manual_cluster.rs deleted file mode 100644 index a15ba3c..0000000 --- a/examples/tests/manual_cluster.rs +++ /dev/null @@ -1,139 +0,0 @@ -use std::time::Duration; - -use anyhow::Result; -use testing_framework_core::{ - scenario::{PeerSelection, StartNodeOptions}, - topology::config::TopologyConfig, -}; -use testing_framework_runner_local::LocalDeployer; -use tokio::time::sleep; -use tracing_subscriber::fmt::try_init; - -const MAX_HEIGHT_DIFF: u64 = 5; -const CONVERGENCE_TIMEOUT: Duration = Duration::from_secs(60); -const CONVERGENCE_POLL: Duration = Duration::from_secs(2); - -#[tokio::test] -#[ignore = "run manually with `cargo test -p runner-examples -- --ignored manual_cluster_two_clusters_merge`"] -async fn manual_cluster_two_clusters_merge() -> Result<()> { - let _ = try_init(); - // Required env vars (set on the command line when running this test): - // - `RUST_LOG=info` (optional) - let config = TopologyConfig::with_node_numbers(2); - let deployer = LocalDeployer::new(); - let cluster = deployer.manual_cluster(config)?; - // Nodes are stopped automatically when the cluster is dropped. - - println!("starting node a"); - - let node_a = cluster - .start_node_with( - "a", - StartNodeOptions { - peers: PeerSelection::None, - config_patch: None, - persist_dir: None, - }, - ) - .await? - .api; - - println!("waiting briefly before starting c"); - sleep(Duration::from_secs(30)).await; - - println!("starting node c -> a"); - let node_c = cluster - .start_node_with( - "c", - StartNodeOptions { - peers: PeerSelection::Named(vec!["node-a".to_owned()]), - config_patch: None, - persist_dir: None, - }, - ) - .await? - .api; - - println!("waiting for network readiness: cluster a,c"); - cluster.wait_network_ready().await?; - - let start = tokio::time::Instant::now(); - - loop { - let a_info = node_a.consensus_info().await?; - let c_info = node_c.consensus_info().await?; - let a_height = a_info.height; - let c_height = c_info.height; - let diff = a_height.abs_diff(c_height); - - if diff <= MAX_HEIGHT_DIFF { - println!( - "final heights: node-a={}, node-c={}, diff={}", - a_height, c_height, diff - ); - return Ok(()); - } - - if start.elapsed() >= CONVERGENCE_TIMEOUT { - return Err(anyhow::anyhow!( - "height diff too large after timeout: {diff} > {MAX_HEIGHT_DIFF} (node-a={a_height}, node-c={c_height})" - )); - } - - sleep(CONVERGENCE_POLL).await; - } -} - -#[tokio::test] -#[ignore = "run manually with `cargo test -p runner-examples -- --ignored manual_cluster_with_persist_dir`"] -async fn manual_cluster_with_persist_dir() -> Result<()> { - use std::path::PathBuf; - - let _ = try_init(); - // Required env vars (set on the command line when running this test): - // - `RUST_LOG=info` (optional) - let config = TopologyConfig::with_node_numbers(1); - let deployer = LocalDeployer::new(); - let cluster = deployer.manual_cluster(config)?; - - let persist_dir = PathBuf::from("/tmp/test-node-persist-dir"); - - println!("starting validator with persist_dir: {:?}", persist_dir); - - let _node = cluster - .start_node_with( - "test", - StartNodeOptions { - peers: PeerSelection::None, - config_patch: None, - persist_dir: Some(persist_dir.clone()), - }, - ) - .await? - .api; - - println!("validator started, waiting briefly"); - sleep(Duration::from_secs(5)).await; - - // Drop the cluster to trigger the persist logic - drop(cluster); - - println!("cluster dropped, checking if persist_dir exists"); - - // Verify the persist_dir was created - if !persist_dir.exists() { - return Err(anyhow::anyhow!( - "persist_dir was not created: {:?}", - persist_dir - )); - } - - println!("persist_dir verified: {:?}", persist_dir); - - // Clean up - if persist_dir.exists() { - std::fs::remove_dir_all(&persist_dir)?; - } - - Ok(()) -} diff --git a/examples/tests/node_config_override.rs b/examples/tests/node_config_override.rs deleted file mode 100644 index d16071c..0000000 --- a/examples/tests/node_config_override.rs +++ /dev/null @@ -1,114 +0,0 @@ -use std::{ - net::{SocketAddr, TcpListener}, - time::Duration, -}; - -use anyhow::Result; -use testing_framework_core::{ - nodes::ApiClient, - scenario::{Deployer, PeerSelection, ScenarioBuilder, StartNodeOptions}, - topology::config::TopologyConfig, -}; -use testing_framework_runner_local::LocalDeployer; -use tracing_subscriber::fmt::try_init; - -#[tokio::test] -#[ignore = "run manually with `cargo test -p runner-examples -- --ignored manual_cluster_api_port_override`"] -async fn manual_cluster_api_port_override() -> Result<()> { - let _ = try_init(); - // Required env vars (set on the command line when running this test): - // - `LOGOS_BLOCKCHAIN_NODE_BIN=...` - // - `LOGOS_BLOCKCHAIN_CIRCUITS=...` - // - `RUST_LOG=info` (optional) - - let api_port = random_api_port(); - - let deployer = LocalDeployer::new(); - let cluster = deployer.manual_cluster(TopologyConfig::with_node_numbers(1))?; - - let node = cluster - .start_node_with( - "override-api", - StartNodeOptions { - peers: PeerSelection::None, - config_patch: None, - persist_dir: None, - } - .create_patch(move |mut config| { - println!("overriding API port to {api_port}"); - - let current_addr = config.user.http.backend_settings.address; - - config.user.http.backend_settings.address = - SocketAddr::new(current_addr.ip(), api_port); - - Ok(config) - }), - ) - .await? - .api; - - node.consensus_info() - .await - .expect("consensus_info should succeed"); - - assert_eq!(resolved_port(&node), api_port); - - Ok(()) -} - -#[tokio::test] -#[ignore = "run manually with `cargo test -p runner-examples -- --ignored scenario_builder_api_port_override`"] -async fn scenario_builder_api_port_override() -> Result<()> { - let _ = try_init(); - // Required env vars (set on the command line when running this test): - // - `LOGOS_BLOCKCHAIN_NODE_BIN=...` - // - `LOGOS_BLOCKCHAIN_CIRCUITS=...` - // - `RUST_LOG=info` (optional) - let api_port = random_api_port(); - - let mut scenario = ScenarioBuilder::topology_with(|t| { - t.network_star() - .nodes(1) - .node_config_patch_with(0, move |mut config| { - println!("overriding API port to {api_port}"); - - let current_addr = config.user.http.backend_settings.address; - - config.user.http.backend_settings.address = - SocketAddr::new(current_addr.ip(), api_port); - - Ok(config) - }) - }) - .with_run_duration(Duration::from_secs(1)) - .build()?; - - let deployer = LocalDeployer::default(); - let runner = deployer.deploy(&scenario).await?; - let handle = runner.run(&mut scenario).await?; - - let client = handle - .context() - .node_clients() - .any_client() - .ok_or_else(|| anyhow::anyhow!("scenario did not expose any node clients"))?; - - client - .consensus_info() - .await - .expect("consensus_info should succeed"); - - assert_eq!(resolved_port(&client), api_port); - - Ok(()) -} - -fn random_api_port() -> u16 { - let listener = TcpListener::bind("127.0.0.1:0").expect("bind random API port"); - listener.local_addr().expect("read API port").port() -} - -fn resolved_port(client: &ApiClient) -> u16 { - client.base_url().port().unwrap_or_default() -} diff --git a/examples/tests/orphan_manual_cluster.rs b/examples/tests/orphan_manual_cluster.rs deleted file mode 100644 index 7c2ea0c..0000000 --- a/examples/tests/orphan_manual_cluster.rs +++ /dev/null @@ -1,110 +0,0 @@ -use std::time::Duration; - -use anyhow::{Result, anyhow}; -use testing_framework_core::{ - scenario::StartNodeOptions, - topology::{ - config::{TopologyBuilder, TopologyConfig}, - configs::network::Libp2pNetworkLayout, - }, -}; -use testing_framework_runner_local::LocalDeployer; -use testing_framework_workflows::{start_node_with_timeout, wait_for_min_height}; -use tokio::time::{sleep, timeout}; -use tracing_subscriber::fmt::try_init; - -const MIN_HEIGHT: u64 = 5; -const INITIAL_READY_TIMEOUT: Duration = Duration::from_secs(500); -const CATCH_UP_TIMEOUT: Duration = Duration::from_secs(300); -const START_NODE_TIMEOUT: Duration = Duration::from_secs(90); -const TEST_TIMEOUT: Duration = Duration::from_secs(600); -const POLL_INTERVAL: Duration = Duration::from_secs(1); - -#[tokio::test] -#[ignore = "run manually with `cargo test -p runner-examples -- --ignored orphan_manual_cluster`"] -async fn orphan_manual_cluster() -> Result<()> { - let _ = try_init(); - // Required env vars (set on the command line when running this test): - // - `LOGOS_BLOCKCHAIN_NODE_BIN=...` - // - `NOMOS_KZGRS_PARAMS_PATH=...` (path to KZG params directory/file) - // - `RUST_LOG=info` (optional; better visibility) - - let config = TopologyConfig::with_node_numbers(3); - timeout(TEST_TIMEOUT, async { - let builder = TopologyBuilder::new(config).with_network_layout(Libp2pNetworkLayout::Full); - - let deployer = LocalDeployer::new(); - let cluster = deployer.manual_cluster_with_builder(builder)?; - // Nodes are stopped automatically when the cluster is dropped. - - let node_a = start_node_with_timeout( - &cluster, - "a", - StartNodeOptions::default(), - START_NODE_TIMEOUT, - ) - .await? - .api; - - let node_b = start_node_with_timeout( - &cluster, - "b", - StartNodeOptions::default(), - START_NODE_TIMEOUT, - ) - .await? - .api; - - wait_for_min_height( - &[node_a.clone(), node_b.clone()], - MIN_HEIGHT, - INITIAL_READY_TIMEOUT, - POLL_INTERVAL, - ) - .await?; - - let behind_node = start_node_with_timeout( - &cluster, - "c", - StartNodeOptions::default(), - START_NODE_TIMEOUT, - ) - .await? - .api; - - timeout(CATCH_UP_TIMEOUT, async { - loop { - let node_a_info = node_a - .consensus_info() - .await - .map_err(|err| anyhow!("node-a consensus_info failed: {err}"))?; - - let node_b_info = node_b - .consensus_info() - .await - .map_err(|err| anyhow!("node-b consensus_info failed: {err}"))?; - - let behind_info = behind_node - .consensus_info() - .await - .map_err(|err| anyhow!("node-c consensus_info failed: {err}"))?; - - let initial_min_height = node_a_info.height.min(node_b_info.height); - - if behind_info.height >= initial_min_height.saturating_sub(1) { - return Ok::<(), anyhow::Error>(()); - } - - sleep(POLL_INTERVAL).await; - } - }) - .await - .map_err(|_| anyhow!("timeout waiting for behind node to catch up"))??; - - Ok::<(), anyhow::Error>(()) - }) - .await - .map_err(|_| anyhow!("test timeout exceeded"))??; - - Ok(()) -} diff --git a/logos/README.md b/logos/README.md new file mode 100644 index 0000000..a893e0a --- /dev/null +++ b/logos/README.md @@ -0,0 +1,33 @@ +# Logos Testing Framework Extension + +This directory contains the **Logos-specific extension layer** that plugs into the generic +`testing-framework` core. The goal is to keep all Nomos logic in one place with a clear +structure so it can be reviewed and moved into the `logos-blockchain-node` repo cleanly. + +## Layout + +- `runtime/env` + Logos implementation of the core `Application` trait and runtime wiring. + +- `runtime/ext` + Logos extension glue for compose/k8s/cfgsync integration and scenario helpers. + +- `runtime/workloads` + Logos workloads and expectations (e.g., transaction workload, consensus liveness). + +- `runtime/cfgsync` + Logos cfgsync server/client and config bundling. + +- `infra/assets/stack` + Docker stack assets, scripts, and monitoring bundles. + +- `infra/helm/logos-runner` + Helm chart used by the k8s deployer. + +## Extension Boundary + +The **core** (`testing-framework/*`) remains Logos-agnostic. All app assumptions should +live under `logos/runtime/*` and expose only the minimal surface needed by the core. + +If you need to introduce new core capabilities, add them to the core and keep the Logos +implementation in `logos/runtime/*`. diff --git a/examples/Cargo.toml b/logos/examples/Cargo.toml similarity index 83% rename from examples/Cargo.toml rename to logos/examples/Cargo.toml index 7fb410b..68b9a18 100644 --- a/examples/Cargo.toml +++ b/logos/examples/Cargo.toml @@ -11,11 +11,12 @@ version = "0.1.0" [dependencies] anyhow = "1" +lb-ext = { workspace = true } +lb-framework = { workspace = true } +lb-workloads = { workspace = true } testing-framework-core = { workspace = true } testing-framework-runner-compose = { workspace = true } testing-framework-runner-k8s = { workspace = true } -testing-framework-runner-local = { workspace = true } -testing-framework-workflows = { workspace = true } tokio = { features = ["macros", "net", "rt-multi-thread", "time"], workspace = true } tracing = { workspace = true } tracing-subscriber = { features = ["env-filter", "fmt"], version = "0.3" } diff --git a/examples/doc-snippets/Cargo.toml b/logos/examples/doc-snippets/Cargo.toml similarity index 100% rename from examples/doc-snippets/Cargo.toml rename to logos/examples/doc-snippets/Cargo.toml diff --git a/examples/doc-snippets/src/architecture_overview_builder_api.rs b/logos/examples/doc-snippets/src/architecture_overview_builder_api.rs similarity index 100% rename from examples/doc-snippets/src/architecture_overview_builder_api.rs rename to logos/examples/doc-snippets/src/architecture_overview_builder_api.rs diff --git a/examples/doc-snippets/src/chaos_workloads_random_restart.rs b/logos/examples/doc-snippets/src/chaos_workloads_random_restart.rs similarity index 100% rename from examples/doc-snippets/src/chaos_workloads_random_restart.rs rename to logos/examples/doc-snippets/src/chaos_workloads_random_restart.rs diff --git a/examples/doc-snippets/src/custom_workload_example_expectation.rs b/logos/examples/doc-snippets/src/custom_workload_example_expectation.rs similarity index 100% rename from examples/doc-snippets/src/custom_workload_example_expectation.rs rename to logos/examples/doc-snippets/src/custom_workload_example_expectation.rs diff --git a/examples/doc-snippets/src/custom_workload_example_workload.rs b/logos/examples/doc-snippets/src/custom_workload_example_workload.rs similarity index 100% rename from examples/doc-snippets/src/custom_workload_example_workload.rs rename to logos/examples/doc-snippets/src/custom_workload_example_workload.rs diff --git a/examples/doc-snippets/src/dsl_cheat_sheet_build.rs b/logos/examples/doc-snippets/src/dsl_cheat_sheet_build.rs similarity index 100% rename from examples/doc-snippets/src/dsl_cheat_sheet_build.rs rename to logos/examples/doc-snippets/src/dsl_cheat_sheet_build.rs diff --git a/examples/doc-snippets/src/dsl_cheat_sheet_build_complete_example.rs b/logos/examples/doc-snippets/src/dsl_cheat_sheet_build_complete_example.rs similarity index 100% rename from examples/doc-snippets/src/dsl_cheat_sheet_build_complete_example.rs rename to logos/examples/doc-snippets/src/dsl_cheat_sheet_build_complete_example.rs diff --git a/examples/doc-snippets/src/dsl_cheat_sheet_deployers.rs b/logos/examples/doc-snippets/src/dsl_cheat_sheet_deployers.rs similarity index 100% rename from examples/doc-snippets/src/dsl_cheat_sheet_deployers.rs rename to logos/examples/doc-snippets/src/dsl_cheat_sheet_deployers.rs diff --git a/examples/doc-snippets/src/dsl_cheat_sheet_expectations.rs b/logos/examples/doc-snippets/src/dsl_cheat_sheet_expectations.rs similarity index 100% rename from examples/doc-snippets/src/dsl_cheat_sheet_expectations.rs rename to logos/examples/doc-snippets/src/dsl_cheat_sheet_expectations.rs diff --git a/examples/doc-snippets/src/dsl_cheat_sheet_imports.rs b/logos/examples/doc-snippets/src/dsl_cheat_sheet_imports.rs similarity index 100% rename from examples/doc-snippets/src/dsl_cheat_sheet_imports.rs rename to logos/examples/doc-snippets/src/dsl_cheat_sheet_imports.rs diff --git a/examples/doc-snippets/src/dsl_cheat_sheet_run_duration.rs b/logos/examples/doc-snippets/src/dsl_cheat_sheet_run_duration.rs similarity index 100% rename from examples/doc-snippets/src/dsl_cheat_sheet_run_duration.rs rename to logos/examples/doc-snippets/src/dsl_cheat_sheet_run_duration.rs diff --git a/examples/doc-snippets/src/dsl_cheat_sheet_topology.rs b/logos/examples/doc-snippets/src/dsl_cheat_sheet_topology.rs similarity index 100% rename from examples/doc-snippets/src/dsl_cheat_sheet_topology.rs rename to logos/examples/doc-snippets/src/dsl_cheat_sheet_topology.rs diff --git a/examples/doc-snippets/src/dsl_cheat_sheet_transactions_workload.rs b/logos/examples/doc-snippets/src/dsl_cheat_sheet_transactions_workload.rs similarity index 100% rename from examples/doc-snippets/src/dsl_cheat_sheet_transactions_workload.rs rename to logos/examples/doc-snippets/src/dsl_cheat_sheet_transactions_workload.rs diff --git a/examples/doc-snippets/src/dsl_cheat_sheet_wallets.rs b/logos/examples/doc-snippets/src/dsl_cheat_sheet_wallets.rs similarity index 100% rename from examples/doc-snippets/src/dsl_cheat_sheet_wallets.rs rename to logos/examples/doc-snippets/src/dsl_cheat_sheet_wallets.rs diff --git a/examples/doc-snippets/src/dsl_cheat_sheet_workload_chaos.rs b/logos/examples/doc-snippets/src/dsl_cheat_sheet_workload_chaos.rs similarity index 100% rename from examples/doc-snippets/src/dsl_cheat_sheet_workload_chaos.rs rename to logos/examples/doc-snippets/src/dsl_cheat_sheet_workload_chaos.rs diff --git a/examples/doc-snippets/src/dsl_cheat_sheet_workload_execution.rs b/logos/examples/doc-snippets/src/dsl_cheat_sheet_workload_execution.rs similarity index 100% rename from examples/doc-snippets/src/dsl_cheat_sheet_workload_execution.rs rename to logos/examples/doc-snippets/src/dsl_cheat_sheet_workload_execution.rs diff --git a/examples/doc-snippets/src/examples_advanced_aggressive_chaos_test.rs b/logos/examples/doc-snippets/src/examples_advanced_aggressive_chaos_test.rs similarity index 100% rename from examples/doc-snippets/src/examples_advanced_aggressive_chaos_test.rs rename to logos/examples/doc-snippets/src/examples_advanced_aggressive_chaos_test.rs diff --git a/examples/doc-snippets/src/examples_advanced_load_progression_test.rs b/logos/examples/doc-snippets/src/examples_advanced_load_progression_test.rs similarity index 100% rename from examples/doc-snippets/src/examples_advanced_load_progression_test.rs rename to logos/examples/doc-snippets/src/examples_advanced_load_progression_test.rs diff --git a/examples/doc-snippets/src/examples_advanced_sustained_load_test.rs b/logos/examples/doc-snippets/src/examples_advanced_sustained_load_test.rs similarity index 100% rename from examples/doc-snippets/src/examples_advanced_sustained_load_test.rs rename to logos/examples/doc-snippets/src/examples_advanced_sustained_load_test.rs diff --git a/examples/doc-snippets/src/examples_chaos_resilience.rs b/logos/examples/doc-snippets/src/examples_chaos_resilience.rs similarity index 100% rename from examples/doc-snippets/src/examples_chaos_resilience.rs rename to logos/examples/doc-snippets/src/examples_chaos_resilience.rs diff --git a/examples/doc-snippets/src/examples_da_and_transactions.rs b/logos/examples/doc-snippets/src/examples_da_and_transactions.rs similarity index 100% rename from examples/doc-snippets/src/examples_da_and_transactions.rs rename to logos/examples/doc-snippets/src/examples_da_and_transactions.rs diff --git a/examples/doc-snippets/src/examples_simple_consensus.rs b/logos/examples/doc-snippets/src/examples_simple_consensus.rs similarity index 100% rename from examples/doc-snippets/src/examples_simple_consensus.rs rename to logos/examples/doc-snippets/src/examples_simple_consensus.rs diff --git a/examples/doc-snippets/src/examples_transaction_workload.rs b/logos/examples/doc-snippets/src/examples_transaction_workload.rs similarity index 100% rename from examples/doc-snippets/src/examples_transaction_workload.rs rename to logos/examples/doc-snippets/src/examples_transaction_workload.rs diff --git a/examples/doc-snippets/src/internal_crate_reference_add_deployer.rs b/logos/examples/doc-snippets/src/internal_crate_reference_add_deployer.rs similarity index 100% rename from examples/doc-snippets/src/internal_crate_reference_add_deployer.rs rename to logos/examples/doc-snippets/src/internal_crate_reference_add_deployer.rs diff --git a/examples/doc-snippets/src/internal_crate_reference_add_expectation_builder_ext.rs b/logos/examples/doc-snippets/src/internal_crate_reference_add_expectation_builder_ext.rs similarity index 100% rename from examples/doc-snippets/src/internal_crate_reference_add_expectation_builder_ext.rs rename to logos/examples/doc-snippets/src/internal_crate_reference_add_expectation_builder_ext.rs diff --git a/examples/doc-snippets/src/internal_crate_reference_add_expectation_trait.rs b/logos/examples/doc-snippets/src/internal_crate_reference_add_expectation_trait.rs similarity index 100% rename from examples/doc-snippets/src/internal_crate_reference_add_expectation_trait.rs rename to logos/examples/doc-snippets/src/internal_crate_reference_add_expectation_trait.rs diff --git a/examples/doc-snippets/src/internal_crate_reference_add_workload_builder_ext.rs b/logos/examples/doc-snippets/src/internal_crate_reference_add_workload_builder_ext.rs similarity index 100% rename from examples/doc-snippets/src/internal_crate_reference_add_workload_builder_ext.rs rename to logos/examples/doc-snippets/src/internal_crate_reference_add_workload_builder_ext.rs diff --git a/examples/doc-snippets/src/internal_crate_reference_add_workload_trait.rs b/logos/examples/doc-snippets/src/internal_crate_reference_add_workload_trait.rs similarity index 100% rename from examples/doc-snippets/src/internal_crate_reference_add_workload_trait.rs rename to logos/examples/doc-snippets/src/internal_crate_reference_add_workload_trait.rs diff --git a/examples/doc-snippets/src/internal_crate_reference_add_workload_use_in_examples.rs b/logos/examples/doc-snippets/src/internal_crate_reference_add_workload_use_in_examples.rs similarity index 100% rename from examples/doc-snippets/src/internal_crate_reference_add_workload_use_in_examples.rs rename to logos/examples/doc-snippets/src/internal_crate_reference_add_workload_use_in_examples.rs diff --git a/examples/doc-snippets/src/lib.rs b/logos/examples/doc-snippets/src/lib.rs similarity index 100% rename from examples/doc-snippets/src/lib.rs rename to logos/examples/doc-snippets/src/lib.rs diff --git a/examples/doc-snippets/src/manual_cluster_external_driver_example.rs b/logos/examples/doc-snippets/src/manual_cluster_external_driver_example.rs similarity index 100% rename from examples/doc-snippets/src/manual_cluster_external_driver_example.rs rename to logos/examples/doc-snippets/src/manual_cluster_external_driver_example.rs diff --git a/examples/doc-snippets/src/manual_cluster_validation_patterns.rs b/logos/examples/doc-snippets/src/manual_cluster_validation_patterns.rs similarity index 100% rename from examples/doc-snippets/src/manual_cluster_validation_patterns.rs rename to logos/examples/doc-snippets/src/manual_cluster_validation_patterns.rs diff --git a/examples/doc-snippets/src/node_control_accessing_control.rs b/logos/examples/doc-snippets/src/node_control_accessing_control.rs similarity index 100% rename from examples/doc-snippets/src/node_control_accessing_control.rs rename to logos/examples/doc-snippets/src/node_control_accessing_control.rs diff --git a/examples/doc-snippets/src/node_control_trait.rs b/logos/examples/doc-snippets/src/node_control_trait.rs similarity index 100% rename from examples/doc-snippets/src/node_control_trait.rs rename to logos/examples/doc-snippets/src/node_control_trait.rs diff --git a/examples/doc-snippets/src/quickstart_adjust_topology.rs b/logos/examples/doc-snippets/src/quickstart_adjust_topology.rs similarity index 100% rename from examples/doc-snippets/src/quickstart_adjust_topology.rs rename to logos/examples/doc-snippets/src/quickstart_adjust_topology.rs diff --git a/examples/doc-snippets/src/quickstart_core_api_pattern.rs b/logos/examples/doc-snippets/src/quickstart_core_api_pattern.rs similarity index 100% rename from examples/doc-snippets/src/quickstart_core_api_pattern.rs rename to logos/examples/doc-snippets/src/quickstart_core_api_pattern.rs diff --git a/examples/doc-snippets/src/quickstart_step_1_topology.rs b/logos/examples/doc-snippets/src/quickstart_step_1_topology.rs similarity index 100% rename from examples/doc-snippets/src/quickstart_step_1_topology.rs rename to logos/examples/doc-snippets/src/quickstart_step_1_topology.rs diff --git a/examples/doc-snippets/src/quickstart_step_2_wallets.rs b/logos/examples/doc-snippets/src/quickstart_step_2_wallets.rs similarity index 100% rename from examples/doc-snippets/src/quickstart_step_2_wallets.rs rename to logos/examples/doc-snippets/src/quickstart_step_2_wallets.rs diff --git a/examples/doc-snippets/src/quickstart_step_3_workloads.rs b/logos/examples/doc-snippets/src/quickstart_step_3_workloads.rs similarity index 100% rename from examples/doc-snippets/src/quickstart_step_3_workloads.rs rename to logos/examples/doc-snippets/src/quickstart_step_3_workloads.rs diff --git a/examples/doc-snippets/src/quickstart_step_4_expectation.rs b/logos/examples/doc-snippets/src/quickstart_step_4_expectation.rs similarity index 100% rename from examples/doc-snippets/src/quickstart_step_4_expectation.rs rename to logos/examples/doc-snippets/src/quickstart_step_4_expectation.rs diff --git a/examples/doc-snippets/src/quickstart_step_5_run_duration.rs b/logos/examples/doc-snippets/src/quickstart_step_5_run_duration.rs similarity index 100% rename from examples/doc-snippets/src/quickstart_step_5_run_duration.rs rename to logos/examples/doc-snippets/src/quickstart_step_5_run_duration.rs diff --git a/examples/doc-snippets/src/quickstart_step_6_deploy_and_execute.rs b/logos/examples/doc-snippets/src/quickstart_step_6_deploy_and_execute.rs similarity index 100% rename from examples/doc-snippets/src/quickstart_step_6_deploy_and_execute.rs rename to logos/examples/doc-snippets/src/quickstart_step_6_deploy_and_execute.rs diff --git a/examples/doc-snippets/src/quickstart_swap_deployer_compose.rs b/logos/examples/doc-snippets/src/quickstart_swap_deployer_compose.rs similarity index 100% rename from examples/doc-snippets/src/quickstart_swap_deployer_compose.rs rename to logos/examples/doc-snippets/src/quickstart_swap_deployer_compose.rs diff --git a/examples/doc-snippets/src/testing_philosophy_declarative_over_imperative.rs b/logos/examples/doc-snippets/src/testing_philosophy_declarative_over_imperative.rs similarity index 100% rename from examples/doc-snippets/src/testing_philosophy_declarative_over_imperative.rs rename to logos/examples/doc-snippets/src/testing_philosophy_declarative_over_imperative.rs diff --git a/examples/doc-snippets/src/testing_philosophy_determinism_first.rs b/logos/examples/doc-snippets/src/testing_philosophy_determinism_first.rs similarity index 100% rename from examples/doc-snippets/src/testing_philosophy_determinism_first.rs rename to logos/examples/doc-snippets/src/testing_philosophy_determinism_first.rs diff --git a/examples/doc-snippets/src/testing_philosophy_minimum_run_windows.rs b/logos/examples/doc-snippets/src/testing_philosophy_minimum_run_windows.rs similarity index 100% rename from examples/doc-snippets/src/testing_philosophy_minimum_run_windows.rs rename to logos/examples/doc-snippets/src/testing_philosophy_minimum_run_windows.rs diff --git a/examples/doc-snippets/src/testing_philosophy_protocol_time_not_wall_time.rs b/logos/examples/doc-snippets/src/testing_philosophy_protocol_time_not_wall_time.rs similarity index 100% rename from examples/doc-snippets/src/testing_philosophy_protocol_time_not_wall_time.rs rename to logos/examples/doc-snippets/src/testing_philosophy_protocol_time_not_wall_time.rs diff --git a/logos/examples/src/bin/compose_runner.rs b/logos/examples/src/bin/compose_runner.rs new file mode 100644 index 0000000..25cecf5 --- /dev/null +++ b/logos/examples/src/bin/compose_runner.rs @@ -0,0 +1,83 @@ +use std::{process, time::Duration}; + +use anyhow::{Context as _, Result}; +use lb_ext::{ + CoreBuilderExt as _, LbcComposeDeployer as ComposeDeployer, LbcExtEnv, ScenarioBuilder, + ScenarioBuilderExt as _, configs::network::Libp2pNetworkLayout, +}; +use runner_examples::{DeployerKind, demo, read_env_any, read_topology_seed_or_default}; +use testing_framework_core::scenario::{Deployer as _, Runner}; +use testing_framework_runner_compose::ComposeRunnerError; +use tracing::{info, warn}; + +#[tokio::main] +async fn main() { + runner_examples::defaults::init_node_log_dir_defaults(DeployerKind::Compose); + + tracing_subscriber::fmt::init(); + + let nodes = read_env_any(&["LOGOS_BLOCKCHAIN_DEMO_NODES"], demo::DEFAULT_NODES); + + let run_secs = read_env_any(&["LOGOS_BLOCKCHAIN_DEMO_RUN_SECS"], demo::DEFAULT_RUN_SECS); + + info!(nodes, run_secs, "starting compose runner demo"); + + if let Err(err) = run_compose_case(nodes, Duration::from_secs(run_secs)).await { + warn!("compose runner demo failed: {err:#}"); + process::exit(1); + } +} + +async fn run_compose_case(nodes: usize, run_duration: Duration) -> Result<()> { + info!( + nodes, + duration_secs = run_duration.as_secs(), + "building scenario plan" + ); + + let seed = read_topology_seed_or_default(); + + let scenario = ScenarioBuilder::deployment_with(|t| { + t.with_network_layout(Libp2pNetworkLayout::Star) + .with_node_count(nodes) + }) + .enable_node_control() + .with_run_duration(run_duration) + .with_deployment_seed(seed) + .initialize_wallet( + demo::DEFAULT_TOTAL_WALLETS as u64 * 100, + demo::DEFAULT_TOTAL_WALLETS, + ) + .transactions_with(|txs| { + txs.rate(demo::DEFAULT_MIXED_TXS_PER_BLOCK) + .users(demo::DEFAULT_TRANSACTION_WALLETS) + }) + .expect_consensus_liveness(); + + let mut plan = scenario.build()?; + + let deployer = ComposeDeployer::new(); + info!("deploying compose stack"); + + let runner: Runner = match deployer.deploy(&plan).await { + Ok(runner) => runner, + Err(ComposeRunnerError::DockerUnavailable) => { + warn!("Docker is unavailable; cannot run compose demo"); + return Ok(()); + } + Err(err) => return Err(anyhow::Error::new(err)).context("deploying compose stack failed"), + }; + + if !runner.context().telemetry().is_configured() { + warn!( + "metrics querying is disabled; set LOGOS_BLOCKCHAIN_METRICS_QUERY_URL to enable PromQL queries" + ); + } + + info!("running scenario"); + runner + .run(&mut plan) + .await + .context("running compose scenario failed")?; + Ok(()) +} diff --git a/examples/src/bin/k8s_runner.rs b/logos/examples/src/bin/k8s_runner.rs similarity index 53% rename from examples/src/bin/k8s_runner.rs rename to logos/examples/src/bin/k8s_runner.rs index d1d6439..ff0e554 100644 --- a/examples/src/bin/k8s_runner.rs +++ b/logos/examples/src/bin/k8s_runner.rs @@ -1,18 +1,15 @@ -use std::{env, process, time::Duration}; +use std::{process, time::Duration}; use anyhow::{Context as _, Result}; -use runner_examples::{ScenarioBuilderExt as _, demo, read_env_any}; -use testing_framework_core::scenario::{ - Deployer as _, ObservabilityCapability, Runner, ScenarioBuilder, +use lb_ext::{ + CoreBuilderExt as _, LbcExtEnv, LbcK8sDeployer as K8sDeployer, ScenarioBuilder, + ScenarioBuilderExt as _, configs::network::Libp2pNetworkLayout, }; -use testing_framework_runner_k8s::{K8sDeployer, K8sRunnerError}; -use testing_framework_workflows::ObservabilityBuilderExt as _; +use runner_examples::{demo, read_env_any, read_topology_seed_or_default}; +use testing_framework_core::scenario::{Deployer as _, Runner}; +use testing_framework_runner_k8s::K8sRunnerError; use tracing::{info, warn}; -const MIXED_TXS_PER_BLOCK: u64 = 2; -const TOTAL_WALLETS: usize = 200; -const TRANSACTION_WALLETS: usize = 50; - #[tokio::main] async fn main() { tracing_subscriber::fmt::init(); @@ -34,31 +31,31 @@ async fn run_k8s_case(nodes: usize, run_duration: Duration) -> Result<()> { "building scenario plan" ); - let mut scenario = ScenarioBuilder::topology_with(|t| t.network_star().nodes(nodes)) - .with_capabilities(ObservabilityCapability::default()) - .wallets(TOTAL_WALLETS) - .transactions_with(|txs| txs.rate(MIXED_TXS_PER_BLOCK).users(TRANSACTION_WALLETS)) - .with_run_duration(run_duration) - .expect_consensus_liveness(); + let seed = read_topology_seed_or_default(); - if let Ok(url) = env::var("LOGOS_BLOCKCHAIN_METRICS_QUERY_URL") { - if !url.trim().is_empty() { - scenario = scenario.with_metrics_query_url_str(url.trim()); - } - } - - if let Ok(url) = env::var("LOGOS_BLOCKCHAIN_METRICS_OTLP_INGEST_URL") { - if !url.trim().is_empty() { - scenario = scenario.with_metrics_otlp_ingest_url_str(url.trim()); - } - } + let scenario = ScenarioBuilder::deployment_with(|t| { + t.with_network_layout(Libp2pNetworkLayout::Star) + .with_node_count(nodes) + }) + .enable_observability() + .with_run_duration(run_duration) + .with_deployment_seed(seed) + .initialize_wallet( + demo::DEFAULT_TOTAL_WALLETS as u64 * 100, + demo::DEFAULT_TOTAL_WALLETS, + ) + .transactions_with(|txs| { + txs.rate(demo::DEFAULT_MIXED_TXS_PER_BLOCK) + .users(demo::DEFAULT_TRANSACTION_WALLETS) + }) + .expect_consensus_liveness(); let mut plan = scenario.build()?; let deployer = K8sDeployer::new(); info!("deploying k8s stack"); - let runner: Runner = match deployer.deploy(&plan).await { + let runner: Runner = match deployer.deploy(&plan).await { Ok(runner) => runner, Err(K8sRunnerError::ClientInit { source }) => { warn!("Kubernetes cluster unavailable ({source}); skipping"); diff --git a/examples/src/bin/local_runner.rs b/logos/examples/src/bin/local_runner.rs similarity index 55% rename from examples/src/bin/local_runner.rs rename to logos/examples/src/bin/local_runner.rs index 72a929e..ad03b7e 100644 --- a/examples/src/bin/local_runner.rs +++ b/logos/examples/src/bin/local_runner.rs @@ -1,16 +1,14 @@ use std::{process, time::Duration}; use anyhow::{Context as _, Result}; -use runner_examples::{DeployerKind, ScenarioBuilderExt as _, demo, read_env_any}; -use testing_framework_core::scenario::{Deployer as _, Runner, ScenarioBuilder}; -use testing_framework_runner_local::LocalDeployer; +use lb_framework::{ + CoreBuilderExt as _, LbcEnv, LbcLocalDeployer, ScenarioBuilder, ScenarioBuilderExt as _, + configs::network::Libp2pNetworkLayout, +}; +use runner_examples::{DeployerKind, demo, read_env_any, read_topology_seed_or_default}; +use testing_framework_core::scenario::{Deployer as _, Runner}; use tracing::{info, warn}; -const MIXED_TXS_PER_BLOCK: u64 = 5; -const TOTAL_WALLETS: usize = 1000; -const TRANSACTION_WALLETS: usize = 500; -const SMOKE_RUN_SECS_MAX: u64 = 30; - #[tokio::main] async fn main() { runner_examples::defaults::init_node_log_dir_defaults(DeployerKind::Local); @@ -35,24 +33,30 @@ async fn run_local_case(nodes: usize, run_duration: Duration) -> Result<()> { "building scenario plan" ); - let scenario = ScenarioBuilder::topology_with(|t| t.network_star().nodes(nodes)) - .wallets(TOTAL_WALLETS) - .with_run_duration(run_duration); + let seed = read_topology_seed_or_default(); - let scenario = if run_duration.as_secs() <= SMOKE_RUN_SECS_MAX { - scenario - } else { - scenario - .transactions_with(|txs| txs.rate(MIXED_TXS_PER_BLOCK).users(TRANSACTION_WALLETS)) - .expect_consensus_liveness() - }; + let scenario = ScenarioBuilder::deployment_with(|t| { + t.with_network_layout(Libp2pNetworkLayout::Star) + .with_node_count(nodes) + }) + .with_run_duration(run_duration) + .with_deployment_seed(seed) + .initialize_wallet( + demo::DEFAULT_TOTAL_WALLETS as u64 * 100, + demo::DEFAULT_TOTAL_WALLETS, + ) + .transactions_with(|txs| { + txs.rate(demo::DEFAULT_MIXED_TXS_PER_BLOCK) + .users(demo::DEFAULT_TRANSACTION_WALLETS) + }) + .expect_consensus_liveness(); let mut plan = scenario.build()?; - let deployer = LocalDeployer::default(); + let deployer = LbcLocalDeployer::default(); info!("deploying local nodes"); - let runner: Runner = deployer + let runner: Runner = deployer .deploy(&plan) .await .context("deploying local nodes failed")?; diff --git a/examples/src/defaults.rs b/logos/examples/src/defaults.rs similarity index 95% rename from examples/src/defaults.rs rename to logos/examples/src/defaults.rs index b441a37..8cf25e7 100644 --- a/examples/src/defaults.rs +++ b/logos/examples/src/defaults.rs @@ -11,11 +11,11 @@ const DEFAULT_NODE_LOG_DIR_REL: &str = ".tmp/node-logs"; const DEFAULT_CONTAINER_NODE_LOG_DIR: &str = "/tmp/node-logs"; fn set_default_env(key: &str, value: &str) { - if std::env::var_os(key).is_none() { + if env::var_os(key).is_none() { // SAFETY: Used as an early-run default. Prefer setting env vars in the // shell for multi-threaded runs. unsafe { - std::env::set_var(key, value); + env::set_var(key, value); } } } diff --git a/logos/examples/src/demo.rs b/logos/examples/src/demo.rs new file mode 100644 index 0000000..27420f9 --- /dev/null +++ b/logos/examples/src/demo.rs @@ -0,0 +1,6 @@ +pub const DEFAULT_NODES: usize = 2; +pub const DEFAULT_RUN_SECS: u64 = 60; + +pub const DEFAULT_TOTAL_WALLETS: usize = 200; +pub const DEFAULT_TRANSACTION_WALLETS: usize = 20; +pub const DEFAULT_MIXED_TXS_PER_BLOCK: u64 = 3; diff --git a/logos/examples/src/env.rs b/logos/examples/src/env.rs new file mode 100644 index 0000000..729fc58 --- /dev/null +++ b/logos/examples/src/env.rs @@ -0,0 +1,41 @@ +use std::{ + env, + str::{self, FromStr}, +}; + +use testing_framework_core::topology::DeploymentSeed; + +const DEFAULT_TOPOLOGY_SEED: [u8; 32] = { + let mut bytes = [0u8; 32]; + bytes[31] = 1; + bytes +}; + +pub fn read_env_any(keys: &[&str], default: T) -> T +where + T: FromStr + Copy, +{ + keys.iter() + .find_map(|key| env::var(key).ok().and_then(|raw| raw.parse::().ok())) + .unwrap_or(default) +} + +pub fn read_topology_seed() -> Option { + let raw = env::var("LOGOS_BLOCKCHAIN_TOPOLOGY_SEED").ok()?; + let raw = raw.strip_prefix("0x").unwrap_or(&raw); + if raw.len() != 64 { + return None; + } + + let mut bytes = [0u8; 32]; + for (idx, chunk) in raw.as_bytes().chunks(2).enumerate() { + let chunk = str::from_utf8(chunk).ok()?; + bytes[idx] = u8::from_str_radix(chunk, 16).ok()?; + } + + Some(DeploymentSeed::new(bytes)) +} + +pub fn read_topology_seed_or_default() -> DeploymentSeed { + read_topology_seed().unwrap_or_else(|| DeploymentSeed::new(DEFAULT_TOPOLOGY_SEED)) +} diff --git a/logos/examples/src/lib.rs b/logos/examples/src/lib.rs new file mode 100644 index 0000000..89fae9a --- /dev/null +++ b/logos/examples/src/lib.rs @@ -0,0 +1,14 @@ +pub mod defaults; +pub mod demo; +pub mod env; + +pub use env::{read_env_any, read_topology_seed, read_topology_seed_or_default}; +pub use lb_framework::ScenarioBuilderExt as NodeScenarioBuilderExt; +pub use lb_workloads::{ChaosBuilderExt, ScenarioBuilderExt}; + +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub enum DeployerKind { + #[default] + Local, + Compose, +} diff --git a/logos/examples/tests/deployer_parity.rs b/logos/examples/tests/deployer_parity.rs new file mode 100644 index 0000000..4f03cfb --- /dev/null +++ b/logos/examples/tests/deployer_parity.rs @@ -0,0 +1,120 @@ +use std::{env, time::Duration}; + +use anyhow::Result; +use lb_ext::{ + CoreBuilderExt as _, LbcComposeDeployer, LbcExtEnv, LbcK8sDeployer, + ScenarioBuilder as ExtScenarioBuilder, ScenarioBuilderExt as _, +}; +use lb_framework::{ + CoreBuilderExt as _, LbcEnv, LbcLocalDeployer, ScenarioBuilder as LocalScenarioBuilder, + ScenarioBuilderExt as _, configs::network::NetworkLayout, +}; +use testing_framework_core::{ + scenario::{Deployer as _, Runner}, + topology::DeploymentDescriptor, +}; + +#[derive(Clone, Copy)] +struct ScenarioSpec { + nodes: usize, + run_secs: u64, + tx_rate: u64, + tx_users: usize, + total_wallets: usize, +} + +fn shared_spec() -> ScenarioSpec { + ScenarioSpec { + nodes: 2, + run_secs: 30, + tx_rate: 5, + tx_users: 500, + total_wallets: 1000, + } +} + +fn build_local_scenario( + spec: ScenarioSpec, +) -> Result> { + LocalScenarioBuilder::deployment_with(|d| { + d.with_network_layout(NetworkLayout::Star) + .with_node_count(spec.nodes) + }) + .with_run_duration(Duration::from_secs(spec.run_secs)) + .initialize_wallet(spec.total_wallets as u64 * 100, spec.total_wallets) + .transactions_with(|txs| txs.rate(spec.tx_rate).users(spec.tx_users)) + .expect_consensus_liveness() + .build() + .map_err(Into::into) +} + +fn build_ext_scenario( + spec: ScenarioSpec, +) -> Result> { + ExtScenarioBuilder::deployment_with(|d| { + d.with_network_layout(NetworkLayout::Star) + .with_node_count(spec.nodes) + }) + .with_run_duration(Duration::from_secs(spec.run_secs)) + .initialize_wallet(spec.total_wallets as u64 * 100, spec.total_wallets) + .transactions_with(|txs| txs.rate(spec.tx_rate).users(spec.tx_users)) + .expect_consensus_liveness() + .build() + .map_err(Into::into) +} + +#[test] +fn parity_builds_have_same_shape() -> Result<()> { + let spec = shared_spec(); + let local = build_local_scenario(spec)?; + let ext = build_ext_scenario(spec)?; + + assert_eq!( + local.deployment().node_count(), + ext.deployment().node_count() + ); + assert_eq!(local.duration(), ext.duration()); + assert_eq!(local.workloads().len(), ext.workloads().len()); + assert_eq!(local.expectations().len(), ext.expectations().len()); + + Ok(()) +} + +#[tokio::test] +async fn local_parity_smoke_opt_in() -> Result<()> { + if env::var("TF_RUN_LOCAL_PARITY").is_err() { + return Ok(()); + } + + let mut scenario = build_local_scenario(shared_spec())?; + let deployer = LbcLocalDeployer::default(); + let runner: Runner = deployer.deploy(&scenario).await?; + runner.run(&mut scenario).await?; + Ok(()) +} + +#[tokio::test] +async fn compose_parity_smoke_opt_in() -> Result<()> { + if env::var("TF_RUN_COMPOSE_PARITY").is_err() { + return Ok(()); + } + + let mut scenario = build_ext_scenario(shared_spec())?; + let deployer = LbcComposeDeployer::default(); + let runner: Runner = deployer.deploy(&scenario).await?; + runner.run(&mut scenario).await?; + Ok(()) +} + +#[tokio::test] +async fn k8s_parity_smoke_opt_in() -> Result<()> { + if env::var("TF_RUN_K8S_PARITY").is_err() { + return Ok(()); + } + + let mut scenario = build_ext_scenario(shared_spec())?; + let deployer = LbcK8sDeployer::default(); + let runner: Runner = deployer.deploy(&scenario).await?; + runner.run(&mut scenario).await?; + Ok(()) +} diff --git a/examples/tests/dynamic_join.rs b/logos/examples/tests/dynamic_join.rs similarity index 64% rename from examples/tests/dynamic_join.rs rename to logos/examples/tests/dynamic_join.rs index 7ceea1f..ba88ee1 100644 --- a/examples/tests/dynamic_join.rs +++ b/logos/examples/tests/dynamic_join.rs @@ -2,11 +2,13 @@ use std::time::Duration; use anyhow::Result; use async_trait::async_trait; -use testing_framework_core::scenario::{ - Deployer, DynError, PeerSelection, RunContext, ScenarioBuilder, StartNodeOptions, Workload, +use lb_framework::{ + CoreBuilderExt as _, LbcEnv, LbcLocalDeployer, ScenarioBuilder, + configs::network::Libp2pNetworkLayout, +}; +use testing_framework_core::scenario::{ + Deployer, DynError, PeerSelection, RunContext, StartNodeOptions, Workload, }; -use testing_framework_runner_local::LocalDeployer; -use testing_framework_workflows::ScenarioBuilderExt; use tokio::time::{sleep, timeout}; use tracing_subscriber::fmt::try_init; @@ -25,12 +27,12 @@ impl JoinNodeWorkload { } #[async_trait] -impl Workload for JoinNodeWorkload { +impl Workload for JoinNodeWorkload { fn name(&self) -> &str { "dynamic_join" } - async fn start(&self, ctx: &RunContext) -> Result<(), DynError> { + async fn start(&self, ctx: &RunContext) -> Result<(), DynError> { let handle = ctx .node_control() .ok_or_else(|| "dynamic join workload requires node control".to_owned())?; @@ -38,7 +40,7 @@ impl Workload for JoinNodeWorkload { sleep(START_DELAY).await; let node = handle.start_node(&self.name).await?; - let client = node.api; + let client = node.client; timeout(READY_TIMEOUT, async { loop { @@ -71,25 +73,22 @@ impl JoinNodeWithPeersWorkload { } #[async_trait] -impl Workload for JoinNodeWithPeersWorkload { +impl Workload for JoinNodeWithPeersWorkload { fn name(&self) -> &str { "dynamic_join_with_peers" } - async fn start(&self, ctx: &RunContext) -> Result<(), DynError> { + async fn start(&self, ctx: &RunContext) -> Result<(), DynError> { let handle = ctx .node_control() .ok_or_else(|| "dynamic join workload requires node control".to_owned())?; sleep(START_DELAY).await; - let options = StartNodeOptions { - peers: PeerSelection::Named(self.peers.clone()), - config_patch: None, - persist_dir: None, - }; + let mut options = StartNodeOptions::::default(); + options.peers = PeerSelection::Named(self.peers.clone()); let node = handle.start_node_with(&self.name, options).await?; - let client = node.api; + let client = node.client; timeout(READY_TIMEOUT, async { loop { @@ -112,14 +111,17 @@ impl Workload for JoinNodeWithPeersWorkload { async fn dynamic_join_reaches_consensus_liveness() -> Result<()> { let _ = try_init(); - let mut scenario = ScenarioBuilder::topology_with(|t| t.network_star().nodes(2)) - .enable_node_control() - .with_workload(JoinNodeWorkload::new("joiner")) - .expect_consensus_liveness() - .with_run_duration(Duration::from_secs(60)) - .build()?; + let mut scenario = ScenarioBuilder::deployment_with(|t| { + t.with_network_layout(Libp2pNetworkLayout::Star) + .with_node_count(2) + }) + .enable_node_control() + .with_workload(JoinNodeWorkload::new("joiner")) + .with_expectation(lb_framework::workloads::ConsensusLiveness::::default()) + .with_run_duration(Duration::from_secs(60)) + .build()?; - let deployer = LocalDeployer::default(); + let deployer = LbcLocalDeployer::default(); let runner = deployer.deploy(&scenario).await?; let _handle = runner.run(&mut scenario).await?; @@ -129,17 +131,20 @@ async fn dynamic_join_reaches_consensus_liveness() -> Result<()> { #[tokio::test] #[ignore = "run manually with `cargo test -p runner-examples -- --ignored`"] async fn dynamic_join_with_peers_reaches_consensus_liveness() -> Result<()> { - let mut scenario = ScenarioBuilder::topology_with(|t| t.network_star().nodes(2)) - .enable_node_control() - .with_workload(JoinNodeWithPeersWorkload::new( - "joiner", - vec!["node-0".to_string()], - )) - .expect_consensus_liveness() - .with_run_duration(Duration::from_secs(60)) - .build()?; + let mut scenario = ScenarioBuilder::deployment_with(|t| { + t.with_network_layout(Libp2pNetworkLayout::Star) + .with_node_count(2) + }) + .enable_node_control() + .with_workload(JoinNodeWithPeersWorkload::new( + "joiner", + vec!["node-0".to_string()], + )) + .with_expectation(lb_framework::workloads::ConsensusLiveness::::default()) + .with_run_duration(Duration::from_secs(60)) + .build()?; - let deployer = LocalDeployer::default(); + let deployer = LbcLocalDeployer::default(); let runner = deployer.deploy(&scenario).await?; let _handle = runner.run(&mut scenario).await?; diff --git a/logos/examples/tests/local_deployer_restart.rs b/logos/examples/tests/local_deployer_restart.rs new file mode 100644 index 0000000..10649f3 --- /dev/null +++ b/logos/examples/tests/local_deployer_restart.rs @@ -0,0 +1,90 @@ +use std::time::Duration; + +use anyhow::{Result, anyhow}; +use lb_framework::{ + CoreBuilderExt as _, DeploymentBuilder, LbcLocalDeployer, ScenarioBuilder, TopologyConfig, +}; +use testing_framework_core::scenario::Deployer; +use tracing_subscriber::fmt::try_init; + +#[tokio::test] +#[ignore = "requires local node binary and open ports"] +async fn local_restart_node() -> Result<()> { + let _ = try_init(); + let mut scenario = ScenarioBuilder::deployment_with(|t| t.with_node_count(1)) + .enable_node_control() + .with_run_duration(Duration::from_secs(1)) + .build()?; + + let deployer = LbcLocalDeployer::default(); + let runner = deployer.deploy(&scenario).await?; + let context = runner.context(); + + let control = context + .node_control() + .ok_or_else(|| anyhow!("node control not available"))?; + + let node_name = "node-0"; + let old_pid = control + .node_pid(node_name) + .ok_or_else(|| anyhow!("missing node pid"))?; + + control + .restart_node(node_name) + .await + .map_err(|error| anyhow!("failed to restart {node_name}: {error}"))?; + + let new_pid = control + .node_pid(node_name) + .ok_or_else(|| anyhow!("missing node pid"))?; + assert_ne!(old_pid, new_pid, "expected a new process after restart"); + + control + .stop_node(node_name) + .await + .map_err(|error| anyhow!("failed to stop {node_name}: {error}"))?; + assert!( + control.node_pid(node_name).is_none(), + "expected node pid to be absent after stop" + ); + + let _handle = runner.run(&mut scenario).await?; + + Ok(()) +} + +#[tokio::test] +#[ignore = "requires local node binary and open ports"] +async fn manual_cluster_restart_node() -> Result<()> { + let _ = try_init(); + let deployer = LbcLocalDeployer::default(); + let descriptors = DeploymentBuilder::new(TopologyConfig::with_node_numbers(1)).build()?; + let cluster = deployer.manual_cluster_from_descriptors(descriptors); + + let node_name = cluster.start_node("a").await?.name; + + let old_pid = cluster + .node_pid(&node_name) + .ok_or_else(|| anyhow!("missing node pid"))?; + + cluster + .restart_node(&node_name) + .await + .map_err(|error| anyhow!("failed to restart {node_name}: {error}"))?; + + let new_pid = cluster + .node_pid(&node_name) + .ok_or_else(|| anyhow!("missing node pid"))?; + assert_ne!(old_pid, new_pid, "expected a new process after restart"); + + cluster + .stop_node(&node_name) + .await + .map_err(|error| anyhow!("failed to stop {node_name}: {error}"))?; + assert!( + cluster.node_pid(&node_name).is_none(), + "expected node pid to be absent after stop" + ); + + Ok(()) +} diff --git a/logos/examples/tests/manual_cluster.rs b/logos/examples/tests/manual_cluster.rs new file mode 100644 index 0000000..eeb9b27 --- /dev/null +++ b/logos/examples/tests/manual_cluster.rs @@ -0,0 +1,78 @@ +use std::time::Duration; + +use anyhow::{Result, anyhow}; +use lb_framework::{DeploymentBuilder, LbcEnv, LbcLocalDeployer, NodeHttpClient, TopologyConfig}; +use testing_framework_core::scenario::{PeerSelection, StartNodeOptions}; +use tokio::time::sleep; +use tracing_subscriber::fmt::try_init; + +const MAX_HEIGHT_DIFF: u64 = 5; +const CONVERGENCE_TIMEOUT: Duration = Duration::from_secs(60); +const CONVERGENCE_POLL: Duration = Duration::from_secs(2); + +#[tokio::test] +#[ignore = "run manually with `cargo test -p runner-examples -- --ignored manual_cluster_two_clusters_merge`"] +async fn manual_cluster_two_clusters_merge() -> Result<()> { + let _ = try_init(); + // Required env vars (set on the command line when running this test): + // - `POL_PROOF_DEV_MODE=true` + // - `RUST_LOG=info` (optional) + let config = TopologyConfig::with_node_numbers(2); + let deployer = LbcLocalDeployer::new(); + let descriptors = DeploymentBuilder::new(config).build()?; + let cluster = deployer.manual_cluster_from_descriptors(descriptors); + // Nodes are stopped automatically when the cluster is dropped. + + println!("starting node a"); + + let node_a = cluster + .start_node_with("a", node_start_options(PeerSelection::None)) + .await? + .client; + + println!("waiting briefly before starting c"); + sleep(Duration::from_secs(30)).await; + + println!("starting node c -> a"); + let node_c = cluster + .start_node_with( + "c", + node_start_options(PeerSelection::Named(vec!["node-a".to_owned()])), + ) + .await? + .client; + + println!("waiting for network readiness: cluster a,c"); + cluster.wait_network_ready().await?; + + wait_for_convergence(&node_a, &node_c).await +} + +async fn wait_for_convergence(node_a: &NodeHttpClient, node_c: &NodeHttpClient) -> Result<()> { + let start = tokio::time::Instant::now(); + + loop { + let a_height = node_a.consensus_info().await?.height; + let c_height = node_c.consensus_info().await?.height; + let diff = a_height.abs_diff(c_height); + + if diff <= MAX_HEIGHT_DIFF { + println!("final heights: node-a={a_height}, node-c={c_height}, diff={diff}"); + return Ok(()); + } + + if start.elapsed() >= CONVERGENCE_TIMEOUT { + return Err(anyhow!( + "height diff too large after timeout: {diff} > {MAX_HEIGHT_DIFF} (node-a={a_height}, node-c={c_height})" + )); + } + + sleep(CONVERGENCE_POLL).await; + } +} + +fn node_start_options(peers: PeerSelection) -> StartNodeOptions { + let mut options = StartNodeOptions::::default(); + options.peers = peers; + options +} diff --git a/logos/examples/tests/node_config_override.rs b/logos/examples/tests/node_config_override.rs new file mode 100644 index 0000000..5761eae --- /dev/null +++ b/logos/examples/tests/node_config_override.rs @@ -0,0 +1,128 @@ +use std::{ + net::{SocketAddr, TcpListener}, + time::Duration, +}; + +use anyhow::Result; +use lb_framework::{ + DeploymentBuilder, LbcEnv, LbcLocalDeployer, NodeHttpClient, ScenarioBuilder, TopologyConfig, + configs::build_node_run_config, +}; +use testing_framework_core::scenario::{Deployer, PeerSelection, StartNodeOptions}; +use tracing_subscriber::fmt::try_init; + +#[tokio::test] +#[ignore = "run manually with `cargo test -p runner-examples -- --ignored manual_cluster_api_port_override`"] +async fn manual_cluster_api_port_override() -> Result<()> { + let _ = try_init(); + // Required env vars (set on the command line when running this test): + // - `POL_PROOF_DEV_MODE=true` + // - `LOGOS_BLOCKCHAIN_NODE_BIN=...` + // - `LOGOS_BLOCKCHAIN_CIRCUITS=...` + // - `RUST_LOG=info` (optional) + + let api_port = random_api_port(); + + let deployer = LbcLocalDeployer::new(); + let descriptors = DeploymentBuilder::new(TopologyConfig::with_node_numbers(1)).build()?; + let cluster = deployer.manual_cluster_from_descriptors(descriptors.clone()); + + let node = cluster + .start_node_with( + "override-api", + StartNodeOptions::::default() + .with_peers(PeerSelection::None) + .create_patch(move |mut run_config| { + println!("overriding API port to {api_port}"); + let current_addr = run_config.user.api.backend.listen_address; + run_config.user.api.backend.listen_address = + SocketAddr::new(current_addr.ip(), api_port); + Ok(run_config) + }), + ) + .await? + .client; + + cluster.wait_network_ready().await?; + + wait_until_consensus_ready(&node).await?; + + assert_eq!(resolved_port(&node), api_port); + + Ok(()) +} + +#[tokio::test] +#[ignore = "run manually with `cargo test -p runner-examples -- --ignored scenario_builder_api_port_override`"] +async fn scenario_builder_api_port_override() -> Result<()> { + let _ = try_init(); + // Required env vars (set on the command line when running this test): + // - `POL_PROOF_DEV_MODE=true` + // - `LOGOS_BLOCKCHAIN_NODE_BIN=...` + // - `LOGOS_BLOCKCHAIN_CIRCUITS=...` + // - `RUST_LOG=info` (optional) + let api_port = random_api_port(); + + let base_builder = DeploymentBuilder::new(TopologyConfig::with_node_numbers(1)); + let base_descriptors = base_builder.clone().build()?; + let base_node = base_descriptors.nodes().first().expect("node 0 descriptor"); + let mut run_config = build_node_run_config( + &base_descriptors, + base_node, + base_descriptors + .config() + .node_config_override(base_node.index()), + ) + .expect("build run config"); + println!("overriding API port to {api_port}"); + let current_addr = run_config.user.api.backend.listen_address; + run_config.user.api.backend.listen_address = SocketAddr::new(current_addr.ip(), api_port); + + let mut scenario = ScenarioBuilder::new(Box::new( + base_builder.with_node_config_override(0, run_config), + )) + .with_run_duration(Duration::from_secs(1)) + .build()?; + + let deployer = LbcLocalDeployer::default(); + let runner = deployer.deploy(&scenario).await?; + let handle = runner.run(&mut scenario).await?; + + let client = handle + .context() + .random_node_client() + .ok_or_else(|| anyhow::anyhow!("scenario did not expose any node clients"))?; + + client + .consensus_info() + .await + .expect("consensus_info should succeed"); + + assert_eq!(resolved_port(&client), api_port); + + Ok(()) +} + +fn random_api_port() -> u16 { + let listener = TcpListener::bind("127.0.0.1:0").expect("bind random API port"); + listener.local_addr().expect("read API port").port() +} + +fn resolved_port(client: &NodeHttpClient) -> u16 { + client.base_url().port().unwrap_or_default() +} + +async fn wait_until_consensus_ready(client: &NodeHttpClient) -> Result<()> { + const RETRIES: usize = 120; + const DELAY_MS: u64 = 500; + + for _ in 0..RETRIES { + if client.consensus_info().await.is_ok() { + return Ok(()); + } + + tokio::time::sleep(Duration::from_millis(DELAY_MS)).await; + } + + anyhow::bail!("consensus_info did not become ready in time") +} diff --git a/logos/examples/tests/orphan_manual_cluster.rs b/logos/examples/tests/orphan_manual_cluster.rs new file mode 100644 index 0000000..66b244c --- /dev/null +++ b/logos/examples/tests/orphan_manual_cluster.rs @@ -0,0 +1,111 @@ +use std::time::Duration; + +use anyhow::{Result, anyhow}; +use lb_framework::{ + DeploymentBuilder, LbcEnv, LbcLocalDeployer, NodeHttpClient, TopologyConfig, + configs::network::NetworkLayout, +}; +use lb_workloads::{start_node_with_timeout, wait_for_min_height}; +use testing_framework_core::scenario::StartNodeOptions; +use tokio::time::{sleep, timeout}; +use tracing_subscriber::fmt::try_init; + +const MIN_HEIGHT: u64 = 5; +const INITIAL_READY_TIMEOUT: Duration = Duration::from_secs(500); +const CATCH_UP_TIMEOUT: Duration = Duration::from_secs(300); +const START_NODE_TIMEOUT: Duration = Duration::from_secs(90); +const TEST_TIMEOUT: Duration = Duration::from_secs(600); +const POLL_INTERVAL: Duration = Duration::from_secs(1); + +#[tokio::test] +#[ignore = "run manually with `cargo test -p runner-examples -- --ignored orphan_manual_cluster`"] +async fn orphan_manual_cluster() -> Result<()> { + let _ = try_init(); + // Required env vars (set on the command line when running this test): + // - `LOGOS_BLOCKCHAIN_NODE_BIN=...` + // - `NOMOS_KZGRS_PARAMS_PATH=...` (path to KZG params directory/file) + // - `RUST_LOG=info` (optional; better visibility) + + let config = TopologyConfig::with_node_numbers(3); + timeout(TEST_TIMEOUT, run_orphan_flow(config)) + .await + .map_err(|_| anyhow!("test timeout exceeded"))??; + + Ok(()) +} + +async fn run_orphan_flow(config: TopologyConfig) -> Result<()> { + let builder = DeploymentBuilder::new(config).with_network_layout(NetworkLayout::Full); + let deployer = LbcLocalDeployer::new(); + let descriptors = builder.build()?; + let cluster = deployer.manual_cluster_from_descriptors(descriptors); + + let node_a = start_node_with_timeout( + &cluster, + "a", + StartNodeOptions::::default(), + START_NODE_TIMEOUT, + ) + .await? + .client; + + let node_b = start_node_with_timeout( + &cluster, + "b", + StartNodeOptions::::default(), + START_NODE_TIMEOUT, + ) + .await? + .client; + + wait_for_min_height( + &[node_a.clone(), node_b.clone()], + MIN_HEIGHT, + INITIAL_READY_TIMEOUT, + POLL_INTERVAL, + ) + .await?; + + let behind_node = start_node_with_timeout( + &cluster, + "c", + StartNodeOptions::::default(), + START_NODE_TIMEOUT, + ) + .await? + .client; + + wait_for_catch_up(&node_a, &node_b, &behind_node).await +} + +async fn wait_for_catch_up( + node_a: &NodeHttpClient, + node_b: &NodeHttpClient, + behind_node: &NodeHttpClient, +) -> Result<()> { + timeout(CATCH_UP_TIMEOUT, async { + loop { + let node_a_height = node_height(node_a, "node-a").await?; + let node_b_height = node_height(node_b, "node-b").await?; + let behind_height = node_height(behind_node, "node-c").await?; + + let initial_min_height = node_a_height.min(node_b_height); + if behind_height >= initial_min_height.saturating_sub(1) { + return Ok::<(), anyhow::Error>(()); + } + + sleep(POLL_INTERVAL).await; + } + }) + .await + .map_err(|_| anyhow!("timeout waiting for behind node to catch up"))? +} + +async fn node_height(node: &NodeHttpClient, name: &str) -> Result { + let info = node + .consensus_info() + .await + .map_err(|error| anyhow!("{name} consensus_info failed: {error}"))?; + + Ok(info.height) +} diff --git a/logos/infra/assets/stack/Dockerfile.base b/logos/infra/assets/stack/Dockerfile.base new file mode 100644 index 0000000..08335af --- /dev/null +++ b/logos/infra/assets/stack/Dockerfile.base @@ -0,0 +1,106 @@ +# syntax=docker/dockerfile:1 +# check=skip=SecretsUsedInArgOrEnv +# Ignore warnings about sensitive information as this is test data. + +ARG VERSION +ARG LOGOS_BLOCKCHAIN_NODE_REV +ARG LOGOS_BLOCKCHAIN_NODE_USE_LOCAL_CONTEXT + +# =========================== +# BUILD IMAGE +# =========================== + +FROM rust:1.91.0-slim-bookworm AS builder + +ARG VERSION +ARG LOGOS_BLOCKCHAIN_NODE_REV +ARG LOGOS_BLOCKCHAIN_FORCE_BUILD +ARG LOGOS_BLOCKCHAIN_NODE_USE_LOCAL_CONTEXT + +LABEL maintainer="augustinas@status.im" \ + source="https://github.com/logos-co/nomos-node" \ + description="Logos testnet build image" + +WORKDIR /workspace +COPY . . + +# Reduce debug artifact size. +ENV CARGO_PROFILE_DEV_DEBUG=0 +ENV LOGOS_BLOCKCHAIN_NODE_REV=${LOGOS_BLOCKCHAIN_NODE_REV} +ENV LOGOS_BLOCKCHAIN_FORCE_BUILD=${LOGOS_BLOCKCHAIN_FORCE_BUILD} +ENV LOGOS_BLOCKCHAIN_NODE_USE_LOCAL_CONTEXT=${LOGOS_BLOCKCHAIN_NODE_USE_LOCAL_CONTEXT} + +# Install dependencies needed for building RocksDB and for circuit tooling. +RUN apt-get update && apt-get install -yq \ + git gcc g++ clang make cmake m4 xz-utils libgmp-dev libssl-dev pkg-config ca-certificates curl wget file \ + && rm -rf /var/lib/apt/lists/* + +RUN chmod +x \ + /workspace/logos/infra/assets/stack/scripts/docker/prepare_binaries.sh \ + /workspace/logos/infra/assets/stack/scripts/docker/build_cfgsync.sh \ + /workspace/logos/infra/assets/stack/scripts/setup-logos-blockchain-circuits.sh \ + || true + +RUN /workspace/logos/infra/assets/stack/scripts/setup-logos-blockchain-circuits.sh "${VERSION}" /opt/circuits + +ENV LOGOS_BLOCKCHAIN_CIRCUITS=/opt/circuits + +RUN /workspace/scripts/build/build-rapidsnark.sh /opt/circuits + +RUN --mount=type=bind,from=nomos_node,source=.,target=/nomos-node-local,ro \ + if [ "${LOGOS_BLOCKCHAIN_NODE_USE_LOCAL_CONTEXT}" = "1" ]; then \ + rm -rf /nomos-node && mkdir -p /nomos-node && cp -a /nomos-node-local/. /nomos-node/ ; \ + if grep -q 'file:///Users/.*nomos-node' /workspace/Cargo.toml; then \ + sed -i "s#git = \\\"file:///Users/[^\\\"]*nomos-node\\\"#path = \\\"/nomos-node\\\"#g" /workspace/Cargo.toml; \ + fi; \ + # Local checkout may reference ../nomos-testing paths; remap them to /workspace in container. + if [ -f /nomos-node/Cargo.toml ]; then \ + sed -i 's#\.\./nomos-testing/#../workspace/#g' /nomos-node/Cargo.toml; \ + fi; \ + if [ -f /nomos-node/tests/Cargo.toml ]; then \ + sed -i 's#\.\./\.\./nomos-testing/#../../workspace/#g' /nomos-node/tests/Cargo.toml; \ + fi; \ + else \ + if grep -q 'file:///Users/.*nomos-node' /workspace/Cargo.toml; then \ + sed -i "s#git = \\\"file:///Users/[^\\\"]*nomos-node\\\"#git = \\\"https://github.com/logos-co/nomos-node.git\\\", rev = \\\"${LOGOS_BLOCKCHAIN_NODE_REV}\\\"#g" /workspace/Cargo.toml; \ + fi; \ + rm -rf /nomos-node; \ + git clone https://github.com/logos-co/nomos-node.git /nomos-node; \ + cd /nomos-node; \ + git fetch origin "${LOGOS_BLOCKCHAIN_NODE_REV}"; \ + git checkout "${LOGOS_BLOCKCHAIN_NODE_REV}"; \ + fi + +RUN /workspace/logos/infra/assets/stack/scripts/docker/prepare_binaries.sh + +# Strip host-local patches to avoid unresolved absolute paths inside containers. +RUN sed -i '/^\[patch\."https:\/\/github.com\/logos-co\/nomos-node"\]/,/^$/d' /workspace/Cargo.toml + +RUN /workspace/logos/infra/assets/stack/scripts/docker/build_cfgsync.sh + +# =========================== +# BASE RUNTIME IMAGE +# =========================== + +FROM ubuntu:24.04 AS base + +LABEL maintainer="augustinas@status.im" \ + source="https://github.com/logos-co/nomos-node" \ + description="Logos base runtime image (testing)" + +RUN apt-get update && apt-get install -yq \ + libstdc++6 \ + libgmp10 \ + libgomp1 \ + libssl3 \ + ca-certificates \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /workspace/artifacts/logos-blockchain-node /usr/bin/logos-blockchain-node +COPY --from=builder /workspace/artifacts/cfgsync-server /usr/bin/cfgsync-server +COPY --from=builder /workspace/artifacts/cfgsync-client /usr/bin/cfgsync-client +COPY --from=builder /opt/circuits /opt/circuits + +ENV LOGOS_BLOCKCHAIN_CIRCUITS=/opt/circuits + +EXPOSE 3000 8080 9000 60000 diff --git a/testing-framework/assets/stack/Dockerfile.runtime b/logos/infra/assets/stack/Dockerfile.runtime similarity index 100% rename from testing-framework/assets/stack/Dockerfile.runtime rename to logos/infra/assets/stack/Dockerfile.runtime diff --git a/testing-framework/assets/stack/Dockerfile.testnet b/logos/infra/assets/stack/Dockerfile.testnet similarity index 100% rename from testing-framework/assets/stack/Dockerfile.testnet rename to logos/infra/assets/stack/Dockerfile.testnet diff --git a/testing-framework/assets/stack/cfgsync.yaml b/logos/infra/assets/stack/cfgsync.yaml similarity index 84% rename from testing-framework/assets/stack/cfgsync.yaml rename to logos/infra/assets/stack/cfgsync.yaml index 1aff4b9..23852a4 100644 --- a/testing-framework/assets/stack/cfgsync.yaml +++ b/logos/infra/assets/stack/cfgsync.yaml @@ -1,6 +1,9 @@ port: 4400 n_hosts: 4 timeout: 10 +global_params_path: "/etc/logos/global_params" +# Optional: serve prebuilt configs from a bundle file. +# bundle_path: cfgsync.bundle.yaml # ConsensusConfig related parameters security_param: 10 @@ -42,6 +45,7 @@ tracing_settings: filters: nomos: debug cryptarchia: debug + logos_blockchain_chain_leader_service: debug metrics: None console: None - level: INFO + level: DEBUG diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards.yml b/logos/infra/assets/stack/monitoring/grafana/dashboards.yml similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards.yml rename to logos/infra/assets/stack/monitoring/grafana/dashboards.yml diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/api-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/api-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/api-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/api-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/api-detailed-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/api-detailed-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/api-detailed-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/api-detailed-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/blend-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/blend-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/blend-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/blend-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/consensus-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/consensus-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/consensus-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/consensus-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/da-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/da-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/da-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/da-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/issues-diagnostics-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/issues-diagnostics-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/issues-diagnostics-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/issues-diagnostics-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/kms-wallet-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/kms-wallet-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/kms-wallet-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/kms-wallet-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/mempool-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/mempool-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/mempool-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/mempool-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/overview-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/overview-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/overview-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/overview-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/sdp-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/sdp-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/sdp-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/sdp-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/storage-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/storage-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/storage-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/storage-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/dashboards/storage-detailed-dashboard.json b/logos/infra/assets/stack/monitoring/grafana/dashboards/storage-detailed-dashboard.json similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/dashboards/storage-detailed-dashboard.json rename to logos/infra/assets/stack/monitoring/grafana/dashboards/storage-detailed-dashboard.json diff --git a/testing-framework/assets/stack/monitoring/grafana/datasources.yaml b/logos/infra/assets/stack/monitoring/grafana/datasources.yaml similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/datasources.yaml rename to logos/infra/assets/stack/monitoring/grafana/datasources.yaml diff --git a/testing-framework/assets/stack/monitoring/grafana/grafana.ini b/logos/infra/assets/stack/monitoring/grafana/grafana.ini similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/grafana.ini rename to logos/infra/assets/stack/monitoring/grafana/grafana.ini diff --git a/testing-framework/assets/stack/monitoring/grafana/plugins.env b/logos/infra/assets/stack/monitoring/grafana/plugins.env similarity index 100% rename from testing-framework/assets/stack/monitoring/grafana/plugins.env rename to logos/infra/assets/stack/monitoring/grafana/plugins.env diff --git a/testing-framework/assets/stack/monitoring/prometheus.yml b/logos/infra/assets/stack/monitoring/prometheus.yml similarity index 100% rename from testing-framework/assets/stack/monitoring/prometheus.yml rename to logos/infra/assets/stack/monitoring/prometheus.yml diff --git a/testing-framework/assets/stack/monitoring/tempo.yaml b/logos/infra/assets/stack/monitoring/tempo.yaml similarity index 100% rename from testing-framework/assets/stack/monitoring/tempo.yaml rename to logos/infra/assets/stack/monitoring/tempo.yaml diff --git a/logos/infra/assets/stack/scripts/docker/build_cfgsync.sh b/logos/infra/assets/stack/scripts/docker/build_cfgsync.sh new file mode 100755 index 0000000..61a4ad5 --- /dev/null +++ b/logos/infra/assets/stack/scripts/docker/build_cfgsync.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -euo pipefail + +RUSTFLAGS='--cfg feature="pol-dev-mode"' \ + cargo build --manifest-path /workspace/testing-framework/tools/cfgsync-runtime/Cargo.toml --bin cfgsync-server + +RUSTFLAGS='--cfg feature="pol-dev-mode"' \ + cargo build --manifest-path /workspace/testing-framework/tools/cfgsync-runtime/Cargo.toml --bin cfgsync-client + +cp /workspace/target/debug/cfgsync-server /workspace/artifacts/cfgsync-server +cp /workspace/target/debug/cfgsync-client /workspace/artifacts/cfgsync-client + +rm -rf /workspace/target/debug/incremental diff --git a/logos/infra/assets/stack/scripts/docker/prepare_binaries.sh b/logos/infra/assets/stack/scripts/docker/prepare_binaries.sh new file mode 100755 index 0000000..bd3e153 --- /dev/null +++ b/logos/infra/assets/stack/scripts/docker/prepare_binaries.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +set -euo pipefail + +LOGOS_BLOCKCHAIN_NODE_REV="${LOGOS_BLOCKCHAIN_NODE_REV:?LOGOS_BLOCKCHAIN_NODE_REV build arg missing}" + +mkdir -p /workspace/artifacts + +TARGET_ARCH="$(uname -m)" + +have_prebuilt() { + [ -f logos/infra/assets/stack/bin/logos-blockchain-node ] && \ + [ -f logos/infra/assets/stack/bin/logos-blockchain-node ] +} + +bin_matches_arch() { + local info + info="$(file -b logos/infra/assets/stack/bin/logos-blockchain-node 2>/dev/null || true)" + case "${info}" in + *ELF*) : ;; + *) return 1 ;; + esac + + local pattern + case "${TARGET_ARCH}" in + x86_64) pattern="x86-64|x86_64" ;; + aarch64|arm64) pattern="arm64|aarch64" ;; + *) pattern="${TARGET_ARCH}" ;; + esac + + echo "${info}" | grep -Eqi "${pattern}" +} + +if [ -n "${LOGOS_BLOCKCHAIN_FORCE_BUILD:-}" ]; then + echo "LOGOS_BLOCKCHAIN_FORCE_BUILD is set; rebuilding logos-blockchain binaries from source" +elif have_prebuilt && bin_matches_arch; then + echo "Using prebuilt logos-blockchain binaries from logos/infra/assets/stack/bin" + cp logos/infra/assets/stack/bin/logos-blockchain-node /workspace/artifacts/logos-blockchain-node + exit 0 +fi + +if have_prebuilt; then + echo "Prebuilt logos-blockchain binaries do not match target architecture (${TARGET_ARCH}); rebuilding from source" +else + echo "Prebuilt logos-blockchain binaries missing; building from source" +fi + +echo "Building logos-blockchain binaries from source (rev ${LOGOS_BLOCKCHAIN_NODE_REV})" +if [ "${LOGOS_BLOCKCHAIN_NODE_USE_LOCAL_CONTEXT:-0}" = "1" ] && [ -d /nomos-node ]; then + echo "Using local nomos-node checkout from Docker build context" + cd /nomos-node +else + git clone https://github.com/logos-co/nomos-node.git /tmp/nomos-node + cd /tmp/nomos-node + git fetch --depth 1 origin "${LOGOS_BLOCKCHAIN_NODE_REV}" + git checkout "${LOGOS_BLOCKCHAIN_NODE_REV}" + git reset --hard + git clean -fdx +fi + +# Enable pol-dev-mode and embed verification keys for proof validation. +RUSTFLAGS='--cfg feature="pol-dev-mode" --cfg feature="build-verification-key"' \ + CARGO_FEATURE_BUILD_VERIFICATION_KEY=1 \ + cargo build --all-features -p logos-blockchain-node + +cp target/debug/logos-blockchain-node /workspace/artifacts/logos-blockchain-node + +rm -rf target/debug/incremental diff --git a/logos/infra/assets/stack/scripts/run_cfgsync.sh b/logos/infra/assets/stack/scripts/run_cfgsync.sh new file mode 100755 index 0000000..34cefec --- /dev/null +++ b/logos/infra/assets/stack/scripts/run_cfgsync.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +set -e + +cd /etc/logos +exec /usr/bin/cfgsync-server /etc/logos/cfgsync.yaml diff --git a/testing-framework/assets/stack/scripts/run_nomos.sh b/logos/infra/assets/stack/scripts/run_logos.sh similarity index 81% rename from testing-framework/assets/stack/scripts/run_nomos.sh rename to logos/infra/assets/stack/scripts/run_logos.sh index d27213f..229bcf2 100755 --- a/testing-framework/assets/stack/scripts/run_nomos.sh +++ b/logos/infra/assets/stack/scripts/run_logos.sh @@ -4,9 +4,26 @@ set -e role="${1:-node}" +resolve_binary() { + name="$1" + if command -v "$name" >/dev/null 2>&1; then + command -v "$name" + return + fi + if [ -x "/usr/bin/$name" ]; then + echo "/usr/bin/$name" + return + fi + if [ -x "/usr/local/bin/$name" ]; then + echo "/usr/local/bin/$name" + return + fi + echo "/usr/bin/$name" +} + bin_for_role() { case "$1" in - node) echo "/usr/bin/logos-blockchain-node" ;; + node) resolve_binary "logos-blockchain-node" ;; *) echo "Unknown role: $1" >&2; exit 2 ;; esac } @@ -53,12 +70,13 @@ export CFG_FILE_PATH="/config.yaml" \ # persist state. mkdir -p /recovery -# cfgsync-server can start a little after the container; retry until it is -# reachable instead of exiting immediately and crash-looping. +# cfgsync-server can start a little after the container; retry until the client +# succeeds and writes /config.yaml. +cfgsync_bin="$(resolve_binary "cfgsync-client")" attempt=0 max_attempts=30 sleep_seconds=3 -until /usr/bin/cfgsync-client; do +until "${cfgsync_bin}"; do attempt=$((attempt + 1)) if [ "$attempt" -ge "$max_attempts" ]; then echo "cfgsync-client failed after ${max_attempts} attempts, giving up" diff --git a/logos/infra/assets/stack/scripts/run_logos_node.sh b/logos/infra/assets/stack/scripts/run_logos_node.sh new file mode 100755 index 0000000..eccb27e --- /dev/null +++ b/logos/infra/assets/stack/scripts/run_logos_node.sh @@ -0,0 +1,2 @@ +#!/bin/sh +exec /etc/logos/scripts/run_logos.sh node diff --git a/testing-framework/assets/stack/scripts/setup-logos-blockchain-circuits.sh b/logos/infra/assets/stack/scripts/setup-logos-blockchain-circuits.sh old mode 100644 new mode 100755 similarity index 97% rename from testing-framework/assets/stack/scripts/setup-logos-blockchain-circuits.sh rename to logos/infra/assets/stack/scripts/setup-logos-blockchain-circuits.sh index 8f33e65..005b2aa --- a/testing-framework/assets/stack/scripts/setup-logos-blockchain-circuits.sh +++ b/logos/infra/assets/stack/scripts/setup-logos-blockchain-circuits.sh @@ -13,7 +13,7 @@ set -euo pipefail -readonly DEFAULT_CIRCUITS_VERSION="v0.3.1" +readonly DEFAULT_CIRCUITS_VERSION="v0.4.1" readonly DEFAULT_INSTALL_SUBDIR=".logos-blockchain-circuits" readonly DEFAULT_CIRCUITS_REPO="logos-blockchain/logos-blockchain-circuits" @@ -108,7 +108,7 @@ download_release() { rm -rf "${temp_dir}" } -platform=$(detect_platform) +platform="${LOGOS_BLOCKCHAIN_CIRCUITS_PLATFORM:-$(detect_platform)}" echo "Setting up logos-blockchain-circuits ${VERSION} for ${platform}" echo "Installing to ${INSTALL_DIR}" diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/Chart.yaml b/logos/infra/helm/logos-runner/Chart.yaml similarity index 87% rename from testing-framework/deployers/k8s/helm/nomos-runner/Chart.yaml rename to logos/infra/helm/logos-runner/Chart.yaml index 4174927..a35d6ad 100644 --- a/testing-framework/deployers/k8s/helm/nomos-runner/Chart.yaml +++ b/logos/infra/helm/logos-runner/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -name: nomos-runner +name: logos-runner description: Helm chart for Logos integration test runner assets type: application version: 0.1.0 diff --git a/logos/infra/helm/logos-runner/templates/_helpers.tpl b/logos/infra/helm/logos-runner/templates/_helpers.tpl new file mode 100644 index 0000000..669aad3 --- /dev/null +++ b/logos/infra/helm/logos-runner/templates/_helpers.tpl @@ -0,0 +1,34 @@ +{{- define "logos-runner.chart" -}} +{{- .Chart.Name -}} +{{- end -}} + +{{- define "logos-runner.name" -}} +{{- include "logos-runner.chart" . -}} +{{- end -}} + +{{- define "logos-runner.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- printf "%s" .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s" .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{- define "logos-runner.labels" -}} +app.kubernetes.io/name: {{ include "logos-runner.chart" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{- define "logos-runner.selectorLabels" -}} +app.kubernetes.io/name: {{ include "logos-runner.chart" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{- define "logos-runner.nodeLabels" -}} +{{- $root := index . "root" -}} +{{- $index := index . "index" -}} +app.kubernetes.io/name: {{ include "logos-runner.chart" $root }} +app.kubernetes.io/instance: {{ $root.Release.Name }} +logos/logical-role: node +logos/node-index: "{{ $index }}" +{{- end -}} diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/templates/cfgsync-deployment.yaml b/logos/infra/helm/logos-runner/templates/cfgsync-deployment.yaml similarity index 59% rename from testing-framework/deployers/k8s/helm/nomos-runner/templates/cfgsync-deployment.yaml rename to logos/infra/helm/logos-runner/templates/cfgsync-deployment.yaml index bc497dd..7362ab3 100644 --- a/testing-framework/deployers/k8s/helm/nomos-runner/templates/cfgsync-deployment.yaml +++ b/logos/infra/helm/logos-runner/templates/cfgsync-deployment.yaml @@ -1,26 +1,26 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: {{ include "nomos-runner.fullname" . }}-cfgsync + name: {{ include "logos-runner.fullname" . }}-cfgsync labels: - {{- include "nomos-runner.labels" . | nindent 4 }} + {{- include "logos-runner.labels" . | nindent 4 }} spec: replicas: 1 selector: matchLabels: - {{- include "nomos-runner.selectorLabels" . | nindent 6 }} - nomos/component: cfgsync + {{- include "logos-runner.selectorLabels" . | nindent 6 }} + logos/component: cfgsync template: metadata: labels: - {{- include "nomos-runner.selectorLabels" . | nindent 8 }} - nomos/component: cfgsync + {{- include "logos-runner.selectorLabels" . | nindent 8 }} + logos/component: cfgsync spec: containers: - name: cfgsync image: {{ .Values.image }} imagePullPolicy: {{ .Values.imagePullPolicy }} - command: ["/etc/nomos/scripts/run_cfgsync.sh"] + command: ["/bin/sh", "/etc/logos/scripts/run_cfgsync.sh"] ports: - name: http containerPort: {{ .Values.cfgsync.port }} @@ -29,15 +29,17 @@ spec: value: debug volumeMounts: - name: assets - mountPath: /etc/nomos + mountPath: /etc/logos readOnly: true volumes: - name: assets configMap: - name: {{ include "nomos-runner.fullname" . }}-assets + name: {{ include "logos-runner.fullname" . }}-assets defaultMode: 0755 items: - key: cfgsync.yaml path: cfgsync.yaml + - key: cfgsync.bundle.yaml + path: cfgsync.bundle.yaml - key: run_cfgsync.sh path: scripts/run_cfgsync.sh diff --git a/logos/infra/helm/logos-runner/templates/cfgsync-service.yaml b/logos/infra/helm/logos-runner/templates/cfgsync-service.yaml new file mode 100644 index 0000000..626ae24 --- /dev/null +++ b/logos/infra/helm/logos-runner/templates/cfgsync-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "logos-runner.fullname" . }}-cfgsync + labels: + {{- include "logos-runner.labels" . | nindent 4 }} +spec: + type: ClusterIP + selector: + {{- include "logos-runner.selectorLabels" . | nindent 4 }} + logos/component: cfgsync + ports: + - name: http + port: {{ .Values.cfgsync.port }} + targetPort: http diff --git a/logos/infra/helm/logos-runner/templates/configmap.yaml b/logos/infra/helm/logos-runner/templates/configmap.yaml new file mode 100644 index 0000000..a962e6a --- /dev/null +++ b/logos/infra/helm/logos-runner/templates/configmap.yaml @@ -0,0 +1,37 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "logos-runner.fullname" . }}-assets + labels: + {{- include "logos-runner.labels" . | nindent 4 }} +data: + cfgsync.yaml: | +{{- if .Values.cfgsync.config }} +{{ .Values.cfgsync.config | indent 4 }} +{{- else }} +{{ "" | indent 4 }} +{{- end }} + cfgsync.bundle.yaml: | +{{- if .Values.cfgsync.bundle }} +{{ .Values.cfgsync.bundle | indent 4 }} +{{- else }} +{{ "" | indent 4 }} +{{- end }} + run_cfgsync.sh: | +{{- if .Values.scripts.runCfgsyncSh }} +{{ .Values.scripts.runCfgsyncSh | indent 4 }} +{{- else }} +{{ "" | indent 4 }} +{{- end }} + run_logos.sh: | +{{- if .Values.scripts.runLogosSh }} +{{ .Values.scripts.runLogosSh | indent 4 }} +{{- else }} +{{ "" | indent 4 }} +{{- end }} + run_logos_node.sh: | +{{- if .Values.scripts.runLogosNodeSh }} +{{ .Values.scripts.runLogosNodeSh | indent 4 }} +{{- else }} +{{ "" | indent 4 }} +{{- end }} diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/templates/pv.yaml b/logos/infra/helm/logos-runner/templates/pv.yaml similarity index 77% rename from testing-framework/deployers/k8s/helm/nomos-runner/templates/pv.yaml rename to logos/infra/helm/logos-runner/templates/pv.yaml index 6372cde..4894614 100644 --- a/testing-framework/deployers/k8s/helm/nomos-runner/templates/pv.yaml +++ b/logos/infra/helm/logos-runner/templates/pv.yaml @@ -2,9 +2,9 @@ apiVersion: v1 kind: PersistentVolume metadata: - name: {{ include "nomos-runner.fullname" . }}-kzg + name: {{ include "logos-runner.fullname" . }}-kzg labels: - {{- include "nomos-runner.labels" . | nindent 4 }} + {{- include "logos-runner.labels" . | nindent 4 }} spec: capacity: storage: {{ .Values.kzg.storageSize }} diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/templates/pvc.yaml b/logos/infra/helm/logos-runner/templates/pvc.yaml similarity index 60% rename from testing-framework/deployers/k8s/helm/nomos-runner/templates/pvc.yaml rename to logos/infra/helm/logos-runner/templates/pvc.yaml index 93f4081..08e5b9c 100644 --- a/testing-framework/deployers/k8s/helm/nomos-runner/templates/pvc.yaml +++ b/logos/infra/helm/logos-runner/templates/pvc.yaml @@ -2,14 +2,14 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: {{ include "nomos-runner.fullname" . }}-kzg + name: {{ include "logos-runner.fullname" . }}-kzg labels: - {{- include "nomos-runner.labels" . | nindent 4 }} + {{- include "logos-runner.labels" . | nindent 4 }} spec: accessModes: - ReadOnlyMany storageClassName: manual - volumeName: {{ include "nomos-runner.fullname" . }}-kzg + volumeName: {{ include "logos-runner.fullname" . }}-kzg resources: requests: storage: {{ .Values.kzg.storageSize }} diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/templates/validator-deployments.yaml b/logos/infra/helm/logos-runner/templates/validator-deployments.yaml similarity index 64% rename from testing-framework/deployers/k8s/helm/nomos-runner/templates/validator-deployments.yaml rename to logos/infra/helm/logos-runner/templates/validator-deployments.yaml index 2e5c08c..8b9b196 100644 --- a/testing-framework/deployers/k8s/helm/nomos-runner/templates/validator-deployments.yaml +++ b/logos/infra/helm/logos-runner/templates/validator-deployments.yaml @@ -5,32 +5,35 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: {{ include "nomos-runner.fullname" $root }}-node-{{ $i }} + name: {{ include "logos-runner.fullname" $root }}-node-{{ $i }} labels: - {{- include "nomos-runner.nodeLabels" (dict "root" $root "index" $i) | nindent 4 }} + {{- include "logos-runner.nodeLabels" (dict "root" $root "index" $i) | nindent 4 }} spec: replicas: 1 selector: matchLabels: - {{- include "nomos-runner.nodeLabels" (dict "root" $root "index" $i) | nindent 6 }} + {{- include "logos-runner.nodeLabels" (dict "root" $root "index" $i) | nindent 6 }} template: metadata: labels: - {{- include "nomos-runner.nodeLabels" (dict "root" $root "index" $i) | nindent 8 }} + {{- include "logos-runner.nodeLabels" (dict "root" $root "index" $i) | nindent 8 }} spec: containers: - name: node image: {{ $root.Values.image }} imagePullPolicy: {{ $root.Values.imagePullPolicy }} - command: ["/etc/nomos/scripts/run_nomos_node.sh"] + command: ["/bin/sh", "/etc/logos/scripts/run_logos_node.sh"] ports: - name: http containerPort: {{ default 18080 $node.apiPort }} - name: testing-http containerPort: {{ default 18081 $node.testingHttpPort }} + - name: swarm-udp + containerPort: {{ default 60000 $node.networkPort }} + protocol: UDP env: - name: CFG_SERVER_ADDR - value: http://{{ include "nomos-runner.fullname" $root }}-cfgsync:{{ $root.Values.cfgsync.port }} + value: http://{{ include "logos-runner.fullname" $root }}-cfgsync:{{ $root.Values.cfgsync.port }} - name: LOGOS_BLOCKCHAIN_TIME_BACKEND value: {{ $root.Values.timeBackend | default "monotonic" | quote }} {{- range $key, $value := $node.env }} @@ -39,20 +42,20 @@ spec: {{- end }} volumeMounts: - name: assets - mountPath: /etc/nomos + mountPath: /etc/logos readOnly: true volumes: - name: assets configMap: - name: {{ include "nomos-runner.fullname" $root }}-assets + name: {{ include "logos-runner.fullname" $root }}-assets defaultMode: 0755 items: - key: cfgsync.yaml path: cfgsync.yaml - key: run_cfgsync.sh path: scripts/run_cfgsync.sh - - key: run_nomos.sh - path: scripts/run_nomos.sh - - key: run_nomos_node.sh - path: scripts/run_nomos_node.sh + - key: run_logos.sh + path: scripts/run_logos.sh + - key: run_logos_node.sh + path: scripts/run_logos_node.sh {{- end }} diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/templates/validator-services.yaml b/logos/infra/helm/logos-runner/templates/validator-services.yaml similarity index 59% rename from testing-framework/deployers/k8s/helm/nomos-runner/templates/validator-services.yaml rename to logos/infra/helm/logos-runner/templates/validator-services.yaml index 62a0f30..a98490f 100644 --- a/testing-framework/deployers/k8s/helm/nomos-runner/templates/validator-services.yaml +++ b/logos/infra/helm/logos-runner/templates/validator-services.yaml @@ -5,13 +5,13 @@ apiVersion: v1 kind: Service metadata: - name: {{ include "nomos-runner.fullname" $root }}-node-{{ $i }} + name: {{ include "logos-runner.fullname" $root }}-node-{{ $i }} labels: - {{- include "nomos-runner.nodeLabels" (dict "root" $root "index" $i) | nindent 4 }} + {{- include "logos-runner.nodeLabels" (dict "root" $root "index" $i) | nindent 4 }} spec: type: NodePort selector: - {{- include "nomos-runner.nodeLabels" (dict "root" $root "index" $i) | nindent 4 }} + {{- include "logos-runner.nodeLabels" (dict "root" $root "index" $i) | nindent 4 }} ports: - name: http port: {{ default 18080 $node.apiPort }} @@ -19,4 +19,8 @@ spec: - name: testing-http port: {{ default 18081 $node.testingHttpPort }} targetPort: testing-http + - name: swarm-udp + port: {{ default 60000 $node.networkPort }} + targetPort: swarm-udp + protocol: UDP {{- end }} diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/values.yaml b/logos/infra/helm/logos-runner/values.yaml similarity index 87% rename from testing-framework/deployers/k8s/helm/nomos-runner/values.yaml rename to logos/infra/helm/logos-runner/values.yaml index 260bbf9..5e3deb3 100644 --- a/testing-framework/deployers/k8s/helm/nomos-runner/values.yaml +++ b/logos/infra/helm/logos-runner/values.yaml @@ -8,11 +8,12 @@ timeBackend: "monotonic" cfgsync: port: 4400 config: "" + bundle: "" scripts: runCfgsyncSh: "" - runNomosSh: "" - runNomosNodeSh: "" + runLogosSh: "" + runLogosNodeSh: "" nodes: count: 1 diff --git a/testing-framework/env/Cargo.toml b/logos/runtime/env/Cargo.toml similarity index 100% rename from testing-framework/env/Cargo.toml rename to logos/runtime/env/Cargo.toml diff --git a/testing-framework/env/src/lib.rs b/logos/runtime/env/src/lib.rs similarity index 100% rename from testing-framework/env/src/lib.rs rename to logos/runtime/env/src/lib.rs diff --git a/logos/runtime/ext/Cargo.toml b/logos/runtime/ext/Cargo.toml new file mode 100644 index 0000000..e70629a --- /dev/null +++ b/logos/runtime/ext/Cargo.toml @@ -0,0 +1,33 @@ +[package] +description = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +name = "lb-ext" +version = { workspace = true } + +[dependencies] +# Workspace crates +cfgsync_runtime = { workspace = true } +lb-framework = { workspace = true } +testing-framework-core = { workspace = true } +testing-framework-env = { workspace = true } +testing-framework-runner-compose = { workspace = true } +testing-framework-runner-k8s = { workspace = true } + +# Logos / Nomos deps +lb_http_api_common = { workspace = true } +lb_tracing = { workspace = true } +lb_tracing_service = { workspace = true } + +# External +anyhow = "1" +async-trait = { workspace = true } +kube = { default-features = false, features = ["client", "rustls-tls"], version = "0.87" } +reqwest = { features = ["json"], workspace = true } +serde = { workspace = true } +serde_yaml = { workspace = true } +tempfile = { workspace = true } +thiserror = { workspace = true } +tokio = { features = ["macros", "process", "rt-multi-thread", "time"], workspace = true } +tracing = { workspace = true } +uuid = { features = ["v4"], workspace = true } diff --git a/logos/runtime/ext/src/cfgsync/mod.rs b/logos/runtime/ext/src/cfgsync/mod.rs new file mode 100644 index 0000000..8271705 --- /dev/null +++ b/logos/runtime/ext/src/cfgsync/mod.rs @@ -0,0 +1,85 @@ +mod template; + +use std::path::Path; + +use anyhow::Result; +pub(crate) use cfgsync_runtime::render::CfgsyncOutputPaths; +use cfgsync_runtime::{ + bundle::build_cfgsync_bundle_with_hostnames, + render::{RenderedCfgsync, apply_timeout_floor, ensure_bundle_path, write_rendered_cfgsync}, +}; +use lb_tracing::metrics::otlp::OtlpMetricsConfig; +use lb_tracing_service::MetricsLayer; +use reqwest::Url; +use testing_framework_core::cfgsync::CfgsyncEnv; + +pub(crate) struct CfgsyncRenderOptions { + pub port: Option, + pub bundle_path: Option, + pub min_timeout_secs: Option, + pub metrics_otlp_ingest_url: Option, +} + +pub(crate) fn render_cfgsync_from_template( + template_path: &Path, + topology: &E::Deployment, + hostnames: &[String], + options: CfgsyncRenderOptions, +) -> Result { + let mut cfg = template::load_cfgsync_template(template_path)?; + apply_render_options::(&mut cfg, topology, options); + + let bundle = build_cfgsync_bundle_with_hostnames::(topology, hostnames)?; + let config_yaml = serde_yaml::to_string(&cfg)?; + let bundle_yaml = serde_yaml::to_string(&bundle)?; + + Ok(RenderedCfgsync { + config_yaml, + bundle_yaml, + }) +} + +pub(crate) fn render_and_write_cfgsync_from_template( + template_path: &Path, + topology: &E::Deployment, + hostnames: &[String], + mut options: CfgsyncRenderOptions, + output: CfgsyncOutputPaths<'_>, +) -> Result { + ensure_bundle_path(&mut options.bundle_path, output.bundle_path); + + let rendered = render_cfgsync_from_template::(template_path, topology, hostnames, options)?; + write_rendered_cfgsync(&rendered, output)?; + Ok(rendered) +} + +fn apply_render_options( + cfg: &mut template::CfgSyncConfig, + topology: &E::Deployment, + options: CfgsyncRenderOptions, +) { + let CfgsyncRenderOptions { + port, + bundle_path, + min_timeout_secs, + metrics_otlp_ingest_url, + } = options; + + if let Some(port) = port { + cfg.port = port; + } + + cfg.n_hosts = E::nodes(topology).len(); + cfg.bundle_path = bundle_path; + apply_metrics_endpoint(cfg, metrics_otlp_ingest_url); + apply_timeout_floor(&mut cfg.timeout, min_timeout_secs); +} + +fn apply_metrics_endpoint(cfg: &mut template::CfgSyncConfig, endpoint: Option) { + if let Some(endpoint) = endpoint { + cfg.tracing_settings.metrics = MetricsLayer::Otlp(OtlpMetricsConfig { + endpoint, + host_identifier: "node".into(), + }); + } +} diff --git a/logos/runtime/ext/src/cfgsync/template.rs b/logos/runtime/ext/src/cfgsync/template.rs new file mode 100644 index 0000000..4812101 --- /dev/null +++ b/logos/runtime/ext/src/cfgsync/template.rs @@ -0,0 +1,26 @@ +use std::{fs::File, path::Path}; + +use anyhow::{Context as _, Result}; +use lb_tracing_service::TracingSettings; +use serde::{Deserialize, Serialize}; +use tracing::debug; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CfgSyncConfig { + pub port: u16, + #[serde(default)] + pub n_hosts: usize, + pub timeout: u64, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub bundle_path: Option, + #[serde(default)] + pub tracing_settings: TracingSettings, +} + +pub fn load_cfgsync_template(path: &Path) -> Result { + debug!(path = %path.display(), "loading cfgsync template"); + let file = File::open(path) + .with_context(|| format!("opening cfgsync template at {}", path.display()))?; + serde_yaml::from_reader(file).context("parsing cfgsync template") +} diff --git a/logos/runtime/ext/src/compose_env.rs b/logos/runtime/ext/src/compose_env.rs new file mode 100644 index 0000000..f2a2290 --- /dev/null +++ b/logos/runtime/ext/src/compose_env.rs @@ -0,0 +1,393 @@ +use std::{ + env, + path::{Path, PathBuf}, + process::Command as StdCommand, + time::Duration, +}; + +use anyhow::anyhow; +use async_trait::async_trait; +use lb_framework::{ + NodeHttpClient, + internal::{DeploymentPlan, NodePlan}, +}; +use lb_http_api_common::paths; +use reqwest::Url; +use testing_framework_core::{adjust_timeout, scenario::DynError}; +use testing_framework_env as tf_env; +use testing_framework_runner_compose::{ + ComposeDeployEnv, ComposeDescriptor, ConfigServerHandle, EnvEntry, NodeDescriptor, + docker::commands::run_docker_command, + infrastructure::{ports::NodeHostPorts, template::repository_root}, +}; +use tokio::process::Command; +use tracing::{debug, info, warn}; +use uuid::Uuid; + +use crate::{ + LbcExtEnv, + cfgsync::{CfgsyncOutputPaths, CfgsyncRenderOptions, render_and_write_cfgsync_from_template}, + constants::DEFAULT_CFGSYNC_PORT, +}; + +const NODE_ENTRYPOINT: &str = "/etc/logos/scripts/run_logos_node.sh"; +const CFGSYNC_START_TIMEOUT: Duration = Duration::from_secs(180); +const DEFAULT_COMPOSE_RUNNER_HOST: &str = "127.0.0.1"; +const DEFAULT_COMPOSE_TEST_IMAGE: &str = "logos-blockchain-testing:local"; +const GHCR_TESTNET_IMAGE: &str = "ghcr.io/logos-co/nomos:testnet"; +const DEFAULT_CFGSYNC_HOST: &str = "cfgsync"; + +#[derive(Debug)] +pub struct LbcCfgsyncHandle { + name: String, + stopped: bool, +} + +impl LbcCfgsyncHandle { + fn new(name: String) -> Self { + Self { + name, + stopped: false, + } + } +} + +impl ConfigServerHandle for LbcCfgsyncHandle { + fn shutdown(&mut self) { + if self.stopped { + return; + } + let name = self.name.clone(); + let status = StdCommand::new("docker") + .arg("rm") + .arg("-f") + .arg(&name) + .status(); + match status { + Ok(status) if status.success() => { + debug!(container = name, "removed cfgsync container"); + } + Ok(status) => { + warn!(container = name, status = ?status, "failed to remove cfgsync container"); + } + Err(err) => { + warn!(container = name, error = ?err, "failed to spawn docker rm for cfgsync container"); + } + } + self.stopped = true; + } + + fn mark_preserved(&mut self) { + self.stopped = true; + } + + fn container_name(&self) -> Option<&str> { + Some(self.name.as_str()) + } +} + +#[async_trait] +impl ComposeDeployEnv for LbcExtEnv { + type ConfigHandle = LbcCfgsyncHandle; + + fn compose_descriptor(topology: &Self::Deployment, cfgsync_port: u16) -> ComposeDescriptor { + let cfgsync_port = normalized_cfgsync_port(cfgsync_port); + let (image, platform) = resolve_image(); + let nodes = topology + .nodes() + .iter() + .enumerate() + .map(|(index, node)| { + build_compose_node_descriptor(index, node, cfgsync_port, &image, platform.clone()) + }) + .collect(); + + ComposeDescriptor::new(nodes) + } + + fn update_cfgsync_config( + path: &Path, + topology: &Self::Deployment, + port: u16, + metrics_otlp_ingest_url: Option<&Url>, + ) -> Result<(), DynError> { + debug!( + path = %path.display(), + port, + nodes = topology.nodes().len(), + "updating cfgsync template" + ); + let bundle_path = cfgsync_bundle_path(path); + let hostnames = topology_hostnames(topology); + let options = cfgsync_render_options(port, metrics_otlp_ingest_url); + render_and_write_cfgsync_from_template::( + path, + topology, + &hostnames, + options, + CfgsyncOutputPaths { + config_path: path, + bundle_path: &bundle_path, + }, + )?; + Ok(()) + } + + async fn start_cfgsync( + cfgsync_path: &Path, + port: u16, + network: &str, + ) -> Result { + let testnet_dir = cfgsync_dir(cfgsync_path)?; + let (image, _) = resolve_image(); + let container_name = cfgsync_container_name(); + + debug!( + container = %container_name, + image, + cfgsync = %cfgsync_path.display(), + port, + "starting cfgsync container" + ); + + let command = + build_cfgsync_docker_run_command(&container_name, network, port, testnet_dir, &image); + + run_docker_command( + command, + adjust_timeout(CFGSYNC_START_TIMEOUT), + "docker run cfgsync server", + ) + .await + .map_err(|err| anyhow!(err.to_string()))?; + + info!(container = %container_name, port, "cfgsync container started"); + + Ok(LbcCfgsyncHandle::new(container_name)) + } + + fn node_client_from_ports( + ports: &NodeHostPorts, + host: &str, + ) -> Result { + api_client_from_host_ports(ports, host) + } + + fn readiness_path() -> &'static str { + paths::CRYPTARCHIA_INFO + } + + fn compose_runner_host() -> String { + compose_runner_host() + } +} + +fn node_instance_name(index: usize) -> String { + format!("node-{index}") +} + +fn cfgsync_bundle_path(config_path: &Path) -> PathBuf { + config_path + .parent() + .unwrap_or(config_path) + .join("cfgsync.bundle.yaml") +} + +fn topology_hostnames(topology: &DeploymentPlan) -> Vec { + topology + .nodes() + .iter() + .map(|node| format!("node-{}", node.index())) + .collect() +} + +fn cfgsync_render_options( + port: u16, + metrics_otlp_ingest_url: Option<&Url>, +) -> CfgsyncRenderOptions { + CfgsyncRenderOptions { + port: Some(port), + bundle_path: None, + min_timeout_secs: None, + metrics_otlp_ingest_url: metrics_otlp_ingest_url.cloned(), + } +} + +fn cfgsync_dir(cfgsync_path: &Path) -> Result<&Path, DynError> { + cfgsync_path + .parent() + .ok_or_else(|| anyhow!("cfgsync path {cfgsync_path:?} has no parent directory").into()) +} + +fn normalized_cfgsync_port(port: u16) -> u16 { + if port == 0 { + DEFAULT_CFGSYNC_PORT + } else { + port + } +} + +fn build_compose_node_descriptor( + index: usize, + node: &NodePlan, + cfgsync_port: u16, + image: &str, + platform: Option, +) -> NodeDescriptor { + let mut environment = base_environment(cfgsync_port); + environment.push(EnvEntry::new( + "CFG_HOST_IDENTIFIER", + node_instance_name(index), + )); + + let api_port = node.general.api_config.address.port(); + let testing_port = node.general.api_config.testing_http_address.port(); + let ports = vec![ + format!("127.0.0.1::{api_port}"), + format!("127.0.0.1::{testing_port}"), + ]; + + NodeDescriptor::new( + node_instance_name(index), + image.to_owned(), + NODE_ENTRYPOINT, + base_volumes(), + default_extra_hosts(), + ports, + environment, + platform, + ) +} + +fn cfgsync_container_name() -> String { + format!("nomos-cfgsync-{}", Uuid::new_v4()) +} + +fn cfgsync_stack_volume_arg(testnet_dir: &Path) -> String { + let stack_dir = testnet_dir + .canonicalize() + .unwrap_or_else(|_| testnet_dir.to_path_buf()); + format!("{}:/etc/logos:ro", stack_dir.display()) +} + +fn maybe_add_circuits_mount(command: &mut Command) { + let circuits_dir = env::var("LOGOS_BLOCKCHAIN_CIRCUITS_DOCKER") + .ok() + .or_else(|| env::var("LOGOS_BLOCKCHAIN_CIRCUITS").ok()); + + let Some(circuits_dir) = circuits_dir else { + return; + }; + + let host_path = PathBuf::from(&circuits_dir); + if !host_path.exists() { + return; + } + + let resolved_host_path = host_path.canonicalize().unwrap_or(host_path); + command + .arg("-e") + .arg(format!("LOGOS_BLOCKCHAIN_CIRCUITS={circuits_dir}")) + .arg("-v") + .arg(format!( + "{}:{circuits_dir}:ro", + resolved_host_path.display() + )); +} + +fn build_cfgsync_docker_run_command( + container_name: &str, + network: &str, + port: u16, + testnet_dir: &Path, + image: &str, +) -> Command { + let mut command = Command::new("docker"); + command + .arg("run") + .arg("-d") + .arg("--name") + .arg(container_name) + .arg("--network") + .arg(network) + .arg("--network-alias") + .arg("cfgsync") + .arg("--workdir") + .arg("/etc/logos") + .arg("--entrypoint") + .arg("cfgsync-server") + .arg("-p") + .arg(format!("{port}:{port}")) + .arg("-v") + .arg(cfgsync_stack_volume_arg(testnet_dir)); + + maybe_add_circuits_mount(&mut command); + command.arg(image).arg("/etc/logos/cfgsync.yaml"); + command +} + +fn resolve_image() -> (String, Option) { + let image = + tf_env::nomos_testnet_image().unwrap_or_else(|| String::from(DEFAULT_COMPOSE_TEST_IMAGE)); + let platform = (image == GHCR_TESTNET_IMAGE).then(|| "linux/amd64".to_owned()); + debug!(image, platform = ?platform, "resolved compose image"); + (image, platform) +} + +fn base_volumes() -> Vec { + let mut volumes = vec!["./stack:/etc/logos".into()]; + if let Some(host_log_dir) = repository_root() + .ok() + .map(|root| root.join("tmp").join("node-logs")) + .map(|dir| dir.display().to_string()) + { + volumes.push(format!("{host_log_dir}:/tmp/node-logs")); + } + volumes +} + +fn default_extra_hosts() -> Vec { + testing_framework_runner_compose::docker::platform::host_gateway_entry() + .into_iter() + .collect() +} + +fn base_environment(cfgsync_port: u16) -> Vec { + let rust_log = env_value_or_default(tf_env::rust_log, "info"); + let nomos_log_level = env_value_or_default(tf_env::nomos_log_level, "info"); + let time_backend = env_value_or_default(tf_env::lb_time_service_backend, "monotonic"); + let cfgsync_host = env::var("LOGOS_BLOCKCHAIN_CFGSYNC_HOST") + .unwrap_or_else(|_| String::from(DEFAULT_CFGSYNC_HOST)); + vec![ + EnvEntry::new("RUST_LOG", rust_log), + EnvEntry::new("LOGOS_BLOCKCHAIN_LOG_LEVEL", nomos_log_level), + EnvEntry::new("LOGOS_BLOCKCHAIN_TIME_BACKEND", time_backend), + EnvEntry::new( + "CFG_SERVER_ADDR", + format!("http://{cfgsync_host}:{cfgsync_port}"), + ), + EnvEntry::new("OTEL_METRIC_EXPORT_INTERVAL", "5000"), + ] +} + +fn compose_runner_host() -> String { + env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| DEFAULT_COMPOSE_RUNNER_HOST.to_string()) +} + +fn api_client_from_host_ports( + ports: &NodeHostPorts, + host: &str, +) -> Result { + let base_url = url_for_host_port(host, ports.api)?; + let testing_url = url_for_host_port(host, ports.testing)?; + Ok(NodeHttpClient::from_urls(base_url, Some(testing_url))) +} + +fn env_value_or_default(getter: impl Fn() -> Option, default: &'static str) -> String { + getter().unwrap_or_else(|| String::from(default)) +} + +fn url_for_host_port(host: &str, port: u16) -> Result { + let url = Url::parse(&format!("http://{host}:{port}/"))?; + Ok(url) +} diff --git a/logos/runtime/ext/src/constants.rs b/logos/runtime/ext/src/constants.rs new file mode 100644 index 0000000..1e469d5 --- /dev/null +++ b/logos/runtime/ext/src/constants.rs @@ -0,0 +1,13 @@ +use testing_framework_env as tf_env; + +/// Default cfgsync port used across extension runners. +pub const DEFAULT_CFGSYNC_PORT: u16 = 4400; + +/// Default stack assets directory used by k8s compose assets discovery. +pub const DEFAULT_ASSETS_STACK_DIR: &str = "logos/infra/assets/stack"; + +/// Resolve cfgsync port from `LOGOS_BLOCKCHAIN_CFGSYNC_PORT`, falling back to +/// the default. +pub fn cfgsync_port() -> u16 { + tf_env::nomos_cfgsync_port().unwrap_or(DEFAULT_CFGSYNC_PORT) +} diff --git a/logos/runtime/ext/src/k8s_env.rs b/logos/runtime/ext/src/k8s_env.rs new file mode 100644 index 0000000..667b83c --- /dev/null +++ b/logos/runtime/ext/src/k8s_env.rs @@ -0,0 +1,666 @@ +use std::{ + collections::BTreeMap, + env, fs, io, + path::{Path, PathBuf}, + process::Output, +}; + +use anyhow::{Result as AnyhowResult, anyhow}; +use async_trait::async_trait; +use kube::Client; +use lb_framework::{ + NodeHttpClient, + internal::{DeploymentPlan, NodePlan}, +}; +use lb_http_api_common::paths; +use reqwest::Url; +use serde::Serialize; +use tempfile::TempDir; +use testing_framework_core::scenario::DynError; +use testing_framework_env as tf_env; +use testing_framework_runner_k8s::{K8sDeployEnv, PortSpecs, RunnerCleanup, wait::NodeConfigPorts}; +use thiserror::Error; +use tokio::process::Command; +use tracing::{debug, info}; + +use crate::{ + LbcExtEnv, + cfgsync::{CfgsyncOutputPaths, CfgsyncRenderOptions, render_and_write_cfgsync_from_template}, + constants::{DEFAULT_ASSETS_STACK_DIR, cfgsync_port}, +}; + +const CFGSYNC_K8S_TIMEOUT_SECS: u64 = 300; +const K8S_FULLNAME_OVERRIDE: &str = "logos-runner"; +const DEFAULT_K8S_TESTNET_IMAGE: &str = "public.ecr.aws/r4s5t9y4/logos/logos-blockchain:test"; + +/// Paths and image metadata required to deploy the Helm chart. +pub struct K8sAssets { + pub image: String, + pub chart_path: PathBuf, + pub cfgsync_file: PathBuf, + pub run_cfgsync_script: PathBuf, + pub run_logos_script: PathBuf, + pub run_logos_node_script: PathBuf, + pub values_file: PathBuf, + _tempdir: TempDir, +} + +#[derive(Debug, Error)] +/// Failures preparing Helm assets and rendered cfgsync configuration. +pub enum AssetsError { + #[error("failed to locate workspace root: {source}")] + WorkspaceRoot { + #[source] + source: anyhow::Error, + }, + #[error("failed to render cfgsync configuration: {source}")] + Cfgsync { + #[source] + source: anyhow::Error, + }, + #[error("missing required script at {path}")] + MissingScript { path: PathBuf }, + #[error("missing Helm chart at {path}; ensure the repository is up-to-date")] + MissingChart { path: PathBuf }, + #[error("failed to create temporary directory for rendered assets: {source}")] + TempDir { + #[source] + source: io::Error, + }, + #[error("failed to write asset at {path}: {source}")] + Io { + path: PathBuf, + #[source] + source: io::Error, + }, + #[error("failed to render Helm values: {source}")] + Values { + #[source] + source: serde_yaml::Error, + }, +} + +#[derive(Debug, Error)] +/// Errors returned from Helm invocations. +pub enum HelmError { + #[error("failed to spawn {command}: {source}")] + Spawn { + command: String, + #[source] + source: io::Error, + }, + #[error("{command} exited with status {status:?}\nstderr:\n{stderr}\nstdout:\n{stdout}")] + Failed { + command: String, + status: Option, + stdout: String, + stderr: String, + }, +} + +#[async_trait] +impl K8sDeployEnv for LbcExtEnv { + type Assets = K8sAssets; + + fn collect_port_specs(topology: &Self::Deployment) -> PortSpecs { + let nodes = topology + .nodes() + .iter() + .map(|node| NodeConfigPorts { + api: node.general.api_config.address.port(), + testing: node.general.api_config.testing_http_address.port(), + }) + .collect(); + PortSpecs { nodes } + } + + fn prepare_assets( + topology: &Self::Deployment, + metrics_otlp_ingest_url: Option<&Url>, + ) -> Result { + prepare_assets(topology, metrics_otlp_ingest_url).map_err(|err| err.into()) + } + + async fn install_stack( + client: &Client, + assets: &Self::Assets, + namespace: &str, + release: &str, + nodes: usize, + ) -> Result { + install_release(assets, release, namespace, nodes) + .await + .map_err(|err| -> DynError { Box::new(err) })?; + + let preserve = env::var("K8S_RUNNER_PRESERVE").is_ok(); + Ok(RunnerCleanup::new( + client.clone(), + namespace.to_owned(), + release.to_owned(), + preserve, + )) + } + + fn node_client_from_ports( + host: &str, + api_port: u16, + testing_port: u16, + ) -> Result { + let base_url = node_url(host, api_port)?; + let testing_url = Url::parse(&format!("http://{host}:{testing_port}")).ok(); + Ok(NodeHttpClient::from_urls(base_url, testing_url)) + } + + fn readiness_path() -> &'static str { + paths::CRYPTARCHIA_INFO + } + + fn node_base_url(client: &Self::NodeClient) -> Option { + Some(client.base_url().to_string()) + } + + fn node_deployment_name(_release: &str, index: usize) -> String { + format!("{K8S_FULLNAME_OVERRIDE}-node-{index}") + } + + fn node_service_name(_release: &str, index: usize) -> String { + format!("{K8S_FULLNAME_OVERRIDE}-node-{index}") + } +} + +fn node_url(host: &str, port: u16) -> Result { + let url = Url::parse(&format!("http://{host}:{port}"))?; + Ok(url) +} + +/// Render cfgsync config, Helm values, and locate scripts for a topology. +pub fn prepare_assets( + topology: &DeploymentPlan, + metrics_otlp_ingest_url: Option<&Url>, +) -> Result { + log_assets_prepare_start(topology); + let root = workspace_root().map_err(|source| AssetsError::WorkspaceRoot { source })?; + let tempdir = create_assets_tempdir()?; + + let (cfgsync_file, cfgsync_yaml, bundle_yaml) = + render_and_write_cfgsync(&root, topology, metrics_otlp_ingest_url, &tempdir)?; + let scripts = validate_scripts(&root)?; + let chart_path = helm_chart_path()?; + let values_file = render_and_write_values(topology, &tempdir, &cfgsync_yaml, &bundle_yaml)?; + let image = testnet_image(); + + log_assets_prepare_done(&cfgsync_file, &values_file, &chart_path, &image); + + Ok(K8sAssets { + image, + chart_path, + cfgsync_file, + run_logos_script: scripts.run_shared, + run_cfgsync_script: scripts.run_cfgsync, + run_logos_node_script: scripts.run_node, + values_file, + _tempdir: tempdir, + }) +} + +fn log_assets_prepare_start(topology: &DeploymentPlan) { + info!( + nodes = topology.nodes().len(), + "preparing k8s runner assets" + ); +} + +fn log_assets_prepare_done( + cfgsync_file: &Path, + values_file: &Path, + chart_path: &Path, + image: &str, +) { + debug!( + cfgsync = %cfgsync_file.display(), + values = %values_file.display(), + image, + chart = %chart_path.display(), + "k8s runner assets prepared" + ); +} + +async fn install_release( + assets: &K8sAssets, + release: &str, + namespace: &str, + nodes: usize, +) -> Result<(), HelmError> { + info!( + release, + namespace, + nodes, + image = %assets.image, + cfgsync_port = cfgsync_port(), + values = %assets.values_file.display(), + "installing helm release" + ); + + let command = format!("helm install {release}"); + let cmd = build_install_command(assets, release, namespace, nodes); + let output = run_helm_command(cmd, &command).await?; + + maybe_log_install_output(&command, &output); + + info!(release, namespace, "helm install completed"); + Ok(()) +} + +fn build_install_command( + assets: &K8sAssets, + release: &str, + namespace: &str, + nodes: usize, +) -> Command { + let mut cmd = Command::new("helm"); + cmd.arg("install").arg(release).arg(&assets.chart_path); + add_install_scoping_args(&mut cmd, namespace); + add_install_settings(&mut cmd, assets, nodes); + add_script_file_settings(&mut cmd, assets); + + if let Ok(root) = workspace_root() { + cmd.current_dir(root); + } + + cmd +} + +fn add_install_scoping_args(cmd: &mut Command, namespace: &str) { + cmd.arg("--namespace") + .arg(namespace) + .arg("--create-namespace") + .arg("--wait") + .arg("--timeout") + .arg("5m"); +} + +fn add_install_settings(cmd: &mut Command, assets: &K8sAssets, nodes: usize) { + cmd.arg("--set") + .arg(format!("image={}", assets.image)) + .arg("--set") + .arg(format!("nodes.count={nodes}")) + .arg("--set") + .arg(format!("cfgsync.port={}", cfgsync_port())) + .arg("-f") + .arg(&assets.values_file) + .arg("--set-file") + .arg(format!("cfgsync.config={}", assets.cfgsync_file.display())); +} + +fn add_script_file_settings(cmd: &mut Command, assets: &K8sAssets) { + add_set_file_arg(cmd, "scripts.runCfgsyncSh", &assets.run_cfgsync_script); + add_set_file_arg(cmd, "scripts.runLogosNodeSh", &assets.run_logos_node_script); + add_set_file_arg(cmd, "scripts.runLogosSh", &assets.run_logos_script); +} + +fn add_set_file_arg(cmd: &mut Command, key: &str, value: &Path) { + cmd.arg("--set-file") + .arg(format!("{key}={}", value.display())); +} + +fn maybe_log_install_output(command: &str, output: &Output) { + if env::var("K8S_RUNNER_DEBUG").is_err() { + return; + } + + debug!( + command, + stdout = %String::from_utf8_lossy(&output.stdout), + "helm install stdout" + ); + debug!( + command, + stderr = %String::from_utf8_lossy(&output.stderr), + "helm install stderr" + ); +} + +async fn run_helm_command(mut cmd: Command, command: &str) -> Result { + let output = cmd.output().await.map_err(|source| HelmError::Spawn { + command: command.to_owned(), + source, + })?; + + if output.status.success() { + Ok(output) + } else { + Err(HelmError::Failed { + command: command.to_owned(), + status: output.status.code(), + stdout: String::from_utf8_lossy(&output.stdout).into_owned(), + stderr: String::from_utf8_lossy(&output.stderr).into_owned(), + }) + } +} + +fn create_assets_tempdir() -> Result { + tempfile::Builder::new() + .prefix("nomos-helm-") + .tempdir() + .map_err(|source| AssetsError::TempDir { source }) +} + +fn render_and_write_cfgsync( + root: &Path, + topology: &DeploymentPlan, + metrics_otlp_ingest_url: Option<&Url>, + tempdir: &TempDir, +) -> Result<(PathBuf, String, String), AssetsError> { + let cfgsync_file = tempdir.path().join("cfgsync.yaml"); + let bundle_file = tempdir.path().join("cfgsync.bundle.yaml"); + let (cfgsync_yaml, bundle_yaml) = render_cfgsync_config( + root, + topology, + metrics_otlp_ingest_url, + &cfgsync_file, + &bundle_file, + )?; + Ok((cfgsync_file, cfgsync_yaml, bundle_yaml)) +} + +fn render_and_write_values( + topology: &DeploymentPlan, + tempdir: &TempDir, + cfgsync_yaml: &str, + bundle_yaml: &str, +) -> Result { + let values_yaml = render_values_yaml(topology, cfgsync_yaml, bundle_yaml)?; + write_temp_file(tempdir.path(), "values.yaml", values_yaml) +} + +fn testnet_image() -> String { + tf_env::nomos_testnet_image().unwrap_or_else(|| String::from(DEFAULT_K8S_TESTNET_IMAGE)) +} + +fn render_cfgsync_config( + root: &Path, + topology: &DeploymentPlan, + metrics_otlp_ingest_url: Option<&Url>, + cfgsync_file: &Path, + bundle_file: &Path, +) -> Result<(String, String), AssetsError> { + let cfgsync_template_path = cfgsync_template_path(root); + debug!(path = %cfgsync_template_path.display(), "loading cfgsync template"); + let hostnames = k8s_node_hostnames(topology); + let rendered = render_and_write_cfgsync_from_template::( + &cfgsync_template_path, + topology, + &hostnames, + CfgsyncRenderOptions { + port: Some(cfgsync_port()), + bundle_path: Some("cfgsync.bundle.yaml".to_string()), + min_timeout_secs: Some(CFGSYNC_K8S_TIMEOUT_SECS), + metrics_otlp_ingest_url: metrics_otlp_ingest_url.cloned(), + }, + CfgsyncOutputPaths { + config_path: cfgsync_file, + bundle_path: bundle_file, + }, + ) + .map_err(|source| AssetsError::Cfgsync { source })?; + + Ok((rendered.config_yaml, rendered.bundle_yaml)) +} + +fn cfgsync_template_path(root: &Path) -> PathBuf { + stack_assets_root(root).join("cfgsync.yaml") +} + +fn k8s_node_hostnames(topology: &DeploymentPlan) -> Vec { + topology + .nodes() + .iter() + .map(|node| format!("{K8S_FULLNAME_OVERRIDE}-node-{}", node.index())) + .collect() +} + +struct ScriptPaths { + run_cfgsync: PathBuf, + run_shared: PathBuf, + run_node: PathBuf, +} + +fn validate_scripts(root: &Path) -> Result { + let scripts_dir = stack_scripts_root(root); + let run_cfgsync = scripts_dir.join("run_cfgsync.sh"); + let run_shared = scripts_dir.join("run_logos.sh"); + let run_node = scripts_dir.join("run_logos_node.sh"); + + for path in [&run_cfgsync, &run_shared, &run_node] { + if !path.exists() { + return Err(AssetsError::MissingScript { path: path.clone() }); + } + } + + debug!( + run_cfgsync = %run_cfgsync.display(), + run_shared = %run_shared.display(), + run_node = %run_node.display(), + "validated runner scripts exist" + ); + + Ok(ScriptPaths { + run_cfgsync, + run_shared, + run_node, + }) +} + +fn helm_chart_path() -> Result { + let root = workspace_root().map_err(|source| AssetsError::WorkspaceRoot { source })?; + let path = if let Some(override_dir) = helm_override_dir(&root) { + override_dir + } else { + root.join("logos/infra/helm/logos-runner") + }; + if path.exists() { + Ok(path) + } else { + Err(AssetsError::MissingChart { path }) + } +} + +fn render_values_yaml( + topology: &DeploymentPlan, + cfgsync_yaml: &str, + bundle_yaml: &str, +) -> Result { + let values = build_values(topology, cfgsync_yaml, bundle_yaml); + serde_yaml::to_string(&values).map_err(|source| AssetsError::Values { source }) +} + +fn write_temp_file( + dir: &Path, + name: &str, + contents: impl AsRef<[u8]>, +) -> Result { + let path = dir.join(name); + fs::write(&path, contents).map_err(|source| AssetsError::Io { + path: path.clone(), + source, + })?; + Ok(path) +} + +/// Locate the workspace root, honoring `CARGO_WORKSPACE_DIR` overrides. +pub fn workspace_root() -> AnyhowResult { + if let Ok(var) = env::var("CARGO_WORKSPACE_DIR") { + return Ok(PathBuf::from(var)); + } + let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); + let candidate_roots = [ + manifest_dir + .parent() + .and_then(Path::parent) + .and_then(Path::parent), + manifest_dir.parent().and_then(Path::parent), + ]; + + for candidate in candidate_roots.iter().flatten() { + let stack_root = if let Some(override_dir) = assets_override_dir(candidate) { + override_dir + } else { + candidate.join(DEFAULT_ASSETS_STACK_DIR) + }; + if stack_root.exists() { + return Ok(candidate.to_path_buf()); + } + } + + Err(anyhow!( + "resolving workspace root from manifest dir: {manifest_dir:?}" + )) +} + +fn stack_assets_root(root: &Path) -> PathBuf { + if let Some(override_dir) = assets_override_dir(root) + && override_dir.exists() + { + return override_dir; + } + root.join(DEFAULT_ASSETS_STACK_DIR) +} + +fn stack_scripts_root(root: &Path) -> PathBuf { + if let Some(scripts) = override_scripts_dir(root) + && scripts.exists() + { + return scripts; + } + root.join(DEFAULT_ASSETS_STACK_DIR).join("scripts") +} + +fn assets_override_dir(root: &Path) -> Option { + env::var("REL_ASSETS_STACK_DIR").ok().map(|value| { + let path = PathBuf::from(value); + if path.is_absolute() { + path + } else { + root.join(path) + } + }) +} + +fn override_scripts_dir(root: &Path) -> Option { + assets_override_dir(root).map(|dir| dir.join("scripts")) +} + +fn helm_override_dir(root: &Path) -> Option { + env::var("REL_HELM_CHART_DIR").ok().map(|value| { + let path = PathBuf::from(value); + if path.is_absolute() { + path + } else { + root.join(path) + } + }) +} + +#[derive(Serialize)] +struct HelmValues { + #[serde(rename = "imagePullPolicy")] + image_pull_policy: String, + #[serde(rename = "fullnameOverride")] + fullname_override: String, + kzg: KzgValues, + cfgsync: CfgsyncValues, + nodes: NodeGroup, +} + +#[derive(Serialize)] +struct KzgValues { + mode: String, + #[serde(rename = "storageSize")] + storage_size: String, + #[serde(rename = "hostPath")] + host_path: String, + #[serde(rename = "hostPathType")] + host_path_type: String, +} + +#[derive(Serialize)] +struct CfgsyncValues { + port: u16, + config: String, + bundle: String, +} + +#[derive(Serialize)] +struct NodeGroup { + count: usize, + nodes: Vec, +} + +#[derive(Serialize)] +struct NodeValues { + #[serde(rename = "apiPort")] + api_port: u16, + #[serde(rename = "testingHttpPort")] + testing_http_port: u16, + #[serde(rename = "networkPort")] + network_port: u16, + env: BTreeMap, +} + +fn build_values(topology: &DeploymentPlan, cfgsync_yaml: &str, bundle_yaml: &str) -> HelmValues { + let cfgsync = CfgsyncValues { + port: cfgsync_port(), + config: cfgsync_yaml.to_string(), + bundle: bundle_yaml.to_string(), + }; + let kzg = KzgValues::disabled(); + let image_pull_policy = + tf_env::nomos_testnet_image_pull_policy().unwrap_or_else(|| "IfNotPresent".into()); + debug!("rendering Helm values for k8s stack"); + let nodes = build_node_group("node", topology.nodes()); + + HelmValues { + image_pull_policy, + fullname_override: K8S_FULLNAME_OVERRIDE.to_string(), + kzg, + cfgsync, + nodes, + } +} + +impl KzgValues { + fn disabled() -> Self { + Self { + mode: "disabled".to_string(), + storage_size: "1Gi".to_string(), + host_path: "/tmp/nomos-kzg".to_string(), + host_path_type: "DirectoryOrCreate".to_string(), + } + } +} + +fn build_node_group(kind: &'static str, nodes: &[NodePlan]) -> NodeGroup { + let node_values = nodes + .iter() + .enumerate() + .map(|(index, node)| build_node_values(kind, index, node)) + .collect(); + + NodeGroup { + count: nodes.len(), + nodes: node_values, + } +} + +fn build_node_values(kind: &'static str, index: usize, node: &NodePlan) -> NodeValues { + let mut env = BTreeMap::new(); + env.insert("CFG_HOST_KIND".into(), kind.to_string()); + env.insert("CFG_HOST_IDENTIFIER".into(), format!("{kind}-{index}")); + + NodeValues { + api_port: node.general.api_config.address.port(), + testing_http_port: node.general.api_config.testing_http_address.port(), + network_port: node.general.network_config.backend.swarm.port, + env, + } +} diff --git a/logos/runtime/ext/src/lib.rs b/logos/runtime/ext/src/lib.rs new file mode 100644 index 0000000..c849ceb --- /dev/null +++ b/logos/runtime/ext/src/lib.rs @@ -0,0 +1,46 @@ +use std::sync::Arc; + +use async_trait::async_trait; +pub use lb_framework::*; +use testing_framework_core::scenario::{Application, DynError, FeedRuntime, RunContext}; +use tokio::sync::broadcast; + +pub mod cfgsync; +mod compose_env; +pub mod constants; +mod k8s_env; +pub mod scenario; + +pub struct LbcExtEnv; + +#[async_trait] +impl Application for LbcExtEnv { + type Deployment = ::Deployment; + type NodeClient = ::NodeClient; + type NodeConfig = ::NodeConfig; + type FeedRuntime = ::FeedRuntime; + + async fn prepare_feed( + client: Self::NodeClient, + ) -> Result<(::Feed, Self::FeedRuntime), DynError> { + ::prepare_feed(client).await + } +} + +pub use scenario::{ + CoreBuilderExt, ObservabilityBuilderExt, ScenarioBuilder, ScenarioBuilderExt, + ScenarioBuilderWith, +}; + +pub type LbcComposeDeployer = testing_framework_runner_compose::ComposeDeployer; +pub type LbcK8sDeployer = testing_framework_runner_k8s::K8sDeployer; + +impl lb_framework::workloads::LbcScenarioEnv for LbcExtEnv {} + +impl lb_framework::workloads::LbcBlockFeedEnv for LbcExtEnv { + fn block_feed_subscription( + ctx: &RunContext, + ) -> broadcast::Receiver> { + ctx.feed().subscribe() + } +} diff --git a/logos/runtime/ext/src/scenario/mod.rs b/logos/runtime/ext/src/scenario/mod.rs new file mode 100644 index 0000000..a25297d --- /dev/null +++ b/logos/runtime/ext/src/scenario/mod.rs @@ -0,0 +1,212 @@ +use std::num::{NonZeroU64, NonZeroUsize}; + +use lb_framework::{ + configs::{ + deployment::{DeploymentBuilder, TopologyConfig}, + wallet::{WalletConfig, wallet_config_for_users}, + }, + internal::{DeploymentPlan, apply_wallet_config_to_deployment}, +}; +pub use testing_framework_core::scenario::ObservabilityBuilderExt; +use testing_framework_core::{ + scenario::{NodeControlScenarioBuilder, ObservabilityScenarioBuilder}, + topology::{DeploymentProvider, DeploymentSeed, DynTopologyError}, +}; +use tracing::warn; + +use crate::LbcExtEnv; + +pub type ScenarioBuilder = testing_framework_core::scenario::ScenarioBuilder; +pub type ScenarioBuilderWith = + testing_framework_core::scenario::CoreBuilder; + +pub trait CoreBuilderExt: Sized { + fn deployment_with(f: impl FnOnce(DeploymentBuilder) -> DeploymentBuilder) -> Self; + + fn with_wallet_config(self, wallet: WalletConfig) -> Self; + + fn wallets(self, users: usize) -> Self; +} + +pub trait ScenarioBuilderExt: Sized { + fn transactions(self) -> TransactionFlowBuilder; + + fn transactions_with( + self, + f: impl FnOnce(TransactionFlowBuilder) -> TransactionFlowBuilder, + ) -> Self; + + fn expect_consensus_liveness(self) -> Self; + + fn initialize_wallet(self, total_funds: u64, users: usize) -> Self; +} + +impl CoreBuilderExt for ScenarioBuilder { + fn deployment_with(f: impl FnOnce(DeploymentBuilder) -> DeploymentBuilder) -> Self { + let topology = f(DeploymentBuilder::new(TopologyConfig::empty())); + ScenarioBuilder::new(Box::new(topology)) + } + + fn with_wallet_config(self, wallet: WalletConfig) -> Self { + self.map_deployment_provider(|provider| { + Box::new(WalletConfigProvider { + inner: provider, + wallet, + }) + }) + } + + fn wallets(self, users: usize) -> Self { + with_wallets_or_warn(self, users, CoreBuilderExt::with_wallet_config) + } +} + +impl CoreBuilderExt for NodeControlScenarioBuilder { + fn deployment_with(f: impl FnOnce(DeploymentBuilder) -> DeploymentBuilder) -> Self { + ScenarioBuilder::deployment_with(f).enable_node_control() + } + + fn with_wallet_config(self, wallet: WalletConfig) -> Self { + self.map_deployment_provider(|provider| { + Box::new(WalletConfigProvider { + inner: provider, + wallet, + }) + }) + } + + fn wallets(self, users: usize) -> Self { + with_wallets_or_warn(self, users, CoreBuilderExt::with_wallet_config) + } +} + +impl CoreBuilderExt for ObservabilityScenarioBuilder { + fn deployment_with(f: impl FnOnce(DeploymentBuilder) -> DeploymentBuilder) -> Self { + ScenarioBuilder::deployment_with(f).enable_observability() + } + + fn with_wallet_config(self, wallet: WalletConfig) -> Self { + self.map_deployment_provider(|provider| { + Box::new(WalletConfigProvider { + inner: provider, + wallet, + }) + }) + } + + fn wallets(self, users: usize) -> Self { + with_wallets_or_warn(self, users, CoreBuilderExt::with_wallet_config) + } +} + +impl ScenarioBuilderExt for B +where + B: CoreBuilderExt + testing_framework_core::scenario::CoreBuilderExt + Sized, +{ + fn transactions(self) -> TransactionFlowBuilder { + TransactionFlowBuilder { + builder: self, + rate: NonZeroU64::MIN, + users: None, + } + } + + fn transactions_with( + self, + f: impl FnOnce(TransactionFlowBuilder) -> TransactionFlowBuilder, + ) -> Self { + f(self.transactions()).apply() + } + + fn expect_consensus_liveness(self) -> Self { + self.with_expectation(lb_framework::workloads::ConsensusLiveness::::default()) + } + + fn initialize_wallet(self, total_funds: u64, users: usize) -> Self { + let Some(user_count) = NonZeroUsize::new(users) else { + warn!( + users, + "wallet user count must be non-zero; ignoring initialize_wallet" + ); + return self; + }; + + match WalletConfig::uniform(total_funds, user_count) { + Ok(wallet) => self.with_wallet_config(wallet), + Err(error) => { + warn!( + users, + total_funds, + error = %error, + "invalid initialize_wallet input; ignoring initialize_wallet" + ); + self + } + } + } +} + +pub struct TransactionFlowBuilder { + builder: B, + rate: NonZeroU64, + users: Option, +} + +impl TransactionFlowBuilder +where + B: testing_framework_core::scenario::CoreBuilderExt + Sized, +{ + pub fn rate(mut self, rate: u64) -> Self { + match NonZeroU64::new(rate) { + Some(rate) => self.rate = rate, + None => warn!( + rate, + "transaction rate must be non-zero; keeping previous rate" + ), + } + self + } + + pub fn users(mut self, users: usize) -> Self { + match NonZeroUsize::new(users) { + Some(value) => self.users = Some(value), + None => warn!( + users, + "transaction user count must be non-zero; keeping previous setting" + ), + } + self + } + + pub fn apply(self) -> B { + let workload = lb_framework::workloads::transaction::Workload::::new(self.rate) + .with_user_limit(self.users); + self.builder.with_workload(workload) + } +} + +struct WalletConfigProvider { + inner: Box>, + wallet: WalletConfig, +} + +impl DeploymentProvider for WalletConfigProvider { + fn build(&self, seed: Option<&DeploymentSeed>) -> Result { + let mut deployment = self.inner.build(seed)?; + apply_wallet_config_to_deployment(&mut deployment, &self.wallet); + Ok(deployment) + } +} + +fn with_wallets_or_warn(builder: B, users: usize, apply: impl FnOnce(B, WalletConfig) -> B) -> B +where + B: CoreBuilderExt, +{ + match wallet_config_for_users(users) { + Ok(wallet) => apply(builder, wallet), + Err(error) => { + warn!(users, error = %error, "invalid wallets input; ignoring wallets"); + builder + } + } +} diff --git a/logos/runtime/workloads/Cargo.toml b/logos/runtime/workloads/Cargo.toml new file mode 100644 index 0000000..fb63f8c --- /dev/null +++ b/logos/runtime/workloads/Cargo.toml @@ -0,0 +1,19 @@ +[package] +description = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +name = "lb-workloads" +version = { workspace = true } + +[lints] +workspace = true + +[dependencies] +# Workspace crates +lb-ext = { workspace = true } +lb-framework = { workspace = true } +testing-framework-core = { workspace = true } + +# External +thiserror = { workspace = true } +tokio = { features = ["time"], workspace = true } diff --git a/logos/runtime/workloads/src/lib.rs b/logos/runtime/workloads/src/lib.rs new file mode 100644 index 0000000..c8974ab --- /dev/null +++ b/logos/runtime/workloads/src/lib.rs @@ -0,0 +1,9 @@ +pub mod workflows; +pub mod workloads; + +pub use lb_ext::LbcExtEnv as LbcEnv; +pub use lb_framework::workloads::{ConsensusLiveness, transaction::TxInclusionExpectation}; +pub use testing_framework_core::{scenario::BuilderInputError, workloads::RandomRestartWorkload}; +pub use workflows::{ + ChaosBuilderExt, ScenarioBuilderExt, start_node_with_timeout, wait_for_min_height, +}; diff --git a/testing-framework/workflows/src/manual.rs b/logos/runtime/workloads/src/workflows/manual.rs similarity index 80% rename from testing-framework/workflows/src/manual.rs rename to logos/runtime/workloads/src/workflows/manual.rs index bbc0b9e..a11b86e 100644 --- a/testing-framework/workflows/src/manual.rs +++ b/logos/runtime/workloads/src/workflows/manual.rs @@ -1,8 +1,8 @@ use std::time::Duration; -use testing_framework_core::{ - nodes::ApiClient, - scenario::{DynError, NodeControlHandle, StartNodeOptions, StartedNode}, +use lb_framework::NodeHttpClient; +use testing_framework_core::scenario::{ + Application, DynError, NodeControlHandle, StartNodeOptions, StartedNode, }; use thiserror::Error; use tokio::time::{Instant, sleep, timeout}; @@ -14,18 +14,19 @@ pub enum ManualTestError { #[error("start node failed: {message}")] StartNode { message: String }, #[error("consensus_info failed: {source}")] - ConsensusInfo { - #[from] - source: reqwest::Error, - }, + ConsensusInfo { source: DynError }, } -pub async fn start_node_with_timeout( +pub async fn start_node_with_timeout( handle: &H, name: &str, - options: StartNodeOptions, + options: StartNodeOptions, timeout_duration: Duration, -) -> Result { +) -> Result, ManualTestError> +where + App: Application, + H: NodeControlHandle + ?Sized, +{ timeout(timeout_duration, handle.start_node_with(name, options)) .await .map_err(|_| ManualTestError::Timeout { @@ -37,7 +38,7 @@ pub async fn start_node_with_timeout( } pub async fn wait_for_min_height( - clients: &[ApiClient], + clients: &[NodeHttpClient], min_height: u64, timeout_duration: Duration, poll_interval: Duration, @@ -51,7 +52,7 @@ pub async fn wait_for_min_height( Ok(info) => heights.push(info.height), Err(err) => { if start.elapsed() >= timeout_duration { - return Err(ManualTestError::ConsensusInfo { source: err }); + return Err(ManualTestError::ConsensusInfo { source: err.into() }); } sleep(poll_interval).await; continue; diff --git a/logos/runtime/workloads/src/workflows/mod.rs b/logos/runtime/workloads/src/workflows/mod.rs new file mode 100644 index 0000000..052c1af --- /dev/null +++ b/logos/runtime/workloads/src/workflows/mod.rs @@ -0,0 +1,5 @@ +pub mod manual; + +pub use lb_framework::ScenarioBuilderExt; +pub use manual::{start_node_with_timeout, wait_for_min_height}; +pub use testing_framework_core::workloads::ChaosBuilderExt; diff --git a/logos/runtime/workloads/src/workloads/mod.rs b/logos/runtime/workloads/src/workloads/mod.rs new file mode 100644 index 0000000..ffa8b4a --- /dev/null +++ b/logos/runtime/workloads/src/workloads/mod.rs @@ -0,0 +1,7 @@ +pub mod chaos { + pub use testing_framework_core::workloads::RandomRestartWorkload; +} + +pub mod transaction { + pub use lb_framework::workloads::transaction::{TxInclusionExpectation, Workload}; +} diff --git a/runner-review-notes.md b/runner-review-notes.md new file mode 100644 index 0000000..e01c1cf --- /dev/null +++ b/runner-review-notes.md @@ -0,0 +1,17 @@ +# Runner review notes + +File: `testing-framework/core/src/scenario/runtime/runner.rs` + +## Concerns + +1) **Potential hang if workloads don’t self-terminate** +- `run_workloads` never cancels workloads after the duration expires. It waits for `drive_until_timer` to time out, then optionally does a cooldown, then `drain_workloads` which waits for all tasks to finish. If a workload is long-lived or never exits on its own, the scenario can hang indefinitely after the duration window. +- Suggestion: call `workloads.abort_all()` when `drive_until_timer` returns `Ok(false)` (timeout) or add a second timeout around `drain_workloads`. + +2) **Block-feed settle constants are inconsistent** +- `DEFAULT_BLOCK_FEED_SETTLE_WAIT` is 1s, but `MIN_BLOCK_FEED_SETTLE_WAIT` is 2s. In `settle_before_expectations`, any zero wait becomes 1s and then gets max’d to 2s, so the default is effectively unused. +- Suggestion: set default ≥ min, or remove the default constant and use min directly. + +## Questions + +- Are workloads guaranteed to finish on their own at the end of `scenario.duration()`? If not, the hang risk is real. diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 1b79eb8..9655629 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -2,7 +2,7 @@ # Keep this version in sync also in the following places: # * Dockerfile # * flake.nix -# * testing-framework/assets/stack/Dockerfile +# * logos/infra/assets/stack/Dockerfile # Also, update the version of the nightly toolchain to the latest nightly of the new version specified in the following places: # * .github/workflows/code-check.yml (fmt job) # * .pre-commit-config.yml (fmt hook) diff --git a/scripts/build/build-bundle.sh b/scripts/build/build-bundle.sh index 6e0125a..ac67dcf 100755 --- a/scripts/build/build-bundle.sh +++ b/scripts/build/build-bundle.sh @@ -99,10 +99,24 @@ build_bundle::load_env() { ROOT_DIR="$(common::repo_root)" export ROOT_DIR + local env_version="${VERSION:-}" + local env_node_rev="${LOGOS_BLOCKCHAIN_NODE_REV:-}" + local env_node_path="${LOGOS_BLOCKCHAIN_NODE_PATH:-}" + common::require_file "${ROOT_DIR}/versions.env" # shellcheck disable=SC1091 . "${ROOT_DIR}/versions.env" + if [ -n "${env_version}" ]; then + VERSION="${env_version}" + fi + if [ -n "${env_node_rev}" ]; then + LOGOS_BLOCKCHAIN_NODE_REV="${env_node_rev}" + fi + if [ -n "${env_node_path}" ]; then + LOGOS_BLOCKCHAIN_NODE_PATH="${env_node_path}" + fi + DEFAULT_VERSION="${VERSION:?Missing VERSION in versions.env}" DEFAULT_NODE_REV="${LOGOS_BLOCKCHAIN_NODE_REV:-}" DEFAULT_NODE_PATH="${LOGOS_BLOCKCHAIN_NODE_PATH:-}" @@ -132,6 +146,23 @@ build_bundle::default_docker_platform() { esac } +build_bundle::ensure_circuits() { + if [ -n "${LOGOS_BLOCKCHAIN_CIRCUITS:-}" ]; then + [ -d "${LOGOS_BLOCKCHAIN_CIRCUITS}" ] || build_bundle::fail \ + "LOGOS_BLOCKCHAIN_CIRCUITS is set but missing: ${LOGOS_BLOCKCHAIN_CIRCUITS}" + return 0 + fi + + local default_dir="${HOME}/.logos-blockchain-circuits" + if [ ! -d "${default_dir}" ]; then + echo "==> Circuits not found; installing to ${default_dir}" + bash "${ROOT_DIR}/scripts/setup/setup-logos-blockchain-circuits.sh" "${VERSION}" "${default_dir}" + fi + + LOGOS_BLOCKCHAIN_CIRCUITS="${default_dir}" + export LOGOS_BLOCKCHAIN_CIRCUITS +} + build_bundle::parse_args() { PLATFORM="host" OUTPUT="" @@ -231,20 +262,27 @@ build_bundle::maybe_run_linux_build_in_docker() { node_path_env="/workspace${LOGOS_BLOCKCHAIN_NODE_PATH#"${ROOT_DIR}"}" ;; /*) - node_path_env="/external/logos-blockchain-node" + node_path_env="/external/nomos-node" extra_mounts+=("-v" "${LOGOS_BLOCKCHAIN_NODE_PATH}:${node_path_env}") + # Local node checkouts may reference this workspace via absolute + # /external/nomos-testing path dependencies. + extra_mounts+=("-v" "${ROOT_DIR}:/external/nomos-testing") ;; *) build_bundle::fail "--path must be absolute when cross-building in Docker" ;; esac fi + if [ -n "${LOGOS_BLOCKCHAIN_CIRCUITS:-}" ] && [ -d "${LOGOS_BLOCKCHAIN_CIRCUITS}" ]; then + extra_mounts+=("-v" "${LOGOS_BLOCKCHAIN_CIRCUITS}:/root/.logos-blockchain-circuits:ro") + fi echo "==> Building Linux bundle inside Docker" local container_output="/workspace${OUTPUT#"${ROOT_DIR}"}" local target_suffix target_suffix="$(build_bundle::docker_platform_suffix "${DOCKER_PLATFORM}")" - local host_target_dir="${ROOT_DIR}/.tmp/logos-blockchain-node-linux-target${target_suffix}" + local run_suffix="-$(date +%s)" + local host_target_dir="${ROOT_DIR}/.tmp/logos-blockchain-node-linux-target${target_suffix}${run_suffix}" mkdir -p "${ROOT_DIR}/.tmp/cargo-linux" "${host_target_dir}" local -a features_args=() @@ -263,13 +301,15 @@ build_bundle::maybe_run_linux_build_in_docker() { -e VERSION="${VERSION}" \ -e LOGOS_BLOCKCHAIN_NODE_REV="${LOGOS_BLOCKCHAIN_NODE_REV}" \ -e LOGOS_BLOCKCHAIN_NODE_PATH="${node_path_env}" \ + -e LOGOS_BLOCKCHAIN_CIRCUITS="/root/.logos-blockchain-circuits" \ -e LOGOS_BLOCKCHAIN_BUNDLE_DOCKER_PLATFORM="${DOCKER_PLATFORM}" \ + -e LOGOS_BLOCKCHAIN_NODE_TARGET_SUFFIX="${run_suffix}" \ -e LOGOS_BLOCKCHAIN_EXTRA_FEATURES="${LOGOS_BLOCKCHAIN_EXTRA_FEATURES:-}" \ -e BUNDLE_IN_CONTAINER=1 \ -e CARGO_HOME=/workspace/.tmp/cargo-linux \ - -e CARGO_TARGET_DIR="/workspace/.tmp/logos-blockchain-node-linux-target${target_suffix}" \ + -e CARGO_TARGET_DIR="/workspace/.tmp/logos-blockchain-node-linux-target${target_suffix}${run_suffix}" \ -v "${ROOT_DIR}/.tmp/cargo-linux":/workspace/.tmp/cargo-linux \ - -v "${host_target_dir}:/workspace/.tmp/logos-blockchain-node-linux-target${target_suffix}" \ + -v "${host_target_dir}:/workspace/.tmp/logos-blockchain-node-linux-target${target_suffix}${run_suffix}" \ -v "${ROOT_DIR}:/workspace" \ "${extra_mounts[@]}" \ -w /workspace \ @@ -291,6 +331,7 @@ build_bundle::prepare_circuits() { if [ -n "${BUNDLE_IN_CONTAINER:-}" ]; then target_suffix="$(build_bundle::docker_platform_suffix "${LOGOS_BLOCKCHAIN_BUNDLE_DOCKER_PLATFORM:-}")" fi + target_suffix="${target_suffix}${LOGOS_BLOCKCHAIN_NODE_TARGET_SUFFIX:-}" NODE_TARGET="${ROOT_DIR}/.tmp/logos-blockchain-node-linux-target${target_suffix}" fi @@ -315,7 +356,11 @@ build_bundle::build_binaries() { ( cd "${NODE_SRC}" if [ -d "${NODE_TARGET}" ]; then - rm -rf "${NODE_TARGET}" + if [ -n "${BUNDLE_IN_CONTAINER:-}" ]; then + find "${NODE_TARGET}" -mindepth 1 -maxdepth 1 -exec rm -rf {} + + else + rm -rf "${NODE_TARGET}" + fi fi if [ -n "${LOGOS_BLOCKCHAIN_NODE_PATH}" ]; then echo "Using local logos-blockchain-node checkout at ${NODE_SRC} (no fetch/checkout)" @@ -329,6 +374,9 @@ build_bundle::build_binaries() { git clean -fdx fi + if [ -z "${LOGOS_BLOCKCHAIN_NODE_PATH}" ]; then + build_bundle::apply_nomos_node_patches "${NODE_SRC}" + fi if [ -n "${BUNDLE_RUSTUP_TOOLCHAIN}" ]; then RUSTFLAGS='--cfg feature="high-active-slot-coefficient" --cfg feature="build-verification-key"' \ CARGO_FEATURE_BUILD_VERIFICATION_KEY=1 \ @@ -386,6 +434,7 @@ build_bundle::main() { build_bundle::clean_cargo_linux_cache build_bundle::parse_args "$@" build_bundle::validate_and_finalize + build_bundle::ensure_circuits build_bundle::maybe_run_linux_build_in_docker build_bundle::prepare_circuits build_bundle::build_binaries diff --git a/scripts/build/build-linux-binaries.sh b/scripts/build/build-linux-binaries.sh index 3d97948..7ea6995 100755 --- a/scripts/build/build-linux-binaries.sh +++ b/scripts/build/build-linux-binaries.sh @@ -13,7 +13,7 @@ build_linux_binaries::usage() { Usage: scripts/build/build-linux-binaries.sh [options] Builds a Linux bundle via scripts/build/build-bundle.sh, then stages artifacts into: - - testing-framework/assets/stack/bin + - logos/infra/assets/stack/bin Options: --rev REV logos-blockchain-node git revision to build (overrides LOGOS_BLOCKCHAIN_NODE_REV) @@ -124,15 +124,14 @@ build_linux_binaries::stage_from_bundle() { local tar_path="$1" local extract_dir extract_dir="$(common::tmpdir nomos-linux-bundle.XXXXXX)" - cleanup() { rm -rf "${extract_dir}" 2>/dev/null || true; } - trap cleanup EXIT + trap "rm -rf '${extract_dir}' 2>/dev/null || true" EXIT echo "==> Extracting ${tar_path}" tar -xzf "${tar_path}" -C "${extract_dir}" local artifacts="${extract_dir}/artifacts" [ -f "${artifacts}/logos-blockchain-node" ] || common::die "Missing logos-blockchain-node in bundle: ${tar_path}" - local bin_out="${ROOT_DIR}/testing-framework/assets/stack/bin" + local bin_out="${ROOT_DIR}/logos/infra/assets/stack/bin" echo "==> Staging binaries to ${bin_out}" mkdir -p "${bin_out}" @@ -148,7 +147,7 @@ build_linux_binaries::main() { build_linux_binaries::stage_from_bundle "${BUNDLE_TAR}" echo - echo "Binaries staged in ${ROOT_DIR}/testing-framework/assets/stack/bin" + echo "Binaries staged in ${ROOT_DIR}/logos/infra/assets/stack/bin" echo "Bundle tarball: ${BUNDLE_TAR}" } diff --git a/scripts/build/build_test_image.sh b/scripts/build/build_test_image.sh index 4e07960..2763df5 100755 --- a/scripts/build/build_test_image.sh +++ b/scripts/build/build_test_image.sh @@ -17,7 +17,7 @@ Builds the compose/k8s test image (bakes in binaries). Options: --tag TAG Docker image tag (default: logos-blockchain-testing:local; or env IMAGE_TAG) --version VERSION Bundle version tag (default: versions.env VERSION) - --dockerfile PATH Dockerfile path (default: testing-framework/assets/stack/Dockerfile.runtime) + --dockerfile PATH Dockerfile path (default: logos/infra/assets/stack/Dockerfile.runtime) --base-tag TAG Base image tag (default: logos-blockchain-testing:base) --bundle-tar PATH Bundle tar containing artifacts/{nomos-*} (default: .tmp/nomos-binaries-linux-.tar.gz; or env LOGOS_BLOCKCHAIN_BINARIES_TAR) --no-restore Do not restore binaries from bundle tar (forces Dockerfile to build/download as needed) @@ -46,8 +46,8 @@ build_test_image::load_env() { . "${ROOT_DIR}/versions.env" common::maybe_source "${ROOT_DIR}/paths.env" - DOCKERFILE_PATH_DEFAULT="${ROOT_DIR}/testing-framework/assets/stack/Dockerfile.runtime" - BASE_DOCKERFILE_PATH_DEFAULT="${ROOT_DIR}/testing-framework/assets/stack/Dockerfile.base" + DOCKERFILE_PATH_DEFAULT="${ROOT_DIR}/logos/infra/assets/stack/Dockerfile.runtime" + BASE_DOCKERFILE_PATH_DEFAULT="${ROOT_DIR}/logos/infra/assets/stack/Dockerfile.base" IMAGE_TAG_DEFAULT="logos-blockchain-testing:local" BASE_IMAGE_TAG_DEFAULT="logos-blockchain-testing:base" @@ -90,10 +90,23 @@ build_test_image::parse_args() { VERSION="${VERSION_DEFAULT}" fi - BIN_DST="${ROOT_DIR}/testing-framework/assets/stack/bin" + BIN_DST="${ROOT_DIR}/logos/infra/assets/stack/bin" DEFAULT_LINUX_TAR="${ROOT_DIR}/.tmp/nomos-binaries-linux-${VERSION}.tar.gz" TAR_PATH="${BUNDLE_TAR_PATH:-${DEFAULT_LINUX_TAR}}" + + LOGOS_BLOCKCHAIN_NODE_PATH="${LOGOS_BLOCKCHAIN_NODE_PATH:-}" + if [ -z "${LOGOS_BLOCKCHAIN_NODE_PATH}" ]; then + # Prefer local checkout when available: this repo currently depends on + # lb-framework from nomos-node/tests/testing_framework. + local sibling_node_path="${ROOT_DIR}/../nomos-node" + if [ -d "${sibling_node_path}/tests/testing_framework" ]; then + LOGOS_BLOCKCHAIN_NODE_PATH="${sibling_node_path}" + fi + fi + if [ -n "${LOGOS_BLOCKCHAIN_NODE_PATH}" ] && [ ! -d "${LOGOS_BLOCKCHAIN_NODE_PATH}" ]; then + build_test_image::fail "LOGOS_BLOCKCHAIN_NODE_PATH does not exist: ${LOGOS_BLOCKCHAIN_NODE_PATH}" + fi } build_test_image::print_config() { @@ -103,6 +116,7 @@ build_test_image::print_config() { echo "Base image tag: ${BASE_IMAGE_TAG}" echo "Base Dockerfile: ${BASE_DOCKERFILE_PATH}" echo "Logos node rev: ${LOGOS_BLOCKCHAIN_NODE_REV}" + echo "Logos node path: ${LOGOS_BLOCKCHAIN_NODE_PATH:-}" echo "Binaries dir: ${BIN_DST}" echo "Bundle tar (if used): ${TAR_PATH}" echo "Restore from tar: $([ "${NO_RESTORE}" -eq 1 ] && echo "disabled" || echo "enabled")" @@ -164,22 +178,32 @@ build_test_image::docker_build() { linux-aarch64) target_platform="linux/arm64" ;; esac fi + if [ -z "${target_platform}" ] && [ -n "${host_platform}" ]; then + # Prefer native builds to avoid emulation issues in rapidsnark/prover. + target_platform="${host_platform}" + fi local -a base_build_args=( -f "${BASE_DOCKERFILE_PATH}" -t "${BASE_IMAGE_TAG}" --build-arg "LOGOS_BLOCKCHAIN_NODE_REV=${LOGOS_BLOCKCHAIN_NODE_REV}" + --build-arg "LOGOS_BLOCKCHAIN_NODE_USE_LOCAL_CONTEXT=$([ -n "${LOGOS_BLOCKCHAIN_NODE_PATH}" ] && echo 1 || echo 0)" --build-arg "VERSION=${VERSION}" - "${ROOT_DIR}" ) + local node_context_path="${LOGOS_BLOCKCHAIN_NODE_PATH:-${ROOT_DIR}}" + base_build_args+=(--build-context "nomos_node=${node_context_path}") + if [ -n "${LOGOS_BLOCKCHAIN_FORCE_BUILD:-}" ]; then + base_build_args+=(--build-arg "LOGOS_BLOCKCHAIN_FORCE_BUILD=${LOGOS_BLOCKCHAIN_FORCE_BUILD}") + fi if [ -n "${host_platform}" ] && [ -n "${target_platform}" ] && [ "${host_platform}" != "${target_platform}" ]; then base_build_args+=(--platform "${target_platform}") fi + base_build_args+=("${ROOT_DIR}") printf "Running:" printf " %q" docker build "${base_build_args[@]}" echo - docker build "${base_build_args[@]}" + build_test_image::docker_build_with_retry "${base_build_args[@]}" local -a final_build_args=( -f "${DOCKERFILE_PATH}" @@ -193,7 +217,23 @@ build_test_image::docker_build() { printf "Running:" printf " %q" docker build "${final_build_args[@]}" echo - docker build "${final_build_args[@]}" + build_test_image::docker_build_with_retry "${final_build_args[@]}" +} + +build_test_image::docker_build_with_retry() { + local max_attempts=3 + local attempt + for attempt in $(seq 1 "${max_attempts}"); do + if docker build "$@"; then + return 0 + fi + if [ "${attempt}" -lt "${max_attempts}" ]; then + local backoff=$((attempt * 5)) + echo "docker build failed (attempt ${attempt}/${max_attempts}); retrying in ${backoff}s..." + sleep "${backoff}" + fi + done + build_test_image::fail "docker build failed after ${max_attempts} attempts" } build_test_image::main() { diff --git a/scripts/observability/compose/docker-compose.yml b/scripts/observability/compose/docker-compose.yml index 618944d..eb03ae6 100644 --- a/scripts/observability/compose/docker-compose.yml +++ b/scripts/observability/compose/docker-compose.yml @@ -19,17 +19,17 @@ services: depends_on: - prometheus env_file: - - ../../../testing-framework/assets/stack/monitoring/grafana/plugins.env + - ../../../logos/infra/assets/stack/monitoring/grafana/plugins.env environment: GF_SECURITY_ADMIN_USER: admin GF_SECURITY_ADMIN_PASSWORD: admin GF_USERS_ALLOW_SIGN_UP: "false" volumes: - grafana-data:/var/lib/grafana - - ../../../testing-framework/assets/stack/monitoring/grafana/grafana.ini:/etc/grafana/grafana.ini:ro - - ../../../testing-framework/assets/stack/monitoring/grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:ro - - ../../../testing-framework/assets/stack/monitoring/grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro - - ../../../testing-framework/assets/stack/monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro + - ../../../logos/infra/assets/stack/monitoring/grafana/grafana.ini:/etc/grafana/grafana.ini:ro + - ../../../logos/infra/assets/stack/monitoring/grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:ro + - ../../../logos/infra/assets/stack/monitoring/grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:ro + - ../../../logos/infra/assets/stack/monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro ports: - "3000:3000" diff --git a/scripts/ops/push-ecr-test.sh b/scripts/ops/push-ecr-test.sh index 172ad0c..67609a4 100755 --- a/scripts/ops/push-ecr-test.sh +++ b/scripts/ops/push-ecr-test.sh @@ -32,7 +32,7 @@ export DOCKER_DEFAULT_PLATFORM="${DEFAULT_DOCKER_PLATFORM}" export CIRCUITS_PLATFORM="${CIRCUITS_PLATFORM:-${DEFAULT_CIRCUITS_PLATFORM}}" export IMAGE_TAG="${REMOTE_IMAGE}" - "${ROOT_DIR}/scripts/build/build_test_image.sh" --dockerfile "${ROOT_DIR}/testing-framework/assets/stack/Dockerfile.testnet" + "${ROOT_DIR}/scripts/build/build_test_image.sh" --dockerfile "${ROOT_DIR}/logos/infra/assets/stack/Dockerfile.testnet" if [[ "${ECR_IMAGE_REPO}" == ${PUBLIC_ECR_HOST}/* ]]; then aws ecr-public get-login-password --region "${AWS_REGION}" \ diff --git a/scripts/run/check-boundaries.sh b/scripts/run/check-boundaries.sh new file mode 100755 index 0000000..53786cd --- /dev/null +++ b/scripts/run/check-boundaries.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "${ROOT_DIR}" + +echo "==> boundary check: lb-topology must stay local/topology-focused" + +violations=0 +topology_dir="${ROOT_DIR}/../nomos-node/tests/testing_framework/lb-topology" + +if [ ! -d "${topology_dir}" ]; then + echo "Boundary check failed: expected lb-topology at ${topology_dir}" + exit 1 +fi + +echo "==> checking ${topology_dir}" + +if rg -n "cfgsync|ComposeDeployEnv|K8sDeployEnv|runner-compose|runner-k8s|DEFAULT_CFGSYNC_PORT|DEFAULT_ASSETS_STACK_DIR" \ + "${topology_dir}/src" "${topology_dir}/Cargo.toml" >/tmp/lb-topology-boundary.out 2>&1; then + echo "Boundary violation: extension-specific references found in lb-topology:" + cat /tmp/lb-topology-boundary.out + violations=1 +fi + +if [ "${violations}" -ne 0 ]; then + exit 1 +fi + +echo "Boundary check passed." diff --git a/scripts/run/run-examples.sh b/scripts/run/run-examples.sh index 777a6b4..66ce750 100755 --- a/scripts/run/run-examples.sh +++ b/scripts/run/run-examples.sh @@ -33,9 +33,9 @@ run_examples::usage() { Usage: scripts/run/run-examples.sh [options] [compose|host|k8s] Modes: - compose Run examples/src/bin/compose_runner.rs (default) - host Run examples/src/bin/local_runner.rs - k8s Run examples/src/bin/k8s_runner.rs + compose Run logos/examples/src/bin/compose_runner.rs (default) + host Run logos/examples/src/bin/local_runner.rs + k8s Run logos/examples/src/bin/k8s_runner.rs Options: -t, --run-seconds N Duration to run the demo (required) @@ -85,16 +85,48 @@ run_examples::load_env() { ROOT_DIR="$(common::repo_root)" export ROOT_DIR + local env_version="${VERSION:-}" + local env_node_rev="${LOGOS_BLOCKCHAIN_NODE_REV:-}" + local env_node_path="${LOGOS_BLOCKCHAIN_NODE_PATH:-}" + common::require_file "${ROOT_DIR}/versions.env" # shellcheck disable=SC1091 . "${ROOT_DIR}/versions.env" common::maybe_source "${ROOT_DIR}/paths.env" + if [ -n "${env_version}" ]; then + VERSION="${env_version}" + fi + if [ -n "${env_node_rev}" ]; then + LOGOS_BLOCKCHAIN_NODE_REV="${env_node_rev}" + fi + if [ -n "${env_node_path}" ]; then + LOGOS_BLOCKCHAIN_NODE_PATH="${env_node_path}" + fi + DEFAULT_VERSION="${VERSION:?Missing VERSION in versions.env}" VERSION="${VERSION:-${DEFAULT_VERSION}}" } +run_examples::apply_platform_defaults() { + local arch + arch="$(uname -m)" + + case "${arch}" in + arm64|aarch64) + if [ "${MODE}" != "host" ]; then + if [ -z "${LOGOS_BLOCKCHAIN_BIN_PLATFORM:-}" ]; then + export LOGOS_BLOCKCHAIN_BIN_PLATFORM="linux/arm64" + fi + if [ -z "${COMPOSE_CIRCUITS_PLATFORM:-}" ]; then + export COMPOSE_CIRCUITS_PLATFORM="linux-aarch64" + fi + fi + ;; + esac +} + run_examples::select_bin() { case "${MODE}" in compose) BIN="compose_runner" ;; @@ -360,7 +392,7 @@ run_examples::restore_binaries_from_tar() { tar -xzf "${tar_path}" -C "${extract_dir}" || common::die "Failed to extract ${tar_path}" local src="${extract_dir}/artifacts" - local bin_dst="${ROOT_DIR}/testing-framework/assets/stack/bin" + local bin_dst="${ROOT_DIR}/logos/infra/assets/stack/bin" RESTORED_BIN_DIR="${src}" export RESTORED_BIN_DIR @@ -388,7 +420,11 @@ run_examples::ensure_binaries_tar() { local platform="$1" local tar_path="$2" echo "==> Building fresh binaries bundle (${platform}) at ${tar_path}" - "${ROOT_DIR}/scripts/build/build-bundle.sh" --platform "${platform}" --output "${tar_path}" --rev "${LOGOS_BLOCKCHAIN_NODE_REV}" + if [ -n "${LOGOS_BLOCKCHAIN_NODE_PATH:-}" ]; then + "${ROOT_DIR}/scripts/build/build-bundle.sh" --platform "${platform}" --output "${tar_path}" --path "${LOGOS_BLOCKCHAIN_NODE_PATH}" + else + "${ROOT_DIR}/scripts/build/build-bundle.sh" --platform "${platform}" --output "${tar_path}" --rev "${LOGOS_BLOCKCHAIN_NODE_REV}" + fi } run_examples::prepare_bundles() { @@ -403,6 +439,14 @@ run_examples::prepare_bundles() { return 0 fi + if [ -n "${LOGOS_BLOCKCHAIN_NODE_PATH:-}" ]; then + echo "==> Using local logos-blockchain-node checkout at ${LOGOS_BLOCKCHAIN_NODE_PATH}" + if [ "${LOGOS_BLOCKCHAIN_FORCE_BUNDLE_REBUILD:-0}" = "1" ]; then + echo "==> Forcing local bundle rebuild (LOGOS_BLOCKCHAIN_FORCE_BUNDLE_REBUILD=1)" + rm -f "${HOST_TAR}" "${LINUX_TAR}" + fi + fi + # On non-Linux compose/k8s runs, use the Linux bundle for image build, then restore host bundle for the runner. if [ "${MODE}" != "host" ] && [ "$(uname -s)" != "Linux" ] && [ "${LOGOS_BLOCKCHAIN_SKIP_IMAGE_BUILD:-0}" = "0" ] && [ -f "${LINUX_TAR}" ]; then NEED_HOST_RESTORE_AFTER_IMAGE=1 @@ -461,7 +505,7 @@ run_examples::maybe_restore_host_after_image() { run_examples::validate_restored_bundle() { if [ "${MODE}" = "host" ] && ! { [ -n "${LOGOS_BLOCKCHAIN_NODE_BIN:-}" ] && [ -x "${LOGOS_BLOCKCHAIN_NODE_BIN:-}" ]; }; then local tar_node - tar_node="${RESTORED_BIN_DIR:-${ROOT_DIR}/testing-framework/assets/stack/bin}/logos-blockchain-node" + tar_node="${RESTORED_BIN_DIR:-${ROOT_DIR}/logos/infra/assets/stack/bin}/logos-blockchain-node" [ -x "${tar_node}" ] || common::die \ "Restored tarball missing host executables; provide a host-compatible binaries tarball." @@ -475,24 +519,32 @@ run_examples::validate_restored_bundle() { } run_examples::ensure_circuits() { + local circuits_dir + local setup_script + local platform + if [ -n "${LOGOS_BLOCKCHAIN_CIRCUITS:-}" ]; then - if [ -d "${LOGOS_BLOCKCHAIN_CIRCUITS}" ]; then - return 0 - fi - common::die "LOGOS_BLOCKCHAIN_CIRCUITS is set to '${LOGOS_BLOCKCHAIN_CIRCUITS}', but the directory does not exist" + circuits_dir="${LOGOS_BLOCKCHAIN_CIRCUITS}" + else + circuits_dir="${ROOT_DIR}/${LOGOS_BLOCKCHAIN_CIRCUITS_HOST_DIR_REL:-.tmp/logos-blockchain-circuits-host}" + platform="${LOGOS_BLOCKCHAIN_CIRCUITS_PLATFORM:-}" + export LOGOS_BLOCKCHAIN_CIRCUITS="${circuits_dir}" fi - local default_dir="${HOME}/.logos-blockchain-circuits" - if [ -d "${default_dir}" ]; then - LOGOS_BLOCKCHAIN_CIRCUITS="${default_dir}" - export LOGOS_BLOCKCHAIN_CIRCUITS + if [ -d "${circuits_dir}" ]; then return 0 fi - echo "==> Circuits not found; installing to ${default_dir}" - bash "${ROOT_DIR}/scripts/setup/setup-logos-blockchain-circuits.sh" "${VERSION}" "${default_dir}" - LOGOS_BLOCKCHAIN_CIRCUITS="${default_dir}" - export LOGOS_BLOCKCHAIN_CIRCUITS + setup_script="${ROOT_DIR}/logos/infra/assets/stack/scripts/setup-logos-blockchain-circuits.sh" + if [ ! -x "${setup_script}" ]; then + common::die "Missing circuits setup script at ${setup_script}" + fi + + if [ -n "${platform:-}" ]; then + LOGOS_BLOCKCHAIN_CIRCUITS_PLATFORM="${platform}" "${setup_script}" "${VERSION}" "${circuits_dir}" + else + "${setup_script}" "${VERSION}" "${circuits_dir}" + fi } run_examples::run() { @@ -506,12 +558,6 @@ run_examples::run() { export LOGOS_BLOCKCHAIN_METRICS_OTLP_INGEST_URL="${METRICS_OTLP_INGEST_URL}" fi - if [ "${MODE}" = "host" ]; then - run_examples::ensure_circuits - # Ensure Groth16 verification keys are embedded when building local node binaries. - export CARGO_FEATURE_BUILD_VERIFICATION_KEY=1 - fi - echo "==> Running ${BIN} for ${RUN_SECS}s (mode=${MODE}, image=${IMAGE})" cd "${ROOT_DIR}" @@ -526,7 +572,7 @@ run_examples::main() { run_examples::parse_args "$@" run_examples::select_bin run_examples::select_image - + run_examples::apply_platform_defaults run_examples::prepare_bundles echo "==> Using restored binaries bundle" diff --git a/scripts/setup/setup-logos-blockchain-circuits.sh b/scripts/setup/setup-logos-blockchain-circuits.sh index 6a3d19e..532c7c1 100755 --- a/scripts/setup/setup-logos-blockchain-circuits.sh +++ b/scripts/setup/setup-logos-blockchain-circuits.sh @@ -18,7 +18,7 @@ fi # scripts/setup/setup-logos-blockchain-circuits.sh v0.3.2 # scripts/setup/setup-logos-blockchain-circuits.sh v0.3.2 /opt/circuits -DEFAULT_CIRCUITS_VERSION="v0.3.2" +DEFAULT_CIRCUITS_VERSION="v0.4.1" DEFAULT_INSTALL_DIR="${HOME}/.logos-blockchain-circuits" REPO="logos-blockchain/logos-blockchain-circuits" @@ -35,6 +35,9 @@ print_error() { echo -e "${RED}✗${NC} $1"; } VERSION="${1:-${DEFAULT_CIRCUITS_VERSION}}" INSTALL_DIR="${2:-${DEFAULT_INSTALL_DIR}}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +RAPIDSNARK_SKIP="${RAPIDSNARK_SKIP:-0}" # Detect OS and architecture # Outputs: os-arch like linux-x86_64, macos-aarch64 @@ -170,6 +173,41 @@ print_circuits() { done } +ensure_rapidsnark() { + if [ -x "${INSTALL_DIR}/prover" ] && [ -x "${INSTALL_DIR}/verifier" ]; then + print_info "rapidsnark prover already present at ${INSTALL_DIR}" + return + fi + + if [ "${RAPIDSNARK_SKIP}" = "1" ]; then + print_warning "Skipping rapidsnark build (RAPIDSNARK_SKIP=1). Proofs may fail without prover/verifier." + return + fi + + local build_script="${REPO_ROOT}/scripts/build/build-rapidsnark.sh" + if [ ! -x "${build_script}" ]; then + print_error "rapidsnark build script not found or not executable: ${build_script}" + exit 1 + fi + + local missing=() + for tool in git cmake make gcc g++; do + if ! command -v "${tool}" >/dev/null 2>&1; then + missing+=("${tool}") + fi + done + + if [ "${#missing[@]}" -gt 0 ]; then + print_error "Missing build tools for rapidsnark: ${missing[*]}" + print_error "Install the required packages, or set RAPIDSNARK_SKIP=1 to skip the build." + exit 1 + fi + + print_info "Building rapidsnark prover into ${INSTALL_DIR}..." + bash "${build_script}" "${INSTALL_DIR}" + print_success "rapidsnark prover installed" +} + main() { print_info "Setting up logos-blockchain-circuits ${VERSION}" print_info "Installation directory: ${INSTALL_DIR}" @@ -187,6 +225,9 @@ main() { handle_macos_quarantine fi + echo + ensure_rapidsnark + echo print_success "Installation complete!" echo diff --git a/scripts/setup/setup-observability.sh b/scripts/setup/setup-observability.sh index 539c0b7..9229f09 100755 --- a/scripts/setup/setup-observability.sh +++ b/scripts/setup/setup-observability.sh @@ -96,7 +96,7 @@ k8s_apply_dashboards() { local ns dash_dir ns="$(k8s_namespace)" - dash_dir="${ROOT}/testing-framework/assets/stack/monitoring/grafana/dashboards" + dash_dir="${ROOT}/logos/infra/assets/stack/monitoring/grafana/dashboards" [ -d "${dash_dir}" ] || common::die "Missing dashboards directory: ${dash_dir}" diff --git a/testing-framework/assets/stack/Dockerfile.base b/testing-framework/assets/stack/Dockerfile.base deleted file mode 100644 index aeceff6..0000000 --- a/testing-framework/assets/stack/Dockerfile.base +++ /dev/null @@ -1,72 +0,0 @@ -# syntax=docker/dockerfile:1 -# check=skip=SecretsUsedInArgOrEnv -# Ignore warnings about sensitive information as this is test data. - -ARG VERSION -ARG LOGOS_BLOCKCHAIN_NODE_REV - -# =========================== -# BUILD IMAGE -# =========================== - -FROM rust:1.91.0-slim-bookworm AS builder - -ARG VERSION -ARG LOGOS_BLOCKCHAIN_NODE_REV - -LABEL maintainer="augustinas@status.im" \ - source="https://github.com/logos-co/nomos-node" \ - description="Logos testnet build image" - -WORKDIR /workspace -COPY . . - -# Reduce debug artifact size. -ENV CARGO_PROFILE_DEV_DEBUG=0 -ENV LOGOS_BLOCKCHAIN_NODE_REV=${LOGOS_BLOCKCHAIN_NODE_REV} - -# Install dependencies needed for building RocksDB and for circuit tooling. -RUN apt-get update && apt-get install -yq \ - git gcc g++ clang make cmake m4 xz-utils libgmp-dev libssl-dev pkg-config ca-certificates curl wget file \ - && rm -rf /var/lib/apt/lists/* - -RUN chmod +x \ - /workspace/testing-framework/assets/stack/scripts/docker/prepare_binaries.sh \ - /workspace/testing-framework/assets/stack/scripts/docker/build_cfgsync.sh \ - /workspace/testing-framework/assets/stack/scripts/setup-logos-blockchain-circuits.sh \ - || true - -RUN /workspace/testing-framework/assets/stack/scripts/setup-logos-blockchain-circuits.sh "${VERSION}" /opt/circuits - -ENV LOGOS_BLOCKCHAIN_CIRCUITS=/opt/circuits - -RUN /workspace/testing-framework/assets/stack/scripts/docker/prepare_binaries.sh - -# Strip local path patches so container builds use git sources. -RUN sed -i '/^\[patch\."https:\/\/github.com\/logos-co\/nomos-node"\]/,/^$/d' /workspace/Cargo.toml - -RUN /workspace/testing-framework/assets/stack/scripts/docker/build_cfgsync.sh - -# =========================== -# BASE RUNTIME IMAGE -# =========================== - -FROM ubuntu:24.04 AS base - -LABEL maintainer="augustinas@status.im" \ - source="https://github.com/logos-co/nomos-node" \ - description="Logos base runtime image (testing)" - -RUN apt-get update && apt-get install -yq \ - libstdc++6 \ - libgmp10 \ - libgomp1 \ - libssl3 \ - ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -COPY --from=builder /workspace/artifacts/logos-blockchain-node /usr/bin/logos-blockchain-node -COPY --from=builder /workspace/artifacts/cfgsync-server /usr/bin/cfgsync-server -COPY --from=builder /workspace/artifacts/cfgsync-client /usr/bin/cfgsync-client - -EXPOSE 3000 8080 9000 60000 diff --git a/testing-framework/assets/stack/scripts/docker/build_cfgsync.sh b/testing-framework/assets/stack/scripts/docker/build_cfgsync.sh deleted file mode 100755 index aada5eb..0000000 --- a/testing-framework/assets/stack/scripts/docker/build_cfgsync.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -RUSTFLAGS='--cfg feature="high-active-slot-coefficient"' \ - cargo build --all-features --manifest-path /workspace/testing-framework/tools/cfgsync_tf/Cargo.toml --bins - -cp /workspace/target/debug/cfgsync-server /workspace/artifacts/cfgsync-server -cp /workspace/target/debug/cfgsync-client /workspace/artifacts/cfgsync-client - -rm -rf /workspace/target/debug/incremental diff --git a/testing-framework/assets/stack/scripts/docker/prepare_binaries.sh b/testing-framework/assets/stack/scripts/docker/prepare_binaries.sh deleted file mode 100755 index 03f61cf..0000000 --- a/testing-framework/assets/stack/scripts/docker/prepare_binaries.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -LOGOS_BLOCKCHAIN_NODE_REV="${LOGOS_BLOCKCHAIN_NODE_REV:?LOGOS_BLOCKCHAIN_NODE_REV build arg missing}" - -mkdir -p /workspace/artifacts - -TARGET_ARCH="$(uname -m)" - -have_prebuilt() { - [ -f testing-framework/assets/stack/bin/logos-blockchain-node ] && \ - [ -f testing-framework/assets/stack/bin/logos-blockchain-node ] -} - -bin_matches_arch() { - local info - info="$(file -b testing-framework/assets/stack/bin/logos-blockchain-node 2>/dev/null || true)" - case "${info}" in - *ELF*) : ;; - *) return 1 ;; - esac - - local pattern - case "${TARGET_ARCH}" in - x86_64) pattern="x86-64|x86_64" ;; - aarch64|arm64) pattern="arm64|aarch64" ;; - *) pattern="${TARGET_ARCH}" ;; - esac - - echo "${info}" | grep -Eqi "${pattern}" -} - -if have_prebuilt && bin_matches_arch; then - echo "Using prebuilt logos-blockchain binaries from testing-framework/assets/stack/bin" - cp testing-framework/assets/stack/bin/logos-blockchain-node /workspace/artifacts/logos-blockchain-node - exit 0 -fi - -if have_prebuilt; then - echo "Prebuilt logos-blockchain binaries do not match target architecture (${TARGET_ARCH}); rebuilding from source" -else - echo "Prebuilt logos-blockchain binaries missing; building from source" -fi - -echo "Building logos-blockchain binaries from source (rev ${LOGOS_BLOCKCHAIN_NODE_REV})" -git clone https://github.com/logos-co/nomos-node.git /tmp/nomos-node -cd /tmp/nomos-node -git fetch --depth 1 origin "${LOGOS_BLOCKCHAIN_NODE_REV}" -git checkout "${LOGOS_BLOCKCHAIN_NODE_REV}" -git reset --hard -git clean -fdx - -# Enable high-active-slot-coefficient via cfg to keep test blocks frequent. -RUSTFLAGS='--cfg feature="high-active-slot-coefficient"' \ - cargo build --features "testing" -p logos-blockchain-node - -cp /tmp/nomos-node/target/debug/logos-blockchain-node /workspace/artifacts/logos-blockchain-node - -rm -rf /tmp/nomos-node/target/debug/incremental diff --git a/testing-framework/assets/stack/scripts/run_cfgsync.sh b/testing-framework/assets/stack/scripts/run_cfgsync.sh deleted file mode 100755 index d0f2659..0000000 --- a/testing-framework/assets/stack/scripts/run_cfgsync.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -set -e - -exec /usr/bin/cfgsync-server /etc/nomos/cfgsync.yaml diff --git a/testing-framework/assets/stack/scripts/run_nomos_node.sh b/testing-framework/assets/stack/scripts/run_nomos_node.sh deleted file mode 100755 index 94a970c..0000000 --- a/testing-framework/assets/stack/scripts/run_nomos_node.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -exec /etc/nomos/scripts/run_nomos.sh node diff --git a/testing-framework/configs/Cargo.toml b/testing-framework/configs/Cargo.toml deleted file mode 100644 index 993fea9..0000000 --- a/testing-framework/configs/Cargo.toml +++ /dev/null @@ -1,42 +0,0 @@ -[package] -categories.workspace = true -description.workspace = true -edition.workspace = true -keywords.workspace = true -license.workspace = true -name = "testing-framework-config" -readme.workspace = true -repository.workspace = true -version = "0.1.0" - -[dependencies] -hex = { default-features = false, version = "0.4.3" } -lb-api-service = { workspace = true } -lb-blend-service = { features = ["libp2p"], workspace = true } -lb-chain-leader-service = { workspace = true } -lb-chain-network = { workspace = true } -lb-chain-service = { workspace = true } -lb-core = { workspace = true } -lb-cryptarchia-engine = { features = ["serde"], workspace = true } -lb-cryptarchia-sync = { workspace = true } -lb-groth16 = { workspace = true } -lb-key-management-system-service = { workspace = true } -lb-ledger = { features = ["serde"], workspace = true } -lb-libp2p = { workspace = true } -lb-node = { default-features = false, features = ["testing"], workspace = true } -lb-sdp-service = { workspace = true } -lb-time-service = { workspace = true } -lb-tracing = { workspace = true } -lb-tracing-service = { workspace = true } -lb-utils = { workspace = true } -lb-wallet-service = { workspace = true } -num-bigint = { default-features = false, version = "0.4" } -rand = { workspace = true } -serde = { features = ["derive"], workspace = true } -testing-framework-env = { workspace = true } -thiserror = { workspace = true } -time = { default-features = true, version = "0.3" } -tracing = { workspace = true } - -[lints] -workspace = true diff --git a/testing-framework/configs/src/constants.rs b/testing-framework/configs/src/constants.rs deleted file mode 100644 index 39dd889..0000000 --- a/testing-framework/configs/src/constants.rs +++ /dev/null @@ -1,42 +0,0 @@ -use std::time::Duration; - -use testing_framework_env as tf_env; - -/// Default cfgsync port used across runners. -pub const DEFAULT_CFGSYNC_PORT: u16 = 4400; - -/// Default HTTP probe interval used across readiness checks. -pub const DEFAULT_HTTP_POLL_INTERVAL: Duration = Duration::from_secs(1); - -/// Default node HTTP timeout when probing endpoints. -pub const DEFAULT_NODE_HTTP_TIMEOUT: Duration = Duration::from_secs(240); - -/// Default node HTTP timeout when probing NodePort endpoints. -pub const DEFAULT_NODE_HTTP_PROBE_TIMEOUT: Duration = Duration::from_secs(30); - -/// Default Kubernetes deployment readiness timeout. -pub const DEFAULT_K8S_DEPLOYMENT_TIMEOUT: Duration = Duration::from_secs(180); - -/// Default API port used by nodes. -pub const DEFAULT_API_PORT: u16 = 18080; - -/// Default testing HTTP port used by nodes. -pub const DEFAULT_TESTING_HTTP_PORT: u16 = 18081; - -/// Default libp2p network port. -pub const DEFAULT_LIBP2P_NETWORK_PORT: u16 = 3000; - -/// Default DA network port. -pub const DEFAULT_DA_NETWORK_PORT: u16 = 3300; - -/// Default blend network port. -pub const DEFAULT_BLEND_NETWORK_PORT: u16 = 3400; //4401; - -/// Resolve cfgsync port from `LOGOS_BLOCKCHAIN_CFGSYNC_PORT`, falling back to -/// the default. -pub fn cfgsync_port() -> u16 { - tf_env::nomos_cfgsync_port().unwrap_or(DEFAULT_CFGSYNC_PORT) -} - -/// Default stack assets directory. -pub const DEFAULT_ASSETS_STACK_DIR: &str = "testing-framework/assets/stack"; diff --git a/testing-framework/configs/src/lib.rs b/testing-framework/configs/src/lib.rs deleted file mode 100644 index f2002a2..0000000 --- a/testing-framework/configs/src/lib.rs +++ /dev/null @@ -1,52 +0,0 @@ -use std::{net::Ipv4Addr, ops::Mul as _, sync::LazyLock, time::Duration}; - -use lb_core::sdp::ProviderId; -use lb_libp2p::{Multiaddr, PeerId, multiaddr}; -use testing_framework_env as tf_env; - -pub mod constants; -pub mod nodes; -pub mod timeouts; -pub mod topology; - -static IS_SLOW_TEST_ENV: LazyLock = LazyLock::new(tf_env::slow_test_env); - -pub static IS_DEBUG_TRACING: LazyLock = LazyLock::new(tf_env::debug_tracing); - -const SLOW_ENV_TIMEOUT_MULTIPLIER: u32 = 2; - -/// In slow test environments like Codecov, use 2x timeout. -#[must_use] -pub fn adjust_timeout(d: Duration) -> Duration { - if *IS_SLOW_TEST_ENV { - d.mul(SLOW_ENV_TIMEOUT_MULTIPLIER) - } else { - d - } -} - -#[must_use] -pub fn node_address_from_port(port: u16) -> Multiaddr { - multiaddr(Ipv4Addr::LOCALHOST, port) -} - -#[must_use] -pub fn secret_key_to_peer_id(node_key: lb_libp2p::ed25519::SecretKey) -> PeerId { - PeerId::from_public_key(&lb_libp2p::ed25519::Keypair::from(node_key).public().into()) -} - -#[must_use] -pub fn secret_key_to_provider_id(node_key: lb_libp2p::ed25519::SecretKey) -> ProviderId { - let bytes = lb_libp2p::ed25519::Keypair::from(node_key) - .public() - .to_bytes(); - match ProviderId::try_from(bytes) { - Ok(value) => value, - Err(_) => unsafe { - // Safety: `bytes` is a 32-byte ed25519 public key, matching `ProviderId`'s - // expected width; failure would indicate a broken invariant in the - // dependency. - std::hint::unreachable_unchecked() - }, - } -} diff --git a/testing-framework/configs/src/nodes/blend.rs b/testing-framework/configs/src/nodes/blend.rs deleted file mode 100644 index 44f4775..0000000 --- a/testing-framework/configs/src/nodes/blend.rs +++ /dev/null @@ -1,159 +0,0 @@ -use std::{num::NonZeroU64, path::PathBuf, time::Duration}; - -use blend_serde::Config as BlendUserConfig; -use lb_blend_service::{ - core::settings::{CoverTrafficSettings, MessageDelayerSettings, SchedulerSettings, ZkSettings}, - settings::TimingSettings, -}; -use lb_key_management_system_service::keys::Key; -use lb_node::config::{ - blend::{ - deployment::{self as blend_deployment, Settings as BlendDeploymentSettings}, - serde as blend_serde, - }, - network::deployment::Settings as NetworkDeploymentSettings, -}; -use lb_utils::math::NonNegativeF64; - -use crate::{ - nodes::kms::key_id_for_preload_backend, - topology::configs::blend::GeneralBlendConfig as TopologyBlendConfig, -}; - -// Blend service constants -const BLEND_LAYERS_COUNT: u64 = 1; -const MINIMUM_NETWORK_SIZE: u64 = 1; -const ROUND_DURATION_SECS: u64 = 1; -const ROUNDS_PER_INTERVAL: u64 = 30; -const ROUNDS_PER_SESSION: u64 = 648_000; -const ROUNDS_PER_OBSERVATION_WINDOW: u64 = 30; -const ROUNDS_PER_SESSION_TRANSITION: u64 = 30; -const EPOCH_TRANSITION_SLOTS: u64 = 2_600; -const SAFETY_BUFFER_INTERVALS: u64 = 100; -const MESSAGE_FREQUENCY_PER_ROUND: f64 = 1.0; -const MAX_RELEASE_DELAY_ROUNDS: u64 = 3; -const DATA_REPLICATION_FACTOR: u64 = 0; -pub const ACTIVITY_THRESHOLD_SENSITIVITY: u64 = 1; - -pub(crate) fn build_blend_service_config( - config: &TopologyBlendConfig, -) -> ( - BlendUserConfig, - BlendDeploymentSettings, - NetworkDeploymentSettings, -) { - let message_frequency_per_round = message_frequency_per_round(); - let zk_key_id = key_id_for_preload_backend(&Key::from(config.secret_zk_key.clone())); - let signing_key_id = key_id_for_preload_backend(&Key::from(config.signer.clone())); - - let user = build_blend_user_config(config, zk_key_id, signing_key_id); - let deployment_settings = build_blend_deployment_settings(config, message_frequency_per_round); - let network_deployment = build_network_deployment_settings(); - - (user, deployment_settings, network_deployment) -} - -fn message_frequency_per_round() -> NonNegativeF64 { - match NonNegativeF64::try_from(MESSAGE_FREQUENCY_PER_ROUND) { - Ok(value) => value, - Err(_) => unsafe { - // Safety: `MESSAGE_FREQUENCY_PER_ROUND` is a finite non-negative constant. - std::hint::unreachable_unchecked() - }, - } -} - -fn build_blend_user_config( - config: &TopologyBlendConfig, - zk_key_id: String, - signing_key_id: String, -) -> BlendUserConfig { - let backend_core = &config.backend_core; - let backend_edge = &config.backend_edge; - - BlendUserConfig { - non_ephemeral_signing_key_id: signing_key_id, - // Persist recovery data under the tempdir so components expecting it - // can start cleanly. - recovery_path_prefix: PathBuf::from("./recovery/blend"), - core: blend_serde::core::Config { - backend: blend_serde::core::BackendConfig { - listening_address: backend_core.listening_address.clone(), - core_peering_degree: backend_core.core_peering_degree.clone(), - edge_node_connection_timeout: backend_core.edge_node_connection_timeout, - max_edge_node_incoming_connections: backend_core.max_edge_node_incoming_connections, - max_dial_attempts_per_peer: backend_core.max_dial_attempts_per_peer, - }, - zk: ZkSettings { - secret_key_kms_id: zk_key_id, - }, - }, - edge: blend_serde::edge::Config { - backend: blend_serde::edge::BackendConfig { - max_dial_attempts_per_peer_per_message: backend_edge - .max_dial_attempts_per_peer_per_message, - replication_factor: backend_edge.replication_factor, - }, - }, - } -} - -fn build_blend_deployment_settings( - config: &TopologyBlendConfig, - message_frequency_per_round: NonNegativeF64, -) -> BlendDeploymentSettings { - let backend_core = &config.backend_core; - - BlendDeploymentSettings { - common: blend_deployment::CommonSettings { - num_blend_layers: unsafe { NonZeroU64::new_unchecked(BLEND_LAYERS_COUNT) }, - minimum_network_size: unsafe { NonZeroU64::new_unchecked(MINIMUM_NETWORK_SIZE) }, - data_replication_factor: DATA_REPLICATION_FACTOR, - timing: TimingSettings { - round_duration: Duration::from_secs(ROUND_DURATION_SECS), - rounds_per_interval: unsafe { NonZeroU64::new_unchecked(ROUNDS_PER_INTERVAL) }, - rounds_per_session: unsafe { NonZeroU64::new_unchecked(ROUNDS_PER_SESSION) }, - rounds_per_observation_window: unsafe { - NonZeroU64::new_unchecked(ROUNDS_PER_OBSERVATION_WINDOW) - }, - rounds_per_session_transition_period: unsafe { - NonZeroU64::new_unchecked(ROUNDS_PER_SESSION_TRANSITION) - }, - epoch_transition_period_in_slots: unsafe { - NonZeroU64::new_unchecked(EPOCH_TRANSITION_SLOTS) - }, - }, - protocol_name: backend_core.protocol_name.clone(), - }, - core: blend_deployment::CoreSettings { - scheduler: SchedulerSettings { - cover: CoverTrafficSettings { - intervals_for_safety_buffer: SAFETY_BUFFER_INTERVALS, - message_frequency_per_round, - }, - delayer: MessageDelayerSettings { - maximum_release_delay_in_rounds: unsafe { - NonZeroU64::new_unchecked(MAX_RELEASE_DELAY_ROUNDS) - }, - }, - }, - minimum_messages_coefficient: backend_core.minimum_messages_coefficient, - normalization_constant: backend_core.normalization_constant, - activity_threshold_sensitivity: ACTIVITY_THRESHOLD_SENSITIVITY, - }, - } -} - -fn build_network_deployment_settings() -> NetworkDeploymentSettings { - NetworkDeploymentSettings { - identify_protocol_name: lb_libp2p::protocol_name::StreamProtocol::new( - "/integration/nomos/identify/1.0.0", - ), - kademlia_protocol_name: lb_libp2p::protocol_name::StreamProtocol::new( - "/integration/nomos/kad/1.0.0", - ), - chain_sync_protocol_name: lb_libp2p::protocol_name::StreamProtocol::new( - "/integration/nomos/cryptarchia/sync/1.0.0", - ), - } -} diff --git a/testing-framework/configs/src/nodes/common.rs b/testing-framework/configs/src/nodes/common.rs deleted file mode 100644 index b153d46..0000000 --- a/testing-framework/configs/src/nodes/common.rs +++ /dev/null @@ -1,208 +0,0 @@ -use std::{ - collections::{HashMap, HashSet}, - num::NonZeroUsize, - path::PathBuf, - time::Duration, -}; - -use lb_api_service::ApiServiceSettings; -use lb_chain_leader_service::LeaderWalletConfig; -use lb_chain_network::{BootstrapConfig as ChainBootstrapConfig, OrphanConfig, SyncConfig}; -use lb_core::mantle::Value; -use lb_key_management_system_service::keys::{Key, secured_key::SecuredKey}; -use lb_node::{ - api::backend::AxumBackendSettings as NodeAxumBackendSettings, - config::{ - cryptarchia::{ - deployment::{ - SdpConfig as DeploymentSdpConfig, ServiceParameters, - Settings as CryptarchiaDeploymentSettings, - }, - serde::{ - Config as CryptarchiaConfig, LeaderConfig, - NetworkConfig as CryptarchiaNetworkConfig, - ServiceConfig as CryptarchiaServiceConfig, - }, - }, - mempool::deployment::Settings as MempoolDeploymentSettings, - time::{deployment::Settings as TimeDeploymentSettings, serde::Config as TimeConfig}, - }, -}; -use lb_wallet_service::WalletServiceSettings; -use time::OffsetDateTime; - -use crate::{nodes::kms::key_id_for_preload_backend, timeouts, topology::configs::GeneralConfig}; - -// Configuration constants -const CRYPTARCHIA_GOSSIPSUB_PROTOCOL: &str = "/cryptarchia/proto"; -const MEMPOOL_PUBSUB_TOPIC: &str = "mantle"; -const STATE_RECORDING_INTERVAL_SECS: u64 = 60; -const IBD_DOWNLOAD_DELAY_SECS: u64 = 10; -const MAX_ORPHAN_CACHE_SIZE: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(5) }; -const API_MAX_CONCURRENT_REQUESTS: usize = 1000; - -pub(crate) fn cryptarchia_deployment(config: &GeneralConfig) -> CryptarchiaDeploymentSettings { - let mantle_service_params = &config - .consensus_config - .ledger_config - .sdp_config - .service_params; - let node_service_params = mantle_service_params - .iter() - .map(|(service_type, service_parameters)| { - ( - service_type.clone(), - ServiceParameters { - lock_period: service_parameters.lock_period, - inactivity_period: service_parameters.inactivity_period, - retention_period: service_parameters.retention_period, - timestamp: service_parameters.timestamp, - }, - ) - }) - .collect::>(); - - CryptarchiaDeploymentSettings { - epoch_config: config.consensus_config.ledger_config.epoch_config, - security_param: config - .consensus_config - .ledger_config - .consensus_config - .security_param(), - sdp_config: DeploymentSdpConfig { - service_params: node_service_params, - min_stake: config.consensus_config.ledger_config.sdp_config.min_stake, - }, - gossipsub_protocol: CRYPTARCHIA_GOSSIPSUB_PROTOCOL.to_owned(), - genesis_state: config.consensus_config.genesis_tx.clone(), - } -} - -pub(crate) fn time_deployment(config: &GeneralConfig) -> TimeDeploymentSettings { - TimeDeploymentSettings { - slot_duration: config.time_config.slot_duration, - chain_start_time: OffsetDateTime::now_utc(), - } -} - -pub(crate) fn mempool_deployment() -> MempoolDeploymentSettings { - MempoolDeploymentSettings { - pubsub_topic: MEMPOOL_PUBSUB_TOPIC.to_owned(), - } -} - -pub(crate) fn cryptarchia_config(config: &GeneralConfig) -> CryptarchiaConfig { - CryptarchiaConfig { - service: CryptarchiaServiceConfig { - recovery_file: PathBuf::from("recovery/cryptarchia.json"), - bootstrap: lb_chain_service::BootstrapConfig { - prolonged_bootstrap_period: config.bootstrapping_config.prolonged_bootstrap_period, - force_bootstrap: false, - offline_grace_period: lb_chain_service::OfflineGracePeriodConfig { - grace_period: timeouts::grace_period(), - state_recording_interval: Duration::from_secs(STATE_RECORDING_INTERVAL_SECS), - }, - }, - }, - network: CryptarchiaNetworkConfig { - bootstrap: ChainBootstrapConfig { - ibd: lb_chain_network::IbdConfig { - peers: HashSet::new(), - delay_before_new_download: Duration::from_secs(IBD_DOWNLOAD_DELAY_SECS), - }, - }, - sync: SyncConfig { - orphan: OrphanConfig { - max_orphan_cache_size: MAX_ORPHAN_CACHE_SIZE, - }, - }, - }, - leader: LeaderConfig { - wallet: LeaderWalletConfig { - max_tx_fee: Value::MAX, - funding_pk: config.consensus_config.funding_sk.as_public_key(), - }, - }, - } -} - -pub(crate) fn time_config(config: &GeneralConfig) -> TimeConfig { - TimeConfig { - backend: lb_time_service::backends::NtpTimeBackendSettings { - ntp_server: config.time_config.ntp_server.clone(), - ntp_client_settings: lb_time_service::backends::ntp::async_client::NTPClientSettings { - timeout: config.time_config.timeout, - listening_interface: config.time_config.interface.clone(), - }, - update_interval: config.time_config.update_interval, - }, - } -} - -pub(crate) fn mempool_config() -> lb_node::config::mempool::serde::Config { - lb_node::config::mempool::serde::Config { - recovery_path: PathBuf::from("recovery/mempool.json"), - } -} - -pub(crate) fn tracing_settings(config: &GeneralConfig) -> lb_tracing_service::TracingSettings { - config.tracing_config.tracing_settings.clone() -} - -pub(crate) fn http_config(config: &GeneralConfig) -> ApiServiceSettings { - ApiServiceSettings { - backend_settings: NodeAxumBackendSettings { - address: config.api_config.address, - max_concurrent_requests: API_MAX_CONCURRENT_REQUESTS, - ..Default::default() - }, - } -} - -pub(crate) fn testing_http_config( - config: &GeneralConfig, -) -> ApiServiceSettings { - ApiServiceSettings { - backend_settings: NodeAxumBackendSettings { - address: config.api_config.testing_http_address, - max_concurrent_requests: API_MAX_CONCURRENT_REQUESTS, - ..Default::default() - }, - } -} - -pub(crate) fn wallet_settings(config: &GeneralConfig) -> WalletServiceSettings { - wallet_settings_with_leader(config, true) -} - -fn wallet_settings_with_leader( - config: &GeneralConfig, - include_leader: bool, -) -> WalletServiceSettings { - let mut keys = HashMap::new(); - - if include_leader { - let leader_key = Key::Zk(config.consensus_config.leader_sk.clone().into()); - let leader_key_id = key_id_for_preload_backend(&leader_key); - keys.insert(leader_key_id, config.consensus_config.leader_pk); - } - - let funding_key = Key::Zk(config.consensus_config.funding_sk.clone()); - let funding_key_id = key_id_for_preload_backend(&funding_key); - keys.insert( - funding_key_id, - config.consensus_config.funding_sk.to_public_key(), - ); - - // Note: wallet accounts are used by the transaction workload directly and - // don't need to be registered for leader eligibility. - - let voucher_master_key_id = - key_id_for_preload_backend(&Key::Zk(config.consensus_config.leader_sk.clone().into())); - - WalletServiceSettings { - known_keys: keys, - voucher_master_key_id, - recovery_path: PathBuf::from("recovery/wallet.json"), - } -} diff --git a/testing-framework/configs/src/nodes/kms.rs b/testing-framework/configs/src/nodes/kms.rs deleted file mode 100644 index a0bfd39..0000000 --- a/testing-framework/configs/src/nodes/kms.rs +++ /dev/null @@ -1,14 +0,0 @@ -use lb_groth16::fr_to_bytes; -use lb_key_management_system_service::{ - backend::preload::KeyId, - keys::{Key, secured_key::SecuredKey as _}, -}; - -#[must_use] -pub fn key_id_for_preload_backend(key: &Key) -> KeyId { - let key_id_bytes = match key { - Key::Ed25519(ed25519_secret_key) => ed25519_secret_key.as_public_key().to_bytes(), - Key::Zk(zk_secret_key) => fr_to_bytes(zk_secret_key.as_public_key().as_fr()), - }; - hex::encode(key_id_bytes) -} diff --git a/testing-framework/configs/src/nodes/mod.rs b/testing-framework/configs/src/nodes/mod.rs deleted file mode 100644 index 085d7d8..0000000 --- a/testing-framework/configs/src/nodes/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub(crate) mod blend; -pub(crate) mod common; -pub mod kms; -pub mod node; diff --git a/testing-framework/configs/src/nodes/node.rs b/testing-framework/configs/src/nodes/node.rs deleted file mode 100644 index 6a7f7af..0000000 --- a/testing-framework/configs/src/nodes/node.rs +++ /dev/null @@ -1,77 +0,0 @@ -use lb_core::mantle::Value; -use lb_key_management_system_service::keys::secured_key::SecuredKey as _; -use lb_node::{ - RocksBackendSettings, UserConfig, - config::{RunConfig, deployment::DeploymentSettings}, -}; -use lb_sdp_service::{SdpSettings, wallet::SdpWalletConfig}; - -use crate::{ - nodes::{ - blend::build_blend_service_config, - common::{ - cryptarchia_config, cryptarchia_deployment, http_config, mempool_config, - mempool_deployment, testing_http_config, time_config, time_deployment, - tracing_settings, wallet_settings, - }, - }, - topology::configs::GeneralConfig, -}; - -#[must_use] -pub fn create_node_config(config: GeneralConfig) -> RunConfig { - let network_config = config.network_config.clone(); - let (blend_user_config, blend_deployment, network_deployment) = - build_blend_service_config(&config.blend_config); - - let deployment_settings = - build_node_deployment_settings(&config, blend_deployment, network_deployment); - - let user_settings = UserConfig { - network: network_config, - blend: blend_user_config, - cryptarchia: cryptarchia_config(&config), - tracing: tracing_settings(&config), - http: http_config(&config), - storage: rocks_storage_settings(), - time: time_config(&config), - mempool: mempool_config(), - sdp: SdpSettings { - declaration: None, - wallet_config: SdpWalletConfig { - max_tx_fee: Value::MAX, - funding_pk: config.consensus_config.funding_sk.as_public_key(), - }, - }, - testing_http: testing_http_config(&config), - wallet: wallet_settings(&config), - key_management: config.kms_config.clone(), - }; - - RunConfig { - deployment: deployment_settings, - user: user_settings, - } -} - -fn build_node_deployment_settings( - config: &GeneralConfig, - blend_deployment: lb_node::config::blend::deployment::Settings, - network_deployment: lb_node::config::network::deployment::Settings, -) -> DeploymentSettings { - DeploymentSettings { - blend: blend_deployment, - network: network_deployment, - cryptarchia: cryptarchia_deployment(config), - time: time_deployment(config), - mempool: mempool_deployment(), - } -} - -fn rocks_storage_settings() -> RocksBackendSettings { - RocksBackendSettings { - db_path: "./db".into(), - read_only: false, - column_family: Some("blocks".into()), - } -} diff --git a/testing-framework/configs/src/timeouts.rs b/testing-framework/configs/src/timeouts.rs deleted file mode 100644 index 58fd667..0000000 --- a/testing-framework/configs/src/timeouts.rs +++ /dev/null @@ -1,59 +0,0 @@ -use std::{env, time::Duration}; - -pub const DISPERSAL_TIMEOUT_SECS: u64 = 20; -pub const RETRY_COOLDOWN_SECS: u64 = 3; -pub const GRACE_PERIOD_SECS: u64 = 20 * 60; -pub const PRUNE_DURATION_SECS: u64 = 30; -pub const PRUNE_INTERVAL_SECS: u64 = 5; -pub const SHARE_DURATION_SECS: u64 = 5; -pub const COMMITMENTS_WAIT_SECS: u64 = 1; -pub const SDP_TRIGGER_DELAY_SECS: u64 = 5; - -fn env_duration(key: &str, default: u64) -> Duration { - env::var(key) - .ok() - .and_then(|v| v.parse::().ok()) - .map(Duration::from_secs) - .unwrap_or_else(|| Duration::from_secs(default)) -} - -pub fn dispersal_timeout() -> Duration { - env_duration( - "LOGOS_BLOCKCHAIN_DISPERSAL_TIMEOUT_SECS", - DISPERSAL_TIMEOUT_SECS, - ) -} - -pub fn retry_cooldown() -> Duration { - env_duration("LOGOS_BLOCKCHAIN_RETRY_COOLDOWN_SECS", RETRY_COOLDOWN_SECS) -} - -pub fn grace_period() -> Duration { - env_duration("LOGOS_BLOCKCHAIN_GRACE_PERIOD_SECS", GRACE_PERIOD_SECS) -} - -pub fn prune_duration() -> Duration { - env_duration("LOGOS_BLOCKCHAIN_PRUNE_DURATION_SECS", PRUNE_DURATION_SECS) -} - -pub fn prune_interval() -> Duration { - env_duration("LOGOS_BLOCKCHAIN_PRUNE_INTERVAL_SECS", PRUNE_INTERVAL_SECS) -} - -pub fn share_duration() -> Duration { - env_duration("LOGOS_BLOCKCHAIN_SHARE_DURATION_SECS", SHARE_DURATION_SECS) -} - -pub fn commitments_wait() -> Duration { - env_duration( - "LOGOS_BLOCKCHAIN_COMMITMENTS_WAIT_SECS", - COMMITMENTS_WAIT_SECS, - ) -} - -pub fn sdp_trigger_delay() -> Duration { - env_duration( - "LOGOS_BLOCKCHAIN_SDP_TRIGGER_DELAY_SECS", - SDP_TRIGGER_DELAY_SECS, - ) -} diff --git a/testing-framework/configs/src/topology/configs/api.rs b/testing-framework/configs/src/topology/configs/api.rs deleted file mode 100644 index 13bf132..0000000 --- a/testing-framework/configs/src/topology/configs/api.rs +++ /dev/null @@ -1,33 +0,0 @@ -use std::net::SocketAddr; - -use lb_utils::net::get_available_tcp_port; -use thiserror::Error; - -const LOCALHOST: [u8; 4] = [127, 0, 0, 1]; - -#[derive(Clone)] -pub struct GeneralApiConfig { - pub address: SocketAddr, - pub testing_http_address: SocketAddr, -} - -#[derive(Debug, Error)] -pub enum ApiConfigError { - #[error("failed to allocate a free TCP port for API config")] - PortAllocationFailed, -} - -pub fn create_api_configs(ids: &[[u8; 32]]) -> Result, ApiConfigError> { - ids.iter() - .map(|_| { - let address_port = - get_available_tcp_port().ok_or(ApiConfigError::PortAllocationFailed)?; - let testing_port = - get_available_tcp_port().ok_or(ApiConfigError::PortAllocationFailed)?; - Ok(GeneralApiConfig { - address: SocketAddr::from((LOCALHOST, address_port)), - testing_http_address: SocketAddr::from((LOCALHOST, testing_port)), - }) - }) - .collect() -} diff --git a/testing-framework/configs/src/topology/configs/base.rs b/testing-framework/configs/src/topology/configs/base.rs deleted file mode 100644 index 959efc3..0000000 --- a/testing-framework/configs/src/topology/configs/base.rs +++ /dev/null @@ -1,43 +0,0 @@ -use thiserror::Error; - -use super::{ - blend, bootstrap, bootstrap::SHORT_PROLONGED_BOOTSTRAP_PERIOD, consensus, - consensus::ConsensusParams, network, network::NetworkParams, wallet::WalletConfig, -}; - -#[derive(Debug, Error)] -pub enum BaseConfigError { - #[error(transparent)] - Consensus(#[from] consensus::ConsensusConfigError), - #[error(transparent)] - Network(#[from] network::NetworkConfigError), -} - -pub struct BaseConfigs { - pub consensus_configs: Vec, - pub bootstrap_configs: Vec, - pub network_configs: Vec, - pub blend_configs: Vec, -} - -pub fn build_base_configs( - ids: &[[u8; 32]], - consensus_params: &ConsensusParams, - network_params: &NetworkParams, - wallet_config: &WalletConfig, - blend_ports: &[u16], -) -> Result { - Ok(BaseConfigs { - consensus_configs: consensus::create_consensus_configs( - ids, - consensus_params, - wallet_config, - )?, - bootstrap_configs: bootstrap::create_bootstrap_configs( - ids, - SHORT_PROLONGED_BOOTSTRAP_PERIOD, - ), - network_configs: network::create_network_configs(ids, network_params)?, - blend_configs: blend::create_blend_configs(ids, blend_ports), - }) -} diff --git a/testing-framework/configs/src/topology/configs/blend.rs b/testing-framework/configs/src/topology/configs/blend.rs deleted file mode 100644 index 2a4be29..0000000 --- a/testing-framework/configs/src/topology/configs/blend.rs +++ /dev/null @@ -1,92 +0,0 @@ -use core::time::Duration; -use std::{net::Ipv4Addr, num::NonZeroU64}; - -use lb_blend_service::{ - core::backends::libp2p::Libp2pBlendBackendSettings as Libp2pCoreBlendBackendSettings, - edge::backends::libp2p::Libp2pBlendBackendSettings as Libp2pEdgeBlendBackendSettings, -}; -use lb_key_management_system_service::keys::{Ed25519Key, UnsecuredEd25519Key, ZkKey}; -use lb_libp2p::{Multiaddr, Protocol, protocol_name::StreamProtocol}; -use lb_utils::math::NonNegativeF64; -use num_bigint::BigUint; - -const EDGE_NODE_CONNECTION_TIMEOUT: Duration = Duration::from_secs(1); -const LOCALHOST: Ipv4Addr = Ipv4Addr::new(127, 0, 0, 1); - -#[derive(Clone)] -pub struct GeneralBlendConfig { - pub backend_core: Libp2pCoreBlendBackendSettings, - pub backend_edge: Libp2pEdgeBlendBackendSettings, - pub private_key: UnsecuredEd25519Key, - pub secret_zk_key: ZkKey, - pub signer: Ed25519Key, -} - -/// Builds blend configs for each node. -#[must_use] -pub fn create_blend_configs(ids: &[[u8; 32]], ports: &[u16]) -> Vec { - ids.iter() - .zip(ports) - .map(|(id, port)| { - let signer = Ed25519Key::from_bytes(id); - let private_key = UnsecuredEd25519Key::from_bytes(id); - // We need unique ZK secret keys, so we just derive them deterministically from - // the generated Ed25519 public keys, which are guaranteed to be unique because - // they are in turned derived from node ID. - let secret_zk_key = - ZkKey::from(BigUint::from_bytes_le(private_key.public_key().as_bytes())); - let listening_address = localhost_quic_address(*port); - let minimum_messages_coefficient = unsafe { NonZeroU64::new_unchecked(1) }; - let normalization_constant = match NonNegativeF64::try_from(1.03f64) { - Ok(value) => value, - Err(_) => unsafe { - // Safety: normalization constant is a finite non-negative constant. - std::hint::unreachable_unchecked() - }, - }; - let max_dial_attempts_per_peer = unsafe { NonZeroU64::new_unchecked(3) }; - let max_dial_attempts_per_peer_per_message = match 1.try_into() { - Ok(value) => value, - Err(_) => unsafe { - // Safety: the constant 1 must fit the target type and be non-zero. - std::hint::unreachable_unchecked() - }, - }; - let replication_factor = match 1.try_into() { - Ok(value) => value, - Err(_) => unsafe { - // Safety: the constant 1 must fit the target type and be non-zero. - std::hint::unreachable_unchecked() - }, - }; - GeneralBlendConfig { - backend_core: Libp2pCoreBlendBackendSettings { - listening_address, - core_peering_degree: 1..=3, - minimum_messages_coefficient, - normalization_constant, - edge_node_connection_timeout: EDGE_NODE_CONNECTION_TIMEOUT, - max_edge_node_incoming_connections: 300, - max_dial_attempts_per_peer, - protocol_name: StreamProtocol::new("/blend/integration-tests"), - }, - backend_edge: Libp2pEdgeBlendBackendSettings { - max_dial_attempts_per_peer_per_message, - protocol_name: StreamProtocol::new("/blend/integration-tests"), - replication_factor, - }, - private_key, - secret_zk_key, - signer, - } - }) - .collect() -} - -fn localhost_quic_address(port: u16) -> Multiaddr { - let mut addr = Multiaddr::empty(); - addr.push(Protocol::Ip4(LOCALHOST)); - addr.push(Protocol::Udp(port)); - addr.push(Protocol::QuicV1); - addr -} diff --git a/testing-framework/configs/src/topology/configs/bootstrap.rs b/testing-framework/configs/src/topology/configs/bootstrap.rs deleted file mode 100644 index 14e51a5..0000000 --- a/testing-framework/configs/src/topology/configs/bootstrap.rs +++ /dev/null @@ -1,20 +0,0 @@ -use std::time::Duration; - -#[derive(Clone)] -pub struct GeneralBootstrapConfig { - pub prolonged_bootstrap_period: Duration, -} - -pub const SHORT_PROLONGED_BOOTSTRAP_PERIOD: Duration = Duration::from_secs(1); - -#[must_use] -pub fn create_bootstrap_configs( - ids: &[[u8; 32]], - prolonged_bootstrap_period: Duration, -) -> Vec { - ids.iter() - .map(|_| GeneralBootstrapConfig { - prolonged_bootstrap_period, - }) - .collect() -} diff --git a/testing-framework/configs/src/topology/configs/consensus.rs b/testing-framework/configs/src/topology/configs/consensus.rs deleted file mode 100644 index 0082c37..0000000 --- a/testing-framework/configs/src/topology/configs/consensus.rs +++ /dev/null @@ -1,488 +0,0 @@ -use std::{ - env, - num::{NonZero, NonZeroU64}, - str::FromStr as _, - sync::Arc, -}; - -use lb_core::{ - mantle::{ - GenesisTx as GenesisTxTrait, MantleTx, Note, OpProof, Utxo, - genesis_tx::GenesisTx, - ledger::Tx as LedgerTx, - ops::{ - Op, - channel::{ChannelId, Ed25519PublicKey, MsgId, inscribe::InscriptionOp}, - }, - }, - sdp::{DeclarationMessage, Locator, ProviderId, ServiceParameters, ServiceType}, -}; -use lb_cryptarchia_engine::EpochConfig; -use lb_groth16::CompressedGroth16Proof; -use lb_key_management_system_service::keys::{ - Ed25519Key, UnsecuredZkKey, ZkKey, ZkPublicKey, ZkSignature, -}; -use lb_node::{SignedMantleTx, Transaction as _}; -use lb_utils::math::NonNegativeF64; -use num_bigint::BigUint; - -use super::wallet::{WalletAccount, WalletConfig}; -use crate::nodes::blend::ACTIVITY_THRESHOLD_SENSITIVITY; - -#[derive(Debug, thiserror::Error)] -pub enum ConsensusConfigError { - #[error("failed to build genesis inscription signer: {message}")] - InscriptionSigner { message: String }, - #[error("failed to build genesis transaction: {message}")] - GenesisTx { message: String }, - #[error("failed to build ledger config: {message}")] - LedgerConfig { message: String }, - #[error("failed to sign genesis declarations: {message}")] - DeclarationSignature { message: String }, - #[error("genesis ledger is missing expected utxo note: {note}")] - MissingGenesisUtxo { note: String }, -} - -#[derive(Clone)] -pub struct ConsensusParams { - pub n_participants: usize, - pub security_param: NonZero, - pub active_slot_coeff: f64, -} - -impl ConsensusParams { - const DEFAULT_ACTIVE_SLOT_COEFF: f64 = 1.0; - const CONSENSUS_ACTIVE_SLOT_COEFF_VAR: &str = "CONSENSUS_ACTIVE_SLOT_COEFF"; - - #[must_use] - pub fn default_for_participants(n_participants: usize) -> Self { - let active_slot_coeff = env::var(Self::CONSENSUS_ACTIVE_SLOT_COEFF_VAR) - .ok() - .and_then(|raw| f64::from_str(&raw).ok()) - .filter(|value| (0.0..=1.0).contains(value) && *value > 0.0) - .unwrap_or(Self::DEFAULT_ACTIVE_SLOT_COEFF); - - Self { - n_participants, - // by setting the slot coeff to 1, we also increase the probability of multiple blocks - // (forks) being produced in the same slot (epoch). Setting the security - // parameter to some value > 1 ensures nodes have some time to sync before - // deciding on the longest chain. - security_param: unsafe { NonZero::new_unchecked(10) }, - // a block should be produced (on average) every slot - active_slot_coeff, - } - } -} - -#[derive(Clone)] -pub struct ProviderInfo { - pub service_type: ServiceType, - pub provider_sk: Ed25519Key, - pub zk_sk: ZkKey, - pub locator: Locator, - pub note: ServiceNote, -} - -impl ProviderInfo { - #[must_use] - pub fn provider_id(&self) -> ProviderId { - ProviderId(self.provider_sk.public_key()) - } - - #[must_use] - pub fn zk_id(&self) -> ZkPublicKey { - self.zk_sk.to_public_key() - } -} - -/// General consensus configuration for a chosen participant, that later could -/// be converted into a specific service or services configuration. -#[derive(Clone)] -pub struct GeneralConsensusConfig { - pub leader_pk: ZkPublicKey, - pub leader_sk: UnsecuredZkKey, - pub ledger_config: lb_ledger::Config, - pub genesis_tx: GenesisTx, - pub utxos: Vec, - pub blend_notes: Vec, - pub wallet_accounts: Vec, - pub funding_sk: ZkKey, -} - -#[derive(Clone)] -pub struct ServiceNote { - pub pk: ZkPublicKey, - pub sk: ZkKey, - pub note: Note, - pub output_index: usize, -} - -fn create_genesis_tx(utxos: &mut [Utxo]) -> Result { - // Create a genesis inscription op (similar to config.yaml) - let inscription = InscriptionOp { - channel_id: ChannelId::from([0; 32]), - inscription: vec![103, 101, 110, 101, 115, 105, 115], // "genesis" in bytes - parent: MsgId::root(), - signer: Ed25519PublicKey::from_bytes(&[0; 32]).map_err(|err| { - ConsensusConfigError::InscriptionSigner { - message: err.to_string(), - } - })?, - }; - - // Create ledger transaction with the utxos as outputs - let outputs: Vec = utxos.iter().map(|u| u.note).collect(); - let ledger_tx = LedgerTx::new(vec![], outputs); - let ledger_tx_hash = ledger_tx.hash(); - - // Ensure utxo IDs match the ledger tx hash used at genesis. - for utxo in utxos { - utxo.tx_hash = ledger_tx_hash; - } - - // Create the mantle transaction - let mantle_tx = MantleTx { - ops: vec![Op::ChannelInscribe(inscription)], - ledger_tx, - execution_gas_price: 0, - storage_gas_price: 0, - }; - let signed_mantle_tx = SignedMantleTx { - mantle_tx, - ops_proofs: vec![OpProof::NoProof], - ledger_tx_proof: ZkSignature::new(CompressedGroth16Proof::from_bytes(&[0u8; 128])), - }; - - // Wrap in GenesisTx - GenesisTx::from_tx(signed_mantle_tx).map_err(|err| ConsensusConfigError::GenesisTx { - message: err.to_string(), - }) -} - -fn build_ledger_config( - consensus_params: &ConsensusParams, -) -> Result { - Ok(lb_ledger::Config { - epoch_config: EpochConfig { - epoch_stake_distribution_stabilization: unsafe { NonZero::new_unchecked(3) }, - epoch_period_nonce_buffer: unsafe { NonZero::new_unchecked(3) }, - epoch_period_nonce_stabilization: unsafe { NonZero::new_unchecked(4) }, - }, - consensus_config: lb_cryptarchia_engine::Config::new( - consensus_params.security_param, - consensus_params.active_slot_coeff, - ), - sdp_config: lb_ledger::mantle::sdp::Config { - service_params: Arc::new( - [( - ServiceType::BlendNetwork, - ServiceParameters { - lock_period: 10, - inactivity_period: 20, - retention_period: 100, - timestamp: 0, - session_duration: 1000, - }, - )] - .into(), - ), - min_stake: lb_core::sdp::MinStake { - threshold: 1, - timestamp: 0, - }, - service_rewards_params: lb_ledger::mantle::sdp::ServiceRewardsParameters { - blend: lb_ledger::mantle::sdp::rewards::blend::RewardsParameters { - rounds_per_session: unsafe { NonZeroU64::new_unchecked(10) }, - message_frequency_per_round: NonNegativeF64::try_from(1.0).map_err(|_| { - ConsensusConfigError::LedgerConfig { - message: "message_frequency_per_round must be non-negative".to_owned(), - } - })?, - num_blend_layers: unsafe { NonZeroU64::new_unchecked(3) }, - minimum_network_size: unsafe { NonZeroU64::new_unchecked(1) }, - data_replication_factor: 0, - activity_threshold_sensitivity: ACTIVITY_THRESHOLD_SENSITIVITY, - }, - }, - }, - }) -} - -#[must_use] -pub fn create_consensus_configs( - ids: &[[u8; 32]], - consensus_params: &ConsensusParams, - wallet: &WalletConfig, -) -> Result, ConsensusConfigError> { - let mut leader_keys = Vec::new(); - let mut blend_notes = Vec::new(); - let mut sdp_notes = Vec::new(); - - let leader_stake = leader_stake_amount(wallet, ids.len()); - let utxos = create_utxos_for_leader_and_services( - ids, - &mut leader_keys, - &mut blend_notes, - &mut sdp_notes, - leader_stake, - ); - let mut utxos = append_wallet_utxos(utxos, wallet); - let genesis_tx = create_genesis_tx(&mut utxos)?; - let ledger_config = build_ledger_config(consensus_params)?; - - Ok(leader_keys - .into_iter() - .enumerate() - .map(|(i, (pk, sk))| GeneralConsensusConfig { - leader_pk: pk, - leader_sk: sk, - ledger_config: ledger_config.clone(), - genesis_tx: genesis_tx.clone(), - utxos: utxos.clone(), - blend_notes: blend_notes.clone(), - wallet_accounts: wallet.accounts.clone(), - funding_sk: sdp_notes[i].sk.clone(), - }) - .collect()) -} - -fn leader_stake_amount(wallet: &WalletConfig, n_participants: usize) -> u64 { - // Minimum leader stake (legacy baseline) so small test wallets still - // have a viable leader in low-fund scenarios. - const MIN_LEADER_STAKE: u64 = 100_000; - - // Leader stake multiplier relative to average wallet allocation per validator. - // Keeps the leader stake competitive when wallet-funded UTXOs dominate total - // stake. - const LEADER_STAKE_MULTIPLIER: u64 = 10; - - let total_wallet_funds: u64 = wallet.accounts.iter().map(|account| account.value).sum(); - if total_wallet_funds == 0 { - return MIN_LEADER_STAKE; - } - - let n = n_participants.max(1) as u64; - - // Scale leader stake to stay competitive with large wallet-funded UTXOs. - // We use LEADER_STAKE_MULTIPLIER × (total_wallet_funds / n) to keep - // block production likely even when wallets dominate total stake. - let scaled = total_wallet_funds - .saturating_mul(LEADER_STAKE_MULTIPLIER) - .saturating_div(n) - .max(1); - - // Floor to preserve the prior baseline leader stake and avoid too-small values. - scaled.max(MIN_LEADER_STAKE) -} - -fn create_utxos_for_leader_and_services( - ids: &[[u8; 32]], - leader_keys: &mut Vec<(ZkPublicKey, UnsecuredZkKey)>, - blend_notes: &mut Vec, - sdp_notes: &mut Vec, - leader_stake: u64, -) -> Vec { - let mut utxos = Vec::new(); - - // Create notes for leader, Blend and DA declarations. - let mut output_index = 0; - for &id in ids { - output_index = push_leader_utxo(id, leader_keys, &mut utxos, output_index, leader_stake); - output_index = push_service_note(b"bn", id, blend_notes, &mut utxos, output_index); - output_index = push_service_note(b"sdp", id, sdp_notes, &mut utxos, output_index); - } - - utxos -} - -fn derive_key_material(prefix: &[u8], id_bytes: &[u8; 32]) -> [u8; 16] { - let mut sk_data = [0; 16]; - let prefix_len = prefix.len(); - - sk_data[..prefix_len].copy_from_slice(prefix); - let remaining_len = 16 - prefix_len; - sk_data[prefix_len..].copy_from_slice(&id_bytes[..remaining_len]); - - sk_data -} - -fn push_leader_utxo( - id: [u8; 32], - leader_keys: &mut Vec<(ZkPublicKey, UnsecuredZkKey)>, - utxos: &mut Vec, - output_index: usize, - leader_stake: u64, -) -> usize { - let sk_data = derive_key_material(b"ld", &id); - let sk = UnsecuredZkKey::from(BigUint::from_bytes_le(&sk_data)); - let pk = sk.to_public_key(); - leader_keys.push((pk, sk)); - utxos.push(Utxo { - note: Note::new(leader_stake, pk), - tx_hash: BigUint::from(0u8).into(), - output_index, - }); - output_index + 1 -} - -fn push_service_note( - prefix: &[u8], - id: [u8; 32], - notes: &mut Vec, - utxos: &mut Vec, - output_index: usize, -) -> usize { - let sk_data = derive_key_material(prefix, &id); - let sk = ZkKey::from(BigUint::from_bytes_le(&sk_data)); - let pk = sk.to_public_key(); - let note = Note::new(1, pk); - notes.push(ServiceNote { - pk, - sk, - note, - output_index, - }); - utxos.push(Utxo { - note, - tx_hash: BigUint::from(0u8).into(), - output_index, - }); - output_index + 1 -} - -fn append_wallet_utxos(mut utxos: Vec, wallet: &WalletConfig) -> Vec { - for account in &wallet.accounts { - let output_index = utxos.len(); - utxos.push(Utxo { - note: Note::new(account.value, account.public_key()), - tx_hash: BigUint::from(0u8).into(), - output_index, - }); - } - - utxos -} - -#[must_use] -pub fn create_genesis_tx_with_declarations( - ledger_tx: LedgerTx, - providers: Vec, -) -> Result { - let inscription = build_genesis_inscription()?; - let ledger_tx_hash = ledger_tx.hash(); - - let ops = build_genesis_ops(inscription, ledger_tx_hash, &providers); - let mantle_tx = MantleTx { - ops, - ledger_tx, - execution_gas_price: 0, - storage_gas_price: 0, - }; - - let ops_proofs = build_genesis_ops_proofs(mantle_tx.hash(), providers)?; - build_genesis_tx(mantle_tx, ops_proofs) -} - -fn build_genesis_inscription() -> Result { - Ok(InscriptionOp { - channel_id: ChannelId::from([0; 32]), - inscription: vec![103, 101, 110, 101, 115, 105, 115], // "genesis" in bytes - parent: MsgId::root(), - signer: Ed25519PublicKey::from_bytes(&[0; 32]).map_err(|err| { - ConsensusConfigError::InscriptionSigner { - message: err.to_string(), - } - })?, - }) -} - -fn build_genesis_ops( - inscription: InscriptionOp, - ledger_tx_hash: lb_core::mantle::TxHash, - providers: &[ProviderInfo], -) -> Vec { - let mut ops = Vec::with_capacity(1 + providers.len()); - ops.push(Op::ChannelInscribe(inscription)); - - for provider in providers { - let utxo = Utxo { - tx_hash: ledger_tx_hash, - output_index: provider.note.output_index, - note: provider.note.note, - }; - let declaration = DeclarationMessage { - service_type: provider.service_type, - locators: vec![provider.locator.clone()], - provider_id: provider.provider_id(), - zk_id: provider.zk_id(), - locked_note_id: utxo.id(), - }; - ops.push(Op::SDPDeclare(declaration)); - } - - ops -} - -fn build_genesis_ops_proofs( - mantle_tx_hash: lb_core::mantle::TxHash, - providers: Vec, -) -> Result, ConsensusConfigError> { - let mut ops_proofs = Vec::with_capacity(1 + providers.len()); - ops_proofs.push(OpProof::NoProof); - - for provider in providers { - let zk_sig = - ZkKey::multi_sign(&[provider.note.sk, provider.zk_sk], mantle_tx_hash.as_ref()) - .map_err(|err| ConsensusConfigError::DeclarationSignature { - message: format!("{err:?}"), - })?; - let ed25519_sig = provider - .provider_sk - .sign_payload(mantle_tx_hash.as_signing_bytes().as_ref()); - - ops_proofs.push(OpProof::ZkAndEd25519Sigs { - zk_sig, - ed25519_sig, - }); - } - - Ok(ops_proofs) -} - -fn build_genesis_tx( - mantle_tx: MantleTx, - ops_proofs: Vec, -) -> Result { - let signed_mantle_tx = SignedMantleTx { - mantle_tx, - ops_proofs, - ledger_tx_proof: ZkSignature::new(CompressedGroth16Proof::from_bytes(&[0u8; 128])), - }; - - GenesisTx::from_tx(signed_mantle_tx).map_err(|err| ConsensusConfigError::GenesisTx { - message: err.to_string(), - }) -} - -pub fn sync_utxos_with_genesis( - utxos: &mut [Utxo], - genesis_tx: &GenesisTx, -) -> Result<(), ConsensusConfigError> { - let ledger_tx = genesis_tx.mantle_tx().ledger_tx.clone(); - let ledger_tx_hash = ledger_tx.hash(); - let outputs = &ledger_tx.outputs; - - for utxo in utxos { - let output_index = outputs - .iter() - .position(|note| note == &utxo.note) - .ok_or_else(|| ConsensusConfigError::MissingGenesisUtxo { - note: format!("{:?}", utxo.note), - })?; - utxo.output_index = output_index; - utxo.tx_hash = ledger_tx_hash; - } - - Ok(()) -} diff --git a/testing-framework/configs/src/topology/configs/deployment.rs b/testing-framework/configs/src/topology/configs/deployment.rs deleted file mode 100644 index 1c7df23..0000000 --- a/testing-framework/configs/src/topology/configs/deployment.rs +++ /dev/null @@ -1,71 +0,0 @@ -use core::{num::NonZeroU64, time::Duration}; - -use lb_blend_service::{ - core::settings::{CoverTrafficSettings, MessageDelayerSettings, SchedulerSettings}, - settings::TimingSettings, -}; -use lb_libp2p::protocol_name::StreamProtocol; -use lb_node::config::{ - blend::deployment::{ - CommonSettings as BlendCommonSettings, CoreSettings as BlendCoreSettings, - Settings as BlendDeploymentSettings, - }, - deployment::{CustomDeployment, Settings as DeploymentSettings}, - network::deployment::Settings as NetworkDeploymentSettings, -}; -use lb_utils::math::NonNegativeF64; - -const DEFAULT_ROUND_DURATION: Duration = Duration::from_secs(1); - -#[must_use] -pub fn default_e2e_deployment_settings() -> DeploymentSettings { - let normalization_constant = match NonNegativeF64::try_from(1.03f64) { - Ok(value) => value, - Err(_) => unsafe { - // Safety: normalization constant is a finite non-negative constant. - std::hint::unreachable_unchecked() - }, - }; - let message_frequency_per_round = match NonNegativeF64::try_from(1f64) { - Ok(value) => value, - Err(_) => unsafe { - // Safety: message frequency is a finite non-negative constant. - std::hint::unreachable_unchecked() - }, - }; - DeploymentSettings::Custom(CustomDeployment { - blend: BlendDeploymentSettings { - common: BlendCommonSettings { - minimum_network_size: unsafe { NonZeroU64::new_unchecked(30) }, - num_blend_layers: unsafe { NonZeroU64::new_unchecked(3) }, - timing: TimingSettings { - round_duration: DEFAULT_ROUND_DURATION, - rounds_per_interval: unsafe { NonZeroU64::new_unchecked(30) }, - // (21,600 blocks * 30s per block) / 1s per round = 648,000 rounds - rounds_per_session: unsafe { NonZeroU64::new_unchecked(648_000) }, - rounds_per_observation_window: unsafe { NonZeroU64::new_unchecked(30) }, - rounds_per_session_transition_period: unsafe { NonZeroU64::new_unchecked(30) }, - epoch_transition_period_in_slots: unsafe { NonZeroU64::new_unchecked(2_600) }, - }, - protocol_name: StreamProtocol::new("/blend/integration-tests"), - }, - core: BlendCoreSettings { - minimum_messages_coefficient: unsafe { NonZeroU64::new_unchecked(1) }, - normalization_constant, - scheduler: SchedulerSettings { - cover: CoverTrafficSettings { - intervals_for_safety_buffer: 100, - message_frequency_per_round, - }, - delayer: MessageDelayerSettings { - maximum_release_delay_in_rounds: unsafe { NonZeroU64::new_unchecked(3) }, - }, - }, - }, - }, - network: NetworkDeploymentSettings { - identify_protocol_name: StreamProtocol::new("/integration/nomos/identify/1.0.0"), - kademlia_protocol_name: StreamProtocol::new("/integration/nomos/kad/1.0.0"), - }, - }) -} diff --git a/testing-framework/configs/src/topology/configs/mod.rs b/testing-framework/configs/src/topology/configs/mod.rs deleted file mode 100644 index 198f58e..0000000 --- a/testing-framework/configs/src/topology/configs/mod.rs +++ /dev/null @@ -1,279 +0,0 @@ -pub mod api; -pub mod base; -pub mod blend; -pub mod bootstrap; -pub mod consensus; -pub mod network; -pub mod runtime; -pub mod time; -pub mod tracing; -pub mod wallet; - -use std::cmp; - -use blend::GeneralBlendConfig; -use consensus::{ - ConsensusConfigError, GeneralConsensusConfig, ProviderInfo, - create_genesis_tx_with_declarations, sync_utxos_with_genesis, -}; -use lb_core::{ - mantle::GenesisTx as _, - sdp::{Locator, ServiceType}, -}; -use lb_key_management_system_service::{backend::preload::PreloadKMSBackendSettings, keys::Key}; -use lb_utils::net::get_available_udp_port; -use network::GeneralNetworkConfig; -use rand::{Rng as _, thread_rng}; -use tracing::GeneralTracingConfig; -use wallet::WalletConfig; - -use crate::{ - nodes::kms::key_id_for_preload_backend, - topology::{ - configs::{ - api::GeneralApiConfig, - bootstrap::{GeneralBootstrapConfig, SHORT_PROLONGED_BOOTSTRAP_PERIOD}, - consensus::ConsensusParams, - network::NetworkParams, - time::GeneralTimeConfig, - }, - invariants::validate_generated_vectors, - }, -}; - -#[derive(Debug, thiserror::Error)] -pub enum GeneralConfigError { - #[error("participant count must be > 0")] - EmptyParticipants, - #[error("blend core subset {blend_core} exceeds participants {participants}")] - BlendCoreSubsetTooLarge { - blend_core: usize, - participants: usize, - }, - #[error("failed to allocate a free UDP port for {label}")] - PortAllocationFailed { label: &'static str }, - #[error(transparent)] - Invariants(#[from] crate::topology::invariants::TopologyInvariantError), - #[error(transparent)] - Consensus(#[from] ConsensusConfigError), - #[error(transparent)] - Network(#[from] network::NetworkConfigError), - #[error(transparent)] - Api(#[from] api::ApiConfigError), -} - -#[derive(Clone)] -pub struct GeneralConfig { - pub api_config: GeneralApiConfig, - pub consensus_config: GeneralConsensusConfig, - pub bootstrapping_config: GeneralBootstrapConfig, - pub network_config: GeneralNetworkConfig, - pub blend_config: GeneralBlendConfig, - pub tracing_config: GeneralTracingConfig, - pub time_config: GeneralTimeConfig, - pub kms_config: PreloadKMSBackendSettings, -} - -pub fn create_general_configs(n_nodes: usize) -> Result, GeneralConfigError> { - create_general_configs_with_network(n_nodes, &NetworkParams::default()) -} - -pub fn create_general_configs_with_network( - n_nodes: usize, - network_params: &NetworkParams, -) -> Result, GeneralConfigError> { - create_general_configs_with_blend_core_subset(n_nodes, n_nodes, network_params) -} - -pub fn create_general_configs_with_blend_core_subset( - n_nodes: usize, - // TODO: Instead of this, define a config struct for each node. - // That would be also useful for non-even token distributions: https://github.com/logos-co/nomos/issues/1888 - n_blend_core_nodes: usize, - network_params: &NetworkParams, -) -> Result, GeneralConfigError> { - validate_node_counts(n_nodes, n_blend_core_nodes)?; - - let (ids, blend_ports) = generate_ids_and_ports(n_nodes)?; - - validate_generated_vectors(n_nodes, &ids, &blend_ports)?; - - let consensus_params = ConsensusParams::default_for_participants(n_nodes); - let mut consensus_configs = - consensus::create_consensus_configs(&ids, &consensus_params, &WalletConfig::default())?; - let bootstrap_config = - bootstrap::create_bootstrap_configs(&ids, SHORT_PROLONGED_BOOTSTRAP_PERIOD); - let network_configs = network::create_network_configs(&ids, network_params)?; - let api_configs = api::create_api_configs(&ids)?; - let blend_configs = blend::create_blend_configs(&ids, &blend_ports); - let tracing_configs = tracing::create_tracing_configs(&ids); - let time_config = time::default_time_config(); - - let Some(first_consensus) = consensus_configs.first() else { - return Err(GeneralConfigError::EmptyParticipants); - }; - - let providers = - collect_blend_core_providers(first_consensus, &blend_configs, n_blend_core_nodes)?; - let ledger_tx = first_consensus.genesis_tx.mantle_tx().ledger_tx.clone(); - let genesis_tx = create_genesis_tx_with_declarations(ledger_tx, providers)?; - apply_consensus_genesis_tx(&mut consensus_configs, &genesis_tx)?; - - // Set Blend and DA keys in KMS of each node config. - let kms_configs = build_kms_configs(&blend_configs); - - build_general_configs( - n_nodes, - &api_configs, - &consensus_configs, - &bootstrap_config, - &network_configs, - &blend_configs, - &tracing_configs, - &kms_configs, - &time_config, - ) -} - -fn validate_node_counts( - n_nodes: usize, - n_blend_core_nodes: usize, -) -> Result<(), GeneralConfigError> { - if n_nodes == 0 { - return Err(GeneralConfigError::EmptyParticipants); - } - - if n_blend_core_nodes > n_nodes { - return Err(GeneralConfigError::BlendCoreSubsetTooLarge { - blend_core: n_blend_core_nodes, - participants: n_nodes, - }); - } - - Ok(()) -} - -fn generate_ids_and_ports(n_nodes: usize) -> Result<(Vec<[u8; 32]>, Vec), GeneralConfigError> { - // Blend relies on each node declaring a different ZK public key, so we need - // different IDs to generate different keys. - let mut ids: Vec<_> = (0..n_nodes).map(|i| [i as u8; 32]).collect(); - let mut blend_ports = Vec::with_capacity(n_nodes); - - for id in &mut ids { - thread_rng().fill(id); - - blend_ports.push( - get_available_udp_port() - .ok_or(GeneralConfigError::PortAllocationFailed { label: "Blend" })?, - ); - } - - Ok((ids, blend_ports)) -} - -fn collect_blend_core_providers( - first_consensus: &GeneralConsensusConfig, - blend_configs: &[GeneralBlendConfig], - n_blend_core_nodes: usize, -) -> Result, GeneralConfigError> { - let n_core = cmp::min(n_blend_core_nodes, blend_configs.len()); - let mut providers = Vec::with_capacity(n_core); - - for (i, blend_conf) in blend_configs.iter().enumerate().take(n_core) { - let note = first_consensus - .blend_notes - .get(i) - .ok_or(GeneralConfigError::EmptyParticipants)? - .clone(); - - providers.push(ProviderInfo { - service_type: ServiceType::BlendNetwork, - provider_sk: blend_conf.signer.clone(), - zk_sk: blend_conf.secret_zk_key.clone(), - locator: Locator(blend_conf.backend_core.listening_address.clone()), - note, - }); - } - - Ok(providers) -} - -fn apply_consensus_genesis_tx( - consensus_configs: &mut [GeneralConsensusConfig], - genesis_tx: &lb_core::mantle::genesis_tx::GenesisTx, -) -> Result<(), ConsensusConfigError> { - for c in consensus_configs { - c.genesis_tx = genesis_tx.clone(); - sync_utxos_with_genesis(&mut c.utxos, genesis_tx)?; - } - - Ok(()) -} - -fn build_kms_configs(blend_configs: &[GeneralBlendConfig]) -> Vec { - blend_configs - .iter() - .map(|blend_conf| { - let ed_key = blend_conf.signer.clone(); - let zk_key = blend_conf.secret_zk_key.clone(); - PreloadKMSBackendSettings { - keys: [ - ( - key_id_for_preload_backend(&Key::from(ed_key.clone())), - Key::from(ed_key), - ), - ( - key_id_for_preload_backend(&Key::from(zk_key.clone())), - Key::from(zk_key), - ), - ] - .into(), - } - }) - .collect() -} - -#[allow(clippy::too_many_arguments)] -fn build_general_configs( - n_nodes: usize, - api_configs: &[GeneralApiConfig], - consensus_configs: &[GeneralConsensusConfig], - bootstrap_config: &[GeneralBootstrapConfig], - network_configs: &[GeneralNetworkConfig], - blend_configs: &[GeneralBlendConfig], - tracing_configs: &[GeneralTracingConfig], - kms_configs: &[PreloadKMSBackendSettings], - time_config: &GeneralTimeConfig, -) -> Result, GeneralConfigError> { - let mut general_configs = Vec::with_capacity(n_nodes); - - for i in 0..n_nodes { - let api_config = get_cloned_or_empty(api_configs, i)?; - let consensus_config = get_cloned_or_empty(consensus_configs, i)?; - let bootstrapping_config = get_cloned_or_empty(bootstrap_config, i)?; - let network_config = get_cloned_or_empty(network_configs, i)?; - let blend_config = get_cloned_or_empty(blend_configs, i)?; - let tracing_config = get_cloned_or_empty(tracing_configs, i)?; - let kms_config = get_cloned_or_empty(kms_configs, i)?; - - general_configs.push(GeneralConfig { - api_config, - consensus_config, - bootstrapping_config, - network_config, - blend_config, - tracing_config, - time_config: time_config.clone(), - kms_config, - }); - } - - Ok(general_configs) -} - -fn get_cloned_or_empty(items: &[T], index: usize) -> Result { - items - .get(index) - .cloned() - .ok_or(GeneralConfigError::EmptyParticipants) -} diff --git a/testing-framework/configs/src/topology/configs/network.rs b/testing-framework/configs/src/topology/configs/network.rs deleted file mode 100644 index 8f189ae..0000000 --- a/testing-framework/configs/src/topology/configs/network.rs +++ /dev/null @@ -1,192 +0,0 @@ -use std::time::Duration; - -use lb_libp2p::{ - IdentifySettings, KademliaSettings, Multiaddr, NatSettings, Protocol, ed25519, gossipsub, -}; -use lb_node::config::network::serde::{BackendSettings, Config, SwarmConfig}; -use lb_utils::net::get_available_udp_port; -use testing_framework_env as tf_env; -use thiserror::Error; - -use crate::{node_address_from_port, secret_key_to_peer_id}; - -const PEER_RESPONSE_TIMEOUT: Duration = Duration::from_secs(60); - -#[derive(Default, Clone)] -pub enum Libp2pNetworkLayout { - #[default] - Star, - Chain, - Full, -} - -#[derive(Default, Clone)] -pub struct NetworkParams { - pub libp2p_network_layout: Libp2pNetworkLayout, -} - -pub type GeneralNetworkConfig = Config; - -#[derive(Debug, Error)] -pub enum NetworkConfigError { - #[error("failed to allocate a free UDP port for libp2p swarm")] - PortAllocationFailed, - #[error("failed to derive node key from bytes: {message}")] - NodeKeyFromBytes { message: String }, - #[error("failed to build loopback multiaddr for NAT settings: {message}")] - LoopbackMultiaddr { message: String }, -} - -fn default_swarm_config() -> SwarmConfig { - SwarmConfig { - host: std::net::Ipv4Addr::UNSPECIFIED, - port: 60000, - node_key: ed25519::SecretKey::generate(), - gossipsub_config: gossipsub::Config::default(), - kademlia_config: KademliaSettings::default(), - identify_config: IdentifySettings::default(), - chain_sync_config: lb_cryptarchia_sync::Config::default(), - nat_config: NatSettings::default(), - } -} - -fn nat_settings(port: u16) -> Result { - if tf_env::nomos_use_autonat() { - return Ok(NatSettings::default()); - } - - let addr: Multiaddr = format!("/ip4/127.0.0.1/udp/{port}/quic-v1") - .parse::() - .map_err(|err| NetworkConfigError::LoopbackMultiaddr { - message: err.to_string(), - })?; - Ok(NatSettings::Static { - external_address: addr, - }) -} - -#[must_use] -pub fn create_network_configs( - ids: &[[u8; 32]], - network_params: &NetworkParams, -) -> Result, NetworkConfigError> { - let swarm_configs: Vec = ids - .iter() - .map(|id| { - let mut node_key_bytes = *id; - let node_key = - ed25519::SecretKey::try_from_bytes(&mut node_key_bytes).map_err(|err| { - NetworkConfigError::NodeKeyFromBytes { - message: err.to_string(), - } - })?; - - let port = get_available_udp_port().ok_or(NetworkConfigError::PortAllocationFailed)?; - Ok(SwarmConfig { - node_key, - port, - chain_sync_config: lb_cryptarchia_sync::Config { - peer_response_timeout: PEER_RESPONSE_TIMEOUT, - }, - nat_config: nat_settings(port)?, - ..default_swarm_config() - }) - }) - .collect::>()?; - - let all_initial_peers = initial_peers_by_network_layout(&swarm_configs, network_params); - - Ok(swarm_configs - .iter() - .zip(all_initial_peers) - .map(|(swarm_config, initial_peers)| GeneralNetworkConfig { - backend: BackendSettings { - initial_peers, - swarm: swarm_config.to_owned(), - }, - }) - .collect()) -} - -pub fn build_network_config_for_node( - id: [u8; 32], - port: u16, - initial_peers: Vec, -) -> Result { - let mut node_key_bytes = id; - let node_key = ed25519::SecretKey::try_from_bytes(&mut node_key_bytes).map_err(|err| { - NetworkConfigError::NodeKeyFromBytes { - message: err.to_string(), - } - })?; - - let swarm_config = SwarmConfig { - node_key, - port, - chain_sync_config: lb_cryptarchia_sync::Config { - peer_response_timeout: PEER_RESPONSE_TIMEOUT, - }, - nat_config: nat_settings(port)?, - ..default_swarm_config() - }; - - Ok(GeneralNetworkConfig { - backend: BackendSettings { - initial_peers, - swarm: swarm_config, - }, - }) -} - -fn initial_peers_by_network_layout( - swarm_configs: &[SwarmConfig], - network_params: &NetworkParams, -) -> Vec> { - let Some(first_swarm) = swarm_configs.first() else { - return Vec::new(); - }; - - let mut all_initial_peers = vec![]; - - match network_params.libp2p_network_layout { - Libp2pNetworkLayout::Star => { - // First node is the hub - has no initial peers - all_initial_peers.push(vec![]); - let first_addr = node_address_with_peer(first_swarm); - - // All other nodes connect to the first node - for _ in 1..swarm_configs.len() { - all_initial_peers.push(vec![first_addr.clone()]); - } - } - Libp2pNetworkLayout::Chain => { - // First node has no initial peers - all_initial_peers.push(vec![]); - - // Each subsequent node connects to the previous one - for i in 1..swarm_configs.len() { - let prev_addr = node_address_with_peer(&swarm_configs[i - 1]); - all_initial_peers.push(vec![prev_addr]); - } - } - Libp2pNetworkLayout::Full => { - // Each node connects to all previous nodes, unidirectional connections - for i in 0..swarm_configs.len() { - let mut peers = vec![]; - for swarm_config in swarm_configs.iter().take(i) { - peers.push(node_address_with_peer(swarm_config)); - } - all_initial_peers.push(peers); - } - } - } - - all_initial_peers -} - -fn node_address_with_peer(swarm_config: &SwarmConfig) -> Multiaddr { - let mut addr = node_address_from_port(swarm_config.port); - let peer_id = secret_key_to_peer_id(swarm_config.node_key.clone()); - addr.push(Protocol::P2p(peer_id.into())); - addr -} diff --git a/testing-framework/configs/src/topology/configs/runtime.rs b/testing-framework/configs/src/topology/configs/runtime.rs deleted file mode 100644 index 76bbdec..0000000 --- a/testing-framework/configs/src/topology/configs/runtime.rs +++ /dev/null @@ -1,136 +0,0 @@ -use std::collections::HashMap; - -use lb_key_management_system_service::{backend::preload::PreloadKMSBackendSettings, keys::Key}; -use lb_libp2p::Multiaddr; - -use crate::{ - node_address_from_port, - nodes::kms::key_id_for_preload_backend, - topology::configs::{ - GeneralConfig, GeneralConfigError, api, blend, bootstrap, consensus, - consensus::{ConsensusParams, GeneralConsensusConfig}, - network, - network::{Libp2pNetworkLayout, NetworkParams}, - time, tracing, - wallet::WalletConfig, - }, -}; - -pub fn build_general_config_for_node( - id: [u8; 32], - network_port: u16, - initial_peers: Vec, - blend_port: u16, - consensus_params: &ConsensusParams, - wallet_config: &WalletConfig, - base_consensus: &GeneralConsensusConfig, - time_config: &time::GeneralTimeConfig, -) -> Result { - let consensus_config = - build_consensus_config_for_node(id, consensus_params, wallet_config, base_consensus)?; - - let bootstrap_config = - bootstrap::create_bootstrap_configs(&[id], bootstrap::SHORT_PROLONGED_BOOTSTRAP_PERIOD) - .into_iter() - .next() - .ok_or(GeneralConfigError::EmptyParticipants)?; - - let blend_config = blend::create_blend_configs(&[id], &[blend_port]) - .into_iter() - .next() - .ok_or(GeneralConfigError::EmptyParticipants)?; - - let network_config = network::build_network_config_for_node(id, network_port, initial_peers)?; - - let api_config = api::create_api_configs(&[id])? - .into_iter() - .next() - .ok_or(GeneralConfigError::EmptyParticipants)?; - - let tracing_config = tracing::create_tracing_configs(&[id]) - .into_iter() - .next() - .ok_or(GeneralConfigError::EmptyParticipants)?; - - let kms_config = build_kms_config_for_node(&blend_config, wallet_config, &consensus_config); - - Ok(GeneralConfig { - consensus_config, - bootstrapping_config: bootstrap_config, - network_config, - blend_config, - api_config, - tracing_config, - time_config: time_config.clone(), - kms_config, - }) -} - -pub fn build_consensus_config_for_node( - id: [u8; 32], - consensus_params: &ConsensusParams, - wallet_config: &WalletConfig, - base: &GeneralConsensusConfig, -) -> Result { - let mut config = consensus::create_consensus_configs(&[id], consensus_params, wallet_config)? - .into_iter() - .next() - .ok_or(GeneralConfigError::EmptyParticipants)?; - - config.genesis_tx = base.genesis_tx.clone(); - config.utxos = base.utxos.clone(); - config.blend_notes = base.blend_notes.clone(); - config.wallet_accounts = base.wallet_accounts.clone(); - - Ok(config) -} - -pub fn build_initial_peers(network_params: &NetworkParams, peer_ports: &[u16]) -> Vec { - match network_params.libp2p_network_layout { - Libp2pNetworkLayout::Star => peer_ports - .first() - .map(|port| vec![node_address_from_port(*port)]) - .unwrap_or_default(), - Libp2pNetworkLayout::Chain => peer_ports - .last() - .map(|port| vec![node_address_from_port(*port)]) - .unwrap_or_default(), - Libp2pNetworkLayout::Full => peer_ports - .iter() - .map(|port| node_address_from_port(*port)) - .collect(), - } -} - -fn build_kms_config_for_node( - blend_config: &blend::GeneralBlendConfig, - wallet_config: &WalletConfig, - consensus_config: &GeneralConsensusConfig, -) -> PreloadKMSBackendSettings { - let mut keys = HashMap::from([ - ( - key_id_for_preload_backend(&Key::Ed25519(blend_config.signer.clone())), - Key::Ed25519(blend_config.signer.clone()), - ), - ( - key_id_for_preload_backend(&Key::Zk(blend_config.secret_zk_key.clone())), - Key::Zk(blend_config.secret_zk_key.clone()), - ), - ( - key_id_for_preload_backend(&Key::Zk(consensus_config.leader_sk.clone().into())), - Key::Zk(consensus_config.leader_sk.clone().into()), - ), - ( - key_id_for_preload_backend(&Key::Zk(consensus_config.funding_sk.clone())), - Key::Zk(consensus_config.funding_sk.clone()), - ), - ]); - - for account in &wallet_config.accounts { - let key = Key::Zk(account.secret_key.clone()); - let key_id = key_id_for_preload_backend(&key); - keys.entry(key_id).or_insert(key); - } - - PreloadKMSBackendSettings { keys } -} diff --git a/testing-framework/configs/src/topology/configs/time.rs b/testing-framework/configs/src/topology/configs/time.rs deleted file mode 100644 index 921393c..0000000 --- a/testing-framework/configs/src/topology/configs/time.rs +++ /dev/null @@ -1,37 +0,0 @@ -use std::{ - net::{IpAddr, Ipv4Addr}, - str::FromStr as _, - time::Duration, -}; - -const DEFAULT_SLOT_TIME: u64 = 1; -const CONSENSUS_SLOT_TIME_VAR: &str = "CONSENSUS_SLOT_TIME"; -const DEFAULT_NTP_SERVER: &str = "pool.ntp.org:123"; -const DEFAULT_NTP_TIMEOUT: Duration = Duration::from_secs(5); -const DEFAULT_NTP_UPDATE_INTERVAL: Duration = Duration::from_secs(16); - -#[derive(Clone, Debug)] -pub struct GeneralTimeConfig { - pub slot_duration: Duration, - pub ntp_server: String, - pub timeout: Duration, - pub interface: IpAddr, - pub update_interval: Duration, -} - -#[must_use] -pub fn default_time_config() -> GeneralTimeConfig { - let slot_duration = std::env::var(CONSENSUS_SLOT_TIME_VAR) - .ok() - .and_then(|raw| u64::from_str(&raw).ok()) - .filter(|value| *value > 0) - .unwrap_or(DEFAULT_SLOT_TIME); - - GeneralTimeConfig { - slot_duration: Duration::from_secs(slot_duration), - ntp_server: String::from(DEFAULT_NTP_SERVER), - timeout: DEFAULT_NTP_TIMEOUT, - interface: IpAddr::V4(Ipv4Addr::UNSPECIFIED), - update_interval: DEFAULT_NTP_UPDATE_INTERVAL, - } -} diff --git a/testing-framework/configs/src/topology/configs/tracing.rs b/testing-framework/configs/src/topology/configs/tracing.rs deleted file mode 100644 index 9d6e309..0000000 --- a/testing-framework/configs/src/topology/configs/tracing.rs +++ /dev/null @@ -1,146 +0,0 @@ -use lb_tracing::{ - logging::{local::FileConfig, loki::LokiConfig}, - metrics::otlp::OtlpMetricsConfig, - tracing::otlp::OtlpTracingConfig, -}; -use lb_tracing_service::{ - ConsoleLayer, FilterLayer, LoggerLayer, MetricsLayer, TracingLayer, TracingSettings, -}; -use testing_framework_env as tf_env; -use tracing::Level; - -use crate::IS_DEBUG_TRACING; - -#[derive(Clone, Default)] -pub struct GeneralTracingConfig { - pub tracing_settings: TracingSettings, -} - -impl GeneralTracingConfig { - fn local_debug_tracing(id: usize) -> Self { - let host_identifier = format!("node-{id}"); - let otlp_tracing = otlp_tracing_endpoint() - .and_then(|endpoint| endpoint.parse().ok()) - .map(|endpoint| { - TracingLayer::Otlp(OtlpTracingConfig { - endpoint, - sample_ratio: 0.5, - service_name: host_identifier.clone(), - }) - }) - .unwrap_or(TracingLayer::None); - let otlp_metrics = otlp_metrics_endpoint() - .and_then(|endpoint| endpoint.parse().ok()) - .map(|endpoint| { - MetricsLayer::Otlp(OtlpMetricsConfig { - endpoint, - host_identifier: host_identifier.clone(), - }) - }) - .unwrap_or(MetricsLayer::None); - - let filter = file_filter_override().unwrap_or_else(|| { - lb_tracing::filter::envfilter::EnvFilterConfig { - filters: std::iter::once(&("nomos", "debug")) - .map(|(k, v)| ((*k).to_owned(), (*v).to_owned())) - .collect(), - } - }); - - Self { - tracing_settings: TracingSettings { - logger: LoggerLayer::Loki(LokiConfig { - endpoint: "http://localhost:3100".parse().unwrap_or_else(|_| unsafe { - // Safety: the URL is a valid constant. - std::hint::unreachable_unchecked() - }), - host_identifier: host_identifier.clone(), - }), - tracing: otlp_tracing, - filter: FilterLayer::EnvFilter(filter), - metrics: otlp_metrics, - console: ConsoleLayer::None, - level: Level::DEBUG, - }, - } - } -} - -fn otlp_tracing_endpoint() -> Option { - tf_env::nomos_otlp_endpoint() -} - -fn otlp_metrics_endpoint() -> Option { - tf_env::nomos_otlp_metrics_endpoint() -} - -#[must_use] -pub fn create_tracing_configs(ids: &[[u8; 32]]) -> Vec { - if *IS_DEBUG_TRACING { - create_debug_configs(ids) - } else { - create_default_configs(ids) - } -} - -fn create_debug_configs(ids: &[[u8; 32]]) -> Vec { - ids.iter() - .enumerate() - .map(|(i, _)| (i, GeneralTracingConfig::local_debug_tracing(i))) - .map(|(i, cfg)| apply_file_logger_override(cfg, i)) - .map(maybe_disable_otlp_layers) - .collect() -} - -fn create_default_configs(ids: &[[u8; 32]]) -> Vec { - ids.iter() - .enumerate() - .map(|(i, _)| (i, GeneralTracingConfig::default())) - .map(|(i, cfg)| apply_file_logger_override(cfg, i)) - .map(maybe_disable_otlp_layers) - .collect() -} - -fn apply_file_logger_override( - mut cfg: GeneralTracingConfig, - node_index: usize, -) -> GeneralTracingConfig { - if let Some(directory) = tf_env::nomos_log_dir() { - cfg.tracing_settings.logger = LoggerLayer::File(FileConfig { - directory, - prefix: Some(format!("logos-blockchain-node-{node_index}").into()), - }); - cfg.tracing_settings.level = file_log_level(); - } - cfg -} - -fn file_log_level() -> Level { - tf_env::nomos_log_level() - .and_then(|raw| raw.parse::().ok()) - .unwrap_or(Level::INFO) -} - -fn file_filter_override() -> Option { - tf_env::nomos_log_filter().map(|raw| lb_tracing::filter::envfilter::EnvFilterConfig { - filters: raw - .split(',') - .filter_map(|pair| { - let mut parts = pair.splitn(2, '='); - let target = parts.next()?.trim().to_string(); - let level = parts.next()?.trim().to_string(); - (!target.is_empty() && !level.is_empty()).then_some((target, level)) - }) - .collect(), - }) -} - -fn maybe_disable_otlp_layers(mut cfg: GeneralTracingConfig) -> GeneralTracingConfig { - if otlp_tracing_endpoint().is_none() { - cfg.tracing_settings.tracing = TracingLayer::None; - } - if otlp_metrics_endpoint().is_none() { - cfg.tracing_settings.metrics = MetricsLayer::None; - } - cfg -} diff --git a/testing-framework/configs/src/topology/configs/wallet.rs b/testing-framework/configs/src/topology/configs/wallet.rs deleted file mode 100644 index a20d6eb..0000000 --- a/testing-framework/configs/src/topology/configs/wallet.rs +++ /dev/null @@ -1,79 +0,0 @@ -use std::num::NonZeroUsize; - -use lb_key_management_system_service::keys::{ZkKey, ZkPublicKey}; -use num_bigint::BigUint; - -/// Collection of wallet accounts that should be funded at genesis. -#[derive(Clone, Default, Debug, serde::Serialize, serde::Deserialize)] -pub struct WalletConfig { - pub accounts: Vec, -} - -impl WalletConfig { - #[must_use] - pub const fn new(accounts: Vec) -> Self { - Self { accounts } - } - - #[must_use] - pub fn uniform(total_funds: u64, users: NonZeroUsize) -> Self { - let user_count = users.get() as u64; - assert!(user_count > 0, "wallet user count must be non-zero"); - assert!( - total_funds >= user_count, - "wallet funds must allocate at least 1 token per user" - ); - - let base_allocation = total_funds / user_count; - let mut remainder = total_funds % user_count; - - let accounts = (0..users.get()) - .map(|idx| { - let mut amount = base_allocation; - if remainder > 0 { - amount += 1; - remainder -= 1; - } - - WalletAccount::deterministic(idx as u64, amount) - }) - .collect(); - - Self { accounts } - } -} - -/// Wallet account that holds funds in the genesis state. -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] -pub struct WalletAccount { - pub label: String, - pub secret_key: ZkKey, - pub value: u64, -} - -impl WalletAccount { - #[must_use] - pub fn new(label: impl Into, secret_key: ZkKey, value: u64) -> Self { - assert!(value > 0, "wallet account value must be positive"); - Self { - label: label.into(), - secret_key, - value, - } - } - - #[must_use] - pub fn deterministic(index: u64, value: u64) -> Self { - let mut seed = [0u8; 32]; - seed[..2].copy_from_slice(b"wl"); - seed[2..10].copy_from_slice(&index.to_le_bytes()); - - let secret_key = ZkKey::from(BigUint::from_bytes_le(&seed)); - Self::new(format!("wallet-user-{index}"), secret_key, value) - } - - #[must_use] - pub fn public_key(&self) -> ZkPublicKey { - self.secret_key.to_public_key() - } -} diff --git a/testing-framework/configs/src/topology/invariants.rs b/testing-framework/configs/src/topology/invariants.rs deleted file mode 100644 index 7b02618..0000000 --- a/testing-framework/configs/src/topology/invariants.rs +++ /dev/null @@ -1,73 +0,0 @@ -use thiserror::Error; - -#[derive(Debug, Error, PartialEq, Eq)] -pub enum TopologyInvariantError { - #[error("participant count must be > 0")] - EmptyParticipants, - #[error("id count {actual} does not match participants {expected}")] - IdCountMismatch { actual: usize, expected: usize }, - #[error("da port count {actual} does not match participants {expected}")] - DaPortCountMismatch { actual: usize, expected: usize }, - #[error("blend port count {actual} does not match participants {expected}")] - BlendPortCountMismatch { actual: usize, expected: usize }, -} - -/// Validate basic invariants shared across all config generation pipelines. -/// -/// This intentionally focuses on "shape" invariants (counts, non-empty) and -/// avoids opinionated checks so behavior stays unchanged. -pub fn validate_node_vectors( - participants: usize, - ids: Option<&Vec<[u8; 32]>>, - blend_ports: Option<&Vec>, -) -> Result<(), TopologyInvariantError> { - if participants == 0 { - return Err(TopologyInvariantError::EmptyParticipants); - } - - if let Some(ids) = ids { - if ids.len() != participants { - return Err(TopologyInvariantError::IdCountMismatch { - actual: ids.len(), - expected: participants, - }); - } - } - - if let Some(ports) = blend_ports { - if ports.len() != participants { - return Err(TopologyInvariantError::BlendPortCountMismatch { - actual: ports.len(), - expected: participants, - }); - } - } - - Ok(()) -} - -pub fn validate_generated_vectors( - participants: usize, - ids: &[[u8; 32]], - blend_ports: &[u16], -) -> Result<(), TopologyInvariantError> { - if participants == 0 { - return Err(TopologyInvariantError::EmptyParticipants); - } - - if ids.len() != participants { - return Err(TopologyInvariantError::IdCountMismatch { - actual: ids.len(), - expected: participants, - }); - } - - if blend_ports.len() != participants { - return Err(TopologyInvariantError::BlendPortCountMismatch { - actual: blend_ports.len(), - expected: participants, - }); - } - - Ok(()) -} diff --git a/testing-framework/configs/src/topology/mod.rs b/testing-framework/configs/src/topology/mod.rs deleted file mode 100644 index 3017c02..0000000 --- a/testing-framework/configs/src/topology/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod configs; -pub mod invariants; diff --git a/testing-framework/core/Cargo.toml b/testing-framework/core/Cargo.toml index 8b902b7..00e0658 100644 --- a/testing-framework/core/Cargo.toml +++ b/testing-framework/core/Cargo.toml @@ -16,32 +16,12 @@ workspace = true default = [] [dependencies] -anyhow = "1" -async-trait = "0.1" -futures = { default-features = false, version = "0.3" } -hex = { default-features = false, version = "0.4.3" } -lb-chain-service = { workspace = true } -lb-common-http-client = { workspace = true } -lb-core = { workspace = true } -lb-groth16 = { workspace = true } -lb-http-api-common = { workspace = true } -lb-key-management-system-service = { workspace = true } -lb-libp2p = { workspace = true } -lb-network-service = { features = ["libp2p"], workspace = true } -lb-node = { default-features = false, features = ["testing"], workspace = true } -lb-tracing = { workspace = true } -lb-tracing-service = { workspace = true } -lb-utils = { workspace = true } -prometheus-http-query = "0.8" -rand = { workspace = true } -reqwest = { features = ["json"], workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -serde_with = { workspace = true } -serde_yaml = { workspace = true } -tempfile = { workspace = true } -testing-framework-config = { workspace = true } -testing-framework-env = { workspace = true } -thiserror = { workspace = true } -tokio = { features = ["macros", "process", "rt-multi-thread", "time"], workspace = true } -tracing = { workspace = true } +async-trait = "0.1" +futures = { default-features = false, features = ["std"], version = "0.3" } +parking_lot = { workspace = true } +prometheus-http-query = "0.8" +rand = { workspace = true } +reqwest = { features = ["json"], workspace = true } +thiserror = { workspace = true } +tokio = { features = ["macros", "process", "rt-multi-thread", "time"], workspace = true } +tracing = { workspace = true } diff --git a/testing-framework/core/src/cfgsync/mod.rs b/testing-framework/core/src/cfgsync/mod.rs new file mode 100644 index 0000000..8f0ad34 --- /dev/null +++ b/testing-framework/core/src/cfgsync/mod.rs @@ -0,0 +1,84 @@ +use std::error::Error; + +use thiserror::Error; + +pub type DynCfgsyncError = Box; + +#[derive(Debug, Clone)] +pub struct CfgsyncNodeConfig { + pub identifier: String, + pub config_yaml: String, +} + +pub trait CfgsyncEnv { + type Deployment; + type Node; + type NodeConfig; + type Error: Error + Send + Sync + 'static; + + fn nodes(deployment: &Self::Deployment) -> &[Self::Node]; + + fn node_identifier(index: usize, node: &Self::Node) -> String; + + fn build_node_config( + deployment: &Self::Deployment, + node: &Self::Node, + ) -> Result; + + fn rewrite_for_hostnames( + deployment: &Self::Deployment, + node_index: usize, + hostnames: &[String], + config: &mut Self::NodeConfig, + ) -> Result<(), Self::Error>; + + fn serialize_node_config(config: &Self::NodeConfig) -> Result; +} + +#[derive(Debug, Error)] +pub enum BuildCfgsyncNodesError { + #[error("cfgsync hostnames mismatch (nodes={nodes}, hostnames={hostnames})")] + HostnameCountMismatch { nodes: usize, hostnames: usize }, + #[error("cfgsync adapter failed: {source}")] + Adapter { + #[source] + source: DynCfgsyncError, + }, +} + +fn adapter_error(source: E) -> BuildCfgsyncNodesError +where + E: Error + Send + Sync + 'static, +{ + BuildCfgsyncNodesError::Adapter { + source: Box::new(source), + } +} + +pub fn build_cfgsync_node_configs( + deployment: &E::Deployment, + hostnames: &[String], +) -> Result, BuildCfgsyncNodesError> { + let nodes = E::nodes(deployment); + if nodes.len() != hostnames.len() { + return Err(BuildCfgsyncNodesError::HostnameCountMismatch { + nodes: nodes.len(), + hostnames: hostnames.len(), + }); + } + + let mut output = Vec::with_capacity(nodes.len()); + for (index, node) in nodes.iter().enumerate() { + let mut node_config = E::build_node_config(deployment, node).map_err(adapter_error)?; + E::rewrite_for_hostnames(deployment, index, hostnames, &mut node_config) + .map_err(adapter_error)?; + let config_yaml = E::serialize_node_config(&node_config).map_err(adapter_error)?; + + output.push(CfgsyncNodeConfig { + identifier: E::node_identifier(index, node), + config_yaml, + }); + } + + Ok(output) +} diff --git a/testing-framework/core/src/env.rs b/testing-framework/core/src/env.rs new file mode 100644 index 0000000..e8b59e0 --- /dev/null +++ b/testing-framework/core/src/env.rs @@ -0,0 +1,22 @@ +use async_trait::async_trait; + +use crate::{ + scenario::{DynError, FeedRuntime}, + topology::DeploymentDescriptor, +}; + +/// Bundles all backend-specific types used by the core scenario engine. +#[async_trait] +pub trait Application: Send + Sync + 'static { + type Deployment: DeploymentDescriptor + Clone + 'static; + + type NodeClient: Clone + Send + Sync + 'static; + + type NodeConfig: Clone + Send + Sync + 'static; + + type FeedRuntime: FeedRuntime; + + async fn prepare_feed( + client: Self::NodeClient, + ) -> Result<(::Feed, Self::FeedRuntime), DynError>; +} diff --git a/testing-framework/core/src/lib.rs b/testing-framework/core/src/lib.rs index 244b161..5cbdb97 100644 --- a/testing-framework/core/src/lib.rs +++ b/testing-framework/core/src/lib.rs @@ -1,16 +1,16 @@ -pub mod manual; -pub mod nodes; +pub mod cfgsync; +pub mod env; +pub mod runtime; pub mod scenario; pub mod topology; +pub mod workloads; -use std::{env, ops::Mul as _, sync::LazyLock, time::Duration}; +use std::{env as std_env, ops::Mul as _, sync::LazyLock, time::Duration}; -pub use testing_framework_config::{ - IS_DEBUG_TRACING, node_address_from_port, secret_key_to_peer_id, secret_key_to_provider_id, -}; +pub use runtime::{manual, process, retry}; static IS_SLOW_TEST_ENV: LazyLock = - LazyLock::new(|| env::var("SLOW_TEST_ENV").is_ok_and(|s| s == "true")); + LazyLock::new(|| std_env::var("SLOW_TEST_ENV").is_ok_and(|s| s == "true")); /// In slow test environments like Codecov, use 2x timeout. #[must_use] diff --git a/testing-framework/core/src/manual.rs b/testing-framework/core/src/manual.rs deleted file mode 100644 index a0fec65..0000000 --- a/testing-framework/core/src/manual.rs +++ /dev/null @@ -1,15 +0,0 @@ -use async_trait::async_trait; - -use crate::scenario::{DynError, NodeControlHandle, StartNodeOptions, StartedNode}; - -/// Interface for imperative, deployer-backed manual clusters. -#[async_trait] -pub trait ManualClusterHandle: NodeControlHandle { - async fn start_node_with( - &self, - name: &str, - options: StartNodeOptions, - ) -> Result; - - async fn wait_network_ready(&self) -> Result<(), DynError>; -} diff --git a/testing-framework/core/src/nodes/api_client.rs b/testing-framework/core/src/nodes/api_client.rs deleted file mode 100644 index e5e6671..0000000 --- a/testing-framework/core/src/nodes/api_client.rs +++ /dev/null @@ -1,334 +0,0 @@ -use std::net::SocketAddr; - -use hex; -use lb_chain_service::CryptarchiaInfo; -use lb_common_http_client::CommonHttpClient; -use lb_core::{block::Block, mantle::SignedMantleTx}; -use lb_http_api_common::paths::{ - CRYPTARCHIA_HEADERS, CRYPTARCHIA_INFO, MEMPOOL_ADD_TX, NETWORK_INFO, STORAGE_BLOCK, -}; -use lb_network_service::backends::libp2p::Libp2pInfo; -use lb_node::HeaderId; -use reqwest::{Client, RequestBuilder, Response, Url}; -use serde::{Serialize, de::DeserializeOwned}; -use serde_json::Value; -use tracing::error; - -pub const DA_GET_TESTING_ENDPOINT_ERROR: &str = "Failed to connect to testing endpoint. The binary was likely built without the 'testing' \ - feature. Try: cargo build --workspace --all-features"; - -#[derive(Debug, thiserror::Error)] -pub enum ApiClientError { - #[error("{DA_GET_TESTING_ENDPOINT_ERROR}")] - TestingEndpointUnavailable, - #[error(transparent)] - Request(#[from] reqwest::Error), -} - -/// Thin async client for node HTTP/testing endpoints. -#[derive(Clone)] -pub struct ApiClient { - pub(crate) base_url: Url, - pub(crate) testing_url: Option, - client: Client, - pub(crate) http_client: CommonHttpClient, -} - -impl ApiClient { - #[must_use] - /// Construct from socket addresses. - pub fn new(base_addr: SocketAddr, testing_addr: Option) -> Self { - let base_url = Url::parse(&format!("http://{base_addr}")).unwrap_or_else(|_| unsafe { - // Safety: `SocketAddr` formatting yields a valid host:port pair. - std::hint::unreachable_unchecked() - }); - let testing_url = testing_addr.map(|addr| { - Url::parse(&format!("http://{addr}")).unwrap_or_else(|_| unsafe { - // Safety: `SocketAddr` formatting yields a valid host:port pair. - std::hint::unreachable_unchecked() - }) - }); - Self::from_urls(base_url, testing_url) - } - - #[must_use] - /// Construct from prebuilt URLs. - pub fn from_urls(base_url: Url, testing_url: Option) -> Self { - let client = Client::new(); - Self { - base_url, - testing_url, - http_client: CommonHttpClient::new_with_client(client.clone(), None), - client, - } - } - - #[must_use] - /// Testing URL, when built with testing features. - pub fn testing_url(&self) -> Option { - self.testing_url.clone() - } - - /// Build a GET request against the base API. - pub fn get_builder(&self, path: &str) -> RequestBuilder { - self.client.get(self.join_base(path)) - } - - /// Issue a GET request against the base API. - pub async fn get_response(&self, path: &str) -> reqwest::Result { - self.client.get(self.join_base(path)).send().await - } - - /// GET and decode JSON from the base API. - pub async fn get_json(&self, path: &str) -> reqwest::Result - where - T: DeserializeOwned, - { - self.get_response(path) - .await? - .error_for_status()? - .json() - .await - } - - /// POST JSON to the base API and decode a response. - pub async fn post_json_decode(&self, path: &str, body: &T) -> reqwest::Result - where - T: Serialize + Sync + ?Sized, - R: DeserializeOwned, - { - self.post_json_response(path, body) - .await? - .error_for_status()? - .json() - .await - } - - /// POST JSON to the base API and return the raw response. - pub async fn post_json_response(&self, path: &str, body: &T) -> reqwest::Result - where - T: Serialize + Sync + ?Sized, - { - self.client - .post(self.join_base(path)) - .json(body) - .send() - .await - } - - /// POST JSON to the base API and expect a success status. - pub async fn post_json_unit(&self, path: &str, body: &T) -> reqwest::Result<()> - where - T: Serialize + Sync + ?Sized, - { - self.post_json_response(path, body) - .await? - .error_for_status()?; - Ok(()) - } - - /// GET and decode JSON from the testing API. - pub async fn get_testing_json(&self, path: &str) -> Result - where - T: DeserializeOwned, - { - self.get_testing_response_checked(path) - .await? - .error_for_status() - .map_err(ApiClientError::Request)? - .json() - .await - .map_err(ApiClientError::Request) - } - - /// POST JSON to the testing API and decode a response. - pub async fn post_testing_json_decode( - &self, - path: &str, - body: &T, - ) -> Result - where - T: Serialize + Sync + ?Sized, - R: DeserializeOwned, - { - self.post_testing_json_response_checked(path, body) - .await? - .error_for_status() - .map_err(ApiClientError::Request)? - .json() - .await - .map_err(ApiClientError::Request) - } - - /// POST JSON to the testing API and expect a success status. - pub async fn post_testing_json_unit( - &self, - path: &str, - body: &T, - ) -> Result<(), ApiClientError> - where - T: Serialize + Sync + ?Sized, - { - self.post_testing_json_response_checked(path, body) - .await? - .error_for_status() - .map_err(ApiClientError::Request)?; - Ok(()) - } - - /// POST JSON to the testing API and return the raw response. - pub async fn post_testing_json_response_checked( - &self, - path: &str, - body: &T, - ) -> Result - where - T: Serialize + Sync + ?Sized, - { - let testing_url = self - .testing_url - .as_ref() - .ok_or(ApiClientError::TestingEndpointUnavailable)?; - self.client - .post(Self::join_url(testing_url, path)) - .json(body) - .send() - .await - .map_err(ApiClientError::Request) - } - - pub async fn post_testing_json_response( - &self, - path: &str, - body: &T, - ) -> Result - where - T: Serialize + Sync + ?Sized, - { - self.post_testing_json_response_checked(path, body).await - } - - /// GET from the testing API and return the raw response. - pub async fn get_testing_response_checked( - &self, - path: &str, - ) -> Result { - let testing_url = self - .testing_url - .as_ref() - .ok_or(ApiClientError::TestingEndpointUnavailable)?; - self.client - .get(Self::join_url(testing_url, path)) - .send() - .await - .map_err(ApiClientError::Request) - } - - pub async fn get_testing_response(&self, path: &str) -> Result { - self.get_testing_response_checked(path).await - } - - /// Fetch consensus info from the base API. - pub async fn consensus_info(&self) -> reqwest::Result { - self.get_json(CRYPTARCHIA_INFO).await - } - - /// Fetch libp2p network info. - pub async fn network_info(&self) -> reqwest::Result { - self.get_json(NETWORK_INFO).await - } - - /// Fetch a block by hash from storage. - pub async fn storage_block( - &self, - id: &HeaderId, - ) -> reqwest::Result>> { - self.post_json_decode(STORAGE_BLOCK, id).await - } - - /// Fetch header ids between optional bounds. - /// When `from` is None, defaults to tip; when `to` is None, defaults to - /// LIB. - pub async fn consensus_headers( - &self, - from: Option, - to: Option, - ) -> reqwest::Result> { - let mut url = self.join_base(CRYPTARCHIA_HEADERS); - { - let mut pairs = url.query_pairs_mut(); - if let Some(from) = from { - let bytes: [u8; 32] = from.into(); - pairs.append_pair("from", &hex::encode(bytes)); - } - if let Some(to) = to { - let bytes: [u8; 32] = to.into(); - pairs.append_pair("to", &hex::encode(bytes)); - } - } - self.client - .get(url) - .send() - .await? - .error_for_status()? - .json() - .await - } - - /// Submit a mantle transaction through the base API. - pub async fn submit_transaction(&self, tx: &SignedMantleTx) -> reqwest::Result<()> { - let res = self.post_json_response(MEMPOOL_ADD_TX, tx).await?; - if let Err(status_err) = res.error_for_status_ref() { - let status = res.status(); - let body = res - .text() - .await - .unwrap_or_else(|_| "".to_string()); - error!(%status, %body, "submit_transaction request failed"); - return Err(status_err); - } - Ok(()) - } - - /// Execute a custom request built by the caller. - pub async fn get_headers_raw(&self, builder: RequestBuilder) -> reqwest::Result { - builder.send().await - } - - /// Fetch raw mempool metrics from the testing endpoint. - pub async fn mempool_metrics(&self, pool: &str) -> reqwest::Result { - self.get_json(&format!("/{pool}/metrics")).await - } - - #[must_use] - /// Base API URL. - pub const fn base_url(&self) -> &Url { - &self.base_url - } - - #[must_use] - /// Underlying common HTTP client wrapper. - pub const fn http_client(&self) -> &CommonHttpClient { - &self.http_client - } - - fn join_base(&self, path: &str) -> Url { - Self::join_url(&self.base_url, path) - } - - fn join_url(base: &Url, path: &str) -> Url { - let trimmed = path.trim_start_matches('/'); - match base.join(trimmed) { - Ok(url) => url, - Err(err) => { - error!( - error = %err, - base = %base, - path, - "failed to join url; falling back to base url" - ); - base.clone() - } - } - } -} diff --git a/testing-framework/core/src/nodes/common/binary.rs b/testing-framework/core/src/nodes/common/binary.rs deleted file mode 100644 index 642eacf..0000000 --- a/testing-framework/core/src/nodes/common/binary.rs +++ /dev/null @@ -1,61 +0,0 @@ -use std::{env, path::PathBuf}; - -use tracing::{debug, info}; - -pub struct BinaryConfig { - pub env_var: &'static str, - pub binary_name: &'static str, - pub fallback_path: &'static str, - pub shared_bin_subpath: &'static str, -} - -pub struct BinaryResolver; - -impl BinaryResolver { - pub fn resolve_path(config: &BinaryConfig) -> PathBuf { - if let Some(path) = env::var_os(config.env_var) { - let resolved = PathBuf::from(path); - - info!( - env = config.env_var, - binary = config.binary_name, - path = %resolved.display(), - "resolved binary from env override" - ); - return resolved; - } - if let Some(path) = Self::which_on_path(config.binary_name) { - info!( - binary = config.binary_name, - path = %path.display(), - "resolved binary from PATH" - ); - return path; - } - let shared_bin = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(config.shared_bin_subpath); - if shared_bin.exists() { - info!( - binary = config.binary_name, - path = %shared_bin.display(), - "resolved binary from shared assets" - ); - return shared_bin; - } - let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../"); - let fallback = root.join(config.fallback_path); - - debug!( - binary = config.binary_name, - path = %fallback.display(), - "falling back to binary path" - ); - fallback - } - - fn which_on_path(bin: &str) -> Option { - let path_env = env::var_os("PATH")?; - env::split_paths(&path_env) - .map(|p| p.join(bin)) - .find(|candidate| candidate.is_file()) - } -} diff --git a/testing-framework/core/src/nodes/common/config/injection.rs b/testing-framework/core/src/nodes/common/config/injection.rs deleted file mode 100644 index 18172a8..0000000 --- a/testing-framework/core/src/nodes/common/config/injection.rs +++ /dev/null @@ -1,133 +0,0 @@ -use hex; -use lb_key_management_system_service::keys::{Ed25519Key, Key}; -use serde_yaml::{Mapping, Number as YamlNumber, Value}; -use testing_framework_config::nodes::kms::key_id_for_preload_backend; - -pub fn normalize_ed25519_sigs(_value: &mut Value) {} - -/// Inject cryptarchia/IBD defaults into a YAML config in-place. -pub fn inject_ibd_into_cryptarchia(yaml_value: &mut Value) { - let Some(cryptarchia) = cryptarchia_section(yaml_value) else { - return; - }; - ensure_network_adapter(cryptarchia); - ensure_sync_defaults(cryptarchia); - ensure_ibd_bootstrap(cryptarchia); -} - -/// Inject blend non-ephemeral signing key id when missing. -pub fn inject_blend_non_ephemeral_signing_key_id(yaml_value: &mut Value) { - let Some(blend) = blend_section(yaml_value) else { - return; - }; - - let key_id_key = Value::String("non_ephemeral_signing_key_id".into()); - if blend.contains_key(&key_id_key) { - return; - } - - let Some(key_str) = blend - .get(&Value::String("non_ephemeral_signing_key".into())) - .and_then(Value::as_str) - else { - return; - }; - - let Ok(bytes) = hex::decode(key_str) else { - return; - }; - let Ok(raw) = <[u8; 32]>::try_from(bytes.as_slice()) else { - return; - }; - - let key_id = key_id_for_preload_backend(&Key::Ed25519(Ed25519Key::from_bytes(&raw))); - blend.insert(key_id_key, Value::String(key_id)); -} - -/// Inject deployment chain sync protocol name when missing. -pub fn inject_chain_sync_protocol_name(yaml_value: &mut Value) { - let Some(network) = deployment_network_section(yaml_value) else { - return; - }; - - let key = Value::String("chain_sync_protocol_name".into()); - if network.contains_key(&key) { - return; - } - - network.insert( - key, - Value::String("/integration/nomos/cryptarchia/sync/1.0.0".into()), - ); -} - -fn cryptarchia_section(yaml_value: &mut Value) -> Option<&mut Mapping> { - yaml_value - .as_mapping_mut() - .and_then(|root| root.get_mut(&Value::String("cryptarchia".into()))) - .and_then(Value::as_mapping_mut) -} - -fn blend_section(yaml_value: &mut Value) -> Option<&mut Mapping> { - yaml_value - .as_mapping_mut() - .and_then(|root| root.get_mut(&Value::String("blend".into()))) - .and_then(Value::as_mapping_mut) -} - -fn deployment_network_section(yaml_value: &mut Value) -> Option<&mut Mapping> { - yaml_value - .as_mapping_mut() - .and_then(|root| root.get_mut(&Value::String("deployment".into()))) - .and_then(Value::as_mapping_mut) - .and_then(|deployment| deployment.get_mut(&Value::String("network".into()))) - .and_then(Value::as_mapping_mut) -} - -fn ensure_network_adapter(cryptarchia: &mut Mapping) { - if cryptarchia.contains_key(&Value::String("network_adapter_settings".into())) { - return; - } - let mut network = Mapping::new(); - network.insert( - Value::String("topic".into()), - Value::String("/cryptarchia/proto".into()), - ); - cryptarchia.insert( - Value::String("network_adapter_settings".into()), - Value::Mapping(network), - ); -} - -fn ensure_sync_defaults(cryptarchia: &mut Mapping) { - if cryptarchia.contains_key(&Value::String("sync".into())) { - return; - } - let mut orphan = Mapping::new(); - orphan.insert( - Value::String("max_orphan_cache_size".into()), - Value::Number(YamlNumber::from(5)), - ); - let mut sync = Mapping::new(); - sync.insert(Value::String("orphan".into()), Value::Mapping(orphan)); - cryptarchia.insert(Value::String("sync".into()), Value::Mapping(sync)); -} - -fn ensure_ibd_bootstrap(cryptarchia: &mut Mapping) { - let Some(bootstrap) = cryptarchia - .get_mut(&Value::String("bootstrap".into())) - .and_then(Value::as_mapping_mut) - else { - return; - }; - - let ibd_key = Value::String("ibd".into()); - if bootstrap.contains_key(&ibd_key) { - return; - } - - let mut ibd = Mapping::new(); - ibd.insert(Value::String("peers".into()), Value::Sequence(vec![])); - - bootstrap.insert(ibd_key, Value::Mapping(ibd)); -} diff --git a/testing-framework/core/src/nodes/common/config/mod.rs b/testing-framework/core/src/nodes/common/config/mod.rs deleted file mode 100644 index 1501330..0000000 --- a/testing-framework/core/src/nodes/common/config/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod injection; -pub mod paths; diff --git a/testing-framework/core/src/nodes/common/config/paths.rs b/testing-framework/core/src/nodes/common/config/paths.rs deleted file mode 100644 index 01a499d..0000000 --- a/testing-framework/core/src/nodes/common/config/paths.rs +++ /dev/null @@ -1,33 +0,0 @@ -use std::{fs, io, path::Path}; - -/// Ensure recovery-related directories and placeholder files exist under the -/// given base dir. -pub fn ensure_recovery_paths(base_dir: &Path) -> io::Result<()> { - let recovery_dir = base_dir.join("recovery"); - fs::create_dir_all(&recovery_dir)?; - - let mempool_path = recovery_dir.join("mempool.json"); - if !mempool_path.exists() { - fs::write(&mempool_path, "{}")?; - } - - let cryptarchia_path = recovery_dir.join("cryptarchia.json"); - if !cryptarchia_path.exists() { - fs::write(&cryptarchia_path, "{}")?; - } - - let wallet_path = recovery_dir.join("wallet.json"); - if !wallet_path.exists() { - fs::write(&wallet_path, "{}")?; - } - - let blend_core_path = recovery_dir.join("blend").join("core.json"); - if let Some(parent) = blend_core_path.parent() { - fs::create_dir_all(parent)?; - } - if !blend_core_path.exists() { - fs::write(&blend_core_path, "{}")?; - } - - Ok(()) -} diff --git a/testing-framework/core/src/nodes/common/lifecycle/kill.rs b/testing-framework/core/src/nodes/common/lifecycle/kill.rs deleted file mode 100644 index 8ebeba6..0000000 --- a/testing-framework/core/src/nodes/common/lifecycle/kill.rs +++ /dev/null @@ -1,6 +0,0 @@ -use std::process::Child; - -/// Shared cleanup helpers for child processes. -pub fn kill_child(child: &mut Child) { - let _ = child.kill(); -} diff --git a/testing-framework/core/src/nodes/common/lifecycle/mod.rs b/testing-framework/core/src/nodes/common/lifecycle/mod.rs deleted file mode 100644 index 9188bed..0000000 --- a/testing-framework/core/src/nodes/common/lifecycle/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod kill; -pub mod monitor; -pub mod spawn; diff --git a/testing-framework/core/src/nodes/common/lifecycle/monitor.rs b/testing-framework/core/src/nodes/common/lifecycle/monitor.rs deleted file mode 100644 index b21abdf..0000000 --- a/testing-framework/core/src/nodes/common/lifecycle/monitor.rs +++ /dev/null @@ -1,21 +0,0 @@ -use std::process::Child; - -use tracing::debug; - -/// Check if a child process is still running. -pub fn is_running(child: &mut Child) -> bool { - match child.try_wait() { - Ok(None) => { - debug!("process still running"); - true - } - Ok(Some(status)) => { - debug!(?status, "process exited"); - false - } - Err(err) => { - debug!(error = ?err, "process state check failed"); - false - } - } -} diff --git a/testing-framework/core/src/nodes/common/lifecycle/spawn.rs b/testing-framework/core/src/nodes/common/lifecycle/spawn.rs deleted file mode 100644 index 86d5121..0000000 --- a/testing-framework/core/src/nodes/common/lifecycle/spawn.rs +++ /dev/null @@ -1,56 +0,0 @@ -use std::{ - fs::{self, File}, - io, - path::Path, -}; - -use lb_tracing::logging::local::FileConfig; -use serde::Serialize; -use serde_yaml::Value; -use testing_framework_env as tf_env; -use tracing::debug; - -use crate::nodes::common::config::injection::normalize_ed25519_sigs; - -/// Configure tracing logger to write into `LOGOS_BLOCKCHAIN_LOG_DIR` if set, -/// else into the provided base dir. -pub fn configure_logging(base_dir: &Path, prefix: &str, set_logger: F) -where - F: FnOnce(FileConfig), -{ - debug!(prefix, base_dir = %base_dir.display(), "configuring node logging"); - - if let Some(log_dir) = tf_env::nomos_log_dir() { - let _ = fs::create_dir_all(&log_dir); - - set_logger(FileConfig { - directory: log_dir, - prefix: Some(prefix.into()), - }); - } else { - set_logger(FileConfig { - directory: base_dir.to_owned(), - prefix: Some(prefix.into()), - }); - } -} - -/// Write a YAML config file, allowing a caller-provided injection hook to -/// mutate the serialized value before it is written. -pub fn write_config_with_injection(config: &T, path: &Path, inject: F) -> io::Result<()> -where - T: Serialize, - F: FnOnce(&mut Value), -{ - debug!(path = %path.display(), "writing node config with injection"); - - let mut yaml_value = - serde_yaml::to_value(config).map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; - - inject(&mut yaml_value); - normalize_ed25519_sigs(&mut yaml_value); - - let file = File::create(path)?; - serde_yaml::to_writer(file, &yaml_value) - .map_err(|err| io::Error::new(io::ErrorKind::Other, err)) -} diff --git a/testing-framework/core/src/nodes/common/mod.rs b/testing-framework/core/src/nodes/common/mod.rs deleted file mode 100644 index 01a8462..0000000 --- a/testing-framework/core/src/nodes/common/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub mod binary; -pub mod config; -pub mod lifecycle; -pub mod node; diff --git a/testing-framework/core/src/nodes/common/node.rs b/testing-framework/core/src/nodes/common/node.rs deleted file mode 100644 index 0955d1c..0000000 --- a/testing-framework/core/src/nodes/common/node.rs +++ /dev/null @@ -1,273 +0,0 @@ -use std::{ - fs, io, - net::SocketAddr, - path::{Path, PathBuf}, - process::{Child, Command, Stdio}, - time::Duration, -}; - -use lb_tracing_service::LoggerLayer; -use reqwest::Url; -use serde::Serialize; -use serde_yaml::Value; -use tempfile::TempDir; -use tokio::time; -use tracing::{debug, info}; - -use super::lifecycle::monitor::is_running; -use crate::nodes::{ - ApiClient, - common::{config::paths::ensure_recovery_paths, lifecycle::spawn::configure_logging}, - create_tempdir, persist_tempdir, -}; - -const EXIT_POLL_INTERVAL: Duration = Duration::from_millis(100); -const STARTUP_POLL_INTERVAL: Duration = Duration::from_millis(100); -const STARTUP_TIMEOUT: Duration = Duration::from_secs(60); - -pub type NodeAddresses = (SocketAddr, Option); -pub type PreparedNodeConfig = (TempDir, T, SocketAddr, Option); - -#[derive(Debug, thiserror::Error)] -pub enum SpawnNodeError { - #[error("failed to create node tempdir: {source}")] - TempDir { - #[source] - source: io::Error, - }, - #[error("failed to prepare node recovery paths: {source}")] - RecoveryPaths { - #[source] - source: io::Error, - }, - #[error("failed to write node config at {path}: {source}")] - WriteConfig { - path: PathBuf, - #[source] - source: io::Error, - }, - #[error("failed to spawn node process '{binary}': {source}")] - Spawn { - binary: PathBuf, - #[source] - source: io::Error, - }, - #[error("node did not become ready before timeout: {source}")] - Readiness { - #[source] - source: tokio::time::error::Elapsed, - }, -} - -/// Minimal interface to apply common node setup. -pub trait NodeConfigCommon { - fn set_logger(&mut self, logger: LoggerLayer); - fn set_paths(&mut self, base: &Path); - fn addresses(&self) -> NodeAddresses; -} - -/// Shared handle for spawned nodes that exposes common operations. -pub struct NodeHandle { - pub(crate) child: Child, - pub(crate) tempdir: TempDir, - pub(crate) config: T, - pub(crate) api: ApiClient, -} - -impl NodeHandle { - pub fn new(child: Child, tempdir: TempDir, config: T, api: ApiClient) -> Self { - Self { - child, - tempdir, - config, - api, - } - } - - #[must_use] - pub fn url(&self) -> Url { - self.api.base_url().clone() - } - - #[must_use] - pub fn testing_url(&self) -> Option { - self.api.testing_url() - } - - #[must_use] - pub fn api(&self) -> &ApiClient { - &self.api - } - - #[must_use] - pub const fn config(&self) -> &T { - &self.config - } - - /// Returns true if the process exited within the timeout, false otherwise. - pub async fn wait_for_exit(&mut self, timeout: Duration) -> bool { - time::timeout(timeout, async { - loop { - if !is_running(&mut self.child) { - return; - } - time::sleep(EXIT_POLL_INTERVAL).await; - } - }) - .await - .is_ok() - } -} - -/// Apply common setup (recovery paths, logging, data dirs) and return a ready -/// config plus API addrs. -pub fn prepare_node_config( - mut config: T, - log_prefix: &str, - enable_logging: bool, - persist_dir: Option, -) -> Result, SpawnNodeError> { - let dir = create_tempdir(persist_dir).map_err(|source| SpawnNodeError::TempDir { source })?; - - debug!(dir = %dir.path().display(), log_prefix, enable_logging, "preparing node config"); - - // Ensure recovery files/dirs exist so services that persist state do not fail - // on startup. - ensure_recovery_paths(dir.path()).map_err(|source| SpawnNodeError::RecoveryPaths { source })?; - - if enable_logging { - configure_logging(dir.path(), log_prefix, |file_cfg| { - config.set_logger(LoggerLayer::File(file_cfg)); - }); - } - - config.set_paths(dir.path()); - let (addr, testing_addr) = config.addresses(); - - debug!(addr = %addr, testing_addr = ?testing_addr, "configured node addresses"); - - Ok((dir, config, addr, testing_addr)) -} - -/// Spawn a node with shared setup, config writing, and readiness wait. -pub async fn spawn_node( - config: C, - log_prefix: &str, - config_filename: &str, - binary_path: PathBuf, - enable_logging: bool, - persist_dir: Option, -) -> Result, SpawnNodeError> -where - C: NodeConfigCommon + Serialize, -{ - let (dir, config, addr, testing_addr) = - prepare_node_config(config, log_prefix, enable_logging, persist_dir)?; - - let config_path = dir.path().join(config_filename); - write_node_config(&config, &config_path)?; - - debug!(config_file = %config_path.display(), binary = %binary_path.display(), "spawning node process"); - - let child = spawn_node_process(&binary_path, &config_path, dir.path())?; - - let mut handle = NodeHandle::new(child, dir, config, ApiClient::new(addr, testing_addr)); - - // Wait for readiness via consensus_info - let ready = wait_for_consensus_readiness(&handle.api).await; - - if let Err(err) = ready { - // Persist tempdir to aid debugging if readiness fails. - let _ = persist_tempdir(&mut handle.tempdir, "logos-blockchain-node"); - return Err(SpawnNodeError::Readiness { source: err }); - } - - info!("node readiness confirmed via consensus_info"); - Ok(handle) -} - -fn write_node_config(config: &C, config_path: &Path) -> Result<(), SpawnNodeError> { - super::lifecycle::spawn::write_config_with_injection(config, config_path, |yaml| { - crate::nodes::common::config::injection::inject_ibd_into_cryptarchia(yaml); - crate::nodes::common::config::injection::inject_blend_non_ephemeral_signing_key_id(yaml); - crate::nodes::common::config::injection::inject_chain_sync_protocol_name(yaml); - }) - .map_err(|source| SpawnNodeError::WriteConfig { - path: config_path.to_path_buf(), - source, - })?; - - write_deployment_config_if_present(config_path).map_err(|source| { - SpawnNodeError::WriteConfig { - path: config_path.to_path_buf(), - source, - } - })?; - - Ok(()) -} - -fn write_deployment_config_if_present(config_path: &Path) -> io::Result<()> { - let config_contents = fs::read_to_string(config_path)?; - let yaml_value: Value = serde_yaml::from_str(&config_contents).map_err(io::Error::other)?; - - let Value::Mapping(mut root) = yaml_value else { - return Ok(()); - }; - - let deployment_key = Value::String("deployment".into()); - - let Some(deployment) = root.remove(&deployment_key) else { - return Ok(()); - }; - - let Some(config_dir) = config_path.parent() else { - return Ok(()); - }; - - let deployment_path = config_dir.join("deployment.yaml"); - let deployment_contents = serde_yaml::to_string(&deployment).map_err(io::Error::other)?; - fs::write(deployment_path, deployment_contents)?; - - let updated_config = Value::Mapping(root); - let updated_contents = serde_yaml::to_string(&updated_config).map_err(io::Error::other)?; - fs::write(config_path, updated_contents) -} - -pub(crate) fn spawn_node_process( - binary_path: &Path, - config_path: &Path, - workdir: &Path, -) -> Result { - let mut cmd = Command::new(binary_path); - cmd.arg(config_path); - - let deployment_path = workdir.join("deployment.yaml"); - if deployment_path.is_file() { - cmd.arg("--deployment").arg(deployment_path); - } - - cmd.current_dir(workdir) - .stdin(Stdio::null()) - .stdout(Stdio::inherit()) - .stderr(Stdio::inherit()) - .spawn() - .map_err(|source| SpawnNodeError::Spawn { - binary: binary_path.to_path_buf(), - source, - }) -} - -pub(crate) async fn wait_for_consensus_readiness( - api: &ApiClient, -) -> Result<(), time::error::Elapsed> { - time::timeout(STARTUP_TIMEOUT, async { - loop { - if api.consensus_info().await.is_ok() { - break; - } - time::sleep(STARTUP_POLL_INTERVAL).await; - } - }) - .await -} diff --git a/testing-framework/core/src/nodes/mod.rs b/testing-framework/core/src/nodes/mod.rs deleted file mode 100644 index 7b2ad55..0000000 --- a/testing-framework/core/src/nodes/mod.rs +++ /dev/null @@ -1,73 +0,0 @@ -mod api_client; -pub mod common; -pub mod node; - -use std::{ - fs, - io::{Error, ErrorKind}, - path::PathBuf, - sync::LazyLock, -}; - -pub use api_client::{ApiClient, ApiClientError}; -use tempfile::TempDir; -use testing_framework_env as tf_env; - -pub(crate) const LOGS_PREFIX: &str = "__logs"; -static KEEP_NODE_TEMPDIRS: LazyLock = LazyLock::new(tf_env::nomos_tests_keep_logs); - -pub(crate) fn create_tempdir(custom_work_dir: Option) -> std::io::Result { - if let Some(dir) = custom_work_dir { - let final_dir_name = dir - .components() - .last() - .ok_or(Error::new(ErrorKind::Other, "Invalid final directory"))? - .as_os_str() - .display() - .to_string() - .to_owned() - + "_"; - let parent_dir = dir - .parent() - .ok_or(Error::new(ErrorKind::Other, "Invalid parent directory"))?; - fs::create_dir_all(parent_dir).map_err(|e| { - Error::new( - ErrorKind::Other, - format!("Error creating parent dir: {}", e), - ) - })?; - let mut temp_dir = TempDir::with_prefix_in(final_dir_name, parent_dir)?; - if should_persist_tempdir() { - temp_dir.disable_cleanup(true); - } - Ok(temp_dir) - } else { - // It's easier to use the current location instead of OS-default tempfile - // location because Github Actions can easily access files in the current - // location using wildcard to upload them as artifacts. - let mut temp_dir = TempDir::new_in(std::env::current_dir()?)?; - if should_persist_tempdir() { - temp_dir.disable_cleanup(true); - } - Ok(temp_dir) - } -} - -fn persist_tempdir(tempdir: &mut TempDir, label: &str) -> std::io::Result<()> { - println!( - "{}: persisting directory at {}", - label, - tempdir.path().display() - ); - if should_persist_tempdir() { - return Ok(()); - } - // we need ownership of the dir to persist it - let dir = std::mem::replace(tempdir, tempfile::tempdir()?); - let _ = dir.keep(); - Ok(()) -} - -pub(crate) fn should_persist_tempdir() -> bool { - std::thread::panicking() || *KEEP_NODE_TEMPDIRS -} diff --git a/testing-framework/core/src/nodes/node.rs b/testing-framework/core/src/nodes/node.rs deleted file mode 100644 index f97c706..0000000 --- a/testing-framework/core/src/nodes/node.rs +++ /dev/null @@ -1,163 +0,0 @@ -use std::{ops::Deref, path::PathBuf, time::Duration}; - -use lb_node::config::RunConfig; -use lb_tracing_service::LoggerLayer; -pub use testing_framework_config::nodes::node::create_node_config; -use tracing::{debug, info}; - -use crate::{ - IS_DEBUG_TRACING, - nodes::{ - LOGS_PREFIX, - common::{ - binary::{BinaryConfig, BinaryResolver}, - lifecycle::{kill::kill_child, monitor::is_running}, - node::{ - NodeAddresses, NodeConfigCommon, NodeHandle, SpawnNodeError, spawn_node, - spawn_node_process, wait_for_consensus_readiness, - }, - }, - }, - scenario::DynError, - topology::config::NodeConfigPatch, -}; - -const BIN_PATH: &str = "target/debug/logos-blockchain-node"; -const RESTART_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(10); - -fn binary_path() -> PathBuf { - let cfg = BinaryConfig { - env_var: "LOGOS_BLOCKCHAIN_NODE_BIN", - binary_name: "logos-blockchain-node", - fallback_path: BIN_PATH, - shared_bin_subpath: "../assets/stack/bin/logos-blockchain-node", - }; - BinaryResolver::resolve_path(&cfg) -} - -pub struct Node { - handle: NodeHandle, -} - -pub fn apply_node_config_patches<'a>( - mut config: RunConfig, - patches: impl IntoIterator, -) -> Result { - for patch in patches { - config = patch(config)?; - } - Ok(config) -} - -pub fn apply_node_config_patch( - config: RunConfig, - patch: &NodeConfigPatch, -) -> Result { - apply_node_config_patches(config, [patch]) -} - -impl Deref for Node { - type Target = NodeHandle; - - fn deref(&self) -> &Self::Target { - &self.handle - } -} - -impl Drop for Node { - fn drop(&mut self) { - debug!("stopping node process"); - kill_child(&mut self.handle.child); - } -} - -impl Node { - /// Return the current process id for the running node. - #[must_use] - pub fn pid(&self) -> u32 { - self.handle.child.id() - } - - /// Check if the node process is still running - pub fn is_running(&mut self) -> bool { - is_running(&mut self.handle.child) - } - - /// Wait for the node process to exit, with a timeout - /// Returns true if the process exited within the timeout, false otherwise - pub async fn wait_for_exit(&mut self, timeout: Duration) -> bool { - self.handle.wait_for_exit(timeout).await - } - - pub async fn spawn( - config: RunConfig, - label: &str, - persist_dir: Option, - ) -> Result { - let log_prefix = format!("{LOGS_PREFIX}-{label}"); - let handle = spawn_node( - config, - &log_prefix, - "node.yaml", - binary_path(), - !*IS_DEBUG_TRACING, - persist_dir, - ) - .await?; - - info!("node spawned and ready"); - - Ok(Self { handle }) - } - - /// Restart the node process using the existing config and data directory. - pub async fn restart(&mut self) -> Result<(), SpawnNodeError> { - let old_pid = self.pid(); - debug!(old_pid, "restarting node process"); - - kill_child(&mut self.handle.child); - let _ = self.wait_for_exit(RESTART_SHUTDOWN_TIMEOUT).await; - - let config_path = self.handle.tempdir.path().join("node.yaml"); - let child = spawn_node_process(&binary_path(), &config_path, self.handle.tempdir.path())?; - self.handle.child = child; - - let new_pid = self.pid(); - wait_for_consensus_readiness(&self.handle.api) - .await - .map_err(|source| SpawnNodeError::Readiness { source })?; - - info!( - old_pid, - new_pid, "node restart readiness confirmed via consensus_info" - ); - - Ok(()) - } - - /// Stop the node process without restarting it. - pub async fn stop(&mut self) { - let pid = self.pid(); - debug!(pid, "stopping node process"); - - kill_child(&mut self.handle.child); - let _ = self.wait_for_exit(RESTART_SHUTDOWN_TIMEOUT).await; - } -} - -impl NodeConfigCommon for RunConfig { - fn set_logger(&mut self, logger: LoggerLayer) { - self.user.tracing.logger = logger; - } - - fn set_paths(&mut self, base: &std::path::Path) { - self.user.storage.db_path = base.join("db"); - } - - fn addresses(&self) -> NodeAddresses { - ( - self.user.http.backend_settings.address, - Some(self.user.testing_http.backend_settings.address), - ) - } -} diff --git a/testing-framework/core/src/runtime/manual.rs b/testing-framework/core/src/runtime/manual.rs new file mode 100644 index 0000000..dfa0292 --- /dev/null +++ b/testing-framework/core/src/runtime/manual.rs @@ -0,0 +1,15 @@ +use async_trait::async_trait; + +use crate::scenario::{Application, DynError, NodeControlHandle, StartNodeOptions, StartedNode}; + +/// Interface for imperative, deployer-backed manual clusters. +#[async_trait] +pub trait ManualClusterHandle: NodeControlHandle { + async fn start_node_with( + &self, + name: &str, + options: StartNodeOptions, + ) -> Result, DynError>; + + async fn wait_network_ready(&self) -> Result<(), DynError>; +} diff --git a/testing-framework/core/src/runtime/mod.rs b/testing-framework/core/src/runtime/mod.rs new file mode 100644 index 0000000..2498238 --- /dev/null +++ b/testing-framework/core/src/runtime/mod.rs @@ -0,0 +1,3 @@ +pub mod manual; +pub mod process; +pub mod retry; diff --git a/testing-framework/core/src/runtime/process.rs b/testing-framework/core/src/runtime/process.rs new file mode 100644 index 0000000..81d4863 --- /dev/null +++ b/testing-framework/core/src/runtime/process.rs @@ -0,0 +1,20 @@ +use std::error::Error; + +use async_trait::async_trait; + +use crate::env::Application; + +#[async_trait] +pub trait RuntimeNode: Send { + type SpawnError: Error + Send + Sync + 'static; + + fn client(&self) -> E::NodeClient; + + fn is_running(&mut self) -> bool; + + fn pid(&self) -> u32; + + async fn stop(&mut self); + + async fn restart(&mut self) -> Result<(), Self::SpawnError>; +} diff --git a/testing-framework/core/src/runtime/retry.rs b/testing-framework/core/src/runtime/retry.rs new file mode 100644 index 0000000..5bbbfb6 --- /dev/null +++ b/testing-framework/core/src/runtime/retry.rs @@ -0,0 +1,58 @@ +use std::{future::Future, time::Duration}; + +use tokio::time::sleep; + +#[derive(Clone, Copy, Debug)] +pub struct RetryConfig { + pub max_attempts: usize, + pub initial_delay: Duration, + pub max_delay: Duration, + pub backoff_factor: u32, +} + +impl RetryConfig { + #[must_use] + pub const fn bounded( + max_attempts: usize, + initial_delay: Duration, + max_delay: Duration, + ) -> Self { + Self { + max_attempts, + initial_delay, + max_delay, + backoff_factor: 2, + } + } + + #[must_use] + pub fn delay_for_attempt(self, attempt: usize) -> Duration { + let mut delay = self.initial_delay; + for _ in 1..attempt { + delay = delay.saturating_mul(self.backoff_factor); + if delay >= self.max_delay { + return self.max_delay; + } + } + delay.min(self.max_delay) + } +} + +pub async fn retry_async(config: RetryConfig, mut op: Op) -> Result +where + Op: FnMut(usize) -> Fut, + Fut: Future>, +{ + let mut attempt = 1usize; + loop { + match op(attempt).await { + Ok(value) => return Ok(value), + Err(_) if attempt < config.max_attempts => { + sleep(config.delay_for_attempt(attempt)).await; + attempt += 1; + } + + Err(err) => return Err(err), + } + } +} diff --git a/testing-framework/core/src/scenario/builder_ext.rs b/testing-framework/core/src/scenario/builder_ext.rs new file mode 100644 index 0000000..9ffca85 --- /dev/null +++ b/testing-framework/core/src/scenario/builder_ext.rs @@ -0,0 +1,205 @@ +use reqwest::Url; + +use super::{Application, ObservabilityCapability, ObservabilityScenarioBuilder, ScenarioBuilder}; + +const METRICS_QUERY_URL_FIELD: &str = "metrics_query_url"; +const METRICS_OTLP_INGEST_URL_FIELD: &str = "metrics_otlp_ingest_url"; +const GRAFANA_URL_FIELD: &str = "grafana_url"; + +#[derive(Debug, thiserror::Error)] +pub enum BuilderInputError { + #[error("invalid url for {field}: '{value}': {message}")] + InvalidUrl { + field: &'static str, + value: String, + message: String, + }, +} + +fn parse_url_or_panic(field: &'static str, value: &str) -> Url { + parse_url_field(field, value).unwrap_or_else(|error| panic!("{error}")) +} + +fn parse_url_field(field: &'static str, value: &str) -> Result { + Url::parse(value).map_err(|error| BuilderInputError::InvalidUrl { + field, + value: value.to_string(), + message: error.to_string(), + }) +} + +fn apply_parsed_url( + field: &'static str, + value: &str, + apply: impl FnOnce(Url) -> T, +) -> Result { + let parsed = parse_url_field(field, value)?; + Ok(apply(parsed)) +} + +fn single_url_observability( + metrics_query_url: Option, + metrics_otlp_ingest_url: Option, + grafana_url: Option, +) -> ObservabilityCapability { + ObservabilityCapability { + metrics_query_url, + metrics_otlp_ingest_url, + grafana_url, + } +} + +/// Observability helpers for scenarios that want to reuse external telemetry. +pub trait ObservabilityBuilderExt: Sized { + type Env: Application; + + /// Reuse an existing Prometheus endpoint. + fn with_metrics_query_url(self, url: Url) -> ObservabilityScenarioBuilder; + + /// Parse and set the Prometheus endpoint (panics on invalid URL). + fn with_metrics_query_url_str(self, url: &str) -> ObservabilityScenarioBuilder; + + /// Parse and set the Prometheus endpoint. + fn try_with_metrics_query_url_str( + self, + url: &str, + ) -> Result, BuilderInputError>; + + /// Set the OTLP HTTP ingest endpoint used by nodes. + fn with_metrics_otlp_ingest_url(self, url: Url) -> ObservabilityScenarioBuilder; + + /// Parse and set the OTLP ingest endpoint (panics on invalid URL). + fn with_metrics_otlp_ingest_url_str(self, url: &str) + -> ObservabilityScenarioBuilder; + + /// Parse and set the OTLP ingest endpoint. + fn try_with_metrics_otlp_ingest_url_str( + self, + url: &str, + ) -> Result, BuilderInputError>; + + /// Set an optional Grafana base URL. + fn with_grafana_url(self, url: Url) -> ObservabilityScenarioBuilder; + + /// Parse and set the Grafana URL (panics on invalid URL). + fn with_grafana_url_str(self, url: &str) -> ObservabilityScenarioBuilder; + + /// Parse and set the Grafana URL. + fn try_with_grafana_url_str( + self, + url: &str, + ) -> Result, BuilderInputError>; +} + +impl ObservabilityBuilderExt for ScenarioBuilder { + type Env = E; + + fn with_metrics_query_url(self, url: Url) -> ObservabilityScenarioBuilder { + self.with_observability(single_url_observability(Some(url), None, None)) + } + + fn with_metrics_query_url_str(self, url: &str) -> ObservabilityScenarioBuilder { + self.with_metrics_query_url(parse_url_or_panic(METRICS_QUERY_URL_FIELD, url)) + } + + fn try_with_metrics_query_url_str( + self, + url: &str, + ) -> Result, BuilderInputError> { + apply_parsed_url(METRICS_QUERY_URL_FIELD, url, |parsed| { + self.with_metrics_query_url(parsed) + }) + } + + fn with_metrics_otlp_ingest_url(self, url: Url) -> ObservabilityScenarioBuilder { + self.with_observability(single_url_observability(None, Some(url), None)) + } + + fn with_metrics_otlp_ingest_url_str(self, url: &str) -> ObservabilityScenarioBuilder { + self.with_metrics_otlp_ingest_url(parse_url_or_panic(METRICS_OTLP_INGEST_URL_FIELD, url)) + } + + fn try_with_metrics_otlp_ingest_url_str( + self, + url: &str, + ) -> Result, BuilderInputError> { + apply_parsed_url(METRICS_OTLP_INGEST_URL_FIELD, url, |parsed| { + self.with_metrics_otlp_ingest_url(parsed) + }) + } + + fn with_grafana_url(self, url: Url) -> ObservabilityScenarioBuilder { + self.with_observability(single_url_observability(None, None, Some(url))) + } + + fn with_grafana_url_str(self, url: &str) -> ObservabilityScenarioBuilder { + self.with_grafana_url(parse_url_or_panic(GRAFANA_URL_FIELD, url)) + } + + fn try_with_grafana_url_str( + self, + url: &str, + ) -> Result, BuilderInputError> { + apply_parsed_url(GRAFANA_URL_FIELD, url, |parsed| { + self.with_grafana_url(parsed) + }) + } +} + +impl ObservabilityBuilderExt for ObservabilityScenarioBuilder { + type Env = E; + + fn with_metrics_query_url(mut self, url: Url) -> ObservabilityScenarioBuilder { + self.capabilities_mut().metrics_query_url = Some(url); + self + } + + fn with_metrics_query_url_str(self, url: &str) -> ObservabilityScenarioBuilder { + self.with_metrics_query_url(parse_url_or_panic(METRICS_QUERY_URL_FIELD, url)) + } + + fn try_with_metrics_query_url_str( + self, + url: &str, + ) -> Result, BuilderInputError> { + apply_parsed_url(METRICS_QUERY_URL_FIELD, url, |parsed| { + self.with_metrics_query_url(parsed) + }) + } + + fn with_metrics_otlp_ingest_url(mut self, url: Url) -> ObservabilityScenarioBuilder { + self.capabilities_mut().metrics_otlp_ingest_url = Some(url); + self + } + + fn with_metrics_otlp_ingest_url_str(self, url: &str) -> ObservabilityScenarioBuilder { + self.with_metrics_otlp_ingest_url(parse_url_or_panic(METRICS_OTLP_INGEST_URL_FIELD, url)) + } + + fn try_with_metrics_otlp_ingest_url_str( + self, + url: &str, + ) -> Result, BuilderInputError> { + apply_parsed_url(METRICS_OTLP_INGEST_URL_FIELD, url, |parsed| { + self.with_metrics_otlp_ingest_url(parsed) + }) + } + + fn with_grafana_url(mut self, url: Url) -> ObservabilityScenarioBuilder { + self.capabilities_mut().grafana_url = Some(url); + self + } + + fn with_grafana_url_str(self, url: &str) -> ObservabilityScenarioBuilder { + self.with_grafana_url(parse_url_or_panic(GRAFANA_URL_FIELD, url)) + } + + fn try_with_grafana_url_str( + self, + url: &str, + ) -> Result, BuilderInputError> { + apply_parsed_url(GRAFANA_URL_FIELD, url, |parsed| { + self.with_grafana_url(parsed) + }) + } +} diff --git a/testing-framework/core/src/scenario/builder_ops.rs b/testing-framework/core/src/scenario/builder_ops.rs new file mode 100644 index 0000000..7cf3482 --- /dev/null +++ b/testing-framework/core/src/scenario/builder_ops.rs @@ -0,0 +1,36 @@ +use super::Application; +use crate::scenario::definition::Builder; + +/// Accessor trait for wrapper builders that delegate generic scenario behavior +/// to the core `Builder`. +#[doc(hidden)] +pub trait CoreBuilderAccess: Sized { + type Env: Application; + type Caps; + + fn map_core_builder( + self, + f: impl FnOnce(Builder) -> Builder, + ) -> Self; + + fn core_builder_ref(&self) -> &Builder; + + fn core_builder_mut(&mut self) -> &mut Builder; +} + +impl CoreBuilderAccess for Builder { + type Env = E; + type Caps = Caps; + + fn map_core_builder(self, f: impl FnOnce(Builder) -> Builder) -> Self { + f(self) + } + + fn core_builder_ref(&self) -> &Builder { + self + } + + fn core_builder_mut(&mut self) -> &mut Builder { + self + } +} diff --git a/testing-framework/core/src/scenario/capabilities.rs b/testing-framework/core/src/scenario/capabilities.rs index ca89fea..e0220ea 100644 --- a/testing-framework/core/src/scenario/capabilities.rs +++ b/testing-framework/core/src/scenario/capabilities.rs @@ -1,75 +1,103 @@ -use std::{path::PathBuf, sync::Arc}; +use std::{fmt, marker::PhantomData, path::PathBuf, sync::Arc}; use reqwest::Url; -use super::DynError; -use crate::{nodes::ApiClient, topology::config::NodeConfigPatch}; +use super::{Application, DynError}; -/// Marker type used by scenario builders to request node control support. +/// Marker enabling node control support. #[derive(Clone, Copy, Debug, Default)] pub struct NodeControlCapability; -/// Optional observability settings attached to a scenario. +/// Observability settings attached to a scenario. #[derive(Clone, Debug, Default)] pub struct ObservabilityCapability { - /// Prometheus-compatible base URL used by the *runner process* to query - /// metrics (commonly a localhost port-forward, but can be any reachable - /// endpoint). + /// Base URL used by the runner to query Prometheus metrics. pub metrics_query_url: Option, - /// Full OTLP HTTP metrics ingest endpoint used by *nodes* to export metrics - /// (backend-specific host and path). + /// OTLP HTTP endpoint used by nodes to export metrics. pub metrics_otlp_ingest_url: Option, - /// Optional Grafana base URL for printing/logging (human access). + /// Optional Grafana URL for logs/output. pub grafana_url: Option, } /// Peer selection strategy for dynamically started nodes. #[derive(Clone, Debug)] pub enum PeerSelection { - /// Use the topology default (star/chain/full). + /// Use topology defaults. DefaultLayout, - /// Start without any initial peers. + /// Start without initial peers. None, - /// Connect to the named peers. + /// Connect to named peers. Named(Vec), } /// Options for dynamically starting a node. #[derive(Clone)] -pub struct StartNodeOptions { +pub struct StartNodeOptions { /// How to select initial peers on startup. pub peers: PeerSelection, - /// Optional node config patch applied before spawn. - pub config_patch: Option, - /// Optional directory to persist node's tempdir to on stop. + /// Optional backend-specific initial config override. + pub config_override: Option, + /// Optional patch callback applied to generated node config before spawn. + pub config_patch: + Option Result + Send + Sync>>, + /// Optional persistent working directory for this node process. pub persist_dir: Option, + _phantom: PhantomData, } -impl Default for StartNodeOptions { +impl fmt::Debug for StartNodeOptions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("StartNodeOptions") + .field("peers", &self.peers) + .field("config_override", &self.config_override.is_some()) + .field("config_patch", &self.config_patch.is_some()) + .field("persist_dir", &self.persist_dir) + .finish() + } +} + +impl Default for StartNodeOptions { fn default() -> Self { Self { peers: PeerSelection::DefaultLayout, + config_override: None, config_patch: None, persist_dir: None, + _phantom: PhantomData, } } } -impl StartNodeOptions { - pub fn create_patch(mut self, f: F) -> Self - where - F: Fn(lb_node::config::RunConfig) -> Result - + Send - + Sync - + 'static, - { - self.config_patch = Some(Arc::new(f)); +impl StartNodeOptions { + #[must_use] + pub fn with_peers(mut self, peers: PeerSelection) -> Self { + self.peers = peers; + self + } + + #[must_use] + pub fn with_config_override(mut self, config_override: E::NodeConfig) -> Self { + self.config_override = Some(config_override); + self + } + + #[must_use] + pub fn create_patch( + mut self, + config_patch: impl Fn(E::NodeConfig) -> Result + Send + Sync + 'static, + ) -> Self { + self.config_patch = Some(Arc::new(config_patch)); + self + } + + #[must_use] + pub fn with_persist_dir(mut self, persist_dir: PathBuf) -> Self { + self.persist_dir = Some(persist_dir); self } } -/// Trait implemented by scenario capability markers to signal whether node -/// control is required. +/// Indicates whether a capability requires node control. pub trait RequiresNodeControl { const REQUIRED: bool; } @@ -87,7 +115,7 @@ impl RequiresNodeControl for ObservabilityCapability { } #[derive(Clone)] -pub struct StartedNode { +pub struct StartedNode { pub name: String, - pub api: ApiClient, + pub client: E::NodeClient, } diff --git a/testing-framework/core/src/scenario/cfgsync.rs b/testing-framework/core/src/scenario/cfgsync.rs deleted file mode 100644 index 53ab636..0000000 --- a/testing-framework/core/src/scenario/cfgsync.rs +++ /dev/null @@ -1,148 +0,0 @@ -use std::{fs::File, num::NonZero, path::Path, time::Duration}; - -use anyhow::{Context as _, Result}; -use lb_tracing_service::TracingSettings; -use lb_utils::bounded_duration::{MinimalBoundedDuration, SECOND}; -use serde::{Deserialize, Serialize}; -use serde_with::serde_as; -use tracing::debug; - -use crate::topology::{configs::wallet::WalletConfig, generation::GeneratedTopology}; - -#[serde_as] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CfgSyncConfig { - pub port: u16, - pub n_hosts: usize, - pub timeout: u64, - pub security_param: NonZero, - pub active_slot_coeff: f64, - #[serde(default)] - pub wallet: WalletConfig, - #[serde(default)] - pub ids: Option>, - #[serde(default)] - pub blend_ports: Option>, - pub subnetwork_size: usize, - pub dispersal_factor: usize, - pub num_samples: u16, - pub num_subnets: u16, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - pub old_blobs_check_interval: Duration, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - pub blobs_validity_duration: Duration, - pub min_dispersal_peers: usize, - pub min_replication_peers: usize, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - pub monitor_failure_time_window: Duration, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - pub balancer_interval: Duration, - pub retry_shares_limit: usize, - pub retry_commitments_limit: usize, - pub tracing_settings: TracingSettings, -} - -pub fn load_cfgsync_template(path: &Path) -> Result { - debug!(path = %path.display(), "loading cfgsync template"); - let file = File::open(path) - .with_context(|| format!("opening cfgsync template at {}", path.display()))?; - serde_yaml::from_reader(file).context("parsing cfgsync template") -} - -pub fn write_cfgsync_template(path: &Path, cfg: &CfgSyncConfig) -> Result<()> { - debug!(path = %path.display(), "writing cfgsync template"); - let file = File::create(path) - .with_context(|| format!("writing cfgsync template to {}", path.display()))?; - let serializable = SerializableCfgSyncConfig::from(cfg); - serde_yaml::to_writer(file, &serializable).context("serializing cfgsync template") -} - -pub fn render_cfgsync_yaml(cfg: &CfgSyncConfig) -> Result { - debug!("rendering cfgsync yaml"); - let serializable = SerializableCfgSyncConfig::from(cfg); - serde_yaml::to_string(&serializable).context("rendering cfgsync yaml") -} - -pub fn apply_topology_overrides(cfg: &mut CfgSyncConfig, topology: &GeneratedTopology) { - debug!( - nodes = topology.nodes().len(), - "applying topology overrides to cfgsync config" - ); - let hosts = topology.nodes().len(); - cfg.n_hosts = hosts; - - let consensus = &topology.config().consensus_params; - cfg.security_param = consensus.security_param; - cfg.active_slot_coeff = consensus.active_slot_coeff; - - let config = topology.config(); - cfg.wallet = config.wallet_config.clone(); - cfg.ids = Some(topology.nodes().iter().map(|node| node.id).collect()); - cfg.blend_ports = Some( - topology - .nodes() - .iter() - .map(|node| node.blend_port) - .collect(), - ); -} - -#[serde_as] -#[derive(Serialize)] -struct SerializableCfgSyncConfig { - port: u16, - n_hosts: usize, - timeout: u64, - security_param: NonZero, - active_slot_coeff: f64, - wallet: WalletConfig, - #[serde(skip_serializing_if = "Option::is_none")] - ids: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - blend_ports: Option>, - subnetwork_size: usize, - dispersal_factor: usize, - num_samples: u16, - num_subnets: u16, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - old_blobs_check_interval: Duration, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - blobs_validity_duration: Duration, - min_dispersal_peers: usize, - min_replication_peers: usize, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - monitor_failure_time_window: Duration, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - balancer_interval: Duration, - retry_shares_limit: usize, - retry_commitments_limit: usize, - tracing_settings: TracingSettings, -} - -impl From<&CfgSyncConfig> for SerializableCfgSyncConfig { - fn from(cfg: &CfgSyncConfig) -> Self { - Self { - port: cfg.port, - n_hosts: cfg.n_hosts, - timeout: cfg.timeout, - security_param: cfg.security_param, - active_slot_coeff: cfg.active_slot_coeff, - wallet: cfg.wallet.clone(), - ids: cfg.ids.clone(), - blend_ports: cfg.blend_ports.clone(), - subnetwork_size: cfg.subnetwork_size, - dispersal_factor: cfg.dispersal_factor, - num_samples: cfg.num_samples, - num_subnets: cfg.num_subnets, - old_blobs_check_interval: cfg.old_blobs_check_interval, - blobs_validity_duration: cfg.blobs_validity_duration, - min_dispersal_peers: cfg.min_dispersal_peers, - min_replication_peers: cfg.min_replication_peers, - monitor_failure_time_window: cfg.monitor_failure_time_window, - balancer_interval: cfg.balancer_interval, - retry_shares_limit: cfg.retry_shares_limit, - retry_commitments_limit: cfg.retry_commitments_limit, - tracing_settings: cfg.tracing_settings.clone(), - } - } -} diff --git a/testing-framework/core/src/scenario/common_builder_ext.rs b/testing-framework/core/src/scenario/common_builder_ext.rs new file mode 100644 index 0000000..76b7f49 --- /dev/null +++ b/testing-framework/core/src/scenario/common_builder_ext.rs @@ -0,0 +1,132 @@ +use std::time::Duration; + +use super::{ + Application, CleanupPolicy, CoreBuilderAccess, DeploymentPolicy, Expectation, + HttpReadinessRequirement, RetryPolicy, Workload, +}; +use crate::topology::{DeploymentProvider, DeploymentSeed}; + +type DeploymentProviderHandle = Box::Deployment>>; + +/// Common fluent builder methods shared by all wrappers around `CoreBuilder`. +/// +/// This lets app integrations reuse generic scenario behavior without +/// re-implementing forwarding methods in each app-specific builder. +pub trait CoreBuilderExt: CoreBuilderAccess + Sized { + #[must_use] + fn with_deployment_provider( + self, + deployment_provider: DeploymentProviderHandle, + ) -> Self { + self.map_core_builder(|builder| builder.with_deployment_provider(deployment_provider)) + } + + #[must_use] + fn with_deployment_seed(self, seed: DeploymentSeed) -> Self { + self.map_core_builder(|builder| builder.with_deployment_seed(seed)) + } + + #[must_use] + fn with_workload(self, workload: W) -> Self + where + W: Workload + 'static, + { + self.map_core_builder(|builder| builder.with_workload(workload)) + } + + #[must_use] + fn with_workload_boxed(self, workload: Box>) -> Self { + self.map_core_builder(|builder| builder.with_workload_boxed(workload)) + } + + #[must_use] + fn with_expectation(self, expectation: Exp) -> Self + where + Exp: Expectation + 'static, + { + self.map_core_builder(|builder| builder.with_expectation(expectation)) + } + + #[must_use] + fn with_expectation_boxed(self, expectation: Box>) -> Self { + self.map_core_builder(|builder| builder.with_expectation_boxed(expectation)) + } + + #[must_use] + fn with_run_duration(self, duration: Duration) -> Self { + self.map_core_builder(|builder| builder.with_run_duration(duration)) + } + + #[must_use] + fn with_expectation_cooldown(self, cooldown: Duration) -> Self { + self.map_core_builder(|builder| builder.with_expectation_cooldown(cooldown)) + } + + #[must_use] + fn with_http_readiness_requirement(self, requirement: HttpReadinessRequirement) -> Self { + self.map_core_builder(|builder| builder.with_http_readiness_requirement(requirement)) + } + + #[must_use] + fn with_deployment_policy(self, policy: DeploymentPolicy) -> Self { + self.map_core_builder(|builder| builder.with_deployment_policy(policy)) + } + + #[must_use] + fn with_readiness_enabled(self, enabled: bool) -> Self { + self.map_core_builder(|builder| { + let mut policy = builder.deployment_policy(); + policy.readiness_enabled = enabled; + builder.with_deployment_policy(policy) + }) + } + + #[must_use] + fn with_http_readiness_all(self) -> Self { + self.with_http_readiness_requirement(HttpReadinessRequirement::AllNodesReady) + } + + #[must_use] + fn with_http_readiness_any(self) -> Self { + self.with_http_readiness_requirement(HttpReadinessRequirement::AnyNodeReady) + } + + #[must_use] + fn with_http_readiness_at_least(self, min_ready_nodes: usize) -> Self { + self.with_http_readiness_requirement(HttpReadinessRequirement::AtLeast(min_ready_nodes)) + } + + #[must_use] + fn with_retry_policy(self, retry: RetryPolicy) -> Self { + self.map_core_builder(|builder| { + let mut policy = builder.deployment_policy(); + policy.retry_policy = Some(retry); + builder.with_deployment_policy(policy) + }) + } + + #[must_use] + fn without_retry_policy(self) -> Self { + self.map_core_builder(|builder| { + let mut policy = builder.deployment_policy(); + policy.retry_policy = None; + builder.with_deployment_policy(policy) + }) + } + + #[must_use] + fn with_preserve_artifacts(self, preserve: bool) -> Self { + self.map_core_builder(|builder| { + let mut policy = builder.deployment_policy(); + policy.cleanup_policy = CleanupPolicy::new(preserve); + builder.with_deployment_policy(policy) + }) + } + + #[must_use] + fn run_duration(&self) -> Duration { + self.core_builder_ref().run_duration() + } +} + +impl CoreBuilderExt for T {} diff --git a/testing-framework/core/src/scenario/control.rs b/testing-framework/core/src/scenario/control.rs index 682937d..d19f871 100644 --- a/testing-framework/core/src/scenario/control.rs +++ b/testing-framework/core/src/scenario/control.rs @@ -1,26 +1,23 @@ use async_trait::async_trait; -use crate::{ - nodes::ApiClient, - scenario::{DynError, StartNodeOptions, StartedNode}, -}; +use crate::scenario::{Application, DynError, StartNodeOptions, StartedNode}; /// Deployer-agnostic control surface for runtime node operations. #[async_trait] -pub trait NodeControlHandle: Send + Sync { +pub trait NodeControlHandle: Send + Sync { async fn restart_node(&self, _name: &str) -> Result<(), DynError> { Err("restart_node not supported by this deployer".into()) } - async fn start_node(&self, _name: &str) -> Result { + async fn start_node(&self, _name: &str) -> Result, DynError> { Err("start_node not supported by this deployer".into()) } async fn start_node_with( &self, _name: &str, - _options: StartNodeOptions, - ) -> Result { + _options: StartNodeOptions, + ) -> Result, DynError> { Err("start_node_with not supported by this deployer".into()) } @@ -28,7 +25,7 @@ pub trait NodeControlHandle: Send + Sync { Err("stop_node not supported by this deployer".into()) } - fn node_client(&self, _name: &str) -> Option { + fn node_client(&self, _name: &str) -> Option { None } diff --git a/testing-framework/core/src/scenario/definition.rs b/testing-framework/core/src/scenario/definition.rs index da0f144..a34ccc4 100644 --- a/testing-framework/core/src/scenario/definition.rs +++ b/testing-framework/core/src/scenario/definition.rs @@ -1,81 +1,78 @@ -use std::{num::NonZeroUsize, path::PathBuf, sync::Arc, time::Duration}; +use std::{sync::Arc, time::Duration}; -use lb_node::config::RunConfig; use thiserror::Error; use tracing::{debug, info}; use super::{ - DynError, NodeControlCapability, expectation::Expectation, runtime::context::RunMetrics, - workload::Workload, -}; -use crate::topology::{ - config::{NodeConfigPatch, TopologyBuildError, TopologyBuilder, TopologyConfig}, - configs::{network::Libp2pNetworkLayout, wallet::WalletConfig}, - generation::GeneratedTopology, + Application, DeploymentPolicy, DynError, HttpReadinessRequirement, NodeControlCapability, + ObservabilityCapability, builder_ops::CoreBuilderAccess, expectation::Expectation, + runtime::context::RunMetrics, workload::Workload, }; +use crate::topology::{DeploymentDescriptor, DeploymentProvider, DeploymentSeed, DynTopologyError}; -const DEFAULT_FUNDS_PER_WALLET: u64 = 100; -const MIN_EXPECTATION_BLOCKS: u32 = 2; const MIN_EXPECTATION_FALLBACK_SECS: u64 = 10; +const MIN_RUN_DURATION_SECS: u64 = 10; #[derive(Debug, Error)] pub enum ScenarioBuildError { - #[error(transparent)] - Topology(#[from] TopologyBuildError), - #[error("wallet user count must be non-zero (got {users})")] - WalletUsersZero { users: usize }, - #[error("wallet funds overflow for {users} users at {per_wallet} per wallet")] - WalletFundsOverflow { users: usize, per_wallet: u64 }, + #[error("topology build failed: {0}")] + Topology(#[source] DynTopologyError), #[error("workload '{name}' failed to initialize")] WorkloadInit { name: String, source: DynError }, #[error("expectation '{name}' failed to initialize")] ExpectationInit { name: String, source: DynError }, } -/// Immutable scenario definition shared between the runner, workloads, and +/// Immutable scenario definition used by the runner, workloads, and /// expectations. -pub struct Scenario { - topology: GeneratedTopology, - workloads: Vec>, - expectations: Vec>, +pub struct Scenario { + deployment: E::Deployment, + workloads: Vec>>, + expectations: Vec>>, duration: Duration, + expectation_cooldown: Duration, + deployment_policy: DeploymentPolicy, capabilities: Caps, } -impl Scenario { +impl Scenario { fn new( - topology: GeneratedTopology, - workloads: Vec>, - expectations: Vec>, + deployment: E::Deployment, + workloads: Vec>>, + expectations: Vec>>, duration: Duration, + expectation_cooldown: Duration, + deployment_policy: DeploymentPolicy, capabilities: Caps, ) -> Self { Self { - topology, + deployment, workloads, expectations, duration, + expectation_cooldown, + deployment_policy, capabilities, } } #[must_use] - pub const fn topology(&self) -> &GeneratedTopology { - &self.topology + pub fn deployment(&self) -> &E::Deployment { + &self.deployment } #[must_use] - pub fn workloads(&self) -> &[Arc] { + pub fn workloads(&self) -> &[Arc>] { &self.workloads } #[must_use] - pub fn expectations(&self) -> &[Box] { + pub fn expectations(&self) -> &[Box>] { &self.expectations } #[must_use] - pub fn expectations_mut(&mut self) -> &mut [Box] { + pub fn expectations_mut(&mut self) -> &mut [Box>] { &mut self.expectations } @@ -84,88 +81,325 @@ impl Scenario { self.duration } + #[must_use] + pub const fn expectation_cooldown(&self) -> Duration { + self.expectation_cooldown + } + + #[must_use] + pub const fn http_readiness_requirement(&self) -> HttpReadinessRequirement { + self.deployment_policy.readiness_requirement + } + + #[must_use] + pub const fn deployment_policy(&self) -> DeploymentPolicy { + self.deployment_policy + } + #[must_use] pub const fn capabilities(&self) -> &Caps { &self.capabilities } } -/// Builder used by callers to describe the desired scenario. -pub struct Builder { - topology: TopologyBuilder, - workloads: Vec>, - expectations: Vec>, +/// Scenario builder entry point. +pub struct Builder { + deployment_provider: Box>, + topology_seed: Option, + workloads: Vec>>, + expectations: Vec>>, duration: Duration, - wallet_users: Option, + expectation_cooldown: Option, + deployment_policy: DeploymentPolicy, capabilities: Caps, } -pub type ScenarioBuilder = Builder<()>; - -/// Builder for shaping the scenario topology. -pub struct TopologyConfigurator { - builder: Builder, - nodes: usize, - network_star: bool, - scenario_base_dir: Option, +pub struct ScenarioBuilder { + inner: Builder, } -impl Builder { +pub struct NodeControlScenarioBuilder { + inner: Builder, +} + +pub struct ObservabilityScenarioBuilder { + inner: Builder, +} + +macro_rules! impl_common_builder_methods { + ($builder:ident) => { + impl $builder { + #[must_use] + pub fn map_deployment_provider( + self, + f: impl FnOnce( + Box>, + ) -> Box>, + ) -> Self { + self.map_core_builder(|builder| builder.map_deployment_provider(f)) + } + + #[must_use] + pub fn with_deployment_provider( + self, + deployment_provider: Box>, + ) -> Self { + self.map_core_builder(|builder| { + builder.with_deployment_provider(deployment_provider) + }) + } + + #[must_use] + pub fn with_deployment_seed(self, seed: DeploymentSeed) -> Self { + self.map_core_builder(|builder| builder.with_deployment_seed(seed)) + } + + #[must_use] + pub fn with_workload(self, workload: W) -> Self + where + W: Workload + 'static, + { + self.map_core_builder(|builder| builder.with_workload(workload)) + } + + #[must_use] + pub fn with_workload_boxed(self, workload: Box>) -> Self { + self.map_core_builder(|builder| builder.with_workload_boxed(workload)) + } + + #[must_use] + pub fn with_expectation(self, expectation: Exp) -> Self + where + Exp: Expectation + 'static, + { + self.map_core_builder(|builder| builder.with_expectation(expectation)) + } + + #[must_use] + pub fn with_expectation_boxed(self, expectation: Box>) -> Self { + self.map_core_builder(|builder| builder.with_expectation_boxed(expectation)) + } + + #[must_use] + pub fn with_run_duration(self, duration: Duration) -> Self { + self.map_core_builder(|builder| builder.with_run_duration(duration)) + } + + #[must_use] + pub fn with_expectation_cooldown(self, cooldown: Duration) -> Self { + self.map_core_builder(|builder| builder.with_expectation_cooldown(cooldown)) + } + + #[must_use] + pub fn with_http_readiness_requirement( + self, + requirement: HttpReadinessRequirement, + ) -> Self { + self.map_core_builder(|builder| { + builder.with_http_readiness_requirement(requirement) + }) + } + + #[must_use] + pub fn with_deployment_policy(self, policy: DeploymentPolicy) -> Self { + self.map_core_builder(|builder| builder.with_deployment_policy(policy)) + } + + #[must_use] + pub fn run_duration(&self) -> Duration { + self.core_builder_ref().run_duration() + } + } + }; +} + +impl CoreBuilderAccess for ScenarioBuilder { + type Env = E; + type Caps = (); + + fn map_core_builder( + mut self, + f: impl FnOnce(Builder) -> Builder, + ) -> Self { + self.inner = f(self.inner); + self + } + + fn core_builder_ref(&self) -> &Builder { + &self.inner + } + + fn core_builder_mut(&mut self) -> &mut Builder { + &mut self.inner + } +} + +impl CoreBuilderAccess for NodeControlScenarioBuilder { + type Env = E; + type Caps = NodeControlCapability; + + fn map_core_builder( + mut self, + f: impl FnOnce(Builder) -> Builder, + ) -> Self { + self.inner = f(self.inner); + self + } + + fn core_builder_ref(&self) -> &Builder { + &self.inner + } + + fn core_builder_mut(&mut self) -> &mut Builder { + &mut self.inner + } +} + +impl CoreBuilderAccess for ObservabilityScenarioBuilder { + type Env = E; + type Caps = ObservabilityCapability; + + fn map_core_builder( + mut self, + f: impl FnOnce(Builder) -> Builder, + ) -> Self { + self.inner = f(self.inner); + self + } + + fn core_builder_ref(&self) -> &Builder { + &self.inner + } + + fn core_builder_mut(&mut self) -> &mut Builder { + &mut self.inner + } +} + +impl Builder { #[must_use] - /// Start a builder from a topology description. - pub fn new(topology: TopologyBuilder) -> Self { + /// Start a builder from a topology provider. + pub fn new(deployment_provider: Box>) -> Self { Self { - topology, + deployment_provider, + topology_seed: None, workloads: Vec::new(), expectations: Vec::new(), duration: Duration::ZERO, - wallet_users: None, + expectation_cooldown: None, + deployment_policy: DeploymentPolicy::default(), capabilities: Caps::default(), } } +} + +impl ScenarioBuilder { + #[must_use] + pub fn new(deployment_provider: Box>) -> Self { + Self { + inner: Builder::new(deployment_provider), + } + } #[must_use] - pub fn with_node_counts(nodes: usize) -> Self { - Self::new(TopologyBuilder::new(TopologyConfig::with_node_numbers( - nodes, - ))) + pub fn enable_node_control(self) -> NodeControlScenarioBuilder { + NodeControlScenarioBuilder { + inner: self.inner.with_capabilities(NodeControlCapability), + } } - /// Convenience constructor that immediately enters topology configuration, - /// letting callers set counts via `nodes`. - pub fn topology() -> TopologyConfigurator { - TopologyConfigurator::new(Self::new(TopologyBuilder::new(TopologyConfig::empty()))) - } - - /// Configure topology via a closure and return the scenario builder. #[must_use] - pub fn topology_with( - f: impl FnOnce(TopologyConfigurator) -> TopologyConfigurator, - ) -> Builder { - let configurator = Self::topology(); - f(configurator).apply() + pub fn enable_observability(self) -> ObservabilityScenarioBuilder { + ObservabilityScenarioBuilder { + inner: self + .inner + .with_capabilities(ObservabilityCapability::default()), + } + } + + pub fn build(self) -> Result, ScenarioBuildError> { + self.inner.build() + } + + pub(crate) fn with_observability( + self, + observability: ObservabilityCapability, + ) -> ObservabilityScenarioBuilder { + ObservabilityScenarioBuilder { + inner: self.inner.with_capabilities(observability), + } } } -impl Builder { +impl_common_builder_methods!(ScenarioBuilder); + +impl NodeControlScenarioBuilder { + pub fn build(self) -> Result, ScenarioBuildError> { + self.inner.build() + } +} + +impl_common_builder_methods!(NodeControlScenarioBuilder); + +impl ObservabilityScenarioBuilder { + pub fn build(self) -> Result, ScenarioBuildError> { + self.inner.build() + } + + pub(crate) fn capabilities_mut(&mut self) -> &mut ObservabilityCapability { + self.inner.capabilities_mut() + } +} + +impl_common_builder_methods!(ObservabilityScenarioBuilder); + +impl Builder { #[must_use] - /// Swap capabilities type carried with the scenario. - pub fn with_capabilities(self, capabilities: NewCaps) -> Builder { + /// Transform the existing deployment provider while preserving all + /// accumulated builder state. + pub fn map_deployment_provider( + mut self, + f: impl FnOnce( + Box>, + ) -> Box>, + ) -> Self { + self.deployment_provider = f(self.deployment_provider); + self + } + + #[must_use] + /// Replace the topology provider while preserving all accumulated builder + /// state. + pub fn with_deployment_provider( + mut self, + deployment_provider: Box>, + ) -> Self { + self.deployment_provider = deployment_provider; + self + } + + #[must_use] + /// Internal capability transition helper. + pub(crate) fn with_capabilities(self, capabilities: NewCaps) -> Builder { let Self { - topology, + deployment_provider, + topology_seed, workloads, expectations, duration, - wallet_users, + expectation_cooldown, + deployment_policy, .. } = self; Builder { - topology, + deployment_provider, + topology_seed, workloads, expectations, duration, - wallet_users, + expectation_cooldown, + deployment_policy, capabilities, } } @@ -180,23 +414,60 @@ impl Builder { &mut self.capabilities } + #[must_use] + pub const fn run_duration(&self) -> Duration { + self.duration + } + + #[must_use] + pub const fn expectation_cooldown_override(&self) -> Option { + self.expectation_cooldown + } + + #[must_use] + pub const fn http_readiness_requirement(&self) -> HttpReadinessRequirement { + self.deployment_policy.readiness_requirement + } + + #[must_use] + pub const fn deployment_policy(&self) -> DeploymentPolicy { + self.deployment_policy + } + + #[must_use] + pub fn with_deployment_seed(mut self, seed: DeploymentSeed) -> Self { + self.topology_seed = Some(seed); + self + } + #[must_use] pub fn with_workload(mut self, workload: W) -> Self where - W: Workload + 'static, + W: Workload + 'static, { - self.expectations.extend(workload.expectations()); - self.workloads.push(Box::new(workload)); + self.add_workload(Box::new(workload)); + self + } + + #[must_use] + pub fn with_workload_boxed(mut self, workload: Box>) -> Self { + self.add_workload(workload); self } #[must_use] /// Add a standalone expectation not tied to a workload. - pub fn with_expectation(mut self, expectation: E) -> Self + pub fn with_expectation(mut self, expectation: Exp) -> Self where - E: Expectation + 'static, + Exp: Expectation + 'static, { - self.expectations.push(Box::new(expectation)); + self.add_expectation(Box::new(expectation)); + self + } + + #[must_use] + pub fn with_expectation_boxed(mut self, expectation: Box>) -> Self { + self.add_expectation(expectation); self } @@ -208,205 +479,211 @@ impl Builder { } #[must_use] - /// Transform the topology builder. - pub fn map_topology(mut self, f: impl FnOnce(TopologyBuilder) -> TopologyBuilder) -> Self { - self.topology = f(self.topology); + /// Override the expectation cooldown used by the runner. + pub const fn with_expectation_cooldown(mut self, cooldown: Duration) -> Self { + self.expectation_cooldown = Some(cooldown); self } #[must_use] - /// Override wallet config for the topology. - pub fn with_wallet_config(mut self, wallet: WalletConfig) -> Self { - self.topology = self.topology.with_wallet_config(wallet); - self.wallet_users = None; + pub const fn with_http_readiness_requirement( + mut self, + requirement: HttpReadinessRequirement, + ) -> Self { + self.deployment_policy.readiness_requirement = requirement; self } #[must_use] - pub fn wallets(self, users: usize) -> Self { - let mut builder = self; - builder.wallet_users = Some(users); - builder + pub const fn with_deployment_policy(mut self, policy: DeploymentPolicy) -> Self { + self.deployment_policy = policy; + self + } + + fn add_workload(&mut self, workload: Box>) { + self.expectations.extend(workload.expectations()); + self.workloads.push(workload); + } + + fn add_expectation(&mut self, expectation: Box>) { + self.expectations.push(expectation); } #[must_use] /// Finalize the scenario, computing run metrics and initializing /// components. - pub fn build(self) -> Result, ScenarioBuildError> { - let Self { - mut topology, - mut workloads, - mut expectations, - duration, - wallet_users, - capabilities, - .. - } = self; + pub fn build(self) -> Result, ScenarioBuildError> { + let mut parts = BuilderParts::from_builder(self); + let descriptors = parts.resolve_deployment()?; + let run_plan = parts.run_plan(); + let run_metrics = RunMetrics::new(run_plan.duration); - if let Some(users) = wallet_users { - let user_count = - NonZeroUsize::new(users).ok_or(ScenarioBuildError::WalletUsersZero { users })?; - let total_funds = DEFAULT_FUNDS_PER_WALLET.checked_mul(users as u64).ok_or( - ScenarioBuildError::WalletFundsOverflow { - users, - per_wallet: DEFAULT_FUNDS_PER_WALLET, - }, - )?; - - let wallet = WalletConfig::uniform(total_funds, user_count); - topology = topology.with_wallet_config(wallet); - } - - let generated = topology.build()?; - let duration = enforce_min_duration(&generated, duration); - let run_metrics = RunMetrics::from_topology(&generated, duration); - initialize_components(&generated, &run_metrics, &mut workloads, &mut expectations)?; - let workloads: Vec> = workloads.into_iter().map(Arc::from).collect(); + initialize_components( + &descriptors, + &run_metrics, + &mut parts.workloads, + &mut parts.expectations, + )?; + let workloads: Vec>> = + parts.workloads.into_iter().map(Arc::from).collect(); info!( - nodes = generated.nodes().len(), - duration_secs = duration.as_secs(), + nodes = descriptors.node_count(), + duration_secs = run_plan.duration.as_secs(), workloads = workloads.len(), - expectations = expectations.len(), + expectations = parts.expectations.len(), "scenario built" ); Ok(Scenario::new( - generated, + descriptors, workloads, - expectations, - duration, - capabilities, + parts.expectations, + run_plan.duration, + run_plan.expectation_cooldown, + parts.deployment_policy, + parts.capabilities, )) } } -impl TopologyConfigurator { - const fn new(builder: Builder) -> Self { +struct RunPlan { + duration: Duration, + expectation_cooldown: Duration, +} + +struct BuilderParts { + deployment_provider: Box>, + topology_seed: Option, + workloads: Vec>>, + expectations: Vec>>, + duration: Duration, + expectation_cooldown: Option, + deployment_policy: DeploymentPolicy, + capabilities: Caps, +} + +impl BuilderParts { + fn from_builder(builder: Builder) -> Self { + let Builder { + deployment_provider, + topology_seed, + workloads, + expectations, + duration, + expectation_cooldown, + deployment_policy, + capabilities, + .. + } = builder; + Self { - builder, - nodes: 0, - network_star: false, - scenario_base_dir: None, + deployment_provider, + topology_seed, + workloads, + expectations, + duration, + expectation_cooldown, + deployment_policy, + capabilities, } } - /// Set the number of nodes. - #[must_use] - pub fn nodes(mut self, count: usize) -> Self { - self.nodes = count; - self + fn resolve_deployment(&self) -> Result { + self.deployment_provider + .build(self.topology_seed.as_ref()) + .map_err(ScenarioBuildError::Topology) } - /// Set a base scenario directory for nodes to persist data. If not set, - /// nodes will use - pub fn scenario_base_dir(mut self, path: PathBuf) -> Self { - self.scenario_base_dir = Some(path); - self - } - - /// Use a star libp2p network layout. - #[must_use] - pub fn network_star(mut self) -> Self { - self.network_star = true; - self - } - - /// Apply a config patch for a specific node index. - #[must_use] - pub fn node_config_patch(mut self, index: usize, patch: NodeConfigPatch) -> Self { - self.builder.topology = self.builder.topology.with_node_config_patch(index, patch); - self - } - - /// Apply a config patch for a specific node index. - #[must_use] - pub fn node_config_patch_with(mut self, index: usize, f: F) -> Self - where - F: Fn(RunConfig) -> Result + Send + Sync + 'static, - { - self.builder.topology = self - .builder - .topology - .with_node_config_patch(index, Arc::new(f)); - self - } - - /// Finalize and return the underlying scenario builder. - #[must_use] - pub fn apply(self) -> Builder { - let mut builder = self.builder; - builder.topology = builder - .topology - .with_node_count(self.nodes) - .with_scenario_base_dir(self.scenario_base_dir); - - if self.network_star { - builder.topology = builder - .topology - .with_network_layout(Libp2pNetworkLayout::Star); + fn run_plan(&self) -> RunPlan { + RunPlan { + duration: enforce_min_duration(self.duration), + expectation_cooldown: expectation_cooldown_for(self.expectation_cooldown), } - builder } } -impl Builder<()> { +impl Builder { #[must_use] - pub fn enable_node_control(self) -> Builder { + pub fn enable_node_control(self) -> Builder { self.with_capabilities(NodeControlCapability) } + + #[must_use] + pub fn enable_observability(self) -> Builder { + self.with_capabilities(ObservabilityCapability::default()) + } } -fn initialize_components( - descriptors: &GeneratedTopology, +fn initialize_components( + descriptors: &E::Deployment, run_metrics: &RunMetrics, - workloads: &mut [Box], - expectations: &mut [Box], + workloads: &mut [Box>], + expectations: &mut [Box>], ) -> Result<(), ScenarioBuildError> { initialize_workloads(descriptors, run_metrics, workloads)?; + initialize_expectations(descriptors, run_metrics, expectations)?; + Ok(()) } -fn initialize_workloads( - descriptors: &GeneratedTopology, +fn initialize_workloads( + descriptors: &E::Deployment, run_metrics: &RunMetrics, - workloads: &mut [Box], + workloads: &mut [Box>], ) -> Result<(), ScenarioBuildError> { for workload in workloads { debug!(workload = workload.name(), "initializing workload"); - workload.init(descriptors, run_metrics).map_err(|source| { - ScenarioBuildError::WorkloadInit { - name: workload.name().to_owned(), - source, - } - })?; + let name = workload.name().to_owned(); + + workload + .init(descriptors, run_metrics) + .map_err(|source| workload_init_error(name, source))?; } + Ok(()) } -fn initialize_expectations( - descriptors: &GeneratedTopology, +fn initialize_expectations( + descriptors: &E::Deployment, run_metrics: &RunMetrics, - expectations: &mut [Box], + expectations: &mut [Box>], ) -> Result<(), ScenarioBuildError> { for expectation in expectations { debug!(expectation = expectation.name(), "initializing expectation"); + let name = expectation.name().to_owned(); + expectation .init(descriptors, run_metrics) - .map_err(|source| ScenarioBuildError::ExpectationInit { - name: expectation.name().to_owned(), - source, - })?; + .map_err(|source| expectation_init_error(name, source))?; } + Ok(()) } -fn enforce_min_duration(descriptors: &GeneratedTopology, requested: Duration) -> Duration { - let min_duration = descriptors.slot_duration().map_or_else( - || Duration::from_secs(MIN_EXPECTATION_FALLBACK_SECS), - |slot| slot * MIN_EXPECTATION_BLOCKS, - ); +fn workload_init_error(name: String, source: DynError) -> ScenarioBuildError { + ScenarioBuildError::WorkloadInit { name, source } +} + +fn expectation_init_error(name: String, source: DynError) -> ScenarioBuildError { + ScenarioBuildError::ExpectationInit { name, source } +} + +fn enforce_min_duration(requested: Duration) -> Duration { + let min_duration = min_run_duration(); requested.max(min_duration) } + +fn default_expectation_cooldown() -> Duration { + Duration::from_secs(MIN_EXPECTATION_FALLBACK_SECS) +} + +fn expectation_cooldown_for(override_value: Option) -> Duration { + override_value.unwrap_or_else(default_expectation_cooldown) +} + +fn min_run_duration() -> Duration { + Duration::from_secs(MIN_RUN_DURATION_SECS) +} diff --git a/testing-framework/core/src/scenario/deployment_policy.rs b/testing-framework/core/src/scenario/deployment_policy.rs new file mode 100644 index 0000000..689014a --- /dev/null +++ b/testing-framework/core/src/scenario/deployment_policy.rs @@ -0,0 +1,52 @@ +use std::time::Duration; + +use super::HttpReadinessRequirement; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct RetryPolicy { + pub max_attempts: usize, + pub base_delay: Duration, + pub max_delay: Duration, +} + +impl RetryPolicy { + #[must_use] + pub const fn new(max_attempts: usize, base_delay: Duration, max_delay: Duration) -> Self { + Self { + max_attempts, + base_delay, + max_delay, + } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct CleanupPolicy { + pub preserve_artifacts: bool, +} + +impl CleanupPolicy { + #[must_use] + pub const fn new(preserve_artifacts: bool) -> Self { + Self { preserve_artifacts } + } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct DeploymentPolicy { + pub readiness_enabled: bool, + pub readiness_requirement: HttpReadinessRequirement, + pub retry_policy: Option, + pub cleanup_policy: CleanupPolicy, +} + +impl Default for DeploymentPolicy { + fn default() -> Self { + Self { + readiness_enabled: true, + readiness_requirement: HttpReadinessRequirement::AllNodesReady, + retry_policy: None, + cleanup_policy: CleanupPolicy::new(false), + } + } +} diff --git a/testing-framework/core/src/scenario/expectation.rs b/testing-framework/core/src/scenario/expectation.rs index 5634ab9..c304582 100644 --- a/testing-framework/core/src/scenario/expectation.rs +++ b/testing-framework/core/src/scenario/expectation.rs @@ -1,24 +1,23 @@ use async_trait::async_trait; -use super::{DynError, RunContext, runtime::context::RunMetrics}; -use crate::topology::generation::GeneratedTopology; +use super::{Application, DynError, RunContext, runtime::context::RunMetrics}; #[async_trait] /// Defines a check evaluated during or after a scenario run. -pub trait Expectation: Send + Sync { +pub trait Expectation: Send + Sync { fn name(&self) -> &str; fn init( &mut self, - _descriptors: &GeneratedTopology, + _descriptors: &E::Deployment, _run_metrics: &RunMetrics, ) -> Result<(), DynError> { Ok(()) } - async fn start_capture(&mut self, _ctx: &RunContext) -> Result<(), DynError> { + async fn start_capture(&mut self, _ctx: &RunContext) -> Result<(), DynError> { Ok(()) } - async fn evaluate(&mut self, ctx: &RunContext) -> Result<(), DynError>; + async fn evaluate(&mut self, ctx: &RunContext) -> Result<(), DynError>; } diff --git a/testing-framework/core/src/scenario/http_probe.rs b/testing-framework/core/src/scenario/http_probe.rs deleted file mode 100644 index 835d7c5..0000000 --- a/testing-framework/core/src/scenario/http_probe.rs +++ /dev/null @@ -1,161 +0,0 @@ -use std::time::Duration; - -use futures::future::try_join_all; -use lb_http_api_common::paths; -use reqwest::Client as ReqwestClient; -use thiserror::Error; -use tokio::time::{Instant, sleep}; -use tracing::{debug, info}; - -/// Label used for readiness probes. -pub const NODE_ROLE: &str = "node"; - -/// Error raised when HTTP readiness checks time out. -#[derive(Clone, Copy, Debug, Error)] -#[error("timeout waiting for {role} HTTP endpoint on port {port} after {timeout:?}")] -pub struct HttpReadinessError { - role: &'static str, - port: u16, - timeout: Duration, -} - -impl HttpReadinessError { - #[must_use] - pub const fn new(role: &'static str, port: u16, timeout: Duration) -> Self { - Self { - role, - port, - timeout, - } - } - - #[must_use] - pub const fn role(&self) -> &'static str { - self.role - } - - #[must_use] - pub const fn port(&self) -> u16 { - self.port - } - - #[must_use] - pub const fn timeout(&self) -> Duration { - self.timeout - } -} - -/// Wait for HTTP readiness on the provided ports against localhost. -pub async fn wait_for_http_ports( - ports: &[u16], - role: &'static str, - timeout_duration: Duration, - poll_interval: Duration, -) -> Result<(), HttpReadinessError> { - wait_for_http_ports_with_host(ports, role, "127.0.0.1", timeout_duration, poll_interval).await -} - -/// Wait for HTTP readiness on the provided ports against a specific host. -pub async fn wait_for_http_ports_with_host( - ports: &[u16], - role: &'static str, - host: &str, - timeout_duration: Duration, - poll_interval: Duration, -) -> Result<(), HttpReadinessError> { - if ports.is_empty() { - return Ok(()); - } - - info!( - role, - ?ports, - host, - timeout_secs = timeout_duration.as_secs_f32(), - poll_ms = poll_interval.as_millis(), - "waiting for HTTP readiness" - ); - - let client = ReqwestClient::new(); - let probes = ports.iter().copied().map(|port| { - wait_for_single_port( - client.clone(), - port, - role, - host, - timeout_duration, - poll_interval, - ) - }); - - try_join_all(probes).await.map(|_| ()) -} - -async fn wait_for_single_port( - client: ReqwestClient, - port: u16, - role: &'static str, - host: &str, - timeout_duration: Duration, - poll_interval: Duration, -) -> Result<(), HttpReadinessError> { - let url = format!("http://{host}:{port}{}", paths::CRYPTARCHIA_INFO); - debug!(role, %url, "probing HTTP endpoint"); - let start = Instant::now(); - let deadline = start + timeout_duration; - let mut attempts: u64 = 0; - - loop { - attempts += 1; - - let last_failure: Option = match client.get(&url).send().await { - Ok(response) if response.status().is_success() => { - info!( - role, - port, - host, - %url, - attempts, - elapsed_ms = start.elapsed().as_millis(), - "HTTP readiness confirmed" - ); - return Ok(()); - } - Ok(response) => { - let status = response.status(); - Some(format!("HTTP {status}")) - } - Err(error) => Some(format!("request error: {error}")), - }; - - if attempts == 1 || attempts % 10 == 0 { - debug!( - role, - port, - host, - %url, - attempts, - elapsed_ms = start.elapsed().as_millis(), - last_failure = last_failure.as_deref().unwrap_or(""), - "HTTP readiness not yet available" - ); - } - - if Instant::now() >= deadline { - info!( - role, - port, - host, - %url, - attempts, - elapsed_ms = start.elapsed().as_millis(), - timeout_secs = timeout_duration.as_secs_f32(), - last_failure = last_failure.as_deref().unwrap_or(""), - "HTTP readiness timed out" - ); - return Err(HttpReadinessError::new(role, port, timeout_duration)); - } - - sleep(poll_interval).await; - } -} diff --git a/testing-framework/core/src/scenario/mod.rs b/testing-framework/core/src/scenario/mod.rs index 6e348c6..9867694 100644 --- a/testing-framework/core/src/scenario/mod.rs +++ b/testing-framework/core/src/scenario/mod.rs @@ -1,34 +1,51 @@ //! Scenario orchestration primitives shared by integration tests and runners. +use std::error::Error; + +mod builder_ext; +mod builder_ops; mod capabilities; -pub mod cfgsync; +mod common_builder_ext; mod control; mod definition; +mod deployment_policy; mod expectation; -pub mod http_probe; mod observability; mod runtime; mod workload; -pub type DynError = Box; +pub type DynError = Box; +pub use builder_ext::{BuilderInputError, ObservabilityBuilderExt}; +#[doc(hidden)] +pub use builder_ops::CoreBuilderAccess; pub use capabilities::{ NodeControlCapability, ObservabilityCapability, PeerSelection, RequiresNodeControl, StartNodeOptions, StartedNode, }; +pub use common_builder_ext::CoreBuilderExt; pub use control::NodeControlHandle; +#[doc(hidden)] pub use definition::{ - Builder, Scenario, ScenarioBuildError, ScenarioBuilder, TopologyConfigurator, + Builder as CoreBuilder, // internal adapter-facing core builder + NodeControlScenarioBuilder, + ObservabilityScenarioBuilder, }; +pub use definition::{Scenario, ScenarioBuildError, ScenarioBuilder}; +pub use deployment_policy::{CleanupPolicy, DeploymentPolicy, RetryPolicy}; pub use expectation::Expectation; pub use observability::{ObservabilityCapabilityProvider, ObservabilityInputs}; pub use runtime::{ - BlockFeed, BlockFeedTask, BlockRecord, BlockStats, CleanupGuard, Deployer, NodeClients, - RunContext, RunHandle, RunMetrics, Runner, ScenarioError, + CleanupGuard, Deployer, Feed, FeedHandle, FeedRuntime, HttpReadinessRequirement, NodeClients, + ReadinessError, RunContext, RunHandle, RunMetrics, Runner, ScenarioError, StabilizationConfig, metrics::{ CONSENSUS_PROCESSED_BLOCKS, CONSENSUS_TRANSACTIONS_TOTAL, Metrics, MetricsError, PrometheusEndpoint, PrometheusInstantSample, }, - spawn_block_feed, + spawn_feed, wait_for_http_ports, wait_for_http_ports_with_host, + wait_for_http_ports_with_host_and_requirement, wait_for_http_ports_with_requirement, + wait_http_readiness, wait_until_stable, }; pub use workload::Workload; + +pub use crate::env::Application; diff --git a/testing-framework/core/src/scenario/observability.rs b/testing-framework/core/src/scenario/observability.rs index dd7f12e..7284f05 100644 --- a/testing-framework/core/src/scenario/observability.rs +++ b/testing-framework/core/src/scenario/observability.rs @@ -4,24 +4,18 @@ use reqwest::Url; use super::{Metrics, MetricsError, NodeControlCapability, ObservabilityCapability}; -/// Observability configuration inputs shared by deployers/runners. -/// -/// All fields are optional; missing values only matter when a caller needs the -/// corresponding capability (e.g. querying metrics from the runner process). +/// Optional observability endpoints used by deployers and runners. #[derive(Clone, Debug, Default)] pub struct ObservabilityInputs { - /// Prometheus-compatible base URL used by the runner process to query - /// metrics (PromQL API endpoints). + /// Base URL used by the runner to query Prometheus. pub metrics_query_url: Option, - /// Full OTLP HTTP metrics ingest endpoint used by nodes to export metrics - /// (backend-specific host and path). + /// OTLP HTTP endpoint used by nodes to export metrics. pub metrics_otlp_ingest_url: Option, - /// Optional Grafana base URL for printing/logging (human access). + /// Optional Grafana URL for logs/output. pub grafana_url: Option, } -/// Capability helper for deployers that are generic over scenario capability -/// markers. +/// Exposes observability capability from scenario capability markers. pub trait ObservabilityCapabilityProvider { fn observability_capability(&self) -> Option<&ObservabilityCapability>; } @@ -54,10 +48,7 @@ impl ObservabilityInputs { } } - /// Load observability inputs from environment variables. - /// - /// The `NOMOS_*` namespace applies to all deployers. Runner-specific env - /// vars are also accepted as aliases for backwards compatibility. + /// Load observability inputs from `LOGOS_BLOCKCHAIN_*` environment vars. pub fn from_env() -> Result { Ok(Self { metrics_query_url: read_url_var(&["LOGOS_BLOCKCHAIN_METRICS_QUERY_URL"])?, @@ -66,22 +57,24 @@ impl ObservabilityInputs { }) } - /// Overlay non-empty values from `overrides` onto `self`. + /// Override `self` values with non-empty values from `overrides`. #[must_use] pub fn with_overrides(mut self, overrides: Self) -> Self { if overrides.metrics_query_url.is_some() { self.metrics_query_url = overrides.metrics_query_url; } + if overrides.metrics_otlp_ingest_url.is_some() { self.metrics_otlp_ingest_url = overrides.metrics_otlp_ingest_url; } + if overrides.grafana_url.is_some() { self.grafana_url = overrides.grafana_url; } self } - /// Build the telemetry handle exposed in `RunContext::telemetry()`. + /// Build the telemetry handle used in `RunContext`. pub fn telemetry_handle(&self) -> Result { match self.metrics_query_url.clone() { Some(url) => Metrics::from_prometheus(url), @@ -99,9 +92,11 @@ fn read_url_var(keys: &[&'static str]) -> Result, MetricsError> { if raw.is_empty() { continue; } + return Url::parse(raw) .map(Some) .map_err(|err| MetricsError::new(format!("invalid {key}: {err}"))); } + Ok(None) } diff --git a/testing-framework/core/src/scenario/runtime/block_feed.rs b/testing-framework/core/src/scenario/runtime/block_feed.rs deleted file mode 100644 index e30ed3a..0000000 --- a/testing-framework/core/src/scenario/runtime/block_feed.rs +++ /dev/null @@ -1,197 +0,0 @@ -use std::{ - collections::HashSet, - sync::{ - Arc, - atomic::{AtomicU64, Ordering}, - }, - time::Duration, -}; - -use anyhow::{Context as _, Result}; -use lb_core::{block::Block, mantle::SignedMantleTx}; -use lb_http_api_common::paths::STORAGE_BLOCK; -use lb_node::HeaderId; -use tokio::{sync::broadcast, task::JoinHandle, time::sleep}; -use tracing::{debug, error}; - -use super::context::CleanupGuard; -use crate::nodes::ApiClient; - -const POLL_INTERVAL: Duration = Duration::from_secs(1); - -/// Broadcasts observed blocks to subscribers while tracking simple stats. -#[derive(Clone)] -pub struct BlockFeed { - inner: Arc, -} - -struct BlockFeedInner { - sender: broadcast::Sender>, - stats: Arc, -} - -/// Block header + payload snapshot emitted by the feed. -#[derive(Clone)] -pub struct BlockRecord { - pub header: HeaderId, - pub block: Arc>, -} - -/// Join handle for the background block feed task. -pub struct BlockFeedTask { - handle: JoinHandle<()>, -} - -impl BlockFeed { - #[must_use] - pub fn subscribe(&self) -> broadcast::Receiver> { - self.inner.sender.subscribe() - } - - #[must_use] - pub fn stats(&self) -> Arc { - Arc::clone(&self.inner.stats) - } - - fn ingest(&self, header: HeaderId, block: Block) { - self.inner.stats.record_block(&block); - let record = Arc::new(BlockRecord { - header, - block: Arc::new(block), - }); - - let _ = self.inner.sender.send(record); - } -} - -impl BlockFeedTask { - #[must_use] - /// Create a task handle wrapper for the block scanner. - pub const fn new(handle: JoinHandle<()>) -> Self { - Self { handle } - } -} - -/// Spawn a background task to poll blocks from the given client and broadcast -/// them. -pub async fn spawn_block_feed(client: ApiClient) -> Result<(BlockFeed, BlockFeedTask)> { - let (sender, _) = broadcast::channel(1024); - let feed = BlockFeed { - inner: Arc::new(BlockFeedInner { - sender, - stats: Arc::new(BlockStats::default()), - }), - }; - - let mut scanner = BlockScanner::new(client, feed.clone()); - scanner.catch_up().await?; - - let handle = tokio::spawn(async move { scanner.run().await }); - - Ok((feed, BlockFeedTask::new(handle))) -} - -struct BlockScanner { - client: ApiClient, - feed: BlockFeed, - seen: HashSet, -} - -impl BlockScanner { - fn new(client: ApiClient, feed: BlockFeed) -> Self { - Self { - client, - feed, - seen: HashSet::new(), - } - } - - async fn run(&mut self) { - loop { - if let Err(err) = self.catch_up().await { - error!(error = %err, error_debug = ?err, "block feed catch up failed"); - } - sleep(POLL_INTERVAL).await; - } - } - - async fn catch_up(&mut self) -> Result<()> { - let info = self.client.consensus_info().await?; - let tip = info.tip; - let mut remaining_height = info.height; - let mut stack = Vec::new(); - let mut cursor = tip; - - loop { - if self.seen.contains(&cursor) { - break; - } - - if remaining_height == 0 { - self.seen.insert(cursor); - break; - } - - let block = match self.client.storage_block(&cursor).await { - Ok(block) => block, - Err(err) => { - if err.is_decode() { - if let Ok(resp) = - self.client.post_json_response(STORAGE_BLOCK, &cursor).await - { - if let Ok(body) = resp.text().await { - error!(header = ?cursor, %body, "failed to decode block response"); - } - } - } - return Err(err.into()); - } - } - .context("missing block while catching up")?; - - let parent = block.header().parent(); - stack.push((cursor, block)); - - if self.seen.contains(&parent) || parent == cursor { - break; - } - - cursor = parent; - remaining_height = remaining_height.saturating_sub(1); - } - - let mut processed = 0usize; - while let Some((header, block)) = stack.pop() { - self.feed.ingest(header, block); - self.seen.insert(header); - processed += 1; - } - - debug!(processed, "block feed processed catch up batch"); - Ok(()) - } -} - -impl CleanupGuard for BlockFeedTask { - fn cleanup(self: Box) { - self.handle.abort(); - } -} - -/// Accumulates simple counters over observed blocks. -#[derive(Default)] -pub struct BlockStats { - total_transactions: AtomicU64, -} - -impl BlockStats { - fn record_block(&self, block: &Block) { - self.total_transactions - .fetch_add(block.transactions().len() as u64, Ordering::Relaxed); - } - - #[must_use] - pub fn total_transactions(&self) -> u64 { - self.total_transactions.load(Ordering::Relaxed) - } -} diff --git a/testing-framework/core/src/scenario/runtime/context.rs b/testing-framework/core/src/scenario/runtime/context.rs index 5a40f39..441d608 100644 --- a/testing-framework/core/src/scenario/runtime/context.rs +++ b/testing-framework/core/src/scenario/runtime/context.rs @@ -1,80 +1,62 @@ use std::{sync::Arc, time::Duration}; -use super::{block_feed::BlockFeed, metrics::Metrics, node_clients::ClusterClient}; -use crate::{ - nodes::ApiClient, - scenario::{NodeClients, NodeControlHandle}, - topology::{ - configs::wallet::WalletAccount, deployment::Topology, generation::GeneratedTopology, - }, -}; +use super::{metrics::Metrics, node_clients::ClusterClient}; +use crate::scenario::{Application, NodeClients, NodeControlHandle}; /// Shared runtime context available to workloads and expectations. -pub struct RunContext { - descriptors: GeneratedTopology, - cluster: Option, - node_clients: NodeClients, +pub struct RunContext { + descriptors: E::Deployment, + node_clients: NodeClients, metrics: RunMetrics, + expectation_cooldown: Duration, telemetry: Metrics, - block_feed: BlockFeed, - node_control: Option>, + feed: ::Feed, + node_control: Option>>, } -impl RunContext { - /// Builds a run context, clamping the requested duration so we always run - /// for at least a couple of slot lengths (or a fallback window if slots are - /// unknown). +impl RunContext { + /// Builds a run context from prepared deployment/runtime artifacts. #[must_use] pub fn new( - descriptors: GeneratedTopology, - cluster: Option, - node_clients: NodeClients, + descriptors: E::Deployment, + node_clients: NodeClients, run_duration: Duration, + expectation_cooldown: Duration, telemetry: Metrics, - block_feed: BlockFeed, - node_control: Option>, + feed: ::Feed, + node_control: Option>>, ) -> Self { - let metrics = RunMetrics::new(&descriptors, run_duration); + let metrics = RunMetrics::new(run_duration); Self { descriptors, - cluster, node_clients, metrics, + expectation_cooldown, telemetry, - block_feed, + feed, node_control, } } #[must_use] - pub const fn descriptors(&self) -> &GeneratedTopology { + pub fn descriptors(&self) -> &E::Deployment { &self.descriptors } #[must_use] - pub const fn topology(&self) -> Option<&Topology> { - self.cluster.as_ref() - } - - #[must_use] - pub const fn node_clients(&self) -> &NodeClients { + pub const fn node_clients(&self) -> &NodeClients { &self.node_clients } #[must_use] - pub fn random_node_client(&self) -> Option { - self.node_clients.any_client() + pub fn random_node_client(&self) -> Option { + self.node_clients.random_client() } #[must_use] - pub fn block_feed(&self) -> BlockFeed { - self.block_feed.clone() - } - - #[must_use] - pub fn wallet_accounts(&self) -> &[WalletAccount] { - self.descriptors.wallet_accounts() + pub fn feed(&self) -> ::Feed { + self.feed.clone() } #[must_use] @@ -88,8 +70,8 @@ impl RunContext { } #[must_use] - pub const fn expected_blocks(&self) -> u64 { - self.metrics.expected_consensus_blocks() + pub const fn expectation_cooldown(&self) -> Duration { + self.expectation_cooldown } #[must_use] @@ -98,23 +80,28 @@ impl RunContext { } #[must_use] - pub fn node_control(&self) -> Option> { + pub fn node_control(&self) -> Option>> { self.node_control.clone() } #[must_use] - pub const fn cluster_client(&self) -> ClusterClient<'_> { + pub const fn controls_nodes(&self) -> bool { + self.node_control.is_some() + } + + #[must_use] + pub const fn cluster_client(&self) -> ClusterClient<'_, E> { self.node_clients.cluster_client() } } /// Handle returned by the runner to control the lifecycle of the run. -pub struct RunHandle { - run_context: Arc, +pub struct RunHandle { + run_context: Arc>, cleanup_guard: Option>, } -impl Drop for RunHandle { +impl Drop for RunHandle { fn drop(&mut self) { if let Some(guard) = self.cleanup_guard.take() { guard.cleanup(); @@ -122,10 +109,10 @@ impl Drop for RunHandle { } } -impl RunHandle { +impl RunHandle { #[must_use] /// Build a handle from owned context and optional cleanup guard. - pub fn new(context: RunContext, cleanup_guard: Option>) -> Self { + pub fn new(context: RunContext, cleanup_guard: Option>) -> Self { Self { run_context: Arc::new(context), cleanup_guard, @@ -135,7 +122,7 @@ impl RunHandle { #[must_use] /// Build a handle from a shared context reference. pub(crate) fn from_shared( - context: Arc, + context: Arc>, cleanup_guard: Option>, ) -> Self { Self { @@ -146,7 +133,7 @@ impl RunHandle { #[must_use] /// Access the shared run context. - pub fn context(&self) -> &RunContext { + pub fn context(&self) -> &RunContext { &self.run_context } } @@ -155,68 +142,20 @@ impl RunHandle { #[derive(Clone, Copy)] pub struct RunMetrics { run_duration: Duration, - expected_blocks: u64, - block_interval_hint: Option, } impl RunMetrics { #[must_use] - pub fn new(descriptors: &GeneratedTopology, run_duration: Duration) -> Self { - Self::from_topology(descriptors, run_duration) - } - - #[must_use] - pub fn from_topology(descriptors: &GeneratedTopology, run_duration: Duration) -> Self { - let slot_duration = descriptors.slot_duration(); - - let active_slot_coeff = descriptors.config().consensus_params.active_slot_coeff; - let expected_blocks = - calculate_expected_blocks(run_duration, slot_duration, active_slot_coeff); - - let block_interval_hint = - slot_duration.map(|duration| duration.mul_f64(active_slot_coeff.clamp(0.0, 1.0))); - - Self { - run_duration, - expected_blocks, - block_interval_hint, - } + pub const fn new(run_duration: Duration) -> Self { + Self { run_duration } } #[must_use] pub const fn run_duration(&self) -> Duration { self.run_duration } - - #[must_use] - pub const fn expected_consensus_blocks(&self) -> u64 { - self.expected_blocks - } - - #[must_use] - pub const fn block_interval_hint(&self) -> Option { - self.block_interval_hint - } } pub trait CleanupGuard: Send { fn cleanup(self: Box); } - -/// Computes the minimum duration we’ll allow for a scenario run so that the -/// scheduler can observe a few block opportunities even if the caller -/// requested an extremely short window. -fn calculate_expected_blocks( - run_duration: Duration, - slot_duration: Option, - active_slot_coeff: f64, -) -> u64 { - let Some(slot_duration) = slot_duration else { - return 0; - }; - let slot_secs = slot_duration.as_secs_f64(); - let run_secs = run_duration.as_secs_f64(); - let expected = run_secs / slot_secs * active_slot_coeff; - - expected.ceil().clamp(0.0, u64::MAX as f64) as u64 -} diff --git a/testing-framework/core/src/scenario/runtime/deployer.rs b/testing-framework/core/src/scenario/runtime/deployer.rs index dfed4b1..0151327 100644 --- a/testing-framework/core/src/scenario/runtime/deployer.rs +++ b/testing-framework/core/src/scenario/runtime/deployer.rs @@ -1,7 +1,7 @@ use async_trait::async_trait; use super::runner::Runner; -use crate::scenario::{DynError, Scenario}; +use crate::scenario::{Application, DynError, Scenario}; /// Error returned when executing workloads or expectations. #[derive(Debug, thiserror::Error)] @@ -16,8 +16,8 @@ pub enum ScenarioError { /// Deploys a scenario into a target environment and returns a `Runner`. #[async_trait] -pub trait Deployer: Send + Sync { +pub trait Deployer: Send + Sync { type Error; - async fn deploy(&self, scenario: &Scenario) -> Result; + async fn deploy(&self, scenario: &Scenario) -> Result, Self::Error>; } diff --git a/testing-framework/core/src/scenario/runtime/metrics.rs b/testing-framework/core/src/scenario/runtime/metrics.rs index 7c6656f..d94d4cb 100644 --- a/testing-framework/core/src/scenario/runtime/metrics.rs +++ b/testing-framework/core/src/scenario/runtime/metrics.rs @@ -1,13 +1,18 @@ -use std::{collections::HashMap, sync::Arc}; +use std::{collections::HashMap, sync::Arc, thread}; -use prometheus_http_query::{Client as PrometheusClient, response::Data as PrometheusData}; +use prometheus_http_query::{ + Client as PrometheusClient, + response::{Data as PrometheusData, PromqlResult, Sample}, +}; use reqwest::Url; +use tokio::runtime::Runtime; use tracing::warn; pub const CONSENSUS_PROCESSED_BLOCKS: &str = "consensus_processed_blocks"; pub const CONSENSUS_TRANSACTIONS_TOTAL: &str = "consensus_transactions_total"; const CONSENSUS_TRANSACTIONS_NODE_QUERY: &str = r#"sum(consensus_transactions_total{job=~"node-.*"})"#; +const NODE_QUERY_FALLBACK_MESSAGE: &str = "falling back to aggregate consensus transaction counter"; /// Telemetry handles available during a run. #[derive(Clone, Default)] @@ -49,17 +54,11 @@ impl Metrics { } pub fn instant_values(&self, query: &str) -> Result, MetricsError> { - let handle = self - .prometheus() - .ok_or_else(|| MetricsError::new("prometheus endpoint unavailable"))?; - handle.instant_values(query) + self.require_prometheus()?.instant_values(query) } pub fn counter_value(&self, query: &str) -> Result { - let handle = self - .prometheus() - .ok_or_else(|| MetricsError::new("prometheus endpoint unavailable"))?; - handle.counter_value(query) + self.require_prometheus()?.counter_value(query) } pub fn consensus_processed_blocks(&self) -> Result { @@ -67,31 +66,52 @@ impl Metrics { } pub fn consensus_transactions_total(&self) -> Result { - let handle = self - .prometheus() - .ok_or_else(|| MetricsError::new("prometheus endpoint unavailable"))?; + let handle = self.require_prometheus()?; - match handle.instant_samples(CONSENSUS_TRANSACTIONS_NODE_QUERY) { - Ok(samples) if !samples.is_empty() => { - return Ok(samples.into_iter().map(|sample| sample.value).sum()); - } - Ok(_) => { - warn!( - query = CONSENSUS_TRANSACTIONS_NODE_QUERY, - "node-specific consensus transaction metric returned no samples; falling back to aggregate counter" - ); - } - Err(err) => { - warn!( - query = CONSENSUS_TRANSACTIONS_NODE_QUERY, - error = %err, - "failed to query node-specific consensus transaction metric; falling back to aggregate counter" - ); - } + if let Some(total) = query_node_transactions_total(&handle) { + return Ok(total); } handle.counter_value(CONSENSUS_TRANSACTIONS_TOTAL) } + + fn require_prometheus(&self) -> Result, MetricsError> { + self.prometheus() + .ok_or_else(|| MetricsError::new("prometheus endpoint unavailable")) + } +} + +fn query_node_transactions_total(handle: &PrometheusEndpoint) -> Option { + match handle.instant_samples(CONSENSUS_TRANSACTIONS_NODE_QUERY) { + Ok(samples) => samples_total_or_warn(samples), + Err(error) => { + warn_node_query_fallback(Some(&error)); + None + } + } +} + +fn samples_total_or_warn(samples: Vec) -> Option { + if samples.is_empty() { + warn_node_query_fallback(None); + return None; + } + + Some(samples.into_iter().map(|sample| sample.value).sum()) +} + +fn warn_node_query_fallback(error: Option<&MetricsError>) { + match error { + Some(error) => warn!( + query = CONSENSUS_TRANSACTIONS_NODE_QUERY, + error = %error, + "{NODE_QUERY_FALLBACK_MESSAGE}" + ), + None => warn!( + query = CONSENSUS_TRANSACTIONS_NODE_QUERY, + "{NODE_QUERY_FALLBACK_MESSAGE}" + ), + } } #[derive(Debug, thiserror::Error)] @@ -144,52 +164,8 @@ impl PrometheusEndpoint { &self, query: &str, ) -> Result, MetricsError> { - let query = query.to_owned(); - let client = self.client.clone(); - - let response = std::thread::spawn(move || -> Result<_, MetricsError> { - let runtime = tokio::runtime::Runtime::new() - .map_err(|err| MetricsError::new(format!("failed to create runtime: {err}")))?; - runtime - .block_on(async { client.query(&query).get().await }) - .map_err(|err| MetricsError::new(format!("prometheus query failed: {err}"))) - }) - .join() - .map_err(|_| MetricsError::new("prometheus query thread panicked"))??; - - let mut samples = Vec::new(); - match response.data() { - PrometheusData::Vector(vectors) => { - for vector in vectors { - samples.push(PrometheusInstantSample { - labels: vector.metric().clone(), - timestamp: vector.sample().timestamp(), - value: vector.sample().value(), - }); - } - } - PrometheusData::Matrix(ranges) => { - for range in ranges { - let labels = range.metric().clone(); - for sample in range.samples() { - samples.push(PrometheusInstantSample { - labels: labels.clone(), - timestamp: sample.timestamp(), - value: sample.value(), - }); - } - } - } - PrometheusData::Scalar(sample) => { - samples.push(PrometheusInstantSample { - labels: HashMap::new(), - timestamp: sample.timestamp(), - value: sample.value(), - }); - } - } - - Ok(samples) + query_prometheus(self.client.clone(), query.to_owned()) + .map(|response| samples_from_prometheus_data(response.data())) } pub fn instant_values(&self, query: &str) -> Result, MetricsError> { @@ -202,3 +178,55 @@ impl PrometheusEndpoint { .map(|values| values.into_iter().sum()) } } + +fn query_prometheus(client: PrometheusClient, query: String) -> Result { + thread::spawn(move || -> Result<_, MetricsError> { + let runtime = Runtime::new() + .map_err(|error| MetricsError::new(format!("failed to create runtime: {error}")))?; + runtime + .block_on(async { client.query(&query).get().await }) + .map_err(|error| MetricsError::new(format!("prometheus query failed: {error}"))) + }) + .join() + .map_err(|_| MetricsError::new("prometheus query thread panicked"))? +} + +fn samples_from_prometheus_data(data: &PrometheusData) -> Vec { + let mut samples = Vec::new(); + + match data { + PrometheusData::Vector(vectors) => { + samples.extend(vectors.iter().map(|vector| PrometheusInstantSample { + labels: vector.metric().clone(), + timestamp: vector.sample().timestamp(), + value: vector.sample().value(), + })); + } + PrometheusData::Matrix(ranges) => { + for range in ranges { + let labels = range.metric().clone(); + samples.extend( + range + .samples() + .iter() + .map(|sample| PrometheusInstantSample { + labels: labels.clone(), + timestamp: sample.timestamp(), + value: sample.value(), + }), + ); + } + } + PrometheusData::Scalar(sample) => samples.push(scalar_sample(sample)), + } + + samples +} + +fn scalar_sample(sample: &Sample) -> PrometheusInstantSample { + PrometheusInstantSample { + labels: HashMap::new(), + timestamp: sample.timestamp(), + value: sample.value(), + } +} diff --git a/testing-framework/core/src/scenario/runtime/mod.rs b/testing-framework/core/src/scenario/runtime/mod.rs index 82c060b..6b969c7 100644 --- a/testing-framework/core/src/scenario/runtime/mod.rs +++ b/testing-framework/core/src/scenario/runtime/mod.rs @@ -1,12 +1,65 @@ -mod block_feed; pub mod context; mod deployer; pub mod metrics; mod node_clients; +pub mod readiness; mod runner; -pub use block_feed::{BlockFeed, BlockFeedTask, BlockRecord, BlockStats, spawn_block_feed}; +use async_trait::async_trait; pub use context::{CleanupGuard, RunContext, RunHandle, RunMetrics}; pub use deployer::{Deployer, ScenarioError}; pub use node_clients::NodeClients; +pub use readiness::{ + HttpReadinessRequirement, ReadinessError, StabilizationConfig, wait_for_http_ports, + wait_for_http_ports_with_host, wait_for_http_ports_with_host_and_requirement, + wait_for_http_ports_with_requirement, wait_http_readiness, wait_until_stable, +}; pub use runner::Runner; +use tokio::task::JoinHandle; + +use crate::{env::Application, scenario::DynError}; + +/// Cloneable feed handle exposed to workloads and expectations. +pub trait Feed: Clone + Send + Sync + 'static { + type Subscription: Send + 'static; + + fn subscribe(&self) -> Self::Subscription; +} + +/// Background worker driving a cluster feed. +#[async_trait] +pub trait FeedRuntime: Send + 'static { + type Feed: Feed; + + async fn run(self: Box); +} + +/// Cleanup guard for a spawned feed worker. +pub struct FeedHandle { + handle: JoinHandle<()>, +} + +impl FeedHandle { + pub const fn new(handle: JoinHandle<()>) -> Self { + Self { handle } + } +} + +impl CleanupGuard for FeedHandle { + fn cleanup(self: Box) { + self.handle.abort(); + } +} + +/// Spawn a background task that drives the environment-provided feed. +pub async fn spawn_feed( + client: E::NodeClient, +) -> Result<(::Feed, FeedHandle), DynError> { + let (feed, worker) = E::prepare_feed(client).await?; + + let handle = tokio::spawn(async move { + Box::new(worker).run().await; + }); + + Ok((feed, FeedHandle::new(handle))) +} diff --git a/testing-framework/core/src/scenario/runtime/node_clients.rs b/testing-framework/core/src/scenario/runtime/node_clients.rs index c5cbc56..9f31122 100644 --- a/testing-framework/core/src/scenario/runtime/node_clients.rs +++ b/testing-framework/core/src/scenario/runtime/node_clients.rs @@ -1,158 +1,137 @@ -use std::{ - pin::Pin, - sync::{Arc, RwLock}, -}; +use std::sync::Arc; -use rand::{Rng as _, seq::SliceRandom as _, thread_rng}; +use parking_lot::RwLock; +use rand::{seq::SliceRandom as _, thread_rng}; -use crate::{ - nodes::ApiClient, - scenario::DynError, - topology::{deployment::Topology, generation::GeneratedTopology}, -}; +use crate::scenario::{Application, DynError}; /// Collection of API clients for the node set. -#[derive(Clone, Default)] -pub struct NodeClients { - inner: Arc>, +pub struct NodeClients { + inner: Arc>>, } -#[derive(Default)] -struct NodeClientsInner { - nodes: Vec, +struct NodeClientsInner { + nodes: Vec, } -impl NodeClients { +impl Default for NodeClients { + fn default() -> Self { + Self { + inner: Arc::new(RwLock::new(NodeClientsInner { nodes: Vec::new() })), + } + } +} + +impl Clone for NodeClients { + fn clone(&self) -> Self { + Self { + inner: Arc::clone(&self.inner), + } + } +} + +impl NodeClients { #[must_use] /// Build clients from preconstructed vectors. - pub fn new(nodes: Vec) -> Self { + pub fn new(nodes: Vec) -> Self { Self { inner: Arc::new(RwLock::new(NodeClientsInner { nodes })), } } #[must_use] - /// Derive clients from a spawned topology. - pub fn from_topology(_descriptors: &GeneratedTopology, topology: &Topology) -> Self { - let node_clients = topology.nodes().iter().map(|node| { - let testing = node.testing_url(); - ApiClient::from_urls(node.url(), testing) - }); + /// Immutable client snapshot at the current moment. + /// + /// This clones the current vector so callers can iterate across `.await` + /// boundaries without holding the internal lock. + pub fn snapshot(&self) -> Vec { + self.inner.read().nodes.clone() + } - Self::new(node_clients.collect()) + /// Execute a synchronous read against the current client slice. + /// + /// Prefer this over `snapshot()` when no async boundary is involved. + pub fn with_clients(&self, f: impl FnOnce(&[E::NodeClient]) -> R) -> R { + let guard = self.inner.read(); + f(&guard.nodes) } #[must_use] - /// Node API clients. - pub fn node_clients(&self) -> Vec { - self.inner - .read() - .unwrap_or_else(|poisoned| poisoned.into_inner()) - .nodes - .clone() + /// Choose a random client from the current snapshot. + pub fn random_client(&self) -> Option { + self.with_clients(|clients| clients.choose(&mut thread_rng()).cloned()) } #[must_use] - /// Choose a random node client if present. - pub fn random_node(&self) -> Option { - let nodes = self.node_clients(); - if nodes.is_empty() { - return None; - } - let mut rng = thread_rng(); - let idx = rng.gen_range(0..nodes.len()); - nodes.get(idx).cloned() - } - - /// Iterator over all clients. - pub fn all_clients(&self) -> Vec { - let guard = self - .inner - .read() - .unwrap_or_else(|poisoned| poisoned.into_inner()); - - guard.nodes.iter().cloned().collect() + pub fn len(&self) -> usize { + self.inner.read().nodes.len() } #[must_use] - /// Choose any random client from nodes. - pub fn any_client(&self) -> Option { - let guard = self - .inner - .read() - .unwrap_or_else(|poisoned| poisoned.into_inner()); - - let total = guard.nodes.len(); - if total == 0 { - return None; - } - let mut rng = thread_rng(); - let choice = rng.gen_range(0..total); - guard.nodes.get(choice).cloned() + pub fn is_empty(&self) -> bool { + self.len() == 0 } #[must_use] /// Convenience wrapper for fan-out queries. - pub const fn cluster_client(&self) -> ClusterClient<'_> { + pub const fn cluster_client(&self) -> ClusterClient<'_, E> { ClusterClient::new(self) } - pub fn add_node(&self, client: ApiClient) { - let mut guard = self - .inner - .write() - .unwrap_or_else(|poisoned| poisoned.into_inner()); - + pub fn add_node(&self, client: E::NodeClient) { + let mut guard = self.inner.write(); guard.nodes.push(client); } pub fn clear(&self) { - let mut guard = self - .inner - .write() - .unwrap_or_else(|poisoned| poisoned.into_inner()); - + let mut guard = self.inner.write(); guard.nodes.clear(); } + + fn shuffled_snapshot(&self) -> Vec { + let mut clients = self.snapshot(); + clients.shuffle(&mut thread_rng()); + clients + } } -pub struct ClusterClient<'a> { - node_clients: &'a NodeClients, +pub struct ClusterClient<'a, E: Application> { + node_clients: &'a NodeClients, } -impl<'a> ClusterClient<'a> { +impl<'a, E: Application> ClusterClient<'a, E> { #[must_use] - /// Build a cluster client that can try multiple nodes. - pub const fn new(node_clients: &'a NodeClients) -> Self { + pub const fn new(node_clients: &'a NodeClients) -> Self { Self { node_clients } } - /// Try all node clients until one call succeeds, shuffling order each time. - pub async fn try_all_clients( + pub async fn try_all_clients( &self, - mut f: impl for<'b> FnMut( - &'b ApiClient, - ) -> Pin> + Send + 'b>> - + Send, - ) -> Result + mut f: impl for<'b> FnMut(&'b E::NodeClient) -> Fut + Send, + ) -> Result where - E: Into, + for<'b> Fut: Future> + Send + 'b, + ErrorType: Into, { - let mut clients = self.node_clients.all_clients(); + // Snapshot once so retries can await without holding the internal lock. + let clients = self.node_clients.shuffled_snapshot(); + if clients.is_empty() { return Err("cluster client has no api clients".into()); } - clients.shuffle(&mut thread_rng()); - - let mut last_err = None; + let mut last_error = None; for client in &clients { match f(client).await { Ok(value) => return Ok(value), - Err(err) => last_err = Some(err.into()), + Err(error) => last_error = Some(error.into()), } } - Err(last_err.unwrap_or_else(|| "cluster client exhausted all nodes".into())) + if let Some(error) = last_error { + return Err(error); + } + + Err("cluster client exhausted all nodes".into()) } } diff --git a/testing-framework/core/src/scenario/runtime/readiness.rs b/testing-framework/core/src/scenario/runtime/readiness.rs new file mode 100644 index 0000000..f44e929 --- /dev/null +++ b/testing-framework/core/src/scenario/runtime/readiness.rs @@ -0,0 +1,311 @@ +use std::{ + future::Future, + time::{Duration, Instant}, +}; + +use reqwest::{Client, Url}; +use thiserror::Error; + +use crate::{ + adjust_timeout, + retry::{RetryConfig, retry_async}, + scenario::DynError, +}; + +const DEFAULT_POLL_INTERVAL: Duration = Duration::from_millis(200); +const DEFAULT_TIMEOUT: Duration = Duration::from_secs(60); +const LOCALHOST: &str = "127.0.0.1"; +const NO_STABILIZATION_DETAILS: &str = "no probe details reported"; +const NO_FAILING_ENDPOINTS: &str = ""; + +#[derive(Debug, Error)] +pub enum ReadinessError { + #[error("readiness probe timed out: {message}")] + ProbeTimeout { message: String }, + #[error("invalid readiness endpoint '{endpoint}': {reason}")] + InvalidEndpoint { endpoint: String, reason: String }, + #[error("cluster stabilization failed: {source}")] + ClusterStable { + #[source] + source: DynError, + }, + #[error("cluster stabilization timed out after {timeout:?}: {details}")] + StabilizationTimeout { timeout: Duration, details: String }, + #[error("cluster stabilization probe failed: {source}")] + StabilizationProbe { + #[source] + source: DynError, + }, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum HttpReadinessRequirement { + AllNodesReady, + AnyNodeReady, + AtLeast(usize), +} + +#[derive(Debug)] +struct HttpProbeStatus { + endpoint: Url, + ok: bool, + detail: String, +} + +#[derive(Debug, Clone, Copy)] +pub struct StabilizationConfig { + pub timeout: Duration, + pub poll_interval: Duration, +} + +impl StabilizationConfig { + #[must_use] + pub const fn new(timeout: Duration, poll_interval: Duration) -> Self { + Self { + timeout, + poll_interval, + } + } +} + +fn normalize_endpoint_path(endpoint_path: &str) -> String { + if endpoint_path.starts_with('/') { + endpoint_path.to_string() + } else { + format!("/{endpoint_path}") + } +} + +fn build_local_endpoints(ports: &[u16], endpoint_path: &str) -> Result, ReadinessError> { + build_endpoints_with_host(ports, LOCALHOST, endpoint_path) +} + +fn build_endpoints_with_host( + ports: &[u16], + host: &str, + endpoint_path: &str, +) -> Result, ReadinessError> { + let endpoint_path = normalize_endpoint_path(endpoint_path); + + ports + .iter() + .map(|port| format!("http://{host}:{port}{endpoint_path}")) + .map(|endpoint| { + Url::parse(&endpoint).map_err(|source| ReadinessError::InvalidEndpoint { + endpoint, + reason: source.to_string(), + }) + }) + .collect() +} + +fn requirement_satisfied( + statuses: &[HttpProbeStatus], + requirement: HttpReadinessRequirement, +) -> bool { + let ready = ready_count(statuses); + match requirement { + HttpReadinessRequirement::AllNodesReady => ready == statuses.len(), + HttpReadinessRequirement::AnyNodeReady => ready >= 1, + HttpReadinessRequirement::AtLeast(min_ready) => ready >= min_ready, + } +} + +fn ready_count(statuses: &[HttpProbeStatus]) -> usize { + statuses.iter().filter(|status| status.ok).count() +} + +fn format_http_timeout_message( + statuses: &[HttpProbeStatus], + requirement: HttpReadinessRequirement, +) -> String { + let summary = timeout_summary(statuses); + let required = required_ready_nodes(requirement, summary.total); + + format!( + "timed out waiting for readiness {:?}; ready={}, required={}, total={}, failing endpoints: {}", + requirement, summary.ready, required, summary.total, summary.failed_list + ) +} + +struct TimeoutSummary { + ready: usize, + total: usize, + failed_list: String, +} + +fn timeout_summary(statuses: &[HttpProbeStatus]) -> TimeoutSummary { + let total = statuses.len(); + + TimeoutSummary { + ready: ready_count(statuses), + total, + failed_list: format_failed_endpoints(statuses), + } +} + +fn failed_endpoints(statuses: &[HttpProbeStatus]) -> Vec { + statuses + .iter() + .filter(|status| !status.ok) + .map(|status| format!("{} ({})", status.endpoint, status.detail)) + .collect() +} + +fn format_failed_endpoints(statuses: &[HttpProbeStatus]) -> String { + let failed = failed_endpoints(statuses); + if failed.is_empty() { + return NO_FAILING_ENDPOINTS.to_string(); + } + + failed.join(", ") +} + +fn required_ready_nodes(requirement: HttpReadinessRequirement, total: usize) -> usize { + match requirement { + HttpReadinessRequirement::AllNodesReady => total, + HttpReadinessRequirement::AnyNodeReady => usize::from(total > 0), + HttpReadinessRequirement::AtLeast(min_ready) => min_ready, + } +} + +fn stabilization_details(failures: &[String]) -> String { + if failures.is_empty() { + NO_STABILIZATION_DETAILS.to_string() + } else { + failures.join(", ") + } +} + +async fn collect_http_statuses(client: &Client, endpoints: &[Url]) -> Vec { + let futures = endpoints.iter().map(|endpoint| async move { + match client.get(endpoint.clone()).send().await { + Ok(response) => { + let status = response.status(); + HttpProbeStatus { + endpoint: endpoint.clone(), + ok: status.is_success(), + detail: format!("status {}", status.as_u16()), + } + } + + Err(err) => HttpProbeStatus { + endpoint: endpoint.clone(), + ok: false, + detail: err.to_string(), + }, + } + }); + futures::future::join_all(futures).await +} + +pub async fn wait_until_stable( + config: StabilizationConfig, + mut probe: F, +) -> Result<(), ReadinessError> +where + F: FnMut() -> Fut, + Fut: Future, DynError>>, +{ + let timeout = adjust_timeout(config.timeout); + let poll_interval = config.poll_interval; + let deadline = Instant::now() + timeout; + + loop { + let failures = probe() + .await + .map_err(|source| ReadinessError::StabilizationProbe { source })?; + if failures.is_empty() { + return Ok(()); + } + + if Instant::now() >= deadline { + let details = stabilization_details(&failures); + + return Err(ReadinessError::StabilizationTimeout { timeout, details }); + } + + tokio::time::sleep(poll_interval).await; + } +} + +pub async fn wait_http_readiness( + endpoints: &[Url], + requirement: HttpReadinessRequirement, +) -> Result<(), ReadinessError> { + if endpoints.is_empty() { + return Ok(()); + } + + let (poll_interval, max_attempts) = http_retry_plan(); + let client = Client::new(); + let retry = RetryConfig::bounded(max_attempts, poll_interval, poll_interval); + retry_async(retry, |_| async { + let statuses = collect_http_statuses(&client, endpoints).await; + if requirement_satisfied(&statuses, requirement) { + Ok(()) + } else { + Err(statuses) + } + }) + .await + .map_err(|statuses| ReadinessError::ProbeTimeout { + message: format_http_timeout_message(&statuses, requirement), + }) +} + +fn http_retry_plan() -> (Duration, usize) { + let timeout_duration = adjust_timeout(DEFAULT_TIMEOUT); + let poll_interval = DEFAULT_POLL_INTERVAL; + let max_attempts = retry_attempts(timeout_duration, poll_interval); + (poll_interval, max_attempts) +} + +fn retry_attempts(timeout: Duration, interval: Duration) -> usize { + let timeout_ms = timeout.as_millis(); + let interval_ms = interval.as_millis(); + + timeout_ms.div_ceil(interval_ms).max(1).saturating_add(1) as usize +} + +pub async fn wait_for_http_ports(ports: &[u16], endpoint_path: &str) -> Result<(), ReadinessError> { + wait_for_http_ports_with_requirement(ports, endpoint_path, default_readiness_requirement()) + .await +} + +pub async fn wait_for_http_ports_with_requirement( + ports: &[u16], + endpoint_path: &str, + requirement: HttpReadinessRequirement, +) -> Result<(), ReadinessError> { + let endpoints = build_local_endpoints(ports, endpoint_path)?; + wait_http_readiness(&endpoints, requirement).await +} + +pub async fn wait_for_http_ports_with_host( + ports: &[u16], + host: &str, + endpoint_path: &str, +) -> Result<(), ReadinessError> { + wait_for_http_ports_with_host_and_requirement( + ports, + host, + endpoint_path, + default_readiness_requirement(), + ) + .await +} + +pub async fn wait_for_http_ports_with_host_and_requirement( + ports: &[u16], + host: &str, + endpoint_path: &str, + requirement: HttpReadinessRequirement, +) -> Result<(), ReadinessError> { + let endpoints = build_endpoints_with_host(ports, host, endpoint_path)?; + wait_http_readiness(&endpoints, requirement).await +} + +const fn default_readiness_requirement() -> HttpReadinessRequirement { + HttpReadinessRequirement::AllNodesReady +} diff --git a/testing-framework/core/src/scenario/runtime/runner.rs b/testing-framework/core/src/scenario/runtime/runner.rs index 663c6d7..c1502d0 100644 --- a/testing-framework/core/src/scenario/runtime/runner.rs +++ b/testing-framework/core/src/scenario/runtime/runner.rs @@ -1,34 +1,34 @@ -use std::{any::Any, panic::AssertUnwindSafe, sync::Arc, time::Duration}; +use std::{any::Any, future::Future, panic::AssertUnwindSafe, sync::Arc, time::Duration}; -use futures::FutureExt as _; +use futures::{FutureExt as _, future}; use tokio::{ - task::JoinSet, + task::{JoinError, JoinSet}, time::{sleep, timeout}, }; use super::deployer::ScenarioError; use crate::scenario::{ - DynError, Expectation, Scenario, + Application, DynError, Expectation, Scenario, runtime::context::{CleanupGuard, RunContext, RunHandle}, }; type WorkloadOutcome = Result<(), DynError>; -const COOLDOWN_BLOCK_INTERVAL_MULTIPLIER: f64 = 5.0; const MIN_NODE_CONTROL_COOLDOWN: Duration = Duration::from_secs(30); const DEFAULT_BLOCK_FEED_SETTLE_WAIT: Duration = Duration::from_secs(1); const MIN_BLOCK_FEED_SETTLE_WAIT: Duration = Duration::from_secs(2); +const UNKNOWN_PANIC: &str = ""; /// Represents a fully prepared environment capable of executing a scenario. -pub struct Runner { - context: Arc, +pub struct Runner { + context: Arc>, cleanup_guard: Option>, } -impl Runner { +impl Runner { /// Construct a runner from the run context and optional cleanup guard. #[must_use] - pub fn new(context: RunContext, cleanup_guard: Option>) -> Self { + pub fn new(context: RunContext, cleanup_guard: Option>) -> Self { Self { context: Arc::new(context), cleanup_guard, @@ -37,7 +37,7 @@ impl Runner { /// Access the underlying run context. #[must_use] - pub fn context(&self) -> Arc { + pub fn context(&self) -> Arc> { Arc::clone(&self.context) } @@ -47,124 +47,129 @@ impl Runner { } } - pub(crate) fn into_run_handle(mut self) -> RunHandle { + pub(crate) fn into_run_handle(mut self) -> RunHandle { RunHandle::from_shared(Arc::clone(&self.context), self.cleanup_guard.take()) } - /// Executes the scenario by driving workloads first and then evaluating all - /// expectations. On any failure it cleans up resources and propagates the - /// error to the caller. + /// Execute workloads and evaluate expectations. pub async fn run( mut self, - scenario: &mut Scenario, - ) -> Result + scenario: &mut Scenario, + ) -> Result, ScenarioError> where Caps: Send + Sync, { let context = self.context(); - if let Err(error) = - Self::prepare_expectations(scenario.expectations_mut(), context.as_ref()).await - { - self.cleanup(); - return Err(error); - } - if let Err(error) = Self::run_workloads(&context, scenario).await { - self.cleanup(); - return Err(error); - } + self.run_step(Self::prepare_expectations( + scenario.expectations_mut(), + context.as_ref(), + )) + .await?; - Self::settle_before_expectations(&context).await; + self.run_step(Self::run_workloads(Arc::clone(&context), scenario)) + .await?; - if let Err(error) = - Self::run_expectations(scenario.expectations_mut(), context.as_ref()).await - { - self.cleanup(); - return Err(error); - } + Self::settle_before_expectations(context.as_ref()).await; + + self.run_step(Self::run_expectations( + scenario.expectations_mut(), + context.as_ref(), + )) + .await?; Ok(self.into_run_handle()) } - async fn prepare_expectations( - expectations: &mut [Box], - context: &RunContext, + async fn run_step( + &mut self, + step: impl Future>, ) -> Result<(), ScenarioError> { - for expectation in expectations { - if let Err(source) = expectation.start_capture(context).await { - return Err(ScenarioError::ExpectationCapture(source)); + match step.await { + Ok(()) => Ok(()), + Err(error) => { + self.cleanup(); + Err(error) } } + } + + async fn prepare_expectations( + expectations: &mut [Box>], + context: &RunContext, + ) -> Result<(), ScenarioError> { + for expectation in expectations { + expectation + .start_capture(context) + .await + .map_err(ScenarioError::ExpectationCapture)?; + } + Ok(()) } - /// Spawns every workload, waits until the configured duration elapses (or a - /// workload fails), and then aborts the remaining tasks. async fn run_workloads( - context: &Arc, - scenario: &Scenario, + context: Arc>, + scenario: &Scenario, ) -> Result<(), ScenarioError> where Caps: Send + Sync, { if scenario.workloads().is_empty() { - let duration = scenario.duration(); - if !duration.is_zero() { - sleep(duration).await; - } - - if let Some(cooldown) = Self::cooldown_duration(context.as_ref()) { - if !cooldown.is_zero() { - sleep(cooldown).await; - } - } - - return Ok(()); + return idle_until_duration(scenario.duration()).await; } - let mut workloads = Self::spawn_workloads(scenario, context); - let _ = Self::drive_until_timer(&mut workloads, scenario.duration()).await?; + let mut workloads = Self::spawn_workloads(scenario, Arc::clone(&context)); + Self::run_workload_window(&mut workloads, scenario.duration()).await?; - // Keep workloads running during the cooldown window so that late - // inclusions (especially DA parent-linked ops) still have a chance to - // land before expectations evaluate. We still abort everything at the - // end of cooldown to prevent leaking tasks across runs. - if let Some(cooldown) = Self::cooldown_duration(context.as_ref()) { - if !cooldown.is_zero() { - if workloads.is_empty() { - sleep(cooldown).await; - } else { - let _ = Self::drive_until_timer(&mut workloads, cooldown).await?; - } - } + if let Some(cooldown) = nonzero_cooldown(Self::cooldown_duration(context.as_ref())) { + Self::run_workload_window(&mut workloads, cooldown).await?; } Self::drain_workloads(&mut workloads).await } - async fn settle_before_expectations(context: &Arc) { - // `BlockFeed` polls node storage on an interval. After we abort workloads - // we give the feed a moment to catch up with the last blocks that might - // include workload operations so expectations evaluate on a more stable - // snapshot. - let has_node_control = context.node_control().is_some(); - let hint = context.run_metrics().block_interval_hint(); - if !has_node_control && hint.is_none() { + async fn run_workload_window( + workloads: &mut JoinSet, + duration: Duration, + ) -> Result<(), ScenarioError> { + let _completed = Self::drive_until_timer(workloads, duration).await?; + Ok(()) + } + + async fn settle_before_expectations(context: &RunContext) { + // Give the feed a short catch-up window before evaluating expectations. + let Some(wait) = Self::settle_wait_duration(context) else { return; + }; + + sleep(wait).await; + } + + fn settle_wait_duration(context: &RunContext) -> Option { + let has_node_control = context.controls_nodes(); + let configured_wait = context.expectation_cooldown(); + + if configured_wait.is_zero() && !has_node_control { + return None; } - let mut wait = hint.unwrap_or(DEFAULT_BLOCK_FEED_SETTLE_WAIT); - wait = wait.max(MIN_BLOCK_FEED_SETTLE_WAIT); - sleep(wait).await; + let wait = if configured_wait.is_zero() { + DEFAULT_BLOCK_FEED_SETTLE_WAIT + } else { + configured_wait + }; + + Some(wait.max(MIN_BLOCK_FEED_SETTLE_WAIT)) } /// Evaluates every registered expectation, aggregating failures so callers /// can see all missing conditions in a single report. async fn run_expectations( - expectations: &mut [Box], - context: &RunContext, + expectations: &mut [Box>], + context: &RunContext, ) -> Result<(), ScenarioError> { - let mut failures: Vec<(String, DynError)> = Vec::new(); + let mut failures = Vec::new(); for expectation in expectations { if let Err(source) = expectation.evaluate(context).await { failures.push((expectation.name().to_owned(), source)); @@ -175,52 +180,32 @@ impl Runner { return Ok(()); } - let summary = failures - .into_iter() - .map(|(name, source)| format!("{name}: {source}")) - .collect::>() - .join("\n"); - - Err(ScenarioError::Expectations(summary.into())) + Err(ScenarioError::Expectations( + expectation_failure_summary(failures).into(), + )) } - fn cooldown_duration(context: &RunContext) -> Option { - let metrics = context.run_metrics(); - let needs_stabilization = context.node_control().is_some(); - if let Some(interval) = metrics.block_interval_hint() { - if interval.is_zero() { - return None; - } - let mut wait = interval.mul_f64(COOLDOWN_BLOCK_INTERVAL_MULTIPLIER); - // Expectations observe blocks via `BlockFeed`, which ultimately - // follows the chain information returned by `consensus_info`. - // When the consensus uses a security parameter (finality depth), - // newly included operations can take ~k blocks to show up in the - // observable chain. Short smoke runs otherwise end up evaluating - // before finality catches up, systematically failing inclusion - // expectations (especially for DA, where ops are parent-linked). - let security_param = context - .descriptors() - .config() - .consensus_params - .security_param; - wait = wait.max(interval.mul_f64(security_param.get() as f64)); - if needs_stabilization { - wait = wait.max(MIN_NODE_CONTROL_COOLDOWN); - } - Some(wait) - } else if needs_stabilization { - Some(MIN_NODE_CONTROL_COOLDOWN) - } else { - None + fn cooldown_duration(context: &RunContext) -> Option { + // Managed environments need a minimum cooldown so feed and expectations + // observe stabilized state. + let needs_stabilization = context.controls_nodes(); + + let mut wait = context.expectation_cooldown(); + + if wait.is_zero() { + return needs_stabilization.then_some(MIN_NODE_CONTROL_COOLDOWN); } + + if needs_stabilization { + wait = wait.max(MIN_NODE_CONTROL_COOLDOWN); + } + Some(wait) } - /// Spawns each workload inside its own task and returns the join set for - /// cooperative management. + /// Spawn each workload in its own task. fn spawn_workloads( - scenario: &Scenario, - context: &Arc, + scenario: &Scenario, + context: Arc>, ) -> JoinSet where Caps: Send + Sync, @@ -228,9 +213,11 @@ impl Runner { let mut workloads = JoinSet::new(); for workload in scenario.workloads() { let workload = Arc::clone(workload); - let ctx = Arc::clone(context); + let ctx = Arc::clone(&context); workloads.spawn(async move { + // Convert panics into workload errors so the runner can report + // them instead of aborting the process. let outcome = AssertUnwindSafe(async { workload.start(ctx.as_ref()).await }) .catch_unwind() .await; @@ -244,7 +231,7 @@ impl Runner { workloads } - /// Polls workload tasks until the timeout fires or one reports an error. + /// Drive workload tasks until timeout or failure. async fn drive_until_timer( workloads: &mut JoinSet, duration: Duration, @@ -253,61 +240,65 @@ impl Runner { while let Some(result) = workloads.join_next().await { Self::map_join_result(result)?; } + Ok(()) }; - timeout(duration, run_future) - .await - .map_or(Ok(true), |result| { + match timeout(duration, run_future).await { + Ok(result) => { result?; - Ok(false) - }) + Ok(true) + } + + Err(_) => Ok(false), + } } - /// Aborts and drains any remaining workload tasks so we do not leak work - /// across scenario runs. + fn map_join_result(result: Result) -> Result<(), ScenarioError> { + match result { + Ok(Ok(())) => Ok(()), + Ok(Err(err)) => Err(ScenarioError::Workload(err)), + Err(join_err) => Err(ScenarioError::Workload(join_err.into())), + } + } + + /// Wait for all workloads to exit. async fn drain_workloads( workloads: &mut JoinSet, ) -> Result<(), ScenarioError> { - workloads.abort_all(); - while let Some(result) = workloads.join_next().await { Self::map_join_result(result)?; } Ok(()) } - - /// Converts the outcome of a workload task into the canonical scenario - /// error, tolerating cancellation when the runner aborts unfinished tasks. - fn map_join_result( - result: Result, - ) -> Result<(), ScenarioError> { - match result { - Ok(outcome) => outcome.map_err(ScenarioError::Workload), - Err(join_err) if join_err.is_cancelled() => Ok(()), - Err(join_err) => Err(ScenarioError::Workload( - format!("workload task failed: {join_err}").into(), - )), - } - } } -/// Attempts to turn a panic payload into a readable string for diagnostics. +async fn idle_until_duration(duration: Duration) -> Result<(), ScenarioError> { + if duration.is_zero() { + return Ok(()); + } + + let _ = timeout(duration, async { future::pending::<()>().await }).await; + Ok(()) +} + +fn nonzero_cooldown(cooldown: Option) -> Option { + cooldown.filter(|duration| !duration.is_zero()) +} + fn panic_message(panic: Box) -> String { - panic.downcast::().map_or_else( - |panic| { - panic.downcast::<&'static str>().map_or_else( - |_| "unknown panic".to_owned(), - |message| (*message).to_owned(), - ) - }, - |message| *message, - ) + panic + .downcast_ref::<&str>() + .map(|message| (*message).to_string()) + .or_else(|| panic.downcast_ref::().cloned()) + .unwrap_or_else(|| UNKNOWN_PANIC.to_owned()) } -impl Drop for Runner { - fn drop(&mut self) { - self.cleanup(); - } +fn expectation_failure_summary(failures: Vec<(String, DynError)>) -> String { + failures + .into_iter() + .map(|(name, source)| format!("{name}: {source}")) + .collect::>() + .join("\n") } diff --git a/testing-framework/core/src/scenario/workload.rs b/testing-framework/core/src/scenario/workload.rs index 872584e..949aab9 100644 --- a/testing-framework/core/src/scenario/workload.rs +++ b/testing-framework/core/src/scenario/workload.rs @@ -1,24 +1,23 @@ use async_trait::async_trait; -use super::{DynError, Expectation, RunContext, runtime::context::RunMetrics}; -use crate::topology::generation::GeneratedTopology; +use super::{Application, DynError, Expectation, RunContext, runtime::context::RunMetrics}; #[async_trait] /// Describes an action sequence executed during a scenario run. -pub trait Workload: Send + Sync { +pub trait Workload: Send + Sync { fn name(&self) -> &str; - fn expectations(&self) -> Vec> { + fn expectations(&self) -> Vec>> { Vec::new() } fn init( &mut self, - _descriptors: &GeneratedTopology, + _descriptors: &E::Deployment, _run_metrics: &RunMetrics, ) -> Result<(), DynError> { Ok(()) } - async fn start(&self, ctx: &RunContext) -> Result<(), DynError>; + async fn start(&self, ctx: &RunContext) -> Result<(), DynError>; } diff --git a/testing-framework/core/src/topology/config.rs b/testing-framework/core/src/topology/config.rs deleted file mode 100644 index 0d9a903..0000000 --- a/testing-framework/core/src/topology/config.rs +++ /dev/null @@ -1,437 +0,0 @@ -use std::{collections::HashMap, path::PathBuf, sync::Arc}; - -use lb_core::{ - mantle::GenesisTx as _, - sdp::{Locator, ServiceType}, -}; -use lb_node::config::RunConfig; -use testing_framework_config::topology::{ - configs::{ - api::{ApiConfigError, create_api_configs}, - base::{BaseConfigError, BaseConfigs, build_base_configs}, - consensus::{ - ConsensusConfigError, ConsensusParams, ProviderInfo, - create_genesis_tx_with_declarations, sync_utxos_with_genesis, - }, - network::{Libp2pNetworkLayout, NetworkParams}, - tracing::create_tracing_configs, - wallet::WalletConfig, - }, - invariants::TopologyInvariantError, -}; -use thiserror::Error; - -use crate::{ - scenario::DynError, - topology::{ - configs::{GeneralConfig, time::default_time_config}, - generation::{GeneratedNodeConfig, GeneratedTopology}, - utils::{TopologyResolveError, create_kms_configs, resolve_ids, resolve_ports}, - }, -}; - -/// Per-node config patch applied after the default node config is generated. -pub type NodeConfigPatch = Arc Result + Send + Sync>; - -#[derive(Debug, Error)] -pub enum TopologyBuildError { - #[error("topology must include at least one node")] - EmptyParticipants, - #[error(transparent)] - Invariants(#[from] TopologyInvariantError), - #[error(transparent)] - Resolve(#[from] TopologyResolveError), - #[error(transparent)] - Base(#[from] BaseConfigError), - #[error(transparent)] - Api(#[from] ApiConfigError), - #[error(transparent)] - Genesis(#[from] ConsensusConfigError), - #[error("config generation requires at least one consensus config")] - MissingConsensusConfig, - #[error("internal config vector mismatch for {label} (expected {expected}, got {actual})")] - VectorLenMismatch { - label: &'static str, - expected: usize, - actual: usize, - }, -} - -/// High-level topology settings used to generate node configs for a scenario. -#[derive(Clone)] -pub struct TopologyConfig { - pub n_nodes: usize, - pub consensus_params: ConsensusParams, - pub network_params: NetworkParams, - pub wallet_config: WalletConfig, - pub node_config_patches: HashMap, - pub persist_dirs: HashMap, -} - -impl TopologyConfig { - /// Create a config with zero nodes; counts must be set before building. - #[must_use] - pub fn empty() -> Self { - Self { - n_nodes: 0, - consensus_params: ConsensusParams::default_for_participants(1), - network_params: NetworkParams::default(), - wallet_config: WalletConfig::default(), - node_config_patches: HashMap::new(), - persist_dirs: HashMap::new(), - } - } - - #[must_use] - /// Convenience config with two nodes for consensus-only scenarios. - pub fn two_nodes() -> Self { - Self { - n_nodes: 2, - consensus_params: ConsensusParams::default_for_participants(2), - network_params: NetworkParams::default(), - wallet_config: WalletConfig::default(), - node_config_patches: HashMap::new(), - persist_dirs: HashMap::new(), - } - } - - #[must_use] - /// Build a topology with explicit node counts. - pub fn with_node_numbers(nodes: usize) -> Self { - let participants = nodes; - - Self { - n_nodes: nodes, - consensus_params: ConsensusParams::default_for_participants(participants), - network_params: NetworkParams::default(), - wallet_config: WalletConfig::default(), - node_config_patches: HashMap::new(), - persist_dirs: HashMap::new(), - } - } - - #[must_use] - pub const fn wallet(&self) -> &WalletConfig { - &self.wallet_config - } - - #[must_use] - pub fn node_config_patch(&self, index: usize) -> Option<&NodeConfigPatch> { - self.node_config_patches.get(&index) - } - - #[must_use] - pub fn with_node_config_patch(mut self, index: usize, patch: NodeConfigPatch) -> Self { - self.node_config_patches.insert(index, patch); - self - } - - #[must_use] - pub fn with_persist_dir(mut self, index: usize, dir: PathBuf) -> Self { - self.persist_dirs.insert(index, dir); - self - } -} - -/// Builder that produces `GeneratedTopology` instances from a `TopologyConfig`. -#[derive(Clone)] -pub struct TopologyBuilder { - config: TopologyConfig, - ids: Option>, - blend_ports: Option>, - scenario_base_dir: Option, -} - -impl TopologyBuilder { - #[must_use] - /// Create a builder from a base topology config. - pub const fn new(config: TopologyConfig) -> Self { - Self { - config, - ids: None, - blend_ports: None, - scenario_base_dir: None, - } - } - - #[must_use] - /// Provide deterministic node IDs. - pub fn with_ids(mut self, ids: Vec<[u8; 32]>) -> Self { - self.ids = Some(ids); - self - } - - #[must_use] - /// Override blend ports for nodes in order. - pub fn with_blend_ports(mut self, ports: Vec) -> Self { - self.blend_ports = Some(ports); - self - } - - #[must_use] - /// Apply a config patch for a specific node index. - pub fn with_node_config_patch(mut self, index: usize, patch: NodeConfigPatch) -> Self { - self.config.node_config_patches.insert(index, patch); - self - } - - #[must_use] - /// Apply a persist dir for a specific node index. - pub fn with_persist_dir(mut self, index: usize, dir: PathBuf) -> Self { - self.config.persist_dirs.insert(index, dir); - self - } - - #[must_use] - /// Set node counts. - pub const fn with_node_count(mut self, nodes: usize) -> Self { - self.config.n_nodes = nodes; - self - } - - pub fn with_scenario_base_dir(mut self, scenario_base_dir: Option) -> Self { - self.scenario_base_dir = scenario_base_dir; - self - } - - #[must_use] - /// Configure the libp2p network layout. - pub const fn with_network_layout(mut self, layout: Libp2pNetworkLayout) -> Self { - self.config.network_params.libp2p_network_layout = layout; - self - } - - /// Override wallet configuration used in genesis. - pub fn with_wallet_config(mut self, wallet: WalletConfig) -> Self { - self.config.wallet_config = wallet; - self - } - - /// Finalize and generate topology and node descriptors. - pub fn build(self) -> Result { - let Self { - mut config, - ids, - blend_ports, - scenario_base_dir, - } = self; - - let n_participants = participant_count(&config)?; - if let Some(base_dir) = scenario_base_dir { - for i in 0..n_participants { - let dir = base_dir.join(format!("node_{i}")); - config = config.with_persist_dir(i, dir); - } - } - - let (ids, blend_ports) = resolve_and_validate_vectors(ids, blend_ports, n_participants)?; - - let BaseConfigs { - mut consensus_configs, - bootstrap_configs: bootstrapping_config, - network_configs, - blend_configs, - } = build_base_configs( - &ids, - &config.consensus_params, - &config.network_params, - &config.wallet_config, - &blend_ports, - )?; - - let api_configs = create_api_configs(&ids)?; - let tracing_configs = create_tracing_configs(&ids); - let time_config = default_time_config(); - - let first_consensus = consensus_configs - .first() - .ok_or(TopologyBuildError::MissingConsensusConfig)?; - let providers = collect_provider_infos(first_consensus, &blend_configs)?; - - let genesis_tx = create_consensus_genesis_tx(first_consensus, providers)?; - apply_consensus_genesis_tx(&mut consensus_configs, &genesis_tx)?; - - let kms_configs = create_kms_configs( - &blend_configs, - &consensus_configs, - &config.wallet_config.accounts, - ); - - let nodes = build_node_descriptors( - &config, - n_participants, - &ids, - &blend_ports, - &consensus_configs, - &bootstrapping_config, - &network_configs, - &blend_configs, - &api_configs, - &tracing_configs, - &kms_configs, - &time_config, - &config.node_config_patches, - )?; - - Ok(GeneratedTopology { config, nodes }) - } - - #[must_use] - pub const fn config(&self) -> &TopologyConfig { - &self.config - } -} - -fn participant_count(config: &TopologyConfig) -> Result { - let n_participants = config.n_nodes; - if n_participants == 0 { - return Err(TopologyBuildError::EmptyParticipants); - } - - Ok(n_participants) -} - -fn resolve_and_validate_vectors( - ids: Option>, - blend_ports: Option>, - n_participants: usize, -) -> Result<(Vec<[u8; 32]>, Vec), TopologyBuildError> { - let ids = resolve_ids(ids, n_participants)?; - let blend_ports = resolve_ports(blend_ports, n_participants, "Blend")?; - - Ok((ids, blend_ports)) -} - -fn collect_provider_infos( - first_consensus: &testing_framework_config::topology::configs::consensus::GeneralConsensusConfig, - blend_configs: &[testing_framework_config::topology::configs::blend::GeneralBlendConfig], -) -> Result, TopologyBuildError> { - let mut providers = Vec::with_capacity(blend_configs.len()); - - for (i, blend_conf) in blend_configs.iter().enumerate() { - let note = get_cloned( - "blend_notes", - &first_consensus.blend_notes, - i, - blend_configs.len(), - )?; - providers.push(ProviderInfo { - service_type: ServiceType::BlendNetwork, - provider_sk: blend_conf.signer.clone(), - zk_sk: blend_conf.secret_zk_key.clone(), - locator: Locator(blend_conf.backend_core.listening_address.clone()), - note, - }); - } - - Ok(providers) -} - -fn create_consensus_genesis_tx( - first_consensus: &testing_framework_config::topology::configs::consensus::GeneralConsensusConfig, - providers: Vec, -) -> Result { - let ledger_tx = first_consensus.genesis_tx.mantle_tx().ledger_tx.clone(); - Ok(create_genesis_tx_with_declarations(ledger_tx, providers)?) -} - -fn apply_consensus_genesis_tx( - consensus_configs: &mut [testing_framework_config::topology::configs::consensus::GeneralConsensusConfig], - genesis_tx: &lb_core::mantle::genesis_tx::GenesisTx, -) -> Result<(), TopologyBuildError> { - for c in consensus_configs { - c.genesis_tx = genesis_tx.clone(); - sync_utxos_with_genesis(&mut c.utxos, genesis_tx)?; - } - Ok(()) -} - -#[allow(clippy::too_many_arguments)] -fn build_node_descriptors( - config: &TopologyConfig, - n_participants: usize, - ids: &[[u8; 32]], - blend_ports: &[u16], - consensus_configs: &[testing_framework_config::topology::configs::consensus::GeneralConsensusConfig], - bootstrapping_config: &[testing_framework_config::topology::configs::bootstrap::GeneralBootstrapConfig], - network_configs: &[testing_framework_config::topology::configs::network::GeneralNetworkConfig], - blend_configs: &[testing_framework_config::topology::configs::blend::GeneralBlendConfig], - api_configs: &[testing_framework_config::topology::configs::api::GeneralApiConfig], - tracing_configs: &[testing_framework_config::topology::configs::tracing::GeneralTracingConfig], - kms_configs: &[lb_key_management_system_service::backend::preload::PreloadKMSBackendSettings], - time_config: &testing_framework_config::topology::configs::time::GeneralTimeConfig, - node_config_patches: &HashMap, -) -> Result, TopologyBuildError> { - let mut nodes = Vec::with_capacity(config.n_nodes); - - for i in 0..n_participants { - let consensus_config = - get_cloned("consensus_configs", consensus_configs, i, n_participants)?; - let bootstrapping_config = - get_cloned("bootstrap_configs", bootstrapping_config, i, n_participants)?; - let network_config = get_cloned("network_configs", network_configs, i, n_participants)?; - let blend_config = get_cloned("blend_configs", blend_configs, i, n_participants)?; - let api_config = get_cloned("api_configs", api_configs, i, n_participants)?; - let tracing_config = get_cloned("tracing_configs", tracing_configs, i, n_participants)?; - let kms_config = get_cloned("kms_configs", kms_configs, i, n_participants)?; - - let id = get_copied("ids", ids, i, n_participants)?; - let blend_port = get_copied("blend_ports", blend_ports, i, n_participants)?; - - let general = GeneralConfig { - consensus_config, - bootstrapping_config, - network_config, - blend_config, - api_config, - tracing_config, - time_config: time_config.clone(), - kms_config, - }; - - let descriptor = GeneratedNodeConfig { - index: i, - id, - general, - blend_port, - config_patch: node_config_patches.get(&i).cloned(), - persist_dir: config.persist_dirs.get(&i).cloned(), - }; - - nodes.push(descriptor); - } - - Ok(nodes) -} - -fn get_cloned( - label: &'static str, - items: &[T], - index: usize, - expected: usize, -) -> Result { - items - .get(index) - .cloned() - .ok_or(TopologyBuildError::VectorLenMismatch { - label, - expected, - actual: items.len(), - }) -} - -fn get_copied( - label: &'static str, - items: &[T], - index: usize, - expected: usize, -) -> Result { - items - .get(index) - .copied() - .ok_or(TopologyBuildError::VectorLenMismatch { - label, - expected, - actual: items.len(), - }) -} diff --git a/testing-framework/core/src/topology/deployment.rs b/testing-framework/core/src/topology/deployment.rs deleted file mode 100644 index 360e698..0000000 --- a/testing-framework/core/src/topology/deployment.rs +++ /dev/null @@ -1,89 +0,0 @@ -use std::collections::HashSet; - -use crate::{ - nodes::node::Node, - topology::{ - generation::find_expected_peer_counts, - readiness::{NetworkReadiness, ReadinessCheck, ReadinessError}, - utils::multiaddr_port, - }, -}; - -/// Runtime representation of a spawned topology with running nodes. -pub struct Topology { - pub(crate) nodes: Vec, -} - -impl Topology { - /// Construct a topology from already-spawned nodes. - #[must_use] - pub fn from_nodes(nodes: Vec) -> Self { - Self { nodes } - } - - #[must_use] - pub fn nodes(&self) -> &[Node] { - &self.nodes - } - - #[must_use] - pub fn into_nodes(self) -> Vec { - self.nodes - } - - pub async fn wait_network_ready(&self) -> Result<(), ReadinessError> { - let listen_ports = self.node_listen_ports(); - if listen_ports.len() <= 1 { - return Ok(()); - } - - let initial_peer_ports = self.node_initial_peer_ports(); - let expected_peer_counts = find_expected_peer_counts(&listen_ports, &initial_peer_ports); - let labels = self.node_labels(); - - let check = NetworkReadiness { - topology: self, - expected_peer_counts: &expected_peer_counts, - labels: &labels, - }; - - check.wait().await?; - Ok(()) - } - - fn node_listen_ports(&self) -> Vec { - self.nodes - .iter() - .map(|node| node.config().user.network.backend.swarm.port) - .collect() - } - - fn node_initial_peer_ports(&self) -> Vec> { - self.nodes - .iter() - .map(|node| { - node.config() - .user - .network - .backend - .initial_peers - .iter() - .filter_map(multiaddr_port) - .collect::>() - }) - .collect() - } - - fn node_labels(&self) -> Vec { - self.nodes - .iter() - .enumerate() - .map(|(idx, node)| { - format!( - "node#{idx}@{}", - node.config().user.network.backend.swarm.port - ) - }) - .collect() - } -} diff --git a/testing-framework/core/src/topology/generated.rs b/testing-framework/core/src/topology/generated.rs new file mode 100644 index 0000000..c204856 --- /dev/null +++ b/testing-framework/core/src/topology/generated.rs @@ -0,0 +1,94 @@ +use std::sync::Arc; + +use super::DeploymentDescriptor; + +#[derive(Clone)] +pub struct NodePlan { + pub index: usize, + pub id: [u8; 32], + pub general: NodeConfig, +} + +impl NodePlan { + #[must_use] + pub const fn index(&self) -> usize { + self.index + } +} + +#[derive(Clone)] +pub struct DeploymentPlan { + pub config: TopologyShape, + pub plans: Vec>, +} + +impl DeploymentPlan { + #[must_use] + pub fn new(config: TopologyShape, plans: Vec>) -> Self { + Self { config, plans } + } + + #[must_use] + pub const fn config(&self) -> &TopologyShape { + &self.config + } + + #[must_use] + pub fn nodes(&self) -> &[NodePlan] { + &self.plans + } + + pub fn iter(&self) -> impl Iterator> { + self.plans.iter() + } +} + +impl DeploymentDescriptor for DeploymentPlan +where + TopologyShape: Send + Sync + 'static, + NodeConfig: Send + Sync + 'static, +{ + fn node_count(&self) -> usize { + self.plans.len() + } +} + +pub struct RuntimeTopology { + nodes: Vec, +} + +impl RuntimeTopology { + #[must_use] + pub fn from_nodes(nodes: Vec) -> Self { + Self { nodes } + } + + #[must_use] + pub fn nodes(&self) -> &[Node] { + &self.nodes + } + + #[must_use] + pub fn into_nodes(self) -> Vec { + self.nodes + } +} + +impl Clone for RuntimeTopology +where + Node: Clone, +{ + fn clone(&self) -> Self { + Self { + nodes: self.nodes.clone(), + } + } +} + +impl From> for RuntimeTopology { + fn from(nodes: Vec) -> Self { + Self::from_nodes(nodes) + } +} + +pub type SharedTopology = Arc; diff --git a/testing-framework/core/src/topology/generation.rs b/testing-framework/core/src/topology/generation.rs deleted file mode 100644 index 4aa779a..0000000 --- a/testing-framework/core/src/topology/generation.rs +++ /dev/null @@ -1,202 +0,0 @@ -use std::{collections::HashSet, path::PathBuf, time::Duration}; - -use reqwest::{Client, Url}; - -use crate::topology::{ - config::{NodeConfigPatch, TopologyConfig}, - configs::{GeneralConfig, wallet::WalletAccount}, - readiness::{HttpNetworkReadiness, ReadinessCheck, ReadinessError}, -}; - -/// Fully generated configuration for an individual node. -#[derive(Clone)] -pub struct GeneratedNodeConfig { - pub index: usize, - pub id: [u8; 32], - pub general: GeneralConfig, - pub blend_port: u16, - pub config_patch: Option, - pub persist_dir: Option, -} - -impl GeneratedNodeConfig { - #[must_use] - /// Zero-based index within the topology. - pub const fn index(&self) -> usize { - self.index - } - - #[must_use] - pub const fn network_port(&self) -> u16 { - self.general.network_config.backend.swarm.port - } - - #[must_use] - pub const fn api_port(&self) -> u16 { - self.general.api_config.address.port() - } - - #[must_use] - pub const fn testing_http_port(&self) -> u16 { - self.general.api_config.testing_http_address.port() - } -} - -/// Collection of generated node configs and helpers to spawn or probe the -/// stack. -#[derive(Clone)] -pub struct GeneratedTopology { - pub(crate) config: TopologyConfig, - pub(crate) nodes: Vec, -} - -impl GeneratedTopology { - #[must_use] - /// Underlying configuration used to derive the generated nodes. - pub const fn config(&self) -> &TopologyConfig { - &self.config - } - - #[must_use] - /// All node configs. - pub fn nodes(&self) -> &[GeneratedNodeConfig] { - &self.nodes - } - - /// Iterator over all node configs in topology order. - pub fn iter(&self) -> impl Iterator { - self.nodes.iter() - } - - #[must_use] - /// Slot duration from the first node (assumes homogeneous configs). - pub fn slot_duration(&self) -> Option { - self.nodes - .first() - .map(|node| node.general.time_config.slot_duration) - } - - #[must_use] - /// Wallet accounts configured for this topology. - pub fn wallet_accounts(&self) -> &[WalletAccount] { - &self.config.wallet_config.accounts - } - - pub async fn wait_remote_readiness( - &self, - // Node endpoints - node_endpoints: &[Url], - ) -> Result<(), ReadinessError> { - let total_nodes = self.nodes.len(); - if total_nodes == 0 { - return Ok(()); - } - - let labels = self.labels(); - let client = Client::new(); - - let endpoints = collect_node_endpoints(self, node_endpoints, total_nodes); - - wait_for_network_readiness(self, &client, &endpoints, &labels).await - } - - fn listen_ports(&self) -> Vec { - self.nodes - .iter() - .map(|node| node.general.network_config.backend.swarm.port) - .collect() - } - - fn initial_peer_ports(&self) -> Vec> { - self.nodes - .iter() - .map(|node| { - node.general - .network_config - .backend - .initial_peers - .iter() - .filter_map(crate::topology::utils::multiaddr_port) - .collect::>() - }) - .collect() - } - - fn labels(&self) -> Vec { - self.nodes - .iter() - .enumerate() - .map(|(idx, node)| { - format!( - "node#{idx}@{}", - node.general.network_config.backend.swarm.port - ) - }) - .collect() - } -} - -fn collect_node_endpoints( - topology: &GeneratedTopology, - node_endpoints: &[Url], - total_nodes: usize, -) -> Vec { - assert_eq!( - topology.nodes.len(), - node_endpoints.len(), - "node endpoints must match topology" - ); - - let mut endpoints = Vec::with_capacity(total_nodes); - endpoints.extend_from_slice(node_endpoints); - endpoints -} - -async fn wait_for_network_readiness( - topology: &GeneratedTopology, - client: &Client, - endpoints: &[Url], - labels: &[String], -) -> Result<(), ReadinessError> { - if endpoints.len() <= 1 { - return Ok(()); - } - - let listen_ports = topology.listen_ports(); - let initial_peer_ports = topology.initial_peer_ports(); - let expected_peer_counts = - crate::topology::generation::find_expected_peer_counts(&listen_ports, &initial_peer_ports); - - let network_check = HttpNetworkReadiness { - client, - endpoints, - expected_peer_counts: &expected_peer_counts, - labels, - }; - - network_check.wait().await -} - -pub fn find_expected_peer_counts( - listen_ports: &[u16], - initial_peer_ports: &[HashSet], -) -> Vec { - let mut expected: Vec> = vec![HashSet::new(); initial_peer_ports.len()]; - - for (idx, ports) in initial_peer_ports.iter().enumerate() { - for port in ports { - let Some(peer_idx) = listen_ports.iter().position(|p| p == port) else { - continue; - }; - - if peer_idx == idx { - continue; - } - - expected[idx].insert(peer_idx); - expected[peer_idx].insert(idx); - } - } - - expected.into_iter().map(|set| set.len()).collect() -} diff --git a/testing-framework/core/src/topology/mod.rs b/testing-framework/core/src/topology/mod.rs index 34afbe8..d6a9491 100644 --- a/testing-framework/core/src/topology/mod.rs +++ b/testing-framework/core/src/topology/mod.rs @@ -1,9 +1,34 @@ -pub mod configs { - pub use testing_framework_config::topology::configs::*; +use std::error::Error; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct DeploymentSeed([u8; 32]); + +impl DeploymentSeed { + #[must_use] + pub const fn new(bytes: [u8; 32]) -> Self { + Self(bytes) + } + + #[must_use] + pub const fn bytes(&self) -> &[u8; 32] { + &self.0 + } } -pub mod config; -pub mod deployment; -pub mod generation; -pub mod readiness; -pub mod utils; +pub type DynTopologyError = Box; + +pub mod generated; +pub mod shape; +pub use generated::{DeploymentPlan, RuntimeTopology, SharedTopology}; +pub use shape::TopologyShapeBuilder; + +pub trait DeploymentDescriptor: Send + Sync { + fn node_count(&self) -> usize; +} + +pub trait DeploymentProvider: Send + Sync +where + D: DeploymentDescriptor, +{ + fn build(&self, seed: Option<&DeploymentSeed>) -> Result; +} diff --git a/testing-framework/core/src/topology/readiness/mod.rs b/testing-framework/core/src/topology/readiness/mod.rs deleted file mode 100644 index 7d2e8e1..0000000 --- a/testing-framework/core/src/topology/readiness/mod.rs +++ /dev/null @@ -1,59 +0,0 @@ -pub mod network; - -use std::time::Duration; - -pub use network::{HttpNetworkReadiness, NetworkReadiness}; -use thiserror::Error; -use tokio::time::{sleep, timeout}; - -use crate::adjust_timeout; - -const DEFAULT_POLL_INTERVAL: Duration = Duration::from_millis(200); -const DEFAULT_TIMEOUT: Duration = Duration::from_secs(60); - -#[derive(Debug, Error)] -pub enum ReadinessError { - #[error("{message}")] - Timeout { message: String }, -} - -#[async_trait::async_trait] -pub trait ReadinessCheck<'a> { - type Data: Send; - - async fn collect(&'a self) -> Self::Data; - - fn is_ready(&self, data: &Self::Data) -> bool; - - fn timeout_message(&self, data: Self::Data) -> String; - - fn poll_interval(&self) -> Duration { - DEFAULT_POLL_INTERVAL - } - - async fn wait(&'a self) -> Result<(), ReadinessError> { - let timeout_duration = adjust_timeout(DEFAULT_TIMEOUT); - let poll_interval = self.poll_interval(); - let mut data = self.collect().await; - - let wait_result = timeout(timeout_duration, async { - loop { - if self.is_ready(&data) { - return; - } - - sleep(poll_interval).await; - - data = self.collect().await; - } - }) - .await; - - if wait_result.is_err() { - let message = self.timeout_message(data); - return Err(ReadinessError::Timeout { message }); - } - - Ok(()) - } -} diff --git a/testing-framework/core/src/topology/readiness/network.rs b/testing-framework/core/src/topology/readiness/network.rs deleted file mode 100644 index 24751d8..0000000 --- a/testing-framework/core/src/topology/readiness/network.rs +++ /dev/null @@ -1,208 +0,0 @@ -use lb_libp2p::PeerId; -use lb_network_service::backends::libp2p::Libp2pInfo; -use reqwest::{Client, Url}; -use thiserror::Error; -use tracing::warn; - -use super::ReadinessCheck; -use crate::topology::deployment::Topology; - -#[derive(Debug, Error)] -pub enum NetworkInfoError { - #[error("failed to join url {base} with path {path}: {message}")] - JoinUrl { - base: Url, - path: &'static str, - message: String, - }, - #[error(transparent)] - Request(#[from] reqwest::Error), -} - -#[derive(Debug)] -pub struct NodeNetworkStatus { - label: String, - expected_peers: Option, - result: Result, -} - -pub struct NetworkReadiness<'a> { - pub(crate) topology: &'a Topology, - pub(crate) expected_peer_counts: &'a [usize], - pub(crate) labels: &'a [String], -} - -#[async_trait::async_trait] -impl<'a> ReadinessCheck<'a> for NetworkReadiness<'a> { - type Data = Vec; - - async fn collect(&'a self) -> Self::Data { - collect_node_statuses(self).await - } - - fn is_ready(&self, data: &Self::Data) -> bool { - data.iter().all( - |status| match (status.expected_peers, status.result.as_ref()) { - (Some(expected), Ok(info)) => info.n_peers >= expected, - _ => false, - }, - ) - } - - fn timeout_message(&self, data: Self::Data) -> String { - let summary = build_timeout_summary(&data); - format!("timed out waiting for network readiness: {summary}") - } -} - -pub struct HttpNetworkReadiness<'a> { - pub(crate) client: &'a Client, - pub(crate) endpoints: &'a [Url], - pub(crate) expected_peer_counts: &'a [usize], - pub(crate) labels: &'a [String], -} - -#[async_trait::async_trait] -impl<'a> ReadinessCheck<'a> for HttpNetworkReadiness<'a> { - type Data = Vec; - - async fn collect(&'a self) -> Self::Data { - let futures = self.endpoints.iter().enumerate().map(|(idx, endpoint)| { - let label = self - .labels - .get(idx) - .cloned() - .unwrap_or_else(|| format!("endpoint#{idx}")); - let expected_peers = self.expected_peer_counts.get(idx).copied(); - async move { - let result = try_fetch_network_info(self.client, endpoint).await; - NodeNetworkStatus { - label, - expected_peers, - result, - } - } - }); - futures::future::join_all(futures).await - } - - fn is_ready(&self, data: &Self::Data) -> bool { - data.iter().all( - |status| match (status.expected_peers, status.result.as_ref()) { - (Some(expected), Ok(info)) => info.n_peers >= expected, - _ => false, - }, - ) - } - - fn timeout_message(&self, data: Self::Data) -> String { - let summary = build_timeout_summary(&data); - format!("timed out waiting for network readiness: {summary}") - } -} - -async fn collect_node_statuses(readiness: &NetworkReadiness<'_>) -> Vec { - let node_futures = readiness - .topology - .nodes - .iter() - .enumerate() - .map(|(idx, node)| { - let label = readiness - .labels - .get(idx) - .cloned() - .unwrap_or_else(|| format!("node#{idx}")); - let expected_peers = readiness.expected_peer_counts.get(idx).copied(); - async move { - let result = node - .api() - .network_info() - .await - .map_err(NetworkInfoError::from); - NodeNetworkStatus { - label, - expected_peers, - result, - } - } - }); - - futures::future::join_all(node_futures).await -} - -pub async fn try_fetch_network_info( - client: &Client, - base: &Url, -) -> Result { - let path = lb_http_api_common::paths::NETWORK_INFO.trim_start_matches('/'); - let url = base - .join(path) - .map_err(|source| NetworkInfoError::JoinUrl { - base: base.clone(), - path: lb_http_api_common::paths::NETWORK_INFO, - message: source.to_string(), - })?; - - let response = client - .get(url) - .send() - .await - .map_err(NetworkInfoError::Request)? - .error_for_status() - .map_err(NetworkInfoError::Request)?; - - response - .json::() - .await - .map_err(NetworkInfoError::Request) -} - -#[deprecated(note = "use try_fetch_network_info to avoid panics and preserve error details")] -pub async fn fetch_network_info(client: &Client, base: &Url) -> Libp2pInfo { - match try_fetch_network_info(client, base).await { - Ok(info) => info, - Err(err) => log_network_warning(base, &err), - } -} - -fn log_network_warning(base: &Url, err: &NetworkInfoError) -> Libp2pInfo { - warn!( - target: "readiness", - url = %base, - error = %err, - "network readiness: failed to fetch network info" - ); - empty_libp2p_info() -} - -fn empty_libp2p_info() -> Libp2pInfo { - Libp2pInfo { - listen_addresses: Vec::with_capacity(0), - peer_id: PeerId::random(), - n_peers: 0, - n_connections: 0, - n_pending_connections: 0, - } -} - -fn build_timeout_summary(statuses: &[NodeNetworkStatus]) -> String { - statuses - .iter() - .map( - |status| match (status.expected_peers, status.result.as_ref()) { - (None, _) => format!("{}: missing expected peer count", status.label), - (Some(expected), Ok(info)) => { - format!( - "{}: peers={}, expected={}", - status.label, info.n_peers, expected - ) - } - (Some(expected), Err(err)) => { - format!("{}: error={err}, expected_peers={expected}", status.label) - } - }, - ) - .collect::>() - .join(", ") -} diff --git a/testing-framework/core/src/topology/shape.rs b/testing-framework/core/src/topology/shape.rs new file mode 100644 index 0000000..7deb23d --- /dev/null +++ b/testing-framework/core/src/topology/shape.rs @@ -0,0 +1,37 @@ +#[derive(Clone, Debug, Default)] +pub struct TopologyShapeBuilder { + node_count: Option, + star_network: bool, +} + +impl TopologyShapeBuilder { + #[must_use] + pub const fn new() -> Self { + Self { + node_count: None, + star_network: false, + } + } + + #[must_use] + pub const fn with_nodes(mut self, count: usize) -> Self { + self.node_count = Some(count); + self + } + + #[must_use] + pub const fn with_star_network(mut self) -> Self { + self.star_network = true; + self + } + + #[must_use] + pub fn node_count_or(&self, fallback: usize) -> usize { + self.node_count.unwrap_or(fallback) + } + + #[must_use] + pub fn star_network_enabled(&self) -> bool { + self.star_network + } +} diff --git a/testing-framework/core/src/topology/utils.rs b/testing-framework/core/src/topology/utils.rs deleted file mode 100644 index b2d65da..0000000 --- a/testing-framework/core/src/topology/utils.rs +++ /dev/null @@ -1,133 +0,0 @@ -use std::{collections::HashMap, iter}; - -use lb_groth16::fr_to_bytes; -use lb_key_management_system_service::{backend::preload::PreloadKMSBackendSettings, keys::Key}; -use lb_utils::net::get_available_udp_port; -use rand::{Rng, thread_rng}; -use thiserror::Error; - -use crate::topology::configs::{ - blend::GeneralBlendConfig, consensus::GeneralConsensusConfig, wallet::WalletAccount, -}; - -#[must_use] -/// Build preload KMS configs for blend/DA and wallet keys for every node. -pub fn create_kms_configs( - blend_configs: &[GeneralBlendConfig], - consensus_configs: &[GeneralConsensusConfig], - wallet_accounts: &[WalletAccount], -) -> Vec { - blend_configs - .iter() - .zip(consensus_configs.iter()) - .map(|(blend_conf, consensus_conf)| { - let mut keys = HashMap::from([ - ( - hex::encode(blend_conf.signer.public_key().to_bytes()), - Key::Ed25519(blend_conf.signer.clone()), - ), - ( - hex::encode(fr_to_bytes( - blend_conf.secret_zk_key.to_public_key().as_fr(), - )), - Key::Zk(blend_conf.secret_zk_key.clone()), - ), - ( - hex::encode(fr_to_bytes( - consensus_conf.leader_sk.to_public_key().as_fr(), - )), - Key::Zk(consensus_conf.leader_sk.clone().into()), - ), - ( - hex::encode(fr_to_bytes( - consensus_conf.funding_sk.to_public_key().as_fr(), - )), - Key::Zk(consensus_conf.funding_sk.clone()), - ), - ]); - - for account in wallet_accounts { - let key_id = hex::encode(fr_to_bytes(account.public_key().as_fr())); - keys.entry(key_id) - .or_insert_with(|| Key::Zk(account.secret_key.clone())); - } - - PreloadKMSBackendSettings { keys } - }) - .collect() -} - -#[derive(Debug, Error)] -pub enum TopologyResolveError { - #[error("expected {expected} ids but got {actual}")] - IdCountMismatch { expected: usize, actual: usize }, - #[error("expected {expected} {label} ports but got {actual}")] - PortCountMismatch { - label: &'static str, - expected: usize, - actual: usize, - }, - #[error("failed to allocate a free UDP port for {label}")] - PortAllocationFailed { label: &'static str }, -} - -pub fn resolve_ids( - ids: Option>, - count: usize, -) -> Result, TopologyResolveError> { - match ids { - Some(ids) => { - if ids.len() != count { - return Err(TopologyResolveError::IdCountMismatch { - expected: count, - actual: ids.len(), - }); - } - Ok(ids) - } - None => { - let mut generated = vec![[0; 32]; count]; - for id in &mut generated { - thread_rng().fill(id); - } - Ok(generated) - } - } -} - -pub fn resolve_ports( - ports: Option>, - count: usize, - label: &'static str, -) -> Result, TopologyResolveError> { - let resolved = match ports { - Some(ports) => ports, - None => iter::repeat_with(|| { - get_available_udp_port().ok_or(TopologyResolveError::PortAllocationFailed { label }) - }) - .take(count) - .collect::, _>>()?, - }; - - if resolved.len() != count { - return Err(TopologyResolveError::PortCountMismatch { - label, - expected: count, - actual: resolved.len(), - }); - } - - Ok(resolved) -} - -pub fn multiaddr_port(addr: &lb_libp2p::Multiaddr) -> Option { - for protocol in addr { - match protocol { - lb_libp2p::Protocol::Udp(port) | lb_libp2p::Protocol::Tcp(port) => { - return Some(port); - } - _ => {} - } - } - None -} diff --git a/testing-framework/core/src/workloads/chaos.rs b/testing-framework/core/src/workloads/chaos.rs new file mode 100644 index 0000000..38fba98 --- /dev/null +++ b/testing-framework/core/src/workloads/chaos.rs @@ -0,0 +1,273 @@ +use std::{collections::HashMap, mem::swap, time::Duration}; + +use async_trait::async_trait; +use rand::{Rng as _, seq::SliceRandom as _, thread_rng}; +use tokio::time::{Instant, sleep}; + +use crate::{ + scenario::{Application, CoreBuilder, DynError, NodeControlCapability, RunContext, Workload}, + topology::DeploymentDescriptor, +}; + +const MIN_DELAY_SPREAD_FALLBACK: Duration = Duration::from_millis(1); +const DEFAULT_CHAOS_MIN_DELAY: Duration = Duration::from_secs(10); +const DEFAULT_CHAOS_MAX_DELAY: Duration = Duration::from_secs(30); +const DEFAULT_CHAOS_TARGET_COOLDOWN: Duration = Duration::from_secs(60); +const NO_ELIGIBLE_TARGETS: &str = "chaos restart workload has no eligible targets"; + +/// Chaos helpers for scenarios that can control nodes. +pub trait ChaosBuilderExt: Sized { + fn chaos(self) -> ChaosBuilder; + + fn chaos_with( + self, + f: impl FnOnce(ChaosBuilder) -> CoreBuilder, + ) -> CoreBuilder; +} + +impl ChaosBuilderExt for CoreBuilder { + fn chaos(self) -> ChaosBuilder { + ChaosBuilder { builder: self } + } + + fn chaos_with( + self, + f: impl FnOnce(ChaosBuilder) -> CoreBuilder, + ) -> CoreBuilder { + f(self.chaos()) + } +} + +pub struct ChaosBuilder { + builder: CoreBuilder, +} + +impl ChaosBuilder { + #[must_use] + pub fn apply(self) -> CoreBuilder { + self.builder + } + + #[must_use] + pub fn restart(self) -> ChaosRestartBuilder { + ChaosRestartBuilder { + builder: self.builder, + min_delay: DEFAULT_CHAOS_MIN_DELAY, + max_delay: DEFAULT_CHAOS_MAX_DELAY, + target_cooldown: DEFAULT_CHAOS_TARGET_COOLDOWN, + } + } +} + +pub struct ChaosRestartBuilder { + builder: CoreBuilder, + min_delay: Duration, + max_delay: Duration, + target_cooldown: Duration, +} + +impl ChaosRestartBuilder { + #[must_use] + pub fn min_delay(mut self, delay: Duration) -> Self { + if !delay.is_zero() { + self.min_delay = delay; + } + self + } + + #[must_use] + pub fn max_delay(mut self, delay: Duration) -> Self { + if !delay.is_zero() { + self.max_delay = delay; + } + self + } + + #[must_use] + pub fn target_cooldown(mut self, cooldown: Duration) -> Self { + if !cooldown.is_zero() { + self.target_cooldown = cooldown; + } + self + } + + #[must_use] + pub fn apply(mut self) -> CoreBuilder { + if self.min_delay > self.max_delay { + swap(&mut self.min_delay, &mut self.max_delay); + } + + if self.target_cooldown < self.min_delay { + self.target_cooldown = self.min_delay; + } + + self.builder.with_workload(RandomRestartWorkload::new( + self.min_delay, + self.max_delay, + self.target_cooldown, + )) + } +} + +#[derive(Debug)] +pub struct RandomRestartWorkload { + min_delay: Duration, + max_delay: Duration, + target_cooldown: Duration, +} + +impl RandomRestartWorkload { + #[must_use] + pub const fn new(min_delay: Duration, max_delay: Duration, target_cooldown: Duration) -> Self { + Self { + min_delay, + max_delay, + target_cooldown, + } + } + + fn random_delay(&self) -> Duration { + if self.max_delay <= self.min_delay { + return self.min_delay; + } + + let spread = self.max_delay.saturating_sub(self.min_delay); + let spread = if spread.is_zero() { + MIN_DELAY_SPREAD_FALLBACK + } else { + spread + }; + + let spread_secs = spread.as_secs_f64(); + let offset = thread_rng().gen_range(0.0..=spread_secs); + + self.min_delay + .checked_add(Duration::from_secs_f64(offset)) + .unwrap_or(self.max_delay) + } + + fn initialize_cooldowns(&self, targets: &[Target]) -> HashMap { + let now = Instant::now(); + let ready = now.checked_sub(self.target_cooldown).unwrap_or(now); + + targets + .iter() + .cloned() + .map(|target| (target, ready)) + .collect() + } + + fn targets(&self, ctx: &RunContext) -> Vec { + let node_count = ctx.descriptors().node_count(); + if node_count <= 1 { + return Vec::new(); + } + + (0..node_count).map(node_target).collect() + } + + async fn pick_target( + &self, + targets: &[Target], + cooldowns: &HashMap, + ) -> Result { + ensure_targets_exist(targets)?; + + loop { + let now = Instant::now(); + if let Some(wait) = next_target_wait(now, cooldowns) { + sleep(wait).await; + continue; + } + + return select_target(targets, cooldowns, now); + } + } +} + +fn ensure_targets_exist(targets: &[Target]) -> Result<(), DynError> { + if targets.is_empty() { + return Err(NO_ELIGIBLE_TARGETS.into()); + } + + Ok(()) +} + +fn next_target_wait(now: Instant, cooldowns: &HashMap) -> Option { + let next_ready = cooldowns + .values() + .copied() + .filter(|ready| *ready > now) + .min()?; + let wait = next_ready.saturating_duration_since(now); + if wait.is_zero() { None } else { Some(wait) } +} + +fn pick_available_target( + targets: &[Target], + cooldowns: &HashMap, + now: Instant, +) -> Option { + let available: Vec = targets + .iter() + .cloned() + .filter(|target| cooldowns.get(target).is_none_or(|ready| *ready <= now)) + .collect(); + available.choose(&mut thread_rng()).cloned() +} + +fn select_target( + targets: &[Target], + cooldowns: &HashMap, + now: Instant, +) -> Result { + if let Some(target) = pick_available_target(targets, cooldowns, now) { + return Ok(target); + } + + targets + .choose(&mut thread_rng()) + .cloned() + .ok_or_else(|| NO_ELIGIBLE_TARGETS.into()) +} + +fn node_target(index: usize) -> Target { + Target::Node(format!("node-{index}")) +} + +#[async_trait] +impl Workload for RandomRestartWorkload { + fn name(&self) -> &'static str { + "chaos_restart" + } + + async fn start(&self, ctx: &RunContext) -> Result<(), DynError> { + let Some(handle) = ctx.node_control() else { + return Err("chaos restart workload requires node control".into()); + }; + + let targets = self.targets(ctx); + ensure_targets_exist(&targets)?; + + let mut cooldowns = self.initialize_cooldowns(&targets); + + loop { + sleep(self.random_delay()).await; + let target = self.pick_target(&targets, &cooldowns).await?; + + match target { + Target::Node(ref name) => handle + .restart_node(name) + .await + .map_err(|err| format!("node restart failed: {err}"))?, + } + + cooldowns.insert(target, Instant::now() + self.target_cooldown); + } + } +} + +#[derive(Clone, PartialEq, Eq, Hash, Debug)] +enum Target { + Node(String), +} diff --git a/testing-framework/core/src/workloads/mod.rs b/testing-framework/core/src/workloads/mod.rs new file mode 100644 index 0000000..c1813d2 --- /dev/null +++ b/testing-framework/core/src/workloads/mod.rs @@ -0,0 +1,3 @@ +mod chaos; + +pub use chaos::{ChaosBuilderExt, RandomRestartWorkload}; diff --git a/testing-framework/cucumber/src/steps/run.rs b/testing-framework/cucumber/src/steps/run.rs deleted file mode 100644 index e69de29..0000000 diff --git a/testing-framework/cucumber/src/steps/workloads.rs b/testing-framework/cucumber/src/steps/workloads.rs deleted file mode 100644 index e69de29..0000000 diff --git a/testing-framework/cucumber/src/world.rs b/testing-framework/cucumber/src/world.rs deleted file mode 100644 index e69de29..0000000 diff --git a/testing-framework/deployers/compose/Cargo.toml b/testing-framework/deployers/compose/Cargo.toml index 9af9ea3..4880401 100644 --- a/testing-framework/deployers/compose/Cargo.toml +++ b/testing-framework/deployers/compose/Cargo.toml @@ -13,27 +13,22 @@ version = "0.1.0" workspace = true [dependencies] -anyhow = "1" -async-trait = { workspace = true } -cfgsync_tf = { workspace = true } -lb-tracing = { workspace = true } -lb-tracing-service = { workspace = true } -reqwest = { features = ["json"], workspace = true } -serde = { features = ["derive"], workspace = true } -tempfile = { workspace = true } -tera = "1.19" -testing-framework-config = { workspace = true } -testing-framework-core = { path = "../../core" } -testing-framework-env = { workspace = true } -thiserror = { workspace = true } -tokio = { features = ["macros", "net", "process", "rt-multi-thread", "sync", "time"], workspace = true } -tracing = { workspace = true } -url = { version = "2" } -uuid = { features = ["v4"], version = "1" } +anyhow = "1" +async-trait = { workspace = true } +reqwest = { features = ["json"], workspace = true } +serde = { features = ["derive"], workspace = true } +tempfile = { workspace = true } +tera = "1.19" +testing-framework-core = { path = "../../core" } +thiserror = { workspace = true } +tokio = { features = ["macros", "net", "process", "rt-multi-thread", "sync", "time"], workspace = true } +tokio-retry = "0.3" +tracing = { workspace = true } +url = { version = "2" } +uuid = { features = ["v4"], version = "1" } [dev-dependencies] -lb-core = { workspace = true } -lb-groth16 = { workspace = true } -lb-key-management-system-service = { workspace = true } -lb-ledger = { workspace = true } -lb-zksign = { workspace = true } +groth16 = { workspace = true } +key-management-system-service = { workspace = true } +serde_json = { workspace = true } +zksign = { workspace = true } diff --git a/testing-framework/deployers/compose/assets/docker-compose.yml.tera b/testing-framework/deployers/compose/assets/docker-compose.yml.tera index 13bedfd..ba21922 100644 --- a/testing-framework/deployers/compose/assets/docker-compose.yml.tera +++ b/testing-framework/deployers/compose/assets/docker-compose.yml.tera @@ -29,31 +29,4 @@ services: - seccomp=unconfined restart: on-failure -{% if node.platform %} platform: {{ node.platform }} -{% endif %} entrypoint: {{ node.entrypoint }} - volumes: -{% for volume in node.volumes %} - - {{ volume }} -{% endfor %} -{% if node.extra_hosts | length > 0 %} - extra_hosts: -{% for host in node.extra_hosts %} - - {{ host }} -{% endfor %} -{% endif %} - ports: -{% for port in node.ports %} - - {{ port }} -{% endfor %} - environment: -{% for env in node.environment %} - {{ env.key }}: "{{ env.value }}" -{% endfor %} - cap_add: - - SYS_ADMIN - - SYS_PTRACE - security_opt: - - seccomp=unconfined - restart: on-failure - {% endfor %} diff --git a/testing-framework/deployers/compose/src/deployer/clients.rs b/testing-framework/deployers/compose/src/deployer/clients.rs index 10840dc..665acc3 100644 --- a/testing-framework/deployers/compose/src/deployer/clients.rs +++ b/testing-framework/deployers/compose/src/deployer/clients.rs @@ -1,10 +1,10 @@ -use testing_framework_core::{ - scenario::{BlockFeed, BlockFeedTask, NodeClients}, - topology::generation::GeneratedTopology, -}; -use tracing::info; +use std::{fmt::Debug, marker::PhantomData}; + +use testing_framework_core::scenario::{Application, FeedHandle, FeedRuntime, NodeClients}; +use tracing::{info, warn}; use crate::{ + env::ComposeDeployEnv, errors::ComposeRunnerError, infrastructure::{environment::StackEnvironment, ports::HostPortMapping}, lifecycle::{ @@ -12,68 +12,73 @@ use crate::{ }, }; -pub struct ClientBuilder; +pub struct ClientBuilder { + _env: PhantomData, +} -impl ClientBuilder { +impl ClientBuilder { #[must_use] pub const fn new() -> Self { - Self + Self { _env: PhantomData } } pub async fn build_node_clients( &self, - descriptors: &GeneratedTopology, + descriptors: &E::Deployment, host_ports: &HostPortMapping, host: &str, environment: &mut StackEnvironment, - ) -> Result { - let clients = match build_node_clients_with_ports(descriptors, host_ports, host) { - Ok(clients) => clients, - Err(err) => { - return Err(fail_deploy_step( - environment, - "failed to construct node api clients", - "failed to build node clients", - err, - ) - .await); - } - }; - Ok(clients) + ) -> Result, ComposeRunnerError> { + ensure_step( + environment, + build_node_clients_with_ports::(descriptors, host_ports, host), + "failed to construct node api clients", + "failed to build node clients", + ) + .await } pub async fn start_block_feed( &self, - node_clients: &NodeClients, + node_clients: &NodeClients, environment: &mut StackEnvironment, - ) -> Result<(BlockFeed, BlockFeedTask), ComposeRunnerError> { - let pair = match spawn_block_feed_with_retry(node_clients).await { - Ok(pair) => pair, - Err(err) => { - return Err(fail_deploy_step( - environment, - "failed to initialize block feed", - "block feed initialization failed", - err, - ) - .await); - } - }; + ) -> Result< + ( + <::FeedRuntime as FeedRuntime>::Feed, + FeedHandle, + ), + ComposeRunnerError, + > { + let pair = ensure_step( + environment, + spawn_block_feed_with_retry::(node_clients).await, + "failed to initialize block feed", + "block feed initialization failed", + ) + .await?; + info!("block feed connected to node"); Ok(pair) } } -async fn fail_deploy_step( +async fn ensure_step( environment: &mut StackEnvironment, - reason: &str, + result: Result, + fail_reason: &str, log_message: &str, - error: E, -) -> ComposeRunnerError +) -> Result where - E: std::fmt::Debug + Into, + E: Debug + Into, { - environment.fail(reason).await; - tracing::warn!(error = ?error, "{log_message}"); - error.into() + let value = match result { + Ok(value) => value, + Err(error) => { + environment.fail(fail_reason).await; + warn!(error = ?error, "{log_message}"); + return Err(error.into()); + } + }; + + Ok(value) } diff --git a/testing-framework/deployers/compose/src/deployer/mod.rs b/testing-framework/deployers/compose/src/deployer/mod.rs index 833ce28..e32acae 100644 --- a/testing-framework/deployers/compose/src/deployer/mod.rs +++ b/testing-framework/deployers/compose/src/deployer/mod.rs @@ -4,31 +4,35 @@ pub mod ports; pub mod readiness; pub mod setup; +use std::marker::PhantomData; + use async_trait::async_trait; use testing_framework_core::scenario::{ - BlockFeedTask, CleanupGuard, Deployer, ObservabilityCapabilityProvider, RequiresNodeControl, + CleanupGuard, Deployer, FeedHandle, ObservabilityCapabilityProvider, RequiresNodeControl, Runner, Scenario, }; -use crate::{errors::ComposeRunnerError, lifecycle::cleanup::RunnerCleanup}; +use crate::{env::ComposeDeployEnv, errors::ComposeRunnerError, lifecycle::cleanup::RunnerCleanup}; -/// Docker Compose-based deployer for Logos test scenarios. +/// Docker Compose-based deployer for test scenarios. #[derive(Clone, Copy)] -pub struct ComposeDeployer { +pub struct ComposeDeployer { readiness_checks: bool, + _env: PhantomData, } -impl Default for ComposeDeployer { +impl Default for ComposeDeployer { fn default() -> Self { Self::new() } } -impl ComposeDeployer { +impl ComposeDeployer { #[must_use] pub const fn new() -> Self { Self { readiness_checks: true, + _env: PhantomData, } } @@ -40,14 +44,19 @@ impl ComposeDeployer { } #[async_trait] -impl Deployer for ComposeDeployer +impl Deployer for ComposeDeployer where Caps: RequiresNodeControl + ObservabilityCapabilityProvider + Send + Sync, + E: ComposeDeployEnv, { type Error = ComposeRunnerError; - async fn deploy(&self, scenario: &Scenario) -> Result { - orchestrator::DeploymentOrchestrator::new(*self) + async fn deploy(&self, scenario: &Scenario) -> Result, Self::Error> { + let deployer = Self { + readiness_checks: self.readiness_checks, + _env: PhantomData, + }; + orchestrator::DeploymentOrchestrator::new(deployer) .deploy(scenario) .await } @@ -55,11 +64,11 @@ where pub(super) struct ComposeCleanupGuard { environment: RunnerCleanup, - block_feed: Option, + block_feed: Option, } impl ComposeCleanupGuard { - const fn new(environment: RunnerCleanup, block_feed: BlockFeedTask) -> Self { + const fn new(environment: RunnerCleanup, block_feed: FeedHandle) -> Self { Self { environment, block_feed: Some(block_feed), @@ -78,305 +87,10 @@ impl CleanupGuard for ComposeCleanupGuard { pub(super) fn make_cleanup_guard( environment: RunnerCleanup, - block_feed: BlockFeedTask, + block_feed: FeedHandle, ) -> Box { Box::new(ComposeCleanupGuard::new(environment, block_feed)) } #[cfg(test)] -mod tests { - use std::{collections::HashMap, net::Ipv4Addr}; - - use cfgsync_tf::{ - config::builder::create_node_configs, - host::{Host, PortOverrides}, - }; - use lb_core::{ - mantle::{GenesisTx as GenesisTxTrait, ledger::NoteId}, - sdp::{ProviderId, ServiceType}, - }; - use lb_groth16::Fr; - use lb_key_management_system_service::keys::ZkPublicKey; - use lb_ledger::LedgerState; - use lb_tracing_service::TracingSettings; - use testing_framework_core::{ - scenario::ScenarioBuilder, - topology::{ - generation::{GeneratedNodeConfig, GeneratedTopology}, - utils::multiaddr_port, - }, - }; - - #[test] - fn cfgsync_prebuilt_configs_preserve_genesis() { - let scenario = ScenarioBuilder::topology_with(|t| t.nodes(1)) - .build() - .expect("scenario build should succeed"); - let topology = scenario.topology().clone(); - let hosts = hosts_from_topology(&topology); - let tracing_settings = tracing_settings(&topology); - - let configs = create_node_configs( - &topology.config().consensus_params, - &tracing_settings, - &topology.config().wallet_config, - Some(topology.nodes().iter().map(|node| node.id).collect()), - Some( - topology - .nodes() - .iter() - .map(|node| node.blend_port) - .collect(), - ), - hosts, - ) - .expect("cfgsync config generation should succeed"); - let configs_by_identifier: HashMap<_, _> = configs - .into_iter() - .map(|(host, config)| (host.identifier, config)) - .collect(); - - for node in topology.nodes() { - let identifier = identifier_for(node.index()); - let cfgsync_config = configs_by_identifier - .get(&identifier) - .unwrap_or_else(|| panic!("missing cfgsync config for {identifier}")); - let expected_genesis = &node.general.consensus_config.genesis_tx; - let actual_genesis = &cfgsync_config.consensus_config.genesis_tx; - if std::env::var("PRINT_GENESIS").is_ok() { - println!( - "[fingerprint {identifier}] expected={:?}", - declaration_fingerprint(expected_genesis) - ); - println!( - "[fingerprint {identifier}] actual={:?}", - declaration_fingerprint(actual_genesis) - ); - } - assert_eq!( - expected_genesis.mantle_tx().ledger_tx, - actual_genesis.mantle_tx().ledger_tx, - "ledger tx mismatch for {identifier}" - ); - assert_eq!( - declaration_fingerprint(expected_genesis), - declaration_fingerprint(actual_genesis), - "declaration entries mismatch for {identifier}" - ); - } - } - - #[test] - fn cfgsync_genesis_proofs_verify_against_ledger() { - let scenario = ScenarioBuilder::topology_with(|t| t.nodes(1)) - .build() - .expect("scenario build should succeed"); - let topology = scenario.topology().clone(); - let hosts = hosts_from_topology(&topology); - let tracing_settings = tracing_settings(&topology); - - let configs = create_node_configs( - &topology.config().consensus_params, - &tracing_settings, - &topology.config().wallet_config, - Some(topology.nodes().iter().map(|node| node.id).collect()), - Some( - topology - .nodes() - .iter() - .map(|node| node.blend_port) - .collect(), - ), - hosts, - ) - .expect("cfgsync config generation should succeed"); - let configs_by_identifier: HashMap<_, _> = configs - .into_iter() - .map(|(host, config)| (host.identifier, config)) - .collect(); - - for node in topology.nodes() { - let identifier = identifier_for(node.index()); - let cfgsync_config = configs_by_identifier - .get(&identifier) - .unwrap_or_else(|| panic!("missing cfgsync config for {identifier}")); - LedgerState::from_genesis_tx::<()>( - cfgsync_config.consensus_config.genesis_tx.clone(), - &cfgsync_config.consensus_config.ledger_config, - Fr::from(0u64), - ) - .unwrap_or_else(|err| panic!("ledger rejected genesis for {identifier}: {err:?}")); - } - } - - #[test] - fn cfgsync_docker_overrides_produce_valid_genesis() { - let scenario = ScenarioBuilder::topology_with(|t| t.nodes(3)) - .build() - .expect("scenario build should succeed"); - let topology = scenario.topology().clone(); - let tracing_settings = tracing_settings(&topology); - let hosts = docker_style_hosts(&topology); - - let configs = create_node_configs( - &topology.config().consensus_params, - &tracing_settings, - &topology.config().wallet_config, - Some(topology.nodes().iter().map(|node| node.id).collect()), - Some( - topology - .nodes() - .iter() - .map(|node| node.blend_port) - .collect(), - ), - hosts, - ) - .expect("cfgsync config generation should succeed"); - - for (host, config) in configs { - let genesis = &config.consensus_config.genesis_tx; - LedgerState::from_genesis_tx::<()>( - genesis.clone(), - &config.consensus_config.ledger_config, - Fr::from(0u64), - ) - .unwrap_or_else(|err| { - panic!("ledger rejected genesis for {}: {err:?}", host.identifier) - }); - } - } - - #[test] - fn cfgsync_configs_match_topology_ports_and_genesis() { - let scenario = ScenarioBuilder::topology_with(|t| t.nodes(2)) - .build() - .expect("scenario build should succeed"); - let topology = scenario.topology().clone(); - let hosts = hosts_from_topology(&topology); - let tracing_settings = tracing_settings(&topology); - - let configs = create_node_configs( - &topology.config().consensus_params, - &tracing_settings, - &topology.config().wallet_config, - Some(topology.nodes().iter().map(|node| node.id).collect()), - Some( - topology - .nodes() - .iter() - .map(|node| node.blend_port) - .collect(), - ), - hosts, - ) - .expect("cfgsync config generation should succeed"); - let configs_by_identifier: HashMap<_, _> = configs - .into_iter() - .map(|(host, config)| (host.identifier, config)) - .collect(); - - for node in topology.nodes() { - let identifier = identifier_for(node.index()); - let cfg = configs_by_identifier - .get(&identifier) - .unwrap_or_else(|| panic!("missing cfgsync config for {identifier}")); - - assert_eq!( - declaration_fingerprint(&node.general.consensus_config.genesis_tx), - declaration_fingerprint(&cfg.consensus_config.genesis_tx), - "genesis declaration mismatch for {identifier}" - ); - - let expected_net_port = node.network_port(); - assert_eq!( - cfg.network_config.backend.swarm.port, expected_net_port, - "network port mismatch for {identifier}" - ); - - assert_eq!( - multiaddr_port(&cfg.blend_config.backend_core.listening_address), - Some(node.blend_port), - "blend listening port mismatch for {identifier}" - ); - - assert_eq!( - cfg.api_config.address.port(), - node.general.api_config.address.port(), - "api port mismatch for {identifier}" - ); - assert_eq!( - cfg.api_config.testing_http_address.port(), - node.general.api_config.testing_http_address.port(), - "testing http port mismatch for {identifier}" - ); - } - } - - fn hosts_from_topology(topology: &GeneratedTopology) -> Vec { - topology.nodes().iter().map(host_from_node).collect() - } - - fn docker_style_hosts(topology: &GeneratedTopology) -> Vec { - topology - .nodes() - .iter() - .map(|node| docker_host(node, 10 + node.index() as u8)) - .collect() - } - - fn host_from_node(node: &GeneratedNodeConfig) -> Host { - let identifier = identifier_for(node.index()); - let ip = Ipv4Addr::LOCALHOST; - let mut host = make_host(ip, identifier); - host.network_port = node.network_port(); - host.blend_port = node.blend_port; - host - } - - fn docker_host(node: &GeneratedNodeConfig, octet: u8) -> Host { - let identifier = identifier_for(node.index()); - let ip = Ipv4Addr::new(172, 23, 0, octet); - let mut host = make_host(ip, identifier); - host.network_port = node.network_port().saturating_add(1000); - host.blend_port = node.blend_port.saturating_add(1000); - host - } - - fn tracing_settings(topology: &GeneratedTopology) -> TracingSettings { - topology - .nodes() - .first() - .expect("topology must contain at least one node") - .general - .tracing_config - .tracing_settings - .clone() - } - - fn identifier_for(index: usize) -> String { - format!("node-{index}") - } - - fn make_host(ip: Ipv4Addr, identifier: String) -> Host { - let ports = PortOverrides { - network_port: None, - blend_port: None, - api_port: None, - testing_http_port: None, - }; - Host::node_from_ip(ip, identifier, ports) - } - - fn declaration_fingerprint( - genesis: &G, - ) -> Vec<(ServiceType, ProviderId, NoteId, ZkPublicKey)> - where - G: GenesisTxTrait, - { - genesis - .sdp_declarations() - .map(|(op, _)| (op.service_type, op.provider_id, op.locked_note_id, op.zk_id)) - .collect() - } -} +mod tests {} diff --git a/testing-framework/deployers/compose/src/deployer/orchestrator.rs b/testing-framework/deployers/compose/src/deployer/orchestrator.rs index 40c2fc3..1e25b03 100644 --- a/testing-framework/deployers/compose/src/deployer/orchestrator.rs +++ b/testing-framework/deployers/compose/src/deployer/orchestrator.rs @@ -1,8 +1,13 @@ -use std::sync::Arc; +use std::{env, sync::Arc, time::Duration}; -use testing_framework_core::scenario::{ - NodeControlHandle, ObservabilityCapabilityProvider, ObservabilityInputs, RequiresNodeControl, - RunContext, Runner, Scenario, +use reqwest::Url; +use testing_framework_core::{ + scenario::{ + DeploymentPolicy, FeedHandle, FeedRuntime, HttpReadinessRequirement, Metrics, NodeClients, + NodeControlHandle, ObservabilityCapabilityProvider, ObservabilityInputs, + RequiresNodeControl, RunContext, Runner, Scenario, + }, + topology::DeploymentDescriptor, }; use tracing::info; @@ -16,6 +21,7 @@ use super::{ }; use crate::{ docker::control::ComposeNodeControl, + env::ComposeDeployEnv, errors::ComposeRunnerError, infrastructure::{ environment::StackEnvironment, @@ -23,36 +29,142 @@ use crate::{ }, }; -pub struct DeploymentOrchestrator { - deployer: ComposeDeployer, +const PRINT_ENDPOINTS_ENV: &str = "TESTNET_PRINT_ENDPOINTS"; + +pub struct DeploymentOrchestrator { + deployer: ComposeDeployer, } -impl DeploymentOrchestrator { - pub const fn new(deployer: ComposeDeployer) -> Self { +impl DeploymentOrchestrator { + pub const fn new(deployer: ComposeDeployer) -> Self { Self { deployer } } pub async fn deploy( &self, - scenario: &Scenario, - ) -> Result + scenario: &Scenario, + ) -> Result, ComposeRunnerError> where Caps: RequiresNodeControl + ObservabilityCapabilityProvider + Send + Sync, { - let setup = DeploymentSetup::new(scenario.topology()); + let deployment = scenario.deployment(); + let setup = DeploymentSetup::::new(deployment); setup.validate_environment().await?; let observability = resolve_observability_inputs(scenario)?; + let mut prepared = prepare_deployment::(setup, &observability).await?; + let deployment_policy = scenario.deployment_policy(); + let readiness_enabled = + self.deployer.readiness_checks && deployment_policy.readiness_enabled; - let DeploymentContext { - mut environment, - descriptors, - } = setup.prepare_workspace(&observability).await?; + self.log_deploy_start( + scenario, + &prepared.descriptors, + deployment_policy, + &observability, + ); - tracing::info!( - nodes = descriptors.nodes().len(), + let deployed = deploy_nodes::( + &mut prepared.environment, + &prepared.descriptors, + readiness_enabled, + deployment_policy.readiness_requirement, + ) + .await?; + + let runner = self + .build_runner::( + scenario, + prepared, + deployed, + observability, + readiness_enabled, + ) + .await?; + + self.log_deploy_ready( + scenario, + deployment_policy, + deployment.node_count(), + &compose_runner_host(), + readiness_enabled, + ); + + Ok(runner) + } + + async fn build_runner( + &self, + scenario: &Scenario, + mut prepared: PreparedDeployment, + deployed: DeployedNodes, + observability: ObservabilityInputs, + readiness_enabled: bool, + ) -> Result, ComposeRunnerError> + where + Caps: RequiresNodeControl + ObservabilityCapabilityProvider + Send + Sync, + { + let telemetry = observability.telemetry_handle()?; + let node_control = self.maybe_node_control::(&prepared.environment); + + log_observability_endpoints(&observability); + log_profiling_urls(&deployed.host, &deployed.host_ports); + maybe_print_endpoints(&observability, &deployed.host, &deployed.host_ports); + + let input = RuntimeBuildInput { + deployed: &deployed, + descriptors: prepared.descriptors.clone(), + duration: scenario.duration(), + expectation_cooldown: scenario.expectation_cooldown(), + telemetry, + environment: &mut prepared.environment, + node_control, + }; + let runtime = build_compose_runtime::(input).await?; + let cleanup_guard = + make_cleanup_guard(prepared.environment.into_cleanup()?, runtime.feed_task); + + info!( + effective_readiness = readiness_enabled, + host = deployed.host, + "compose runtime prepared" + ); + + Ok(Runner::new(runtime.context, Some(cleanup_guard))) + } + + fn maybe_node_control( + &self, + environment: &StackEnvironment, + ) -> Option>> + where + Caps: RequiresNodeControl + Send + Sync, + { + Caps::REQUIRED.then(|| { + Arc::new(ComposeNodeControl { + compose_file: environment.compose_path().to_path_buf(), + project_name: environment.project_name().to_owned(), + }) as Arc> + }) + } + + fn log_deploy_start( + &self, + scenario: &Scenario, + descriptors: &E::Deployment, + deployment_policy: DeploymentPolicy, + observability: &ObservabilityInputs, + ) { + let effective_readiness = + self.deployer.readiness_checks && deployment_policy.readiness_enabled; + + info!( + nodes = descriptors.node_count(), duration_secs = scenario.duration().as_secs(), readiness_checks = self.deployer.readiness_checks, + readiness_enabled = deployment_policy.readiness_enabled, + readiness_requirement = ?deployment_policy.readiness_requirement, + effective_readiness, metrics_query_url = observability.metrics_query_url.as_ref().map(|u| u.as_str()), metrics_otlp_ingest_url = observability .metrics_otlp_ingest_url @@ -61,78 +173,130 @@ impl DeploymentOrchestrator { grafana_url = observability.grafana_url.as_ref().map(|u| u.as_str()), "compose deployment starting" ); + } - let node_count = descriptors.nodes().len(); - let host_ports = PortManager::prepare(&mut environment, &descriptors).await?; - - wait_for_readiness_or_grace_period( - self.deployer.readiness_checks, - &descriptors, - &host_ports, - &mut environment, - ) - .await?; - - let host = compose_runner_host(); - let client_builder = ClientBuilder::new(); - let node_clients = client_builder - .build_node_clients(&descriptors, &host_ports, &host, &mut environment) - .await?; - let telemetry = observability.telemetry_handle()?; - let node_control = self.maybe_node_control::(&environment); - - log_observability_endpoints(&observability); - log_profiling_urls(&host, &host_ports); - - maybe_print_endpoints(&observability, &host, &host_ports); - - let (block_feed, block_feed_guard) = client_builder - .start_block_feed(&node_clients, &mut environment) - .await?; - let cleanup_guard = make_cleanup_guard(environment.into_cleanup()?, block_feed_guard); - - let context = RunContext::new( - descriptors, - None, - node_clients, - scenario.duration(), - telemetry, - block_feed, - node_control, - ); - + fn log_deploy_ready( + &self, + scenario: &Scenario, + deployment_policy: DeploymentPolicy, + node_count: usize, + host: &str, + readiness_enabled: bool, + ) { info!( nodes = node_count, duration_secs = scenario.duration().as_secs(), readiness_checks = self.deployer.readiness_checks, + readiness_enabled = deployment_policy.readiness_enabled, + readiness_requirement = ?deployment_policy.readiness_requirement, + effective_readiness = readiness_enabled, host, "compose deployment ready; handing control to scenario runner" ); - - Ok(Runner::new(context, Some(cleanup_guard))) - } - - fn maybe_node_control( - &self, - environment: &StackEnvironment, - ) -> Option> - where - Caps: RequiresNodeControl + Send + Sync, - { - Caps::REQUIRED.then(|| { - Arc::new(ComposeNodeControl { - compose_file: environment.compose_path().to_path_buf(), - project_name: environment.project_name().to_owned(), - }) as Arc - }) } } -fn resolve_observability_inputs( - scenario: &Scenario, +struct DeployedNodes { + host_ports: HostPortMapping, + host: String, + node_clients: NodeClients, + client_builder: ClientBuilder, +} + +struct ComposeRuntime { + context: RunContext, + feed_task: FeedHandle, +} + +struct RuntimeBuildInput<'a, E: ComposeDeployEnv> { + deployed: &'a DeployedNodes, + descriptors: E::Deployment, + duration: Duration, + expectation_cooldown: Duration, + telemetry: Metrics, + environment: &'a mut StackEnvironment, + node_control: Option>>, +} + +async fn build_compose_runtime( + input: RuntimeBuildInput<'_, E>, +) -> Result, ComposeRunnerError> { + let node_clients = input.deployed.node_clients.clone(); + let (feed, feed_task) = input + .deployed + .client_builder + .start_block_feed(&node_clients, input.environment) + .await?; + + let context = build_run_context( + input.descriptors, + node_clients, + input.duration, + input.expectation_cooldown, + input.telemetry, + feed, + input.node_control, + ); + + Ok(ComposeRuntime { context, feed_task }) +} + +async fn deploy_nodes( + environment: &mut StackEnvironment, + descriptors: &E::Deployment, + readiness_enabled: bool, + readiness_requirement: HttpReadinessRequirement, +) -> Result, ComposeRunnerError> { + let host_ports = PortManager::::prepare(environment, descriptors).await?; + wait_for_readiness_or_grace_period::( + readiness_enabled, + descriptors, + readiness_requirement, + &host_ports, + environment, + ) + .await?; + + let host = compose_runner_host(); + let client_builder = ClientBuilder::::new(); + let node_clients = client_builder + .build_node_clients(descriptors, &host_ports, &host, environment) + .await?; + + Ok(DeployedNodes { + host_ports, + host, + node_clients, + client_builder, + }) +} + +fn build_run_context( + descriptors: E::Deployment, + node_clients: NodeClients, + run_duration: Duration, + expectation_cooldown: Duration, + telemetry: Metrics, + feed: ::Feed, + node_control: Option>>, +) -> RunContext { + RunContext::new( + descriptors, + node_clients, + run_duration, + expectation_cooldown, + telemetry, + feed, + node_control, + ) +} + +fn resolve_observability_inputs( + scenario: &Scenario, ) -> Result where Caps: ObservabilityCapabilityProvider, + E: ComposeDeployEnv, { let env_inputs = ObservabilityInputs::from_env()?; let cap_inputs = scenario @@ -140,17 +304,25 @@ where .observability_capability() .map(ObservabilityInputs::from_capability) .unwrap_or_default(); + Ok(env_inputs.with_overrides(cap_inputs)) } -async fn wait_for_readiness_or_grace_period( +async fn wait_for_readiness_or_grace_period( readiness_checks: bool, - descriptors: &testing_framework_core::topology::generation::GeneratedTopology, + descriptors: &E::Deployment, + readiness_requirement: HttpReadinessRequirement, host_ports: &HostPortMapping, environment: &mut StackEnvironment, ) -> Result<(), ComposeRunnerError> { if readiness_checks { - ReadinessChecker::wait_all(descriptors, host_ports, environment).await?; + ReadinessChecker::::wait_all( + descriptors, + host_ports, + readiness_requirement, + environment, + ) + .await?; return Ok(()); } @@ -166,42 +338,41 @@ fn log_observability_endpoints(observability: &ObservabilityInputs) { "metrics query endpoint configured" ); } + if let Some(url) = observability.grafana_url.as_ref() { info!(grafana_url = %url.as_str(), "grafana url configured"); } } fn maybe_print_endpoints(observability: &ObservabilityInputs, host: &str, ports: &HostPortMapping) { - if std::env::var("TESTNET_PRINT_ENDPOINTS").is_err() { + if !should_print_endpoints() { return; } - let prometheus = observability - .metrics_query_url - .as_ref() - .map(|u| u.as_str().to_string()) - .unwrap_or_else(|| "".to_string()); - let grafana = observability - .grafana_url - .as_ref() - .map(|u| u.as_str().to_string()) - .unwrap_or_else(|| "".to_string()); + let prometheus = endpoint_or_disabled(observability.metrics_query_url.as_ref()); + let grafana = endpoint_or_disabled(observability.grafana_url.as_ref()); println!( "TESTNET_ENDPOINTS prometheus={} grafana={}", prometheus, grafana ); + print_profiling_urls(host, ports); } +fn should_print_endpoints() -> bool { + env::var(PRINT_ENDPOINTS_ENV).is_ok() +} + +fn endpoint_or_disabled(endpoint: Option<&Url>) -> String { + endpoint.map_or_else(|| "".to_string(), |url| url.as_str().to_string()) +} + fn log_profiling_urls(host: &str, ports: &HostPortMapping) { for (idx, node) in ports.nodes.iter().enumerate() { - tracing::info!( + info!( node = idx, - profiling_url = %format!( - "http://{}:{}/debug/pprof/profile?seconds=15&format=proto", - host, node.api - ), + profiling_url = %profiling_url(host, node.api), "node profiling endpoint (profiling feature required)" ); } @@ -210,8 +381,33 @@ fn log_profiling_urls(host: &str, ports: &HostPortMapping) { fn print_profiling_urls(host: &str, ports: &HostPortMapping) { for (idx, node) in ports.nodes.iter().enumerate() { println!( - "TESTNET_PPROF node_{}=http://{}:{}/debug/pprof/profile?seconds=15&format=proto", - idx, host, node.api + "TESTNET_PPROF node_{}={}", + idx, + profiling_url(host, node.api) ); } } + +fn profiling_url(host: &str, api_port: u16) -> String { + format!("http://{host}:{api_port}/debug/pprof/profile?seconds=15&format=proto") +} + +struct PreparedDeployment { + environment: StackEnvironment, + descriptors: E::Deployment, +} + +async fn prepare_deployment( + setup: DeploymentSetup<'_, E>, + observability: &ObservabilityInputs, +) -> Result, ComposeRunnerError> { + let DeploymentContext { + environment, + descriptors, + } = setup.prepare_workspace(observability).await?; + + Ok(PreparedDeployment { + environment, + descriptors: descriptors.clone(), + }) +} diff --git a/testing-framework/deployers/compose/src/deployer/ports.rs b/testing-framework/deployers/compose/src/deployer/ports.rs index eb076f5..ea46aff 100644 --- a/testing-framework/deployers/compose/src/deployer/ports.rs +++ b/testing-framework/deployers/compose/src/deployer/ports.rs @@ -1,7 +1,9 @@ -use testing_framework_core::topology::generation::GeneratedTopology; -use tracing::{debug, info}; +use std::marker::PhantomData; + +use tracing::{debug, info, warn}; use crate::{ + env::ComposeDeployEnv, errors::ComposeRunnerError, infrastructure::{ environment::StackEnvironment, @@ -9,33 +11,41 @@ use crate::{ }, }; -pub struct PortManager; +pub struct PortManager { + _env: PhantomData, +} -impl PortManager { +impl PortManager { pub async fn prepare( environment: &mut StackEnvironment, - descriptors: &GeneratedTopology, + descriptors: &E::Deployment, ) -> Result { + let nodes = E::node_container_ports(descriptors); debug!( - nodes = descriptors.nodes().len(), + nodes = nodes.len(), "resolving host ports for compose services" ); - match discover_host_ports(environment, descriptors).await { - Ok(mapping) => { - info!( - node_ports = ?mapping.node_api_ports(), - "resolved container host ports" - ); - Ok(mapping) - } - Err(err) => { - environment - .fail("failed to determine container host ports") - .await; + let mapping = match discover_host_ports(environment, &nodes).await { + Ok(mapping) => mapping, + Err(error) => return Err(fail_host_port_resolution(environment, error).await), + }; - tracing::warn!(%err, "failed to resolve host ports"); - Err(err) - } - } + info!( + node_ports = ?mapping.node_api_ports(), + "resolved container host ports" + ); + + Ok(mapping) } } + +async fn fail_host_port_resolution( + environment: &mut StackEnvironment, + error: ComposeRunnerError, +) -> ComposeRunnerError { + environment + .fail("failed to determine container host ports") + .await; + warn!(%error, "failed to resolve host ports"); + error +} diff --git a/testing-framework/deployers/compose/src/deployer/readiness.rs b/testing-framework/deployers/compose/src/deployer/readiness.rs index f31f1c7..0faef3c 100644 --- a/testing-framework/deployers/compose/src/deployer/readiness.rs +++ b/testing-framework/deployers/compose/src/deployer/readiness.rs @@ -1,61 +1,79 @@ -use testing_framework_core::topology::generation::GeneratedTopology; -use tracing::info; +use std::marker::PhantomData; + +use testing_framework_core::scenario::HttpReadinessRequirement; +use tracing::{info, warn}; use crate::{ + env::ComposeDeployEnv, errors::ComposeRunnerError, - infrastructure::{ - environment::StackEnvironment, - ports::{HostPortMapping, ensure_remote_readiness_with_ports}, - }, + infrastructure::{environment::StackEnvironment, ports::HostPortMapping}, lifecycle::readiness::ensure_nodes_ready_with_ports, }; -pub struct ReadinessChecker; +pub struct ReadinessChecker { + _env: PhantomData, +} -impl ReadinessChecker { +impl ReadinessChecker { pub async fn wait_all( - descriptors: &GeneratedTopology, + descriptors: &E::Deployment, host_ports: &HostPortMapping, + requirement: HttpReadinessRequirement, environment: &mut StackEnvironment, ) -> Result<(), ComposeRunnerError> { let node_ports = host_ports.node_api_ports(); info!(ports = ?node_ports, "waiting for node HTTP endpoints"); - if let Err(err) = ensure_nodes_ready_with_ports(&node_ports).await { - return fail_readiness_step( - environment, - "node readiness failed", - "node readiness failed", - err, - ) - .await; - } + + wait_local_readiness::(environment, &node_ports, requirement).await?; info!("waiting for remote service readiness"); - if let Err(err) = ensure_remote_readiness_with_ports(descriptors, host_ports).await { - return fail_readiness_step( - environment, - "remote readiness probe failed", - "remote readiness probe failed", - err, - ) - .await; - } + wait_remote_readiness::(environment, descriptors, host_ports, requirement).await?; info!("compose readiness checks passed"); Ok(()) } } -async fn fail_readiness_step( +async fn wait_local_readiness( environment: &mut StackEnvironment, - reason: &str, - log_message: &str, - error: E, -) -> Result<(), ComposeRunnerError> -where - E: std::fmt::Debug + Into, -{ - environment.fail(reason).await; - tracing::warn!(error = ?error, "{log_message}"); - Err(error.into()) + node_ports: &[u16], + requirement: HttpReadinessRequirement, +) -> Result<(), ComposeRunnerError> { + let result = ensure_nodes_ready_with_ports::(node_ports, requirement) + .await + .map_err(ComposeRunnerError::from); + + run_readiness_check(environment, "node readiness failed", result).await +} + +async fn wait_remote_readiness( + environment: &mut StackEnvironment, + descriptors: &E::Deployment, + host_ports: &HostPortMapping, + requirement: HttpReadinessRequirement, +) -> Result<(), ComposeRunnerError> { + run_readiness_check( + environment, + "remote readiness probe failed", + E::wait_remote_readiness(descriptors, host_ports, requirement) + .await + .map_err(|source| { + ComposeRunnerError::Readiness(crate::errors::StackReadinessError::Remote { source }) + }), + ) + .await +} + +async fn run_readiness_check( + environment: &mut StackEnvironment, + fail_reason: &str, + result: Result<(), ComposeRunnerError>, +) -> Result<(), ComposeRunnerError> { + if let Err(error) = result { + environment.fail(fail_reason).await; + warn!(error = ?error, "{fail_reason}"); + return Err(error); + } + + Ok(()) } diff --git a/testing-framework/deployers/compose/src/deployer/setup.rs b/testing-framework/deployers/compose/src/deployer/setup.rs index ac0e664..71b565f 100644 --- a/testing-framework/deployers/compose/src/deployer/setup.rs +++ b/testing-framework/deployers/compose/src/deployer/setup.rs @@ -1,40 +1,34 @@ -use testing_framework_core::{ - scenario::ObservabilityInputs, topology::generation::GeneratedTopology, -}; +use testing_framework_core::{scenario::ObservabilityInputs, topology::DeploymentDescriptor}; use tracing::info; use crate::{ docker::ensure_docker_available, + env::ComposeDeployEnv, errors::ComposeRunnerError, infrastructure::environment::{ StackEnvironment, ensure_supported_topology, prepare_environment, }, }; -pub struct DeploymentSetup { - descriptors: GeneratedTopology, +pub struct DeploymentSetup<'a, E: ComposeDeployEnv> { + descriptors: &'a E::Deployment, } -pub struct DeploymentContext { - pub descriptors: GeneratedTopology, +pub struct DeploymentContext<'a, E: ComposeDeployEnv> { + pub descriptors: &'a E::Deployment, pub environment: StackEnvironment, } -impl DeploymentSetup { - pub fn new(descriptors: &GeneratedTopology) -> Self { - Self { - descriptors: descriptors.clone(), - } +impl<'a, E: ComposeDeployEnv> DeploymentSetup<'a, E> { + pub fn new(descriptors: &'a E::Deployment) -> Self { + Self { descriptors } } pub async fn validate_environment(&self) -> Result<(), ComposeRunnerError> { ensure_docker_available().await?; - ensure_supported_topology(&self.descriptors)?; + ensure_supported_topology::(self.descriptors)?; - info!( - nodes = self.descriptors.nodes().len(), - "starting compose deployment" - ); + log_deployment_start(self.descriptors.node_count()); Ok(()) } @@ -42,19 +36,12 @@ impl DeploymentSetup { pub async fn prepare_workspace( self, observability: &ObservabilityInputs, - ) -> Result { - let environment = prepare_environment( - &self.descriptors, - observability.metrics_otlp_ingest_url.as_ref(), - ) - .await?; + ) -> Result, ComposeRunnerError> { + let metrics_otlp_ingest_url = observability.metrics_otlp_ingest_url.as_ref(); + let environment = + prepare_environment::(self.descriptors, metrics_otlp_ingest_url).await?; - info!( - compose_file = %environment.compose_path().display(), - project = environment.project_name(), - root = %environment.root().display(), - "compose workspace prepared" - ); + log_workspace_prepared(&environment); Ok(DeploymentContext { descriptors: self.descriptors, @@ -62,3 +49,16 @@ impl DeploymentSetup { }) } } + +fn log_deployment_start(nodes: usize) { + info!(nodes, "starting compose deployment"); +} + +fn log_workspace_prepared(environment: &StackEnvironment) { + info!( + compose_file = %environment.compose_path().display(), + project = environment.project_name(), + root = %environment.root().display(), + "compose workspace prepared" + ); +} diff --git a/testing-framework/deployers/compose/src/descriptor/mod.rs b/testing-framework/deployers/compose/src/descriptor/mod.rs index 8d93e41..92523ef 100644 --- a/testing-framework/deployers/compose/src/descriptor/mod.rs +++ b/testing-framework/deployers/compose/src/descriptor/mod.rs @@ -1,135 +1,29 @@ -use std::{ - env, - path::{Path, PathBuf}, -}; - use serde::Serialize; -use testing_framework_core::topology::generation::{GeneratedNodeConfig, GeneratedTopology}; -use testing_framework_env as tf_env; - -use crate::docker::platform::{host_gateway_entry, resolve_image}; mod node; pub use node::{EnvEntry, NodeDescriptor}; -use testing_framework_config::constants::DEFAULT_CFGSYNC_PORT; -/// Top-level docker-compose descriptor built from a GeneratedTopology. +/// Top-level docker-compose descriptor built from an environment-specific +/// topology. #[derive(Clone, Debug, Serialize)] pub struct ComposeDescriptor { nodes: Vec, } impl ComposeDescriptor { - /// Start building a descriptor from a generated topology. #[must_use] - pub const fn builder(topology: &GeneratedTopology) -> ComposeDescriptorBuilder<'_> { - ComposeDescriptorBuilder::new(topology) + pub fn new(nodes: Vec) -> Self { + Self { nodes } } - #[cfg(test)] + #[must_use] pub fn nodes(&self) -> &[NodeDescriptor] { &self.nodes } -} -/// Builder for `ComposeDescriptor` that plugs topology values into the -/// template. -pub struct ComposeDescriptorBuilder<'a> { - topology: &'a GeneratedTopology, - cfgsync_port: Option, -} - -impl<'a> ComposeDescriptorBuilder<'a> { - const fn new(topology: &'a GeneratedTopology) -> Self { - Self { - topology, - cfgsync_port: None, - } - } - - #[must_use] - /// Set cfgsync port for nodes. - pub const fn with_cfgsync_port(mut self, port: u16) -> Self { - self.cfgsync_port = Some(port); - self - } - - /// Finish building the descriptor. - #[must_use] - pub fn build(self) -> ComposeDescriptor { - let cfgsync_port = self.cfgsync_port.unwrap_or(DEFAULT_CFGSYNC_PORT); - - let (image, platform) = resolve_image(); - - let nodes = build_nodes( - self.topology.nodes(), - &image, - platform.as_deref(), - cfgsync_port, - ); - - ComposeDescriptor { nodes } + #[cfg(test)] + pub fn test_nodes(&self) -> &[NodeDescriptor] { + self.nodes() } } - -const NODE_ENTRYPOINT: &str = "/etc/nomos/scripts/run_nomos_node.sh"; - -pub(crate) fn node_instance_name(index: usize) -> String { - format!("node-{index}") -} - -fn build_nodes( - nodes: &[GeneratedNodeConfig], - image: &str, - platform: Option<&str>, - cfgsync_port: u16, -) -> Vec { - nodes - .iter() - .enumerate() - .map(|(index, node)| NodeDescriptor::from_node(index, node, image, platform, cfgsync_port)) - .collect() -} - -fn base_volumes() -> Vec { - let mut volumes = vec!["./stack:/etc/nomos".into()]; - if let Some(host_log_dir) = repo_root() - .map(|root| root.join("tmp").join("node-logs")) - .map(|dir| dir.display().to_string()) - { - volumes.push(format!("{host_log_dir}:/tmp/node-logs")); - } - volumes -} - -fn repo_root() -> Option { - if let Ok(root) = env::var("CARGO_WORKSPACE_DIR") { - return Some(PathBuf::from(root)); - } - Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .and_then(Path::parent) - .and_then(Path::parent) - .map(Path::to_path_buf) -} - -fn default_extra_hosts() -> Vec { - host_gateway_entry().into_iter().collect() -} - -fn base_environment(cfgsync_port: u16) -> Vec { - let rust_log = tf_env::rust_log().unwrap_or_else(|| "info".to_string()); - let nomos_log_level = tf_env::nomos_log_level().unwrap_or_else(|| "info".to_string()); - let time_backend = tf_env::lb_time_service_backend().unwrap_or_else(|| "monotonic".into()); - vec![ - EnvEntry::new("RUST_LOG", rust_log), - EnvEntry::new("LOGOS_BLOCKCHAIN_LOG_LEVEL", nomos_log_level), - EnvEntry::new("LOGOS_BLOCKCHAIN_TIME_BACKEND", time_backend), - EnvEntry::new( - "CFG_SERVER_ADDR", - format!("http://host.docker.internal:{cfgsync_port}"), - ), - EnvEntry::new("OTEL_METRIC_EXPORT_INTERVAL", "5000"), - ] -} diff --git a/testing-framework/deployers/compose/src/descriptor/node.rs b/testing-framework/deployers/compose/src/descriptor/node.rs index 15c88ed..c5f769b 100644 --- a/testing-framework/deployers/compose/src/descriptor/node.rs +++ b/testing-framework/deployers/compose/src/descriptor/node.rs @@ -1,9 +1,4 @@ use serde::Serialize; -use testing_framework_core::topology::generation::GeneratedNodeConfig; - -use super::{ - NODE_ENTRYPOINT, base_environment, base_volumes, default_extra_hosts, node_instance_name, -}; /// Describes a node container in the compose stack. #[derive(Clone, Debug, Serialize)] @@ -27,7 +22,7 @@ pub struct EnvEntry { } impl EnvEntry { - pub(crate) fn new(key: impl Into, value: impl Into) -> Self { + pub fn new(key: impl Into, value: impl Into) -> Self { Self { key: key.into(), value: value.into(), @@ -46,53 +41,38 @@ impl EnvEntry { } impl NodeDescriptor { - pub(crate) fn from_node( - index: usize, - node: &GeneratedNodeConfig, - image: &str, - platform: Option<&str>, - cfgsync_port: u16, + #[allow(clippy::too_many_arguments)] + pub fn new( + name: impl Into, + image: impl Into, + entrypoint: impl Into, + volumes: Vec, + extra_hosts: Vec, + ports: Vec, + environment: Vec, + platform: Option, ) -> Self { - let mut environment = base_environment(cfgsync_port); - let identifier = node_instance_name(index); - let api_port = node.general.api_config.address.port(); - let testing_port = node.general.api_config.testing_http_address.port(); - environment.extend([ - EnvEntry::new( - "CFG_NETWORK_PORT", - node.general.network_config.backend.swarm.port.to_string(), - ), - EnvEntry::new("CFG_BLEND_PORT", node.blend_port.to_string()), - EnvEntry::new("CFG_API_PORT", api_port.to_string()), - EnvEntry::new("CFG_TESTING_HTTP_PORT", testing_port.to_string()), - EnvEntry::new("CFG_HOST_IDENTIFIER", identifier), - ]); - - // Publish container ports on random host ports to avoid collisions with - // local services and allow multiple compose stacks to run concurrently. - // The runner discovers the chosen host ports via `docker compose port`. - let ports = vec![ - format!("127.0.0.1::{api_port}"), - format!("127.0.0.1::{testing_port}"), - ]; - Self { - name: node_instance_name(index), - image: image.to_owned(), - entrypoint: NODE_ENTRYPOINT.to_owned(), - volumes: base_volumes(), - extra_hosts: default_extra_hosts(), + name: name.into(), + image: image.into(), + entrypoint: entrypoint.into(), + volumes, + extra_hosts, ports, environment, - platform: platform.map(ToOwned::to_owned), + platform, } } - #[cfg(test)] pub fn ports(&self) -> &[String] { &self.ports } + #[cfg(test)] + pub fn test_ports(&self) -> &[String] { + self.ports() + } + #[cfg(test)] pub fn environment(&self) -> &[EnvEntry] { &self.environment diff --git a/testing-framework/deployers/compose/src/docker/commands.rs b/testing-framework/deployers/compose/src/docker/commands.rs index 9386754..a840c74 100644 --- a/testing-framework/deployers/compose/src/docker/commands.rs +++ b/testing-framework/deployers/compose/src/docker/commands.rs @@ -1,4 +1,9 @@ -use std::{io, path::Path, process, time::Duration}; +use std::{ + io, + path::Path, + process::{self, ExitStatus}, + time::Duration, +}; use testing_framework_core::adjust_timeout; use tokio::{process::Command, time::timeout}; @@ -31,14 +36,7 @@ pub async fn run_docker_command( description: &str, ) -> Result<(), ComposeCommandError> { debug!(description, ?command, "running docker command"); - let result = timeout(timeout_duration, command.status()).await; - match result { - Ok(status) => handle_compose_status(status, description), - Err(_) => Err(ComposeCommandError::Timeout { - command: description.to_owned(), - timeout: timeout_duration, - }), - } + run_command_status(&mut command, timeout_duration, description).await } /// Runs `docker compose up -d` for the generated stack. @@ -47,24 +45,51 @@ pub async fn compose_up( project_name: &str, root: &Path, ) -> Result<(), ComposeCommandError> { - let mut cmd = Command::new("docker"); - cmd.arg("compose") - .arg("-f") - .arg(compose_path) - .arg("-p") - .arg(project_name) - .arg("up") - .arg("-d") - .current_dir(root); + run_compose_action( + compose_path, + project_name, + root, + ["up", "-d"], + adjust_timeout(COMPOSE_UP_TIMEOUT), + "docker compose up", + ) + .await +} - info!( - compose_file = %compose_path.display(), - project = project_name, - root = %root.display(), - "running docker compose up" - ); +/// Runs `docker compose up --no-start` for the generated stack. +pub async fn compose_create( + compose_path: &Path, + project_name: &str, + root: &Path, +) -> Result<(), ComposeCommandError> { + run_compose_action( + compose_path, + project_name, + root, + ["up", "--no-start"], + adjust_timeout(COMPOSE_UP_TIMEOUT), + "docker compose create", + ) + .await +} - run_compose_command(cmd, adjust_timeout(COMPOSE_UP_TIMEOUT), "docker compose up").await +/// Runs `docker compose up -d --no-deps ` for a single service. +pub async fn compose_up_service( + compose_path: &Path, + project_name: &str, + root: &Path, + service: &str, +) -> Result<(), ComposeCommandError> { + run_compose_action_with_service( + compose_path, + project_name, + root, + ["up", "-d", "--no-deps"], + service, + adjust_timeout(COMPOSE_UP_TIMEOUT), + "docker compose up service", + ) + .await } /// Runs `docker compose down --volumes` for the generated stack. @@ -73,25 +98,11 @@ pub async fn compose_down( project_name: &str, root: &Path, ) -> Result<(), ComposeCommandError> { - let mut cmd = Command::new("docker"); - cmd.arg("compose") - .arg("-f") - .arg(compose_path) - .arg("-p") - .arg(project_name) - .arg("down") - .arg("--volumes") - .current_dir(root); - - info!( - compose_file = %compose_path.display(), - project = project_name, - root = %root.display(), - "running docker compose down" - ); - - run_compose_command( - cmd, + run_compose_action( + compose_path, + project_name, + root, + ["down", "--volumes"], adjust_timeout(COMPOSE_UP_TIMEOUT), "docker compose down", ) @@ -100,15 +111,8 @@ pub async fn compose_down( /// Dump docker compose logs to stderr for debugging failures. pub async fn dump_compose_logs(compose_file: &Path, project: &str, root: &Path) { - let mut cmd = Command::new("docker"); - cmd.arg("compose") - .arg("-f") - .arg(compose_file) - .arg("-p") - .arg(project) - .arg("logs") - .arg("--no-color") - .current_dir(root); + let mut cmd = compose_command(compose_file, project, root); + cmd.arg("logs").arg("--no-color"); match cmd.output().await { Ok(output) => print_logs(&output.stdout, &output.stderr), @@ -123,6 +127,7 @@ fn print_logs(stdout: &[u8], stderr: &[u8]) { "docker compose stdout" ); } + if !stderr.is_empty() { warn!( logs = %String::from_utf8_lossy(stderr), @@ -131,23 +136,65 @@ fn print_logs(stdout: &[u8], stderr: &[u8]) { } } -async fn run_compose_command( - mut command: Command, +fn compose_command(compose_path: &Path, project_name: &str, root: &Path) -> Command { + let mut cmd = Command::new("docker"); + cmd.arg("compose") + .arg("-f") + .arg(compose_path) + .arg("-p") + .arg(project_name) + .current_dir(root); + cmd +} + +async fn run_compose_action( + compose_path: &Path, + project_name: &str, + root: &Path, + args: [&str; N], timeout_duration: Duration, description: &str, ) -> Result<(), ComposeCommandError> { - let result = timeout(timeout_duration, command.status()).await; - match result { - Ok(status) => handle_compose_status(status, description), - Err(_) => Err(ComposeCommandError::Timeout { - command: description.to_owned(), - timeout: timeout_duration, - }), - } + let mut cmd = compose_command(compose_path, project_name, root); + cmd.args(args); + + info!( + compose_file = %compose_path.display(), + project = project_name, + root = %root.display(), + description, + "running docker compose command" + ); + + run_command_status(&mut cmd, timeout_duration, description).await +} + +async fn run_compose_action_with_service( + compose_path: &Path, + project_name: &str, + root: &Path, + args: [&str; N], + service: &str, + timeout_duration: Duration, + description: &str, +) -> Result<(), ComposeCommandError> { + let mut cmd = compose_command(compose_path, project_name, root); + cmd.args(args).arg(service); + + info!( + compose_file = %compose_path.display(), + project = project_name, + root = %root.display(), + service, + description, + "running docker compose command" + ); + + run_command_status(&mut cmd, timeout_duration, description).await } fn handle_compose_status( - status: std::io::Result, + status: io::Result, description: &str, ) -> Result<(), ComposeCommandError> { match status { @@ -155,6 +202,7 @@ fn handle_compose_status( debug!(description, "docker command succeeded"); Ok(()) } + Ok(code) => { warn!(description, status = ?code, "docker command failed"); Err(ComposeCommandError::Failed { @@ -162,6 +210,7 @@ fn handle_compose_status( status: code, }) } + Err(err) => { warn!(description, error = ?err, "failed to spawn docker command"); Err(ComposeCommandError::Spawn { @@ -171,3 +220,17 @@ fn handle_compose_status( } } } + +async fn run_command_status( + command: &mut Command, + timeout_duration: Duration, + description: &str, +) -> Result<(), ComposeCommandError> { + match timeout(timeout_duration, command.status()).await { + Ok(status) => handle_compose_status(status, description), + Err(_) => Err(ComposeCommandError::Timeout { + command: description.to_owned(), + timeout: timeout_duration, + }), + } +} diff --git a/testing-framework/deployers/compose/src/docker/control.rs b/testing-framework/deployers/compose/src/docker/control.rs index 24d924f..0541e42 100644 --- a/testing-framework/deployers/compose/src/docker/control.rs +++ b/testing-framework/deployers/compose/src/docker/control.rs @@ -3,19 +3,42 @@ use std::{ time::Duration, }; -use testing_framework_core::scenario::{DynError, NodeControlHandle}; +use testing_framework_core::{ + adjust_timeout, + scenario::{Application, DynError, NodeControlHandle}, +}; use tokio::process::Command; use tracing::info; use crate::{docker::commands::run_docker_command, errors::ComposeRunnerError}; const COMPOSE_RESTART_TIMEOUT: Duration = Duration::from_secs(120); +const COMPOSE_RESTART_DESCRIPTION: &str = "docker compose restart"; pub async fn restart_compose_service( compose_file: &Path, project_name: &str, service: &str, ) -> Result<(), ComposeRunnerError> { + let command = compose_restart_command(compose_file, project_name, service); + + info!( + service, + project = project_name, + compose_file = %compose_file.display(), + "restarting compose service" + ); + + run_docker_command( + command, + adjust_timeout(COMPOSE_RESTART_TIMEOUT), + COMPOSE_RESTART_DESCRIPTION, + ) + .await + .map_err(ComposeRunnerError::Compose) +} + +fn compose_restart_command(compose_file: &Path, project_name: &str, service: &str) -> Command { let mut command = Command::new("docker"); command .arg("compose") @@ -25,16 +48,7 @@ pub async fn restart_compose_service( .arg(project_name) .arg("restart") .arg(service); - - let description = "docker compose restart"; - info!(service, project = project_name, compose_file = %compose_file.display(), "restarting compose service"); - run_docker_command( - command, - testing_framework_core::adjust_timeout(COMPOSE_RESTART_TIMEOUT), - description, - ) - .await - .map_err(ComposeRunnerError::Compose) + command } /// Compose-specific node control handle for restarting nodes. @@ -44,7 +58,7 @@ pub struct ComposeNodeControl { } #[async_trait::async_trait] -impl NodeControlHandle for ComposeNodeControl { +impl NodeControlHandle for ComposeNodeControl { async fn restart_node(&self, name: &str) -> Result<(), DynError> { restart_compose_service(&self.compose_file, &self.project_name, name) .await diff --git a/testing-framework/deployers/compose/src/docker/mod.rs b/testing-framework/deployers/compose/src/docker/mod.rs index 4dc6c45..3696492 100644 --- a/testing-framework/deployers/compose/src/docker/mod.rs +++ b/testing-framework/deployers/compose/src/docker/mod.rs @@ -3,9 +3,14 @@ pub mod control; pub mod platform; pub mod workspace; -use std::{env, process::Stdio, time::Duration}; +use std::{ + env, io, + path::{Path, PathBuf}, + process::{ExitStatus, Stdio}, + time::Duration, +}; -use testing_framework_config::constants::DEFAULT_ASSETS_STACK_DIR; +use testing_framework_core::adjust_timeout; use tokio::{process::Command, time::timeout}; use tracing::{debug, info, warn}; @@ -17,6 +22,7 @@ use crate::{ const IMAGE_BUILD_TIMEOUT: Duration = Duration::from_secs(600); const DOCKER_INFO_TIMEOUT: Duration = Duration::from_secs(15); const IMAGE_INSPECT_TIMEOUT: Duration = Duration::from_secs(60); +pub(super) const DEFAULT_ASSETS_STACK_DIR: &str = "logos/infra/assets/stack"; /// Checks that `docker info` succeeds within a timeout. pub async fn ensure_docker_available() -> Result<(), ComposeRunnerError> { @@ -26,15 +32,10 @@ pub async fn ensure_docker_available() -> Result<(), ComposeRunnerError> { .stdout(Stdio::null()) .stderr(Stdio::null()); - let available = timeout( - testing_framework_core::adjust_timeout(DOCKER_INFO_TIMEOUT), - command.status(), - ) - .await - .ok() - .and_then(Result::ok) - .map(|status| status.success()) - .unwrap_or(false); + let available = match timeout(adjust_timeout(DOCKER_INFO_TIMEOUT), command.status()).await { + Ok(Ok(status)) => status.success(), + Ok(Err(_)) | Err(_) => false, + }; if available { debug!("docker info succeeded"); @@ -45,13 +46,6 @@ pub async fn ensure_docker_available() -> Result<(), ComposeRunnerError> { } } -/// Ensure the configured compose image exists, building a local one if needed. -pub async fn ensure_compose_image() -> Result<(), ComposeRunnerError> { - let (image, platform) = crate::docker::platform::resolve_image(); - info!(image, platform = ?platform, "ensuring compose image is present"); - ensure_image_present(&image, platform.as_deref()).await -} - /// Verify an image exists locally, optionally building it for the default tag. pub async fn ensure_image_present( image: &str, @@ -62,7 +56,7 @@ pub async fn ensure_image_present( return Ok(()); } - if image != "logos-blockchain-testing:local" { + if !is_local_test_image(image) { return Err(ComposeRunnerError::MissingImage { image: image.to_owned(), }); @@ -80,12 +74,7 @@ pub async fn docker_image_exists(image: &str) -> Result Ok(status.success()), Ok(Err(source)) => Err(ComposeRunnerError::Compose(ComposeCommandError::Spawn { command: format!("docker image inspect {image}"), @@ -93,7 +82,7 @@ pub async fn docker_image_exists(image: &str) -> Result Err(ComposeRunnerError::Compose(ComposeCommandError::Timeout { command: format!("docker image inspect {image}"), - timeout: testing_framework_core::adjust_timeout(IMAGE_INSPECT_TIMEOUT), + timeout: adjust_timeout(IMAGE_INSPECT_TIMEOUT), })), } } @@ -105,66 +94,18 @@ pub async fn build_local_image( ) -> Result<(), ComposeRunnerError> { let repo_root = repository_root().map_err(|source| ComposeRunnerError::ImageBuild { source })?; - let runtime_dockerfile = repo_root - .join(DEFAULT_ASSETS_STACK_DIR) - .join("Dockerfile.runtime"); - - tracing::info!( + info!( image, "building compose test image via scripts/build/build_test_image.sh" ); + let mut cmd = build_local_image_command(&repo_root, image, platform)?; - let mut cmd = Command::new("bash"); - cmd.arg(repo_root.join("scripts/build/build_test_image.sh")) - .arg("--tag") - .arg(image) - .arg("--dockerfile") - .arg(runtime_dockerfile) - // Make the build self-contained (don't require a local bundle tar). - .arg("--no-restore"); - - if let Some(build_platform) = select_build_platform(platform)? { - cmd.env("DOCKER_DEFAULT_PLATFORM", build_platform); - } - - if let Some(circuits_platform) = env::var("COMPOSE_CIRCUITS_PLATFORM") - .ok() - .filter(|value| !value.is_empty()) - { - cmd.arg("--circuits-platform").arg(circuits_platform); - } - - if let Some(value) = env::var("CIRCUITS_OVERRIDE") - .ok() - .filter(|val| !val.is_empty()) - { - cmd.arg("--circuits-override").arg(value); - } - - cmd.current_dir(&repo_root); - - let status = timeout( - testing_framework_core::adjust_timeout(IMAGE_BUILD_TIMEOUT), - cmd.status(), - ) - .await - .map_err(|_| { - warn!( - image, - timeout = ?IMAGE_BUILD_TIMEOUT, - "test image build timed out" - ); - ComposeRunnerError::Compose(ComposeCommandError::Timeout { - command: String::from("scripts/build/build_test_image.sh"), - timeout: testing_framework_core::adjust_timeout(IMAGE_BUILD_TIMEOUT), - }) - })?; - - match status { + match run_build_command_with_timeout(image, &mut cmd).await? { Ok(code) if code.success() => { info!(image, platform = ?platform, "test image build completed"); Ok(()) } + Ok(code) => { warn!(image, status = ?code, "test image build failed"); Err(ComposeRunnerError::Compose(ComposeCommandError::Failed { @@ -172,6 +113,7 @@ pub async fn build_local_image( status: code, })) } + Err(err) => { warn!(image, error = ?err, "test image build spawn failed"); Err(ComposeRunnerError::ImageBuild { source: err.into() }) @@ -179,13 +121,97 @@ pub async fn build_local_image( } } -fn select_build_platform(platform: Option<&str>) -> Result, ComposeRunnerError> { - Ok(platform.map(String::from).or_else(|| { - let host_arch = std::env::consts::ARCH; +fn build_local_image_command( + repo_root: &Path, + image: &str, + platform: Option<&str>, +) -> Result { + let runtime_dockerfile = stack_assets_root(repo_root).join("Dockerfile.runtime"); + let mut cmd = Command::new("bash"); + + cmd.arg(repo_root.join("scripts/build/build_test_image.sh")) + .arg("--tag") + .arg(image) + .arg("--dockerfile") + .arg(runtime_dockerfile) + // Make the build self-contained (don't require a local bundle tar). + .arg("--no-restore") + .current_dir(repo_root); + + if let Some(build_platform) = select_build_platform(platform) { + cmd.env("DOCKER_DEFAULT_PLATFORM", build_platform); + } + + apply_optional_circuits_flags(&mut cmd); + + Ok(cmd) +} + +async fn run_build_command_with_timeout( + image: &str, + cmd: &mut Command, +) -> Result, ComposeRunnerError> { + let timeout_duration = adjust_timeout(IMAGE_BUILD_TIMEOUT); + + timeout(timeout_duration, cmd.status()).await.map_err(|_| { + warn!( + image, + timeout = ?IMAGE_BUILD_TIMEOUT, + "test image build timed out" + ); + ComposeRunnerError::Compose(ComposeCommandError::Timeout { + command: String::from("scripts/build/build_test_image.sh"), + timeout: timeout_duration, + }) + }) +} + +fn select_build_platform(platform: Option<&str>) -> Option { + platform.map(String::from).or_else(|| { + let host_arch = env::consts::ARCH; match host_arch { "aarch64" | "arm64" => Some(String::from("linux/arm64")), "x86_64" => Some(String::from("linux/amd64")), _ => None, } - })) + }) +} + +fn apply_optional_circuits_flags(cmd: &mut Command) { + if let Some(circuits_platform) = nonempty_env("COMPOSE_CIRCUITS_PLATFORM") { + cmd.arg("--circuits-platform").arg(circuits_platform); + } + + if let Some(value) = nonempty_env("CIRCUITS_OVERRIDE") { + cmd.arg("--circuits-override").arg(value); + } +} + +fn nonempty_env(key: &str) -> Option { + env::var(key).ok().filter(|value| !value.is_empty()) +} + +fn stack_assets_root(repo_root: &Path) -> PathBuf { + if let Some(override_dir) = assets_override_dir(repo_root) + && override_dir.exists() + { + return override_dir; + } + + repo_root.join(DEFAULT_ASSETS_STACK_DIR) +} + +fn is_local_test_image(image: &str) -> bool { + image == "logos-blockchain-testing:local" +} + +fn assets_override_dir(repo_root: &Path) -> Option { + env::var("REL_ASSETS_STACK_DIR").ok().map(|value| { + let path = PathBuf::from(value); + if path.is_absolute() { + path + } else { + repo_root.join(path) + } + }) } diff --git a/testing-framework/deployers/compose/src/docker/platform.rs b/testing-framework/deployers/compose/src/docker/platform.rs index 473b47b..86f03c3 100644 --- a/testing-framework/deployers/compose/src/docker/platform.rs +++ b/testing-framework/deployers/compose/src/docker/platform.rs @@ -1,24 +1,12 @@ use std::env; -use testing_framework_env as tf_env; -use tracing::debug; - -/// Select the compose image and optional platform, honoring -/// LOGOS_BLOCKCHAIN_TESTNET_IMAGE. -pub fn resolve_image() -> (String, Option) { - let image = tf_env::nomos_testnet_image() - .unwrap_or_else(|| String::from("logos-blockchain-testing:local")); - let platform = (image == "ghcr.io/logos-co/nomos:testnet").then(|| "linux/amd64".to_owned()); - debug!(image, platform = ?platform, "resolved compose image"); - (image, platform) -} - /// Optional extra hosts entry for host networking. pub fn host_gateway_entry() -> Option { if let Ok(value) = env::var("COMPOSE_RUNNER_HOST_GATEWAY") { if value.eq_ignore_ascii_case("disable") || value.is_empty() { return None; } + return Some(value); } diff --git a/testing-framework/deployers/compose/src/docker/workspace.rs b/testing-framework/deployers/compose/src/docker/workspace.rs index 65fe782..0ac767d 100644 --- a/testing-framework/deployers/compose/src/docker/workspace.rs +++ b/testing-framework/deployers/compose/src/docker/workspace.rs @@ -5,9 +5,10 @@ use std::{ use anyhow::{Context as _, Result}; use tempfile::TempDir; -use testing_framework_config::constants::DEFAULT_ASSETS_STACK_DIR; use tracing::{debug, info}; +use super::DEFAULT_ASSETS_STACK_DIR; + /// Copy the repository stack assets into a scenario-specific temp dir. #[derive(Debug)] pub struct ComposeWorkspace { @@ -17,39 +18,17 @@ pub struct ComposeWorkspace { impl ComposeWorkspace { /// Clone the stack assets into a temporary directory. pub fn create() -> Result { - let repo_root = env::var("REPO_ROOT_OVERRIDE_DIR") - .or_else(|_| env::var("CARGO_WORKSPACE_DIR")) - .map(PathBuf::from) - .or_else(|_| { - Path::new(env!("CARGO_MANIFEST_DIR")) - .parent() - .and_then(Path::parent) - .and_then(Path::parent) - .map(Path::to_path_buf) - .context("resolving workspace root from manifest dir") - }) - .context("locating repository root")?; - let temp = tempfile::Builder::new() - .prefix("nomos-testnet-") - .tempdir() - .context("creating testnet temp dir")?; + let repo_root = resolve_repo_root()?; + let temp = create_temp_workspace()?; let stack_source = stack_assets_root(&repo_root); - if !stack_source.exists() { - anyhow::bail!( - "stack assets directory not found at {}", - stack_source.display() - ); - } + ensure_stack_source_exists(&stack_source)?; + debug!( repo_root = %repo_root.display(), stack_source = %stack_source.display(), "copying stack assets into temporary workspace" ); - copy_dir_recursive(&stack_source, &temp.path().join("stack"))?; - let scripts_source = stack_scripts_root(&repo_root); - if scripts_source.exists() { - copy_dir_recursive(&scripts_source, &temp.path().join("stack/scripts"))?; - } + copy_stack_assets(&repo_root, &stack_source, temp.path())?; info!(root = %temp.path().display(), "compose workspace created"); Ok(Self { root: temp }) @@ -74,42 +53,100 @@ impl ComposeWorkspace { } } -fn stack_assets_root(repo_root: &Path) -> PathBuf { - let new_layout = if let Some(rel_stack_dir) = env::var("REL_ASSETS_STACK_DIR").ok() { - repo_root.join(rel_stack_dir) - } else { - repo_root.join(DEFAULT_ASSETS_STACK_DIR) - }; - if new_layout.exists() { - new_layout - } else { - repo_root.join("testnet") - } +fn resolve_repo_root() -> Result { + env::var("REPO_ROOT_OVERRIDE_DIR") + .or_else(|_| env::var("CARGO_WORKSPACE_DIR")) + .map(PathBuf::from) + .or_else(|_| { + Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(Path::parent) + .and_then(Path::parent) + .map(Path::to_path_buf) + .context("resolving workspace root from manifest dir") + }) + .context("locating repository root") } -fn stack_scripts_root(repo_root: &Path) -> PathBuf { - let new_layout = repo_root.join(DEFAULT_ASSETS_STACK_DIR).join("scripts"); - if new_layout.exists() { - new_layout - } else { - repo_root.join("testnet/scripts") +fn create_temp_workspace() -> Result { + tempfile::Builder::new() + .prefix("compose-stack-") + .tempdir() + .context("creating testnet temp dir") +} + +fn ensure_stack_source_exists(stack_source: &Path) -> Result<()> { + if !stack_source.exists() { + anyhow::bail!( + "stack assets directory not found at {}", + stack_source.display() + ); } + Ok(()) +} + +fn copy_stack_assets(repo_root: &Path, stack_source: &Path, target_root: &Path) -> Result<()> { + copy_dir_recursive(&stack_source, &target_root.join("stack"))?; + + let scripts_source = stack_scripts_root(repo_root, stack_source); + if scripts_source.exists() { + copy_dir_recursive(&scripts_source, &target_root.join("stack/scripts"))?; + } + + Ok(()) +} + +fn stack_assets_root(repo_root: &Path) -> PathBuf { + if let Some(override_dir) = assets_override_dir(repo_root) + && override_dir.exists() + { + return override_dir; + } + + repo_root.join(DEFAULT_ASSETS_STACK_DIR) +} + +fn stack_scripts_root(repo_root: &Path, stack_source: &Path) -> PathBuf { + let scripts = stack_source.join("scripts"); + if scripts.exists() { + return scripts; + } + + repo_root.join(DEFAULT_ASSETS_STACK_DIR).join("scripts") +} + +fn assets_override_dir(repo_root: &Path) -> Option { + env::var("REL_ASSETS_STACK_DIR") + .ok() + .map(PathBuf::from) + .map(|path| resolve_workspace_relative_path(repo_root, path)) +} + +fn resolve_workspace_relative_path(repo_root: &Path, path: PathBuf) -> PathBuf { + if path.is_absolute() { + return path; + } + + repo_root.join(path) } fn copy_dir_recursive(source: &Path, target: &Path) -> Result<()> { fs::create_dir_all(target) .with_context(|| format!("creating target dir {}", target.display()))?; + for entry in fs::read_dir(source).with_context(|| format!("reading {}", source.display()))? { let entry = entry?; let file_type = entry.file_type()?; let dest = target.join(entry.file_name()); + if file_type.is_dir() { copy_dir_recursive(&entry.path(), &dest)?; - } else if !file_type.is_dir() { + } else { fs::copy(entry.path(), &dest).with_context(|| { format!("copying {} -> {}", entry.path().display(), dest.display()) })?; } } + Ok(()) } diff --git a/testing-framework/deployers/compose/src/env.rs b/testing-framework/deployers/compose/src/env.rs new file mode 100644 index 0000000..42de069 --- /dev/null +++ b/testing-framework/deployers/compose/src/env.rs @@ -0,0 +1,181 @@ +use std::{env, path::Path}; + +use async_trait::async_trait; +use reqwest::Url; +use testing_framework_core::scenario::{ + Application, DynError, HttpReadinessRequirement, NodeClients, + wait_for_http_ports_with_host_and_requirement, wait_http_readiness, +}; + +use crate::{ + descriptor::{ComposeDescriptor, NodeDescriptor}, + infrastructure::ports::{HostPortMapping, NodeContainerPorts, NodeHostPorts}, +}; + +/// Handle returned by a compose config server (cfgsync or equivalent). +pub trait ConfigServerHandle: Send + Sync { + fn shutdown(&mut self); + fn mark_preserved(&mut self); + fn container_name(&self) -> Option<&str> { + None + } +} + +/// Compose-specific topology surface needed by the runner. +#[async_trait] +pub trait ComposeDeployEnv: Application { + type ConfigHandle: ConfigServerHandle; + + /// Produce the compose descriptor for the given topology. + fn compose_descriptor(topology: &Self::Deployment, cfgsync_port: u16) -> ComposeDescriptor; + + /// Container ports (API/testing) per node, used for docker-compose port + /// discovery. + fn node_container_ports(topology: &Self::Deployment) -> Vec { + let descriptor = Self::compose_descriptor(topology, 0); + descriptor + .nodes() + .iter() + .enumerate() + .filter_map(|(index, node)| parse_node_container_ports(index, node)) + .collect() + } + + /// Update the config server template based on topology. + fn update_cfgsync_config( + path: &Path, + topology: &Self::Deployment, + port: u16, + metrics_otlp_ingest_url: Option<&Url>, + ) -> Result<(), DynError>; + + /// Start the config server and return its handle. + async fn start_cfgsync( + cfgsync_path: &Path, + port: u16, + network: &str, + ) -> Result; + + /// Build node clients from discovered host ports. + fn node_client_from_ports( + ports: &NodeHostPorts, + host: &str, + ) -> Result; + + /// Build node clients from discovered host ports. + fn build_node_clients( + _topology: &Self::Deployment, + host_ports: &HostPortMapping, + host: &str, + ) -> Result, DynError> + where + Self: Sized, + { + let clients = host_ports + .nodes + .iter() + .map(|ports| Self::node_client_from_ports(ports, host)) + .collect::>()?; + Ok(NodeClients::new(clients)) + } + + /// Return the compose image name and optional platform override. + /// + /// Defaults: + /// - image: `COMPOSE_RUNNER_IMAGE` or `logos-blockchain-testing:local` + /// - platform: `COMPOSE_RUNNER_PLATFORM` when set + fn compose_image() -> (String, Option) { + let image = compose_image_from_env(); + let platform = env::var("COMPOSE_RUNNER_PLATFORM").ok(); + (image, platform) + } + + /// Path used by default readiness checks. + fn readiness_path() -> &'static str { + "/" + } + + /// Host used by default remote readiness checks. + fn compose_runner_host() -> String { + "127.0.0.1".to_string() + } + + /// Remote readiness probe for node APIs. + async fn wait_remote_readiness( + _topology: &Self::Deployment, + mapping: &HostPortMapping, + requirement: HttpReadinessRequirement, + ) -> Result<(), DynError> { + let host = Self::compose_runner_host(); + let urls = readiness_urls(&host, mapping, Self::readiness_path())?; + wait_http_readiness(&urls, requirement).await?; + Ok(()) + } + + /// Wait for HTTP readiness on node ports. + async fn wait_for_nodes( + ports: &[u16], + host: &str, + requirement: HttpReadinessRequirement, + ) -> Result<(), DynError> { + wait_for_http_ports_with_host_and_requirement( + ports, + host, + Self::readiness_path(), + requirement, + ) + .await?; + Ok(()) + } +} + +fn parse_container_port(entry: &str) -> Option { + entry.rsplit(':').next()?.parse().ok() +} + +fn compose_image_from_env() -> String { + env::var("COMPOSE_RUNNER_IMAGE") + .unwrap_or_else(|_| String::from("logos-blockchain-testing:local")) +} + +fn parse_node_container_ports(index: usize, node: &NodeDescriptor) -> Option { + let mut ports = node + .ports() + .iter() + .filter_map(|entry| parse_container_port(entry)); + let api = ports.next()?; + let testing = ports.next()?; + + Some(NodeContainerPorts { + index, + api, + testing, + }) +} + +fn readiness_urls( + host: &str, + mapping: &HostPortMapping, + endpoint_path: &str, +) -> Result, DynError> { + let endpoint_path = normalize_endpoint_path(endpoint_path); + + mapping + .nodes + .iter() + .map(|ports| readiness_url(host, ports.api, &endpoint_path)) + .collect::>() +} + +fn normalize_endpoint_path(endpoint_path: &str) -> String { + if endpoint_path.starts_with('/') { + endpoint_path.to_string() + } else { + format!("/{endpoint_path}") + } +} + +fn readiness_url(host: &str, api_port: u16, endpoint_path: &str) -> Result { + let url = Url::parse(&format!("http://{host}:{api_port}{endpoint_path}"))?; + Ok(url) +} diff --git a/testing-framework/deployers/compose/src/errors.rs b/testing-framework/deployers/compose/src/errors.rs index 3b0e6c1..28b222b 100644 --- a/testing-framework/deployers/compose/src/errors.rs +++ b/testing-framework/deployers/compose/src/errors.rs @@ -1,9 +1,6 @@ use std::path::PathBuf; -use testing_framework_core::{ - scenario::{MetricsError, http_probe::HttpReadinessError}, - topology::readiness::ReadinessError, -}; +use testing_framework_core::scenario::{DynError, MetricsError}; use url::ParseError; use crate::{docker::commands::ComposeCommandError, infrastructure::template::TemplateError}; @@ -34,16 +31,14 @@ pub enum ComposeRunnerError { NodeClients(#[from] NodeClientError), #[error(transparent)] Telemetry(#[from] MetricsError), - #[error("block feed requires at least one node client")] + #[error("feed requires at least one node client")] BlockFeedMissing, - #[error("failed to start block feed: {source}")] + #[error("failed to start feed: {source}")] BlockFeed { #[source] - source: anyhow::Error, + source: DynError, }, - #[error( - "docker image '{image}' is not available; set LOGOS_BLOCKCHAIN_TESTNET_IMAGE or build the image manually" - )] + #[error("docker image '{image}' is not available; build or load it locally")] MissingImage { image: String }, #[error("failed to prepare docker image: {source}")] ImageBuild { @@ -71,11 +66,11 @@ impl WorkspaceError { #[derive(Debug, thiserror::Error)] /// Configuration-related failures while preparing compose runs. pub enum ConfigError { - #[error("failed to update cfgsync configuration at {path}: {source}")] + #[error("failed to update cfgsync configuration at {path:?}: {source}")] Cfgsync { path: PathBuf, #[source] - source: anyhow::Error, + source: DynError, }, #[error("failed to allocate cfgsync port: {source}")] Port { @@ -86,7 +81,7 @@ pub enum ConfigError { CfgsyncStart { port: u16, #[source] - source: anyhow::Error, + source: DynError, }, #[error("failed to render compose template: {source}")] Template { @@ -98,8 +93,6 @@ pub enum ConfigError { #[derive(Debug, thiserror::Error)] /// Readiness probe failures surfaced to callers. pub enum StackReadinessError { - #[error(transparent)] - Http(#[from] HttpReadinessError), #[error("failed to build readiness URL for {role} port {port}: {source}")] Endpoint { role: &'static str, @@ -110,8 +103,10 @@ pub enum StackReadinessError { #[error("remote readiness probe failed: {source}")] Remote { #[source] - source: ReadinessError, + source: DynError, }, + #[error("expected readiness URLs for {nodes} nodes but none were provided")] + MissingUrls { nodes: usize }, } #[derive(Debug, thiserror::Error)] @@ -125,4 +120,9 @@ pub enum NodeClientError { #[source] source: ParseError, }, + #[error("failed to build node clients: {source}")] + Build { + #[source] + source: DynError, + }, } diff --git a/testing-framework/deployers/compose/src/infrastructure/cfgsync.rs b/testing-framework/deployers/compose/src/infrastructure/cfgsync.rs deleted file mode 100644 index ebd7f17..0000000 --- a/testing-framework/deployers/compose/src/infrastructure/cfgsync.rs +++ /dev/null @@ -1,84 +0,0 @@ -use std::{path::Path, process::Command as StdCommand}; - -use lb_tracing::metrics::otlp::OtlpMetricsConfig; -use lb_tracing_service::MetricsLayer; -use reqwest::Url; -use testing_framework_core::{ - scenario::cfgsync::{apply_topology_overrides, load_cfgsync_template, write_cfgsync_template}, - topology::generation::GeneratedTopology, -}; -use tracing::{debug, info, warn}; - -/// Handle that tracks a cfgsync server started for compose runs. -#[derive(Debug)] -pub enum CfgsyncServerHandle { - Container { name: String, stopped: bool }, -} - -impl CfgsyncServerHandle { - /// Stop the backing container if still running. - pub fn shutdown(&mut self) { - match self { - Self::Container { name, stopped } if !*stopped => { - info!(container = name, "stopping cfgsync container"); - remove_container(name); - *stopped = true; - } - _ => {} - } - } -} - -fn remove_container(name: &str) { - match StdCommand::new("docker") - .arg("rm") - .arg("-f") - .arg(name) - .status() - { - Ok(status) if status.success() => { - debug!(container = name, "removed cfgsync container"); - } - Ok(status) => { - warn!(container = name, status = ?status, "failed to remove cfgsync container"); - } - Err(_) => { - warn!( - container = name, - "failed to spawn docker rm for cfgsync container" - ); - } - } -} - -impl Drop for CfgsyncServerHandle { - fn drop(&mut self) { - self.shutdown(); - } -} - -/// Updates the cfgsync template on disk with topology-driven overrides. -pub fn update_cfgsync_config( - path: &Path, - topology: &GeneratedTopology, - port: u16, - metrics_otlp_ingest_url: Option<&Url>, -) -> anyhow::Result<()> { - debug!( - path = %path.display(), - port, - nodes = topology.nodes().len(), - "updating cfgsync template" - ); - let mut cfg = load_cfgsync_template(path)?; - cfg.port = port; - apply_topology_overrides(&mut cfg, topology); - if let Some(endpoint) = metrics_otlp_ingest_url.cloned() { - cfg.tracing_settings.metrics = MetricsLayer::Otlp(OtlpMetricsConfig { - endpoint, - host_identifier: "node".into(), - }); - } - write_cfgsync_template(path, &cfg)?; - Ok(()) -} diff --git a/testing-framework/deployers/compose/src/infrastructure/environment.rs b/testing-framework/deployers/compose/src/infrastructure/environment.rs index fbf491a..2c98c50 100644 --- a/testing-framework/deployers/compose/src/infrastructure/environment.rs +++ b/testing-framework/deployers/compose/src/infrastructure/environment.rs @@ -6,54 +6,58 @@ use std::{ use anyhow::anyhow; use reqwest::Url; -use testing_framework_core::{ - adjust_timeout, scenario::CleanupGuard, topology::generation::GeneratedTopology, -}; -use tokio::process::Command; -use tracing::{debug, error, info}; +use testing_framework_core::{scenario::CleanupGuard, topology::DeploymentDescriptor}; +use tokio::{net::TcpStream, process::Command}; +use tokio_retry::{Retry, strategy::FixedInterval}; +use tracing::{debug, error, info, warn}; use uuid::Uuid; use crate::{ - descriptor::ComposeDescriptor, docker::{ - commands::{compose_up, dump_compose_logs, run_docker_command}, - ensure_compose_image, - platform::resolve_image, + commands::{compose_create, compose_up, dump_compose_logs}, + ensure_image_present, workspace::ComposeWorkspace, }, + env::{ComposeDeployEnv, ConfigServerHandle}, errors::{ComposeRunnerError, ConfigError, WorkspaceError}, - infrastructure::{ - cfgsync::{CfgsyncServerHandle, update_cfgsync_config}, - template::write_compose_file, - }, + infrastructure::template::write_compose_file, lifecycle::cleanup::RunnerCleanup, }; -const CFGSYNC_START_TIMEOUT: Duration = Duration::from_secs(180); +const CFGSYNC_READY_TIMEOUT: Duration = Duration::from_secs(60); +const CFGSYNC_READY_POLL: Duration = Duration::from_secs(2); +const CFGSYNC_REACHABILITY_ADDR: &str = "127.0.0.1"; -/// Paths and flags describing the prepared compose workspace. +/// Prepared workspace paths. pub struct WorkspaceState { pub workspace: ComposeWorkspace, pub root: PathBuf, pub cfgsync_path: PathBuf, } -/// Holds paths and handles for a running docker-compose stack. +struct PreparedEnvironment { + workspace: WorkspaceState, + cfgsync_port: u16, + compose_path: PathBuf, + project_name: String, +} + +/// Runtime handles for a compose stack. pub struct StackEnvironment { compose_path: PathBuf, project_name: String, root: PathBuf, workspace: Option, - cfgsync_handle: Option, + cfgsync_handle: Option>, } impl StackEnvironment { - /// Builds an environment from the prepared workspace and compose artifacts. + /// Build from prepared workspace artifacts. pub fn from_workspace( state: WorkspaceState, compose_path: PathBuf, project_name: String, - cfgsync_handle: Option, + cfgsync_handle: Option>, ) -> Self { let WorkspaceState { workspace, root, .. @@ -72,26 +76,21 @@ impl StackEnvironment { &self.compose_path } - /// Docker compose project name. + /// Compose project name. pub fn project_name(&self) -> &str { &self.project_name } - /// Root directory that contains generated assets. + /// Root directory with generated assets. pub fn root(&self) -> &Path { &self.root } - /// Convert into a cleanup guard while keeping the environment borrowed. + /// Build a cleanup guard without consuming the environment. pub fn take_cleanup(&mut self) -> Result { - let workspace = self - .workspace - .take() - .ok_or(ComposeRunnerError::InternalInvariant { - message: "workspace must be available while cleaning up", - })?; + let workspace = self.workspace.take().ok_or_else(missing_workspace_error)?; - Ok(RunnerCleanup::new( + Ok(build_runner_cleanup( self.compose_path.clone(), self.project_name.clone(), self.root.clone(), @@ -100,15 +99,11 @@ impl StackEnvironment { )) } - /// Convert into a cleanup guard, consuming the environment. + /// Build a cleanup guard and consume the environment. pub fn into_cleanup(self) -> Result { - let workspace = self - .workspace - .ok_or(ComposeRunnerError::InternalInvariant { - message: "workspace must be available while cleaning up", - })?; + let workspace = self.workspace.ok_or_else(missing_workspace_error)?; - Ok(RunnerCleanup::new( + Ok(build_runner_cleanup( self.compose_path, self.project_name, self.root, @@ -117,32 +112,49 @@ impl StackEnvironment { )) } - /// Dump compose logs and trigger cleanup after a failure. + /// Dump logs and trigger cleanup after failure. pub async fn fail(&mut self, reason: &str) { error!( reason = reason, "compose stack failure; dumping docker logs" ); dump_compose_logs(self.compose_path(), self.project_name(), self.root()).await; - match self.take_cleanup() { - Ok(cleanup) => Box::new(cleanup).cleanup(), - Err(err) => error!(error = %err, "failed to acquire cleanup guard"), - } + self.cleanup_after_failure(); + } + + fn cleanup_after_failure(&mut self) { + let cleanup = match self.take_cleanup() { + Ok(cleanup) => cleanup, + Err(err) => { + error!(error = %err, "failed to acquire cleanup guard"); + return; + } + }; + + Box::new(cleanup).cleanup(); } } -/// Verifies the topology has at least one node so compose can start. -pub fn ensure_supported_topology( - descriptors: &GeneratedTopology, +fn missing_workspace_error() -> ComposeRunnerError { + ComposeRunnerError::InternalInvariant { + message: "workspace must be available while cleaning up", + } +} + +/// Ensure topology has at least one node. +pub fn ensure_supported_topology( + descriptors: &E::Deployment, ) -> Result<(), ComposeRunnerError> { - let nodes = descriptors.nodes().len(); + let nodes = descriptors.node_count(); + if nodes == 0 { return Err(ComposeRunnerError::MissingNode { nodes }); } + Ok(()) } -/// Create a temporary workspace with copied testnet assets and derived paths. +/// Create a temporary workspace and derive key paths. pub fn prepare_workspace_state() -> Result { let workspace = ComposeWorkspace::create().map_err(WorkspaceError::new)?; let root = workspace.root_path().to_path_buf(); @@ -162,51 +174,65 @@ pub fn prepare_workspace_state() -> Result { Ok(state) } -/// Log wrapper for `prepare_workspace_state`. +/// Prepare the workspace and emit setup logs. pub fn prepare_workspace_logged() -> Result { info!("preparing compose workspace"); - prepare_workspace_state().map_err(Into::into) + let workspace = prepare_workspace_state()?; + Ok(workspace) } -/// Render cfgsync config based on the topology and chosen port, logging -/// progress. -pub fn update_cfgsync_logged( +/// Update cfgsync config and emit setup logs. +pub fn update_cfgsync_logged( workspace: &WorkspaceState, - descriptors: &GeneratedTopology, + descriptors: &E::Deployment, cfgsync_port: u16, metrics_otlp_ingest_url: Option<&Url>, ) -> Result<(), ComposeRunnerError> { info!(cfgsync_port, "updating cfgsync configuration"); - configure_cfgsync( + configure_cfgsync::( workspace, descriptors, cfgsync_port, metrics_otlp_ingest_url, - ) - .map_err(Into::into) + )?; + + Ok(()) } -/// Start the cfgsync server container using the generated config. -pub async fn start_cfgsync_stage( +/// Start cfgsync using generated config. +pub async fn start_cfgsync_stage( workspace: &WorkspaceState, cfgsync_port: u16, -) -> Result { + project_name: &str, +) -> Result, ComposeRunnerError> { info!(cfgsync_port = cfgsync_port, "launching cfgsync server"); - let handle = launch_cfgsync(&workspace.cfgsync_path, cfgsync_port).await?; - debug!(container = ?handle, "cfgsync server launched"); - Ok(handle) + + let network = compose_network_name(project_name); + let handle = E::start_cfgsync(&workspace.cfgsync_path, cfgsync_port, &network) + .await + .map_err(|source| { + ComposeRunnerError::Config(ConfigError::CfgsyncStart { + port: cfgsync_port, + source, + }) + })?; + + wait_for_cfgsync_ready(cfgsync_port, Some(&handle)).await?; + log_cfgsync_started(&handle); + + Ok(Box::new(handle)) } -/// Update cfgsync YAML on disk with topology-derived values. -pub fn configure_cfgsync( +/// Write cfgsync YAML from topology data. +pub fn configure_cfgsync( workspace: &WorkspaceState, - descriptors: &GeneratedTopology, + descriptors: &E::Deployment, cfgsync_port: u16, metrics_otlp_ingest_url: Option<&Url>, ) -> Result<(), ConfigError> { - update_cfgsync_config( + E::update_cfgsync_config( &workspace.cfgsync_path, descriptors, cfgsync_port, @@ -218,7 +244,7 @@ pub fn configure_cfgsync( }) } -/// Bind an ephemeral port for cfgsync, returning the chosen value. +/// Allocate an ephemeral cfgsync port. pub fn allocate_cfgsync_port() -> Result { let listener = StdTcpListener::bind((Ipv4Addr::UNSPECIFIED, 0)).map_err(|source| ConfigError::Port { @@ -231,75 +257,16 @@ pub fn allocate_cfgsync_port() -> Result { source: source.into(), })? .port(); + debug!(port, "allocated cfgsync port"); + Ok(port) } -/// Launch cfgsync in a detached docker container on the provided port. -pub async fn launch_cfgsync( - cfgsync_path: &Path, - port: u16, -) -> Result { - let testnet_dir = cfgsync_path - .parent() - .ok_or_else(|| ConfigError::CfgsyncStart { - port, - source: anyhow!("cfgsync path {cfgsync_path:?} has no parent directory"), - })?; - let (image, _) = resolve_image(); - let container_name = format!("nomos-cfgsync-{}", Uuid::new_v4()); - debug!( - container = %container_name, - image, - cfgsync = %cfgsync_path.display(), - port, - "starting cfgsync container" - ); - - let mut command = Command::new("docker"); - command - .arg("run") - .arg("-d") - .arg("--name") - .arg(&container_name) - .arg("--entrypoint") - .arg("cfgsync-server") - .arg("-p") - .arg(format!("{port}:{port}")) - .arg("-v") - .arg(format!( - "{}:/etc/nomos:ro", - testnet_dir - .canonicalize() - .unwrap_or_else(|_| testnet_dir.to_path_buf()) - .display() - )) - .arg(&image) - .arg("/etc/nomos/cfgsync.yaml"); - - run_docker_command( - command, - adjust_timeout(CFGSYNC_START_TIMEOUT), - "docker run cfgsync server", - ) - .await - .map_err(|source| ConfigError::CfgsyncStart { - port, - source: anyhow!(source), - })?; - - info!(container = %container_name, port, "cfgsync container started"); - - Ok(CfgsyncServerHandle::Container { - name: container_name, - stopped: false, - }) -} - -/// Render compose file and associated assets for the current topology. -pub fn write_compose_artifacts( +/// Render compose file for the current topology. +pub fn write_compose_artifacts( workspace: &WorkspaceState, - descriptors: &GeneratedTopology, + descriptors: &E::Deployment, cfgsync_port: u16, ) -> Result { debug!( @@ -307,9 +274,7 @@ pub fn write_compose_artifacts( workspace_root = %workspace.root.display(), "building compose descriptor" ); - let descriptor = ComposeDescriptor::builder(descriptors) - .with_cfgsync_port(cfgsync_port) - .build(); + let descriptor = E::compose_descriptor(descriptors, cfgsync_port); let compose_path = workspace.root.join("compose.generated.yml"); write_compose_file(&descriptor, &compose_path) @@ -319,22 +284,24 @@ pub fn write_compose_artifacts( Ok(compose_path) } -/// Log and wrap `write_compose_artifacts` errors for the runner. -pub fn render_compose_logged( +/// Logged wrapper for `write_compose_artifacts`. +pub fn render_compose_logged( workspace: &WorkspaceState, - descriptors: &GeneratedTopology, + descriptors: &E::Deployment, cfgsync_port: u16, ) -> Result { info!(cfgsync_port, "rendering compose file"); - write_compose_artifacts(workspace, descriptors, cfgsync_port).map_err(Into::into) + + let compose_path = write_compose_artifacts::(workspace, descriptors, cfgsync_port)?; + Ok(compose_path) } -/// Bring up docker compose; shut down cfgsync if start-up fails. +/// Run `docker compose up`; stop cfgsync on failure. pub async fn bring_up_stack( compose_path: &Path, project_name: &str, workspace_root: &Path, - cfgsync_handle: &mut CfgsyncServerHandle, + cfgsync_handle: &mut dyn ConfigServerHandle, ) -> Result<(), ComposeRunnerError> { if let Err(err) = compose_up(compose_path, project_name, workspace_root).await { cfgsync_handle.shutdown(); @@ -344,60 +311,231 @@ pub async fn bring_up_stack( Ok(()) } -/// Log compose bring-up with context. +/// Logged compose bring-up. pub async fn bring_up_stack_logged( compose_path: &Path, project_name: &str, workspace_root: &Path, - cfgsync_handle: &mut CfgsyncServerHandle, + cfgsync_handle: &mut dyn ConfigServerHandle, ) -> Result<(), ComposeRunnerError> { info!(project = %project_name, "bringing up docker compose stack"); bring_up_stack(compose_path, project_name, workspace_root, cfgsync_handle).await } /// Prepare workspace, cfgsync, compose artifacts, and launch the stack. -pub async fn prepare_environment( - descriptors: &GeneratedTopology, +pub async fn prepare_environment( + descriptors: &E::Deployment, metrics_otlp_ingest_url: Option<&Url>, ) -> Result { + let prepared = prepare_stack_artifacts::(descriptors, metrics_otlp_ingest_url).await?; + let mut cfgsync_handle = start_cfgsync_for_prepared::(&prepared).await?; + start_compose_stack(&prepared, cfgsync_handle.as_mut()).await?; + log_compose_environment_ready(&prepared, "compose stack is up"); + + Ok(stack_environment_from_prepared( + prepared, + Some(cfgsync_handle), + )) +} + +/// Prepare workspace, cfgsync, and compose artifacts without starting services. +pub async fn prepare_environment_manual( + descriptors: &E::Deployment, + metrics_otlp_ingest_url: Option<&Url>, +) -> Result { + let prepared = prepare_stack_artifacts::(descriptors, metrics_otlp_ingest_url).await?; + let cfgsync_handle = start_cfgsync_for_prepared::(&prepared).await?; + + log_compose_environment_ready(&prepared, "compose manual environment prepared"); + + Ok(stack_environment_from_prepared( + prepared, + Some(cfgsync_handle), + )) +} + +async fn prepare_stack_artifacts( + descriptors: &E::Deployment, + metrics_otlp_ingest_url: Option<&Url>, +) -> Result { let workspace = prepare_workspace_logged()?; let cfgsync_port = allocate_cfgsync_port()?; - update_cfgsync_logged( + update_cfgsync_logged::( &workspace, descriptors, cfgsync_port, metrics_otlp_ingest_url, )?; - ensure_compose_image().await?; - let compose_path = render_compose_logged(&workspace, descriptors, cfgsync_port)?; + ensure_compose_image_present::().await?; + let compose_path = render_compose_logged::(&workspace, descriptors, cfgsync_port)?; + let project_name = create_project_name(); + compose_create(&compose_path, &project_name, &workspace.root).await?; - let project_name = format!("nomos-compose-{}", Uuid::new_v4()); - let mut cfgsync_handle = start_cfgsync_stage(&workspace, cfgsync_port).await?; + Ok(PreparedEnvironment { + workspace, + cfgsync_port, + compose_path, + project_name, + }) +} - if let Err(err) = bring_up_stack_logged( - &compose_path, - &project_name, - &workspace.root, - &mut cfgsync_handle, +async fn ensure_compose_image_present() -> Result<(), ComposeRunnerError> { + let (image, platform) = E::compose_image(); + ensure_image_present(&image, platform.as_deref()).await +} + +fn create_project_name() -> String { + format!("compose-stack-{}", Uuid::new_v4()) +} + +async fn start_cfgsync_for_prepared( + prepared: &PreparedEnvironment, +) -> Result, ComposeRunnerError> { + start_cfgsync_stage::( + &prepared.workspace, + prepared.cfgsync_port, + &prepared.project_name, + ) + .await +} + +async fn handle_compose_start_failure( + prepared: &PreparedEnvironment, + cfgsync_handle: &mut dyn ConfigServerHandle, +) { + dump_compose_logs( + &prepared.compose_path, + &prepared.project_name, + &prepared.workspace.root, + ) + .await; + cfgsync_handle.shutdown(); +} + +fn stack_environment_from_prepared( + prepared: PreparedEnvironment, + cfgsync_handle: Option>, +) -> StackEnvironment { + StackEnvironment::from_workspace( + prepared.workspace, + prepared.compose_path, + prepared.project_name, + cfgsync_handle, + ) +} + +async fn start_compose_stack( + prepared: &PreparedEnvironment, + cfgsync_handle: &mut dyn ConfigServerHandle, +) -> Result<(), ComposeRunnerError> { + if let Err(error) = bring_up_stack_logged( + &prepared.compose_path, + &prepared.project_name, + &prepared.workspace.root, + cfgsync_handle, ) .await { - dump_compose_logs(&compose_path, &project_name, &workspace.root).await; - cfgsync_handle.shutdown(); - return Err(err); + handle_compose_start_failure(prepared, cfgsync_handle).await; + return Err(error); } - info!( - project = %project_name, - compose_file = %compose_path.display(), - cfgsync_port, - "compose stack is up" - ); - - Ok(StackEnvironment::from_workspace( - workspace, - compose_path, - project_name, - Some(cfgsync_handle), - )) + Ok(()) +} + +fn log_compose_environment_ready(prepared: &PreparedEnvironment, message: &str) { + info!( + project = %prepared.project_name, + compose_file = %prepared.compose_path.display(), + cfgsync_port = prepared.cfgsync_port, + status = message, + "compose environment prepared" + ); +} + +async fn wait_for_cfgsync_ready( + port: u16, + handle: Option<&dyn ConfigServerHandle>, +) -> Result<(), ComposeRunnerError> { + let addr = format!("{CFGSYNC_REACHABILITY_ADDR}:{port}"); + let strategy = cfgsync_retry_strategy(); + + let result = Retry::spawn(strategy, || async { TcpStream::connect(&addr).await }).await; + + if let Err(error) = result { + dump_cfgsync_logs(handle).await; + return Err(cfgsync_reachability_error(port, &error.to_string())); + } + + info!(port, "cfgsync server is reachable"); + Ok(()) +} + +fn cfgsync_reachability_error(port: u16, details: &str) -> ComposeRunnerError { + ComposeRunnerError::Config(ConfigError::CfgsyncStart { + port, + source: anyhow!("cfgsync not reachable: {details}").into(), + }) +} + +fn cfgsync_retry_strategy() -> impl Iterator { + let timeout_ms = CFGSYNC_READY_TIMEOUT.as_millis(); + let poll_ms = CFGSYNC_READY_POLL.as_millis(); + let max_attempts = timeout_ms.div_ceil(poll_ms).max(1) as usize; + + FixedInterval::from_millis(CFGSYNC_READY_POLL.as_millis() as u64).take(max_attempts) +} + +async fn dump_cfgsync_logs(handle: Option<&dyn ConfigServerHandle>) { + let Some(name) = handle.and_then(|handle| handle.container_name()) else { + return; + }; + + let mut cmd = Command::new("docker"); + cmd.arg("logs").arg(name); + + match cmd.output().await { + Ok(output) => { + if !output.stdout.is_empty() { + warn!( + logs = %String::from_utf8_lossy(&output.stdout), + container = name, + "cfgsync stdout" + ); + } + + if !output.stderr.is_empty() { + warn!( + logs = %String::from_utf8_lossy(&output.stderr), + container = name, + "cfgsync stderr" + ); + } + } + + Err(err) => warn!(error = ?err, container = name, "failed to collect cfgsync logs"), + } +} + +fn compose_network_name(project_name: &str) -> String { + format!("{project_name}_default") +} + +fn log_cfgsync_started(handle: &impl ConfigServerHandle) { + if let Some(name) = handle.container_name() { + debug!(container = name, "cfgsync server launched"); + return; + } + + debug!("cfgsync server launched"); +} + +fn build_runner_cleanup( + compose_path: PathBuf, + project_name: String, + root: PathBuf, + workspace: ComposeWorkspace, + cfgsync_handle: Option>, +) -> RunnerCleanup { + RunnerCleanup::new(compose_path, project_name, root, workspace, cfgsync_handle) } diff --git a/testing-framework/deployers/compose/src/infrastructure/mod.rs b/testing-framework/deployers/compose/src/infrastructure/mod.rs index 7d8e66b..d9d382f 100644 --- a/testing-framework/deployers/compose/src/infrastructure/mod.rs +++ b/testing-framework/deployers/compose/src/infrastructure/mod.rs @@ -1,4 +1,3 @@ -pub mod cfgsync; pub mod environment; pub mod ports; pub mod template; diff --git a/testing-framework/deployers/compose/src/infrastructure/ports.rs b/testing-framework/deployers/compose/src/infrastructure/ports.rs index 03db06c..e6e7bbd 100644 --- a/testing-framework/deployers/compose/src/infrastructure/ports.rs +++ b/testing-framework/deployers/compose/src/infrastructure/ports.rs @@ -1,18 +1,11 @@ -use std::time::Duration; +use std::{env, path::Path, process::Output, time::Duration}; use anyhow::{Context as _, anyhow}; -use reqwest::Url; -use testing_framework_core::{ - adjust_timeout, scenario::http_probe::NODE_ROLE, topology::generation::GeneratedTopology, -}; +use testing_framework_core::adjust_timeout; use tokio::{process::Command, time::timeout}; use tracing::{debug, info}; -use url::ParseError; -use crate::{ - errors::{ComposeRunnerError, StackReadinessError}, - infrastructure::environment::StackEnvironment, -}; +use crate::{errors::ComposeRunnerError, infrastructure::environment::StackEnvironment}; const COMPOSE_PORT_DISCOVERY_TIMEOUT: Duration = Duration::from_secs(30); @@ -23,6 +16,14 @@ pub struct NodeHostPorts { pub testing: u16, } +/// Container ports for a single node. +#[derive(Clone, Debug)] +pub struct NodeContainerPorts { + pub index: usize, + pub api: u16, + pub testing: u16, +} + /// All host port mappings for nodes. #[derive(Clone, Debug)] pub struct HostPortMapping { @@ -39,23 +40,20 @@ impl HostPortMapping { /// Resolve host ports for all nodes from docker compose. pub async fn discover_host_ports( environment: &StackEnvironment, - descriptors: &GeneratedTopology, + nodes: &[NodeContainerPorts], ) -> Result { debug!( compose_file = %environment.compose_path().display(), project = environment.project_name(), - nodes = descriptors.nodes().len(), + nodes = nodes.len(), "resolving compose host ports" ); - let mut nodes = Vec::new(); - for node in descriptors.nodes() { - let service = node_identifier(node.index()); - let api = resolve_service_port(environment, &service, node.api_port()).await?; - let testing = resolve_service_port(environment, &service, node.testing_http_port()).await?; - nodes.push(NodeHostPorts { api, testing }); + let mut host_nodes = Vec::with_capacity(nodes.len()); + for node in nodes { + host_nodes.push(resolve_node_ports(environment, node).await?); } - let mapping = HostPortMapping { nodes }; + let mapping = HostPortMapping { nodes: host_nodes }; info!( node_ports = ?mapping.nodes, @@ -65,95 +63,136 @@ pub async fn discover_host_ports( Ok(mapping) } +async fn resolve_node_ports( + environment: &StackEnvironment, + node: &NodeContainerPorts, +) -> Result { + let service = node_identifier(node.index); + let api = resolve_service_port(environment, &service, node.api).await?; + let testing = resolve_service_port(environment, &service, node.testing).await?; + + Ok(NodeHostPorts { api, testing }) +} + async fn resolve_service_port( environment: &StackEnvironment, service: &str, container_port: u16, ) -> Result { - let mut cmd = Command::new("docker"); - cmd.arg("compose") - .arg("-f") - .arg(environment.compose_path()) - .arg("-p") - .arg(environment.project_name()) - .arg("port") - .arg(service) - .arg(container_port.to_string()) - .current_dir(environment.root()); - - let output = timeout(adjust_timeout(COMPOSE_PORT_DISCOVERY_TIMEOUT), cmd.output()) - .await - .map_err(|_| ComposeRunnerError::PortDiscovery { - service: service.to_owned(), - container_port, - source: anyhow!("docker compose port timed out"), - })? - .with_context(|| format!("running docker compose port {service} {container_port}")) - .map_err(|source| ComposeRunnerError::PortDiscovery { - service: service.to_owned(), - container_port, - source, - })?; - - if !output.status.success() { - return Err(ComposeRunnerError::PortDiscovery { - service: service.to_owned(), - container_port, - source: anyhow!("docker compose port exited with {}", output.status), - }); - } - - let stdout = String::from_utf8_lossy(&output.stdout); - for line in stdout.lines() { - let line = line.trim(); - if line.is_empty() { - continue; - } - if let Some(port_str) = line.rsplit(':').next() - && let Ok(port) = port_str.trim().parse::() - { - return Ok(port); - } - } - - Err(ComposeRunnerError::PortDiscovery { - service: service.to_owned(), + resolve_service_port_with( + environment.compose_path(), + environment.project_name(), + environment.root(), + service, container_port, - source: anyhow!("unable to parse docker compose port output: {stdout}"), - }) + ) + .await } -/// Wait for remote readiness using mapped host ports. -pub async fn ensure_remote_readiness_with_ports( - descriptors: &GeneratedTopology, - mapping: &HostPortMapping, -) -> Result<(), StackReadinessError> { - let node_urls = mapping - .nodes - .iter() - .map(|ports| readiness_url(NODE_ROLE, ports.api)) - .collect::, _>>()?; - - descriptors - .wait_remote_readiness(&node_urls) - .await - .map_err(|source| StackReadinessError::Remote { source }) +pub(crate) async fn resolve_service_port_with( + compose_path: &Path, + project_name: &str, + root: &Path, + service: &str, + container_port: u16, +) -> Result { + let mut cmd = + docker_compose_port_command(compose_path, project_name, root, service, container_port); + let output = run_port_discovery_command(&mut cmd, service, container_port).await?; + parse_port_from_output(service, container_port, &output) } -fn readiness_url(role: &'static str, port: u16) -> Result { - localhost_url(port).map_err(|source| StackReadinessError::Endpoint { role, port, source }) -} - -fn localhost_url(port: u16) -> Result { - Url::parse(&format!("http://{}:{port}/", compose_runner_host())) -} - -fn node_identifier(index: usize) -> String { +pub(crate) fn node_identifier(index: usize) -> String { format!("node-{index}") } pub(crate) fn compose_runner_host() -> String { - let host = std::env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string()); + let host = env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| "127.0.0.1".to_string()); debug!(host, "compose runner host resolved for readiness URLs"); host } + +fn docker_compose_port_command( + compose_path: &Path, + project_name: &str, + root: &Path, + service: &str, + container_port: u16, +) -> Command { + let mut cmd = Command::new("docker"); + cmd.arg("compose") + .arg("-f") + .arg(compose_path) + .arg("-p") + .arg(project_name) + .arg("port") + .arg(service) + .arg(container_port.to_string()) + .current_dir(root); + cmd +} + +async fn run_port_discovery_command( + cmd: &mut Command, + service: &str, + container_port: u16, +) -> Result { + timeout(adjust_timeout(COMPOSE_PORT_DISCOVERY_TIMEOUT), cmd.output()) + .await + .map_err(|_| { + port_discovery_error( + service, + container_port, + anyhow!("docker compose port timed out"), + ) + })? + .with_context(|| format!("running docker compose port {service} {container_port}")) + .map_err(|source| port_discovery_error(service, container_port, source)) +} + +fn parse_port_from_output( + service: &str, + container_port: u16, + output: &Output, +) -> Result { + if !output.status.success() { + return Err(port_discovery_error( + service, + container_port, + anyhow!("docker compose port exited with {}", output.status), + )); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + parse_mapped_port(&stdout).ok_or_else(|| { + port_discovery_error( + service, + container_port, + anyhow!("unable to parse docker compose port output: {stdout}"), + ) + }) +} + +fn port_discovery_error( + service: &str, + container_port: u16, + source: anyhow::Error, +) -> ComposeRunnerError { + ComposeRunnerError::PortDiscovery { + service: service.to_owned(), + container_port, + source, + } +} + +fn parse_mapped_port(stdout: &str) -> Option { + stdout.lines().map(str::trim).find_map(parse_port_line) +} + +fn parse_port_line(line: &str) -> Option { + if line.is_empty() { + return None; + } + + line.rsplit(':').next()?.trim().parse::().ok() +} diff --git a/testing-framework/deployers/compose/src/infrastructure/template.rs b/testing-framework/deployers/compose/src/infrastructure/template.rs index b98a74c..2c17891 100644 --- a/testing-framework/deployers/compose/src/infrastructure/template.rs +++ b/testing-framework/deployers/compose/src/infrastructure/template.rs @@ -1,5 +1,5 @@ use std::{ - env, fs, + env, fs, io, path::{Path, PathBuf}, }; @@ -20,28 +20,28 @@ pub enum TemplateError { #[source] source: anyhow::Error, }, - #[error("failed to read compose template at {path}: {source}")] + #[error("failed to read compose template at {path:?}: {source}")] Read { path: PathBuf, #[source] - source: std::io::Error, + source: io::Error, }, #[error("failed to serialise compose descriptor for templating: {source}")] Serialize { #[source] source: tera::Error, }, - #[error("failed to render compose template at {path}: {source}")] + #[error("failed to render compose template at {path:?}: {source}")] Render { path: PathBuf, #[source] source: tera::Error, }, - #[error("failed to write compose file at {path}: {source}")] + #[error("failed to write compose file at {path:?}: {source}")] Write { path: PathBuf, #[source] - source: std::io::Error, + source: io::Error, }, } diff --git a/testing-framework/deployers/compose/src/lib.rs b/testing-framework/deployers/compose/src/lib.rs index aabf08d..c75890f 100644 --- a/testing-framework/deployers/compose/src/lib.rs +++ b/testing-framework/deployers/compose/src/lib.rs @@ -1,16 +1,18 @@ pub mod deployer; pub mod descriptor; pub mod docker; +pub mod env; pub mod errors; pub mod infrastructure; pub mod lifecycle; pub use deployer::ComposeDeployer; -pub use descriptor::{ComposeDescriptor, ComposeDescriptorBuilder, EnvEntry, NodeDescriptor}; +pub use descriptor::{ComposeDescriptor, EnvEntry, NodeDescriptor}; pub use docker::{ commands::{ComposeCommandError, compose_down, compose_up, dump_compose_logs}, - platform::{host_gateway_entry, resolve_image}, + platform::host_gateway_entry, }; +pub use env::{ComposeDeployEnv, ConfigServerHandle}; pub use errors::ComposeRunnerError; pub use infrastructure::{ ports::{HostPortMapping, NodeHostPorts}, diff --git a/testing-framework/deployers/compose/src/lifecycle/block_feed.rs b/testing-framework/deployers/compose/src/lifecycle/block_feed.rs index ec18b06..3898d2b 100644 --- a/testing-framework/deployers/compose/src/lifecycle/block_feed.rs +++ b/testing-framework/deployers/compose/src/lifecycle/block_feed.rs @@ -1,6 +1,8 @@ use std::time::Duration; -use testing_framework_core::scenario::{BlockFeed, BlockFeedTask, NodeClients, spawn_block_feed}; +use testing_framework_core::scenario::{ + Application, FeedHandle, FeedRuntime, NodeClients, spawn_feed, +}; use tokio::time::sleep; use tracing::{debug, info, warn}; @@ -9,27 +11,38 @@ use crate::errors::ComposeRunnerError; const BLOCK_FEED_MAX_ATTEMPTS: usize = 5; const BLOCK_FEED_RETRY_DELAY: Duration = Duration::from_secs(1); -async fn spawn_block_feed_with( - node_clients: &NodeClients, -) -> Result<(BlockFeed, BlockFeedTask), ComposeRunnerError> { +async fn spawn_block_feed_with( + node_clients: &NodeClients, +) -> Result< + ( + <::FeedRuntime as FeedRuntime>::Feed, + FeedHandle, + ), + ComposeRunnerError, +> { debug!( - nodes = node_clients.node_clients().len(), + nodes = node_clients.len(), "selecting node client for block feed" ); let block_source_client = node_clients - .random_node() + .random_client() .ok_or(ComposeRunnerError::BlockFeedMissing)?; - spawn_block_feed(block_source_client) + spawn_feed::(block_source_client) .await .map_err(|source| ComposeRunnerError::BlockFeed { source }) } -pub async fn spawn_block_feed_with_retry( - node_clients: &NodeClients, -) -> Result<(BlockFeed, BlockFeedTask), ComposeRunnerError> { - let mut last_err = None; +pub async fn spawn_block_feed_with_retry( + node_clients: &NodeClients, +) -> Result< + ( + <::FeedRuntime as FeedRuntime>::Feed, + FeedHandle, + ), + ComposeRunnerError, +> { for attempt in 1..=BLOCK_FEED_MAX_ATTEMPTS { info!(attempt, "starting block feed"); match spawn_block_feed_with(node_clients).await { @@ -37,17 +50,17 @@ pub async fn spawn_block_feed_with_retry( info!(attempt, "block feed established"); return Ok(result); } - Err(err) => { - last_err = Some(err); - if attempt < BLOCK_FEED_MAX_ATTEMPTS { - warn!(attempt, "block feed initialization failed; retrying"); - sleep(BLOCK_FEED_RETRY_DELAY).await; + + Err(error) => { + if attempt == BLOCK_FEED_MAX_ATTEMPTS { + return Err(error); } + + warn!(attempt, "block feed initialization failed; retrying"); + sleep(BLOCK_FEED_RETRY_DELAY).await; } } } - Err(last_err.unwrap_or(ComposeRunnerError::InternalInvariant { - message: "block feed retry exhausted without capturing an error", - })) + unreachable!("retry loop always returns on success or final failure") } diff --git a/testing-framework/deployers/compose/src/lifecycle/cleanup.rs b/testing-framework/deployers/compose/src/lifecycle/cleanup.rs index dfc8bfc..33e9cdc 100644 --- a/testing-framework/deployers/compose/src/lifecycle/cleanup.rs +++ b/testing-framework/deployers/compose/src/lifecycle/cleanup.rs @@ -1,4 +1,8 @@ -use std::{env, path::PathBuf, thread}; +use std::{ + env, io, + path::{Path, PathBuf}, + thread, +}; use testing_framework_core::scenario::CleanupGuard; use tracing::{debug, info, warn}; @@ -8,7 +12,7 @@ use crate::{ commands::{ComposeCommandError, compose_down}, workspace::ComposeWorkspace, }, - infrastructure::cfgsync::CfgsyncServerHandle, + env::ConfigServerHandle, }; /// Cleans up a compose deployment and associated cfgsync container. @@ -17,7 +21,7 @@ pub struct RunnerCleanup { pub project_name: String, pub root: PathBuf, workspace: Option, - cfgsync: Option, + cfgsync: Option>, } impl RunnerCleanup { @@ -27,7 +31,7 @@ impl RunnerCleanup { project_name: String, root: PathBuf, workspace: ComposeWorkspace, - cfgsync: Option, + cfgsync: Option>, ) -> Self { debug_assert!( !compose_file.as_os_str().is_empty() && !project_name.is_empty(), @@ -52,13 +56,13 @@ impl RunnerCleanup { } fn run_compose_down_blocking( - compose_file: &PathBuf, + compose_file: &Path, project_name: &str, - root: &PathBuf, + root: &Path, ) -> Result<(), ComposeCommandError> { - let compose_file = compose_file.clone(); + let compose_file = compose_file.to_path_buf(); let project_name = project_name.to_owned(); - let root = root.clone(); + let root = root.to_path_buf(); let handle = thread::spawn(move || { tokio::runtime::Builder::new_current_thread() @@ -66,36 +70,36 @@ fn run_compose_down_blocking( .build() .map_err(|err| ComposeCommandError::Spawn { command: "docker compose down".into(), - source: std::io::Error::new(std::io::ErrorKind::Other, err), + source: io::Error::other(err), })? .block_on(compose_down(&compose_file, &project_name, &root)) }); handle.join().map_err(|_| ComposeCommandError::Spawn { command: "docker compose down".into(), - source: std::io::Error::new( - std::io::ErrorKind::Other, - "join failure running compose down", - ), + source: io::Error::other("join failure running compose down"), })? } impl CleanupGuard for RunnerCleanup { fn cleanup(mut self: Box) { + let preserve = self.should_preserve(); + debug!( compose_file = %self.compose_file.display(), project = %self.project_name, root = %self.root.display(), - preserve = self.should_preserve(), + preserve, "compose cleanup started" ); - if self.should_preserve() { + + if preserve { self.persist_workspace(); return; } - self.teardown_compose(); - self.shutdown_cfgsync(); + + self.teardown_compose(); } } @@ -110,6 +114,11 @@ impl RunnerCleanup { info!(path = %keep.display(), "preserving docker state"); } + if let Some(mut cfgsync) = self.cfgsync.take() { + cfgsync.mark_preserved(); + self.cfgsync = Some(cfgsync); + } + info!("compose preserve flag set; skipping docker compose down"); } diff --git a/testing-framework/deployers/compose/src/lifecycle/mod.rs b/testing-framework/deployers/compose/src/lifecycle/mod.rs index 11abac9..d3cf980 100644 --- a/testing-framework/deployers/compose/src/lifecycle/mod.rs +++ b/testing-framework/deployers/compose/src/lifecycle/mod.rs @@ -1,4 +1,3 @@ pub mod block_feed; pub mod cleanup; pub mod readiness; -pub mod wait; diff --git a/testing-framework/deployers/compose/src/lifecycle/readiness.rs b/testing-framework/deployers/compose/src/lifecycle/readiness.rs index c8e041a..a3ab08b 100644 --- a/testing-framework/deployers/compose/src/lifecycle/readiness.rs +++ b/testing-framework/deployers/compose/src/lifecycle/readiness.rs @@ -1,78 +1,46 @@ use std::time::Duration; -use reqwest::Url; -use testing_framework_core::{ - nodes::ApiClient, - scenario::{NodeClients, http_probe::NODE_ROLE}, - topology::generation::GeneratedTopology, -}; +use testing_framework_core::scenario::{HttpReadinessRequirement, NodeClients}; use tokio::time::sleep; use crate::{ + env::ComposeDeployEnv, errors::{NodeClientError, StackReadinessError}, - infrastructure::ports::{HostPortMapping, NodeHostPorts}, - lifecycle::wait::wait_for_nodes, + infrastructure::ports::{HostPortMapping, compose_runner_host}, }; const DISABLED_READINESS_SLEEP: Duration = Duration::from_secs(5); /// Wait until all nodes respond on their API ports. -pub async fn ensure_nodes_ready_with_ports(ports: &[u16]) -> Result<(), StackReadinessError> { +pub async fn ensure_nodes_ready_with_ports( + ports: &[u16], + requirement: HttpReadinessRequirement, +) -> Result<(), StackReadinessError> { if ports.is_empty() { return Ok(()); } - wait_for_nodes(ports).await.map_err(Into::into) + let host = compose_runner_host(); + E::wait_for_nodes(ports, &host, requirement) + .await + .map_err(|source| StackReadinessError::Remote { source }) } /// Allow a brief pause when readiness probes are disabled. pub async fn maybe_sleep_for_disabled_readiness(readiness_enabled: bool) { - if !readiness_enabled { - sleep(DISABLED_READINESS_SLEEP).await; + if readiness_enabled { + return; } + + sleep(DISABLED_READINESS_SLEEP).await; } /// Construct API clients using the mapped host ports. -pub fn build_node_clients_with_ports( - descriptors: &GeneratedTopology, +pub fn build_node_clients_with_ports( + descriptors: &E::Deployment, mapping: &HostPortMapping, host: &str, -) -> Result { - let nodes = descriptors - .nodes() - .iter() - .zip(mapping.nodes.iter()) - .map(|(_node, ports)| api_client_from_host_ports(NODE_ROLE, ports, host)) - .collect::, _>>()?; - - Ok(NodeClients::new(nodes)) -} - -fn api_client_from_host_ports( - role: &'static str, - ports: &NodeHostPorts, - host: &str, -) -> Result { - let base_url = localhost_url(ports.api, host).map_err(|source| NodeClientError::Endpoint { - role, - endpoint: "api", - port: ports.api, - source, - })?; - - let testing_url = - Some( - localhost_url(ports.testing, host).map_err(|source| NodeClientError::Endpoint { - role, - endpoint: "testing", - port: ports.testing, - source, - })?, - ); - - Ok(ApiClient::from_urls(base_url, testing_url)) -} - -fn localhost_url(port: u16, host: &str) -> Result { - Url::parse(&format!("http://{host}:{port}/")) +) -> Result, NodeClientError> { + E::build_node_clients(descriptors, mapping, host) + .map_err(|source| NodeClientError::Build { source }) } diff --git a/testing-framework/deployers/compose/src/lifecycle/wait.rs b/testing-framework/deployers/compose/src/lifecycle/wait.rs deleted file mode 100644 index dffa4a2..0000000 --- a/testing-framework/deployers/compose/src/lifecycle/wait.rs +++ /dev/null @@ -1,49 +0,0 @@ -use std::{env, time::Duration}; - -use testing_framework_core::{ - adjust_timeout, - scenario::http_probe::{self, HttpReadinessError, NODE_ROLE}, -}; -use tracing::{debug, info}; - -const DEFAULT_WAIT_TIMEOUT_SECS: u64 = 180; -const POLL_INTERVAL_MILLIS: u64 = 250; - -const DEFAULT_WAIT: Duration = Duration::from_secs(DEFAULT_WAIT_TIMEOUT_SECS); -const POLL_INTERVAL: Duration = Duration::from_millis(POLL_INTERVAL_MILLIS); - -pub async fn wait_for_nodes(ports: &[u16]) -> Result<(), HttpReadinessError> { - wait_for_ports(ports, NODE_ROLE).await -} - -async fn wait_for_ports(ports: &[u16], role: &'static str) -> Result<(), HttpReadinessError> { - let host = compose_runner_host(); - let timeout = compose_http_timeout(); - - info!(role, ports = ?ports, host, "waiting for compose HTTP readiness"); - - http_probe::wait_for_http_ports_with_host( - ports, - role, - &host, - adjust_timeout(timeout), - POLL_INTERVAL, - ) - .await -} - -const DEFAULT_COMPOSE_HOST: &str = "127.0.0.1"; - -fn compose_runner_host() -> String { - let host = env::var("COMPOSE_RUNNER_HOST").unwrap_or_else(|_| DEFAULT_COMPOSE_HOST.to_string()); - debug!(host, "compose runner host resolved"); - host -} - -fn compose_http_timeout() -> Duration { - env::var("COMPOSE_RUNNER_HTTP_TIMEOUT_SECS") - .ok() - .and_then(|raw| raw.parse::().ok()) - .map(Duration::from_secs) - .unwrap_or(DEFAULT_WAIT) -} diff --git a/testing-framework/deployers/k8s/Cargo.toml b/testing-framework/deployers/k8s/Cargo.toml index 89b9056..b629a1b 100644 --- a/testing-framework/deployers/k8s/Cargo.toml +++ b/testing-framework/deployers/k8s/Cargo.toml @@ -13,21 +13,14 @@ version = "0.1.0" workspace = true [dependencies] -anyhow = "1" -async-trait = { workspace = true } -k8s-openapi = { features = ["latest"], version = "0.20" } -kube = { default-features = false, features = ["client", "runtime", "rustls-tls"], version = "0.87" } -lb-tracing = { workspace = true } -lb-tracing-service = { workspace = true } -reqwest = { features = ["json"], workspace = true } -serde = { features = ["derive"], version = "1" } -serde_yaml = { workspace = true } -tempfile = { workspace = true } -testing-framework-config = { workspace = true } -testing-framework-core = { path = "../../core" } -testing-framework-env = { workspace = true } -thiserror = { workspace = true } -tokio = { features = ["macros", "net", "process", "rt-multi-thread", "sync", "time"], workspace = true } -tracing = { workspace = true } -url = { version = "2" } -uuid = { features = ["v4"], version = "1" } +anyhow = "1" +async-trait = { workspace = true } +k8s-openapi = { features = ["latest"], version = "0.20" } +kube = { default-features = false, features = ["client", "runtime", "rustls-tls"], version = "0.87" } +reqwest = { features = ["json"], workspace = true } +testing-framework-core = { path = "../../core" } +thiserror = { workspace = true } +tokio = { features = ["macros", "net", "process", "rt-multi-thread", "sync", "time"], workspace = true } +tokio-retry = "0.3" +tracing = { workspace = true } +url = { version = "2" } diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/templates/_helpers.tpl b/testing-framework/deployers/k8s/helm/nomos-runner/templates/_helpers.tpl deleted file mode 100644 index 92c9987..0000000 --- a/testing-framework/deployers/k8s/helm/nomos-runner/templates/_helpers.tpl +++ /dev/null @@ -1,30 +0,0 @@ -{{- define "nomos-runner.chart" -}} -{{- .Chart.Name -}} -{{- end -}} - -{{- define "nomos-runner.name" -}} -{{- include "nomos-runner.chart" . -}} -{{- end -}} - -{{- define "nomos-runner.fullname" -}} -{{- printf "%s" .Release.Name | trunc 63 | trimSuffix "-" -}} -{{- end -}} - -{{- define "nomos-runner.labels" -}} -app.kubernetes.io/name: {{ include "nomos-runner.chart" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end -}} - -{{- define "nomos-runner.selectorLabels" -}} -app.kubernetes.io/name: {{ include "nomos-runner.chart" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end -}} - -{{- define "nomos-runner.nodeLabels" -}} -{{- $root := index . "root" -}} -{{- $index := index . "index" -}} -app.kubernetes.io/name: {{ include "nomos-runner.chart" $root }} -app.kubernetes.io/instance: {{ $root.Release.Name }} -nomos/logical-role: node -nomos/node-index: "{{ $index }}" -{{- end -}} diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/templates/cfgsync-service.yaml b/testing-framework/deployers/k8s/helm/nomos-runner/templates/cfgsync-service.yaml deleted file mode 100644 index db09c16..0000000 --- a/testing-framework/deployers/k8s/helm/nomos-runner/templates/cfgsync-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "nomos-runner.fullname" . }}-cfgsync - labels: - {{- include "nomos-runner.labels" . | nindent 4 }} -spec: - type: ClusterIP - selector: - {{- include "nomos-runner.selectorLabels" . | nindent 4 }} - nomos/component: cfgsync - ports: - - name: http - port: {{ .Values.cfgsync.port }} - targetPort: http diff --git a/testing-framework/deployers/k8s/helm/nomos-runner/templates/configmap.yaml b/testing-framework/deployers/k8s/helm/nomos-runner/templates/configmap.yaml deleted file mode 100644 index b47e88b..0000000 --- a/testing-framework/deployers/k8s/helm/nomos-runner/templates/configmap.yaml +++ /dev/null @@ -1,32 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "nomos-runner.fullname" . }}-assets - labels: - {{- include "nomos-runner.labels" . | nindent 4 }} -data: - cfgsync.yaml: | -{{- if .Values.cfgsync.config }} -{{ .Values.cfgsync.config | indent 4 }} -{{- else }} -{{ "" | indent 4 }} -{{- end }} - run_cfgsync.sh: | -{{- if .Values.scripts.runCfgsyncSh }} -{{ .Values.scripts.runCfgsyncSh | indent 4 }} -{{- else }} -{{ "" | indent 4 }} -{{- end }} - run_nomos.sh: | -{{- if .Values.scripts.runNomosSh }} -{{ .Values.scripts.runNomosSh | indent 4 }} -{{- else }} -{{ "" | indent 4 }} -{{- end }} - run_nomos_node.sh: | -{{- if .Values.scripts.runNomosNodeSh }} -{{ .Values.scripts.runNomosNodeSh | indent 4 }} -{{- else }} -{{ "" | indent 4 }} -{{- end }} - diff --git a/testing-framework/deployers/k8s/src/deployer/orchestrator.rs b/testing-framework/deployers/k8s/src/deployer/orchestrator.rs index 2245318..f784777 100644 --- a/testing-framework/deployers/k8s/src/deployer/orchestrator.rs +++ b/testing-framework/deployers/k8s/src/deployer/orchestrator.rs @@ -1,47 +1,52 @@ -use anyhow::Error; +use std::{env, fmt::Debug, marker::PhantomData, time::Duration}; + use async_trait::async_trait; use kube::Client; +use reqwest::Url; use testing_framework_core::{ scenario::{ - BlockFeedTask, CleanupGuard, Deployer, MetricsError, ObservabilityCapability, - ObservabilityInputs, RunContext, Runner, Scenario, + Application, CleanupGuard, Deployer, DynError, FeedHandle, FeedRuntime, + HttpReadinessRequirement, Metrics, MetricsError, NodeClients, + ObservabilityCapabilityProvider, ObservabilityInputs, RequiresNodeControl, RunContext, + Runner, Scenario, }, - topology::generation::GeneratedTopology, + topology::DeploymentDescriptor, }; use tracing::{error, info}; use crate::{ - infrastructure::{ - assets::{AssetsError, prepare_assets}, - cluster::{ - ClusterEnvironment, ClusterEnvironmentError, NodeClientError, PortSpecs, - RemoteReadinessError, build_node_clients, cluster_identifiers, collect_port_specs, - ensure_cluster_readiness, install_stack, kill_port_forwards, wait_for_ports_or_cleanup, - }, - helm::HelmError, + env::K8sDeployEnv, + infrastructure::cluster::{ + ClusterEnvironment, ClusterEnvironmentError, NodeClientError, PortSpecs, + RemoteReadinessError, build_node_clients, collect_port_specs, ensure_cluster_readiness, + kill_port_forwards, wait_for_ports_or_cleanup, }, lifecycle::{block_feed::spawn_block_feed_with, cleanup::RunnerCleanup}, - wait::{ClusterWaitError, PortForwardHandle}, + wait::{ClusterReady, ClusterWaitError, PortForwardHandle}, }; -/// Deploys a scenario into Kubernetes using Helm charts and port-forwards. +const DISABLED_ENDPOINT: &str = ""; + +/// Deploys a scenario into Kubernetes using an environment-specific stack. #[derive(Clone, Copy)] -pub struct K8sDeployer { +pub struct K8sDeployer { readiness_checks: bool, + _marker: PhantomData, } -impl Default for K8sDeployer { +impl Default for K8sDeployer { fn default() -> Self { Self::new() } } -impl K8sDeployer { +impl K8sDeployer { #[must_use] /// Create a k8s deployer with readiness checks enabled. pub const fn new() -> Self { Self { readiness_checks: true, + _marker: PhantomData, } } @@ -63,10 +68,16 @@ pub enum K8sRunnerError { #[source] source: kube::Error, }, - #[error(transparent)] - Assets(#[from] AssetsError), - #[error(transparent)] - Helm(#[from] HelmError), + #[error("failed to prepare k8s assets: {source}")] + Assets { + #[source] + source: testing_framework_core::scenario::DynError, + }, + #[error("failed to install k8s stack: {source}")] + InstallStack { + #[source] + source: testing_framework_core::scenario::DynError, + }, #[error(transparent)] ClusterEnvironment(#[from] ClusterEnvironmentError), #[error(transparent)] @@ -79,33 +90,25 @@ pub enum K8sRunnerError { Telemetry(#[from] MetricsError), #[error("internal invariant violated: {message}")] InternalInvariant { message: String }, - #[error("k8s runner requires at least one node client to follow blocks")] + #[error("k8s runner requires at least one node client for feed data")] BlockFeedMissing, - #[error("failed to initialize block feed: {source}")] + #[error("failed to initialize feed: {source}")] BlockFeed { #[source] - source: Error, + source: DynError, }, } #[async_trait] -impl Deployer for K8sDeployer { +impl Deployer for K8sDeployer +where + E: K8sDeployEnv, + Caps: RequiresNodeControl + ObservabilityCapabilityProvider + Send + Sync, +{ type Error = K8sRunnerError; - async fn deploy(&self, scenario: &Scenario) -> Result { - deploy_with_observability(self, scenario, None).await - } -} - -#[async_trait] -impl Deployer for K8sDeployer { - type Error = K8sRunnerError; - - async fn deploy( - &self, - scenario: &Scenario, - ) -> Result { - deploy_with_observability(self, scenario, Some(scenario.capabilities())).await + async fn deploy(&self, scenario: &Scenario) -> Result, Self::Error> { + deploy_with_observability(self, scenario).await } } @@ -121,131 +124,166 @@ impl From for K8sRunnerError { } } -fn ensure_supported_topology(descriptors: &GeneratedTopology) -> Result<(), K8sRunnerError> { - let nodes = descriptors.nodes().len(); +type Feed = <::FeedRuntime as FeedRuntime>::Feed; + +fn ensure_supported_topology( + descriptors: &E::Deployment, +) -> Result<(), K8sRunnerError> { + let nodes = descriptors.node_count(); if nodes == 0 { return Err(K8sRunnerError::UnsupportedTopology { nodes }); } + Ok(()) } -async fn deploy_with_observability( - deployer: &K8sDeployer, - scenario: &Scenario, - observability: Option<&ObservabilityCapability>, -) -> Result { - let observability = resolve_observability_inputs(observability)?; +async fn deploy_with_observability( + deployer: &K8sDeployer, + scenario: &Scenario, +) -> Result, K8sRunnerError> +where + E: K8sDeployEnv, + Caps: ObservabilityCapabilityProvider, +{ + let observability = resolve_observability_inputs(scenario.capabilities())?; + let deployment = build_k8s_deployment::(deployer, scenario, &observability).await?; + let mut cluster = Some(deployment.cluster); + let runtime = build_runtime_artifacts::(&mut cluster, &observability).await?; + let parts = build_runner_parts(scenario, deployment.node_count, runtime); - let descriptors = scenario.topology().clone(); - let node_count = descriptors.nodes().len(); - ensure_supported_topology(&descriptors)?; - - let client = init_kube_client().await?; - - info!( - nodes = node_count, - duration_secs = scenario.duration().as_secs(), - readiness_checks = deployer.readiness_checks, - metrics_query_url = observability.metrics_query_url.as_ref().map(|u| u.as_str()), - metrics_otlp_ingest_url = observability - .metrics_otlp_ingest_url - .as_ref() - .map(|u| u.as_str()), - grafana_url = observability.grafana_url.as_ref().map(|u| u.as_str()), - "starting k8s deployment" - ); - - let port_specs = collect_port_specs(&descriptors); - let mut cluster = Some( - setup_cluster( - &client, - &port_specs, - &descriptors, - deployer.readiness_checks, - &observability, - ) - .await?, - ); - - info!("building node clients"); - let node_clients = build_node_clients_or_fail(&mut cluster).await?; - - let telemetry = build_telemetry_or_fail(&mut cluster, &observability).await?; - - let (block_feed, block_feed_guard) = - spawn_block_feed_or_fail(&mut cluster, &node_clients).await?; - - if let Some(url) = observability.metrics_query_url.as_ref() { - info!( - metrics_query_url = %url.as_str(), - "metrics query endpoint configured" - ); - } - if let Some(url) = observability.grafana_url.as_ref() { - info!(grafana_url = %url.as_str(), "grafana url configured"); - } - - maybe_print_endpoints(&observability, &node_clients); - - finalize_runner( - &mut cluster, - descriptors, - node_clients, - scenario.duration(), - telemetry, - block_feed, - block_feed_guard, - node_count, - ) + log_configured_observability(&observability); + maybe_print_endpoints::(&observability, &parts.node_clients); + finalize_runner::(&mut cluster, parts) } -async fn setup_cluster( +struct BuiltK8sDeployment { + cluster: ClusterEnvironment, + node_count: usize, +} + +async fn build_k8s_deployment( + deployer: &K8sDeployer, + scenario: &Scenario, + observability: &ObservabilityInputs, +) -> Result +where + E: K8sDeployEnv, + Caps: ObservabilityCapabilityProvider, +{ + let descriptors = scenario.deployment(); + let node_count = descriptors.node_count(); + let deployment_policy = scenario.deployment_policy(); + ensure_supported_topology::(descriptors)?; + let client = init_kube_client().await?; + + log_k8s_deploy_start( + deployer, + scenario.duration(), + node_count, + deployment_policy.readiness_enabled, + deployment_policy.readiness_requirement, + observability, + ); + + let port_specs = collect_port_specs::(descriptors); + let cluster = setup_cluster::( + &client, + &port_specs, + descriptors, + deployer.readiness_checks && deployment_policy.readiness_enabled, + deployment_policy.readiness_requirement, + observability, + ) + .await?; + + Ok(BuiltK8sDeployment { + cluster, + node_count, + }) +} + +async fn setup_cluster( client: &Client, specs: &PortSpecs, - descriptors: &GeneratedTopology, + descriptors: &E::Deployment, readiness_checks: bool, + readiness_requirement: HttpReadinessRequirement, observability: &ObservabilityInputs, ) -> Result { - let assets = prepare_assets(descriptors, observability.metrics_otlp_ingest_url.as_ref())?; - let nodes = descriptors.nodes().len(); - - let (namespace, release) = cluster_identifiers(); - info!(%namespace, %release, nodes, "preparing k8s assets and namespace"); - - let mut cleanup_guard = - Some(install_stack(client, &assets, &namespace, &release, nodes).await?); + let (setup, cleanup) = prepare_cluster_setup::(client, descriptors, observability).await?; + let mut cleanup_guard = Some(cleanup); info!("waiting for helm-managed services to become ready"); - let cluster_ready = - wait_for_ports_or_cleanup(client, &namespace, &release, specs, &mut cleanup_guard).await?; - - let environment = ClusterEnvironment::new( - client.clone(), - namespace, - release, - cleanup_guard - .take() - .ok_or_else(|| K8sRunnerError::InternalInvariant { - message: "cleanup guard must exist after successful cluster startup".to_owned(), - })?, - &cluster_ready.ports, - cluster_ready.port_forwards, - ); + let cluster_ready = wait_for_ports_or_cleanup::( + client, + &setup.namespace, + &setup.release, + specs, + &mut cleanup_guard, + ) + .await?; + let environment = build_cluster_environment(client, setup, cleanup_guard, cluster_ready)?; if readiness_checks { info!("probing cluster readiness"); - ensure_cluster_readiness(descriptors, &environment).await?; + ensure_cluster_readiness::(descriptors, &environment, readiness_requirement).await?; info!("cluster readiness probes passed"); } Ok(environment) } +struct ClusterSetup { + namespace: String, + release: String, +} + +async fn prepare_cluster_setup( + client: &Client, + descriptors: &E::Deployment, + observability: &ObservabilityInputs, +) -> Result<(ClusterSetup, RunnerCleanup), K8sRunnerError> { + let assets = E::prepare_assets(descriptors, observability.metrics_otlp_ingest_url.as_ref()) + .map_err(|source| K8sRunnerError::Assets { source })?; + let nodes = descriptors.node_count(); + let (namespace, release) = E::cluster_identifiers(); + info!(%namespace, %release, nodes, "preparing k8s assets and namespace"); + + let cleanup = E::install_stack(client, &assets, &namespace, &release, nodes) + .await + .map_err(|source| K8sRunnerError::InstallStack { source })?; + + Ok((ClusterSetup { namespace, release }, cleanup)) +} + +fn build_cluster_environment( + client: &Client, + setup: ClusterSetup, + mut cleanup_guard: Option, + cluster_ready: ClusterReady, +) -> Result { + let cleanup = cleanup_guard + .take() + .ok_or_else(|| K8sRunnerError::InternalInvariant { + message: "cleanup guard must exist after successful cluster startup".to_owned(), + })?; + + Ok(ClusterEnvironment::new( + client.clone(), + setup.namespace, + setup.release, + cleanup, + &cluster_ready.ports, + cluster_ready.port_forwards, + )) +} + fn resolve_observability_inputs( - observability: Option<&ObservabilityCapability>, + observability: &impl ObservabilityCapabilityProvider, ) -> Result { let env_inputs = ObservabilityInputs::from_env()?; let cap_inputs = observability + .observability_capability() .map(ObservabilityInputs::from_capability) .unwrap_or_default(); Ok(env_inputs.with_overrides(cap_inputs)) @@ -257,16 +295,16 @@ async fn init_kube_client() -> Result { .map_err(|source| K8sRunnerError::ClientInit { source }) } -async fn build_node_clients_or_fail( +async fn build_node_clients_or_fail( cluster: &mut Option, -) -> Result { +) -> Result, K8sRunnerError> { let environment = cluster .as_ref() .ok_or_else(|| K8sRunnerError::InternalInvariant { message: "cluster must be available while building clients".to_owned(), })?; - match build_node_clients(environment) { + match build_node_clients::(environment) { Ok(clients) => Ok(clients), Err(err) => { fail_cluster(cluster, "failed to construct node api clients").await; @@ -276,126 +314,254 @@ async fn build_node_clients_or_fail( } } +struct RuntimeArtifacts { + node_clients: NodeClients, + telemetry: Metrics, + feed: Feed, + feed_task: FeedHandle, +} + +fn build_runner_parts( + scenario: &Scenario, + node_count: usize, + runtime: RuntimeArtifacts, +) -> K8sRunnerParts { + K8sRunnerParts { + descriptors: scenario.deployment().clone(), + node_clients: runtime.node_clients, + duration: scenario.duration(), + expectation_cooldown: scenario.expectation_cooldown(), + telemetry: runtime.telemetry, + feed: runtime.feed, + feed_task: runtime.feed_task, + node_count, + } +} + +async fn build_runtime_artifacts( + cluster: &mut Option, + observability: &ObservabilityInputs, +) -> Result, K8sRunnerError> { + info!("building node clients"); + let node_clients = build_node_clients_or_fail::(cluster).await?; + let telemetry = build_telemetry_or_fail(cluster, observability).await?; + let (feed, feed_task) = spawn_block_feed_or_fail::(cluster, &node_clients).await?; + + Ok(RuntimeArtifacts { + node_clients, + telemetry, + feed, + feed_task, + }) +} + async fn build_telemetry_or_fail( cluster: &mut Option, observability: &ObservabilityInputs, -) -> Result { +) -> Result { match observability.telemetry_handle() { Ok(handle) => Ok(handle), Err(err) => { - fail_cluster(cluster, "failed to configure metrics telemetry handle").await; - error!(error = ?err, "failed to configure metrics telemetry handle"); + fail_cluster_with_log( + cluster, + "failed to configure metrics telemetry handle", + &err, + ) + .await; Err(err.into()) } } } -async fn spawn_block_feed_or_fail( +async fn spawn_block_feed_or_fail( cluster: &mut Option, - node_clients: &testing_framework_core::scenario::NodeClients, -) -> Result<(testing_framework_core::scenario::BlockFeed, BlockFeedTask), K8sRunnerError> { - match spawn_block_feed_with(node_clients).await { + node_clients: &NodeClients, +) -> Result<(Feed, FeedHandle), K8sRunnerError> { + match spawn_block_feed_with::(node_clients).await { Ok(pair) => Ok(pair), Err(err) => { - fail_cluster(cluster, "failed to initialize block feed").await; - error!(error = ?err, "failed to initialize block feed"); + fail_cluster_with_log(cluster, "failed to initialize block feed", &err).await; Err(err) } } } -fn maybe_print_endpoints( - observability: &ObservabilityInputs, - node_clients: &testing_framework_core::scenario::NodeClients, +async fn fail_cluster_with_log( + cluster: &mut Option, + reason: &str, + error_value: &ErrorValue, ) { - if std::env::var("TESTNET_PRINT_ENDPOINTS").is_err() { + fail_cluster(cluster, reason).await; + error!(error = ?error_value, "{reason}"); +} + +fn maybe_print_endpoints( + observability: &ObservabilityInputs, + node_clients: &NodeClients, +) { + if env::var("TESTNET_PRINT_ENDPOINTS").is_err() { return; } - let prometheus = observability - .metrics_query_url - .as_ref() - .map(|u| u.as_str().to_string()) - .unwrap_or_else(|| "".to_string()); - println!( "TESTNET_ENDPOINTS prometheus={} grafana={}", - prometheus, - observability - .grafana_url - .as_ref() - .map(|u| u.as_str().to_string()) - .unwrap_or_else(|| "".to_string()) + endpoint_or_disabled(observability.metrics_query_url.as_ref()), + endpoint_or_disabled(observability.grafana_url.as_ref()) ); - let nodes = node_clients.node_clients(); - for (idx, client) in nodes.iter().enumerate() { - println!( - "TESTNET_PPROF node_{}={}/debug/pprof/profile?seconds=15&format=proto", - idx, - client.base_url() - ); - } + print_node_pprof_endpoints::(node_clients); } -#[allow(clippy::too_many_arguments)] -fn finalize_runner( - cluster: &mut Option, - descriptors: GeneratedTopology, - node_clients: testing_framework_core::scenario::NodeClients, - duration: std::time::Duration, - telemetry: testing_framework_core::scenario::Metrics, - block_feed: testing_framework_core::scenario::BlockFeed, - block_feed_guard: BlockFeedTask, +struct K8sRunnerParts { + descriptors: E::Deployment, + node_clients: NodeClients, + duration: Duration, + expectation_cooldown: Duration, + telemetry: Metrics, + feed: Feed, + feed_task: FeedHandle, node_count: usize, -) -> Result { - let environment = cluster - .take() - .ok_or_else(|| K8sRunnerError::InternalInvariant { - message: "cluster should still be available".to_owned(), - })?; +} + +fn finalize_runner( + cluster: &mut Option, + parts: K8sRunnerParts, +) -> Result, K8sRunnerError> { + let environment = take_ready_cluster(cluster)?; let (cleanup, port_forwards) = environment.into_cleanup()?; - let cleanup_guard: Box = Box::new(K8sCleanupGuard::new( - cleanup, - block_feed_guard, - port_forwards, - )); - - let context = RunContext::new( + let K8sRunnerParts { descriptors, - None, node_clients, duration, + expectation_cooldown, telemetry, - block_feed, - None, + feed, + feed_task, + node_count, + } = parts; + let duration_secs = duration.as_secs(); + + let cleanup_guard: Box = + Box::new(K8sCleanupGuard::new(cleanup, feed_task, port_forwards)); + let context = build_k8s_run_context( + descriptors, + node_clients, + duration, + expectation_cooldown, + telemetry, + feed, ); info!( nodes = node_count, - duration_secs = duration.as_secs(), - "k8s deployment ready; handing control to scenario runner" + duration_secs, "k8s deployment ready; handing control to scenario runner" ); Ok(Runner::new(context, Some(cleanup_guard))) } +fn take_ready_cluster( + cluster: &mut Option, +) -> Result { + cluster + .take() + .ok_or_else(|| K8sRunnerError::InternalInvariant { + message: "cluster should still be available".to_owned(), + }) +} + +fn build_k8s_run_context( + descriptors: E::Deployment, + node_clients: NodeClients, + duration: Duration, + expectation_cooldown: Duration, + telemetry: Metrics, + feed: Feed, +) -> RunContext { + RunContext::new( + descriptors, + node_clients, + duration, + expectation_cooldown, + telemetry, + feed, + None, + ) +} + +fn endpoint_or_disabled(endpoint: Option<&Url>) -> String { + endpoint.map_or_else( + || String::from(DISABLED_ENDPOINT), + |url| String::from(url.as_str()), + ) +} + +fn log_configured_observability(observability: &ObservabilityInputs) { + if let Some(url) = observability.metrics_query_url.as_ref() { + info!(metrics_query_url = %url.as_str(), "metrics query endpoint configured"); + } + + if let Some(url) = observability.grafana_url.as_ref() { + info!(grafana_url = %url.as_str(), "grafana url configured"); + } +} + +fn print_node_pprof_endpoints(node_clients: &NodeClients) { + let nodes = node_clients.snapshot(); + + for (idx, client) in nodes.iter().enumerate() { + if let Some(base_url) = E::node_base_url(client) { + println!( + "TESTNET_PPROF node_{}={}/debug/pprof/profile?seconds=15&format=proto", + idx, base_url + ); + } + } +} + +fn log_k8s_deploy_start( + deployer: &K8sDeployer, + duration: Duration, + node_count: usize, + readiness_enabled: bool, + readiness_requirement: HttpReadinessRequirement, + observability: &ObservabilityInputs, +) where + E: K8sDeployEnv, +{ + info!( + nodes = node_count, + duration_secs = duration.as_secs(), + readiness_checks = deployer.readiness_checks, + readiness_enabled, + readiness_requirement = ?readiness_requirement, + effective_readiness = deployer.readiness_checks && readiness_enabled, + metrics_query_url = observability.metrics_query_url.as_ref().map(|u| u.as_str()), + metrics_otlp_ingest_url = observability + .metrics_otlp_ingest_url + .as_ref() + .map(|u| u.as_str()), + grafana_url = observability.grafana_url.as_ref().map(|u| u.as_str()), + "starting k8s deployment" + ); +} + struct K8sCleanupGuard { cleanup: RunnerCleanup, - block_feed: Option, + feed_task: Option, port_forwards: Vec, } impl K8sCleanupGuard { const fn new( cleanup: RunnerCleanup, - block_feed: BlockFeedTask, + feed_task: FeedHandle, port_forwards: Vec, ) -> Self { Self { cleanup, - block_feed: Some(block_feed), + feed_task: Some(feed_task), port_forwards, } } @@ -403,8 +569,8 @@ impl K8sCleanupGuard { impl CleanupGuard for K8sCleanupGuard { fn cleanup(mut self: Box) { - if let Some(block_feed) = self.block_feed.take() { - CleanupGuard::cleanup(Box::new(block_feed)); + if let Some(feed_task) = self.feed_task.take() { + CleanupGuard::cleanup(Box::new(feed_task)); } kill_port_forwards(&mut self.port_forwards); CleanupGuard::cleanup(Box::new(self.cleanup)); diff --git a/testing-framework/deployers/k8s/src/env.rs b/testing-framework/deployers/k8s/src/env.rs new file mode 100644 index 0000000..ecd7972 --- /dev/null +++ b/testing-framework/deployers/k8s/src/env.rs @@ -0,0 +1,135 @@ +use std::{ + process, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; + +use async_trait::async_trait; +use kube::Client; +use reqwest::Url; +use testing_framework_core::scenario::{ + Application, DynError, HttpReadinessRequirement, wait_for_http_ports_with_host_and_requirement, + wait_http_readiness, +}; + +use crate::{infrastructure::cluster::PortSpecs, lifecycle::cleanup::RunnerCleanup}; + +#[async_trait] +pub trait K8sDeployEnv: Application { + type Assets: Send + Sync; + + /// Collect container port specs from the topology. + fn collect_port_specs(topology: &Self::Deployment) -> PortSpecs; + + /// Build deploy-time assets (charts, cfgsync config, scripts). + fn prepare_assets( + topology: &Self::Deployment, + metrics_otlp_ingest_url: Option<&Url>, + ) -> Result; + + /// Install the k8s stack using the prepared assets. + async fn install_stack( + client: &Client, + assets: &Self::Assets, + namespace: &str, + release: &str, + nodes: usize, + ) -> Result; + + /// Provide a namespace/release identifier pair. + fn cluster_identifiers() -> (String, String) { + let stamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_millis()) + .unwrap_or_default(); + let suffix = format!("{stamp:x}-{:x}", process::id()); + (format!("tf-testnet-{suffix}"), String::from("tf-runner")) + } + + /// Build a single node client from forwarded ports. + fn node_client_from_ports( + host: &str, + api_port: u16, + testing_port: u16, + ) -> Result; + + /// Build node clients from forwarded ports. + fn build_node_clients( + host: &str, + node_api_ports: &[u16], + node_testing_ports: &[u16], + ) -> Result, DynError> { + node_api_ports + .iter() + .zip(node_testing_ports.iter()) + .map(|(&api_port, &testing_port)| { + Self::node_client_from_ports(host, api_port, testing_port) + }) + .collect() + } + + /// Path appended to readiness probe URLs. + fn readiness_path() -> &'static str { + "/" + } + + /// Wait for remote readiness using topology + URLs. + async fn wait_remote_readiness( + topology: &Self::Deployment, + urls: &[Url], + requirement: HttpReadinessRequirement, + ) -> Result<(), DynError> { + let _ = topology; + let readiness_urls: Vec<_> = urls + .iter() + .map(|url| { + let mut endpoint = url.clone(); + endpoint.set_path(Self::readiness_path()); + endpoint + }) + .collect(); + wait_http_readiness(&readiness_urls, requirement).await?; + Ok(()) + } + + /// Label used for readiness probe logging. + fn node_role() -> &'static str { + "node" + } + + /// Deployment resource name for a node index. + fn node_deployment_name(release: &str, index: usize) -> String { + format!("{release}-node-{index}") + } + + /// Service resource name for a node index. + fn node_service_name(release: &str, index: usize) -> String { + format!("{release}-node-{index}") + } + + /// Wait for HTTP readiness on provided ports for a given host. + async fn wait_for_node_http( + ports: &[u16], + role: &'static str, + host: &str, + timeout: Duration, + poll_interval: Duration, + requirement: HttpReadinessRequirement, + ) -> Result<(), DynError> { + let _ = role; + let _ = timeout; + let _ = poll_interval; + wait_for_http_ports_with_host_and_requirement( + ports, + host, + Self::readiness_path(), + requirement, + ) + .await?; + Ok(()) + } + + /// Optional base URL for node client diagnostics. + fn node_base_url(_client: &Self::NodeClient) -> Option { + None + } +} diff --git a/testing-framework/deployers/k8s/src/host.rs b/testing-framework/deployers/k8s/src/host.rs index 0401262..f03e98c 100644 --- a/testing-framework/deployers/k8s/src/host.rs +++ b/testing-framework/deployers/k8s/src/host.rs @@ -13,6 +13,7 @@ pub fn node_host() -> String { debug!(host, env = NODE_HOST_ENV, "using node host override"); return host; } + if let Ok(host) = env::var(KUBE_SERVICE_HOST_ENV) && !host.is_empty() { diff --git a/testing-framework/deployers/k8s/src/infrastructure/assets.rs b/testing-framework/deployers/k8s/src/infrastructure/assets.rs deleted file mode 100644 index 624d5f5..0000000 --- a/testing-framework/deployers/k8s/src/infrastructure/assets.rs +++ /dev/null @@ -1,349 +0,0 @@ -use std::{ - collections::BTreeMap, - env, fs, io, - path::{Path, PathBuf}, -}; - -use anyhow::{Context as _, Result as AnyResult}; -use lb_tracing::metrics::otlp::OtlpMetricsConfig; -use lb_tracing_service::MetricsLayer; -use reqwest::Url; -use serde::Serialize; -use tempfile::TempDir; -use testing_framework_config::constants::{DEFAULT_ASSETS_STACK_DIR, cfgsync_port}; -use testing_framework_core::{ - scenario::cfgsync::{apply_topology_overrides, load_cfgsync_template, render_cfgsync_yaml}, - topology::generation::GeneratedTopology, -}; -use testing_framework_env as tf_env; -use thiserror::Error; -use tracing::{debug, info}; - -/// Paths and image metadata required to deploy the Helm chart. -pub struct RunnerAssets { - pub image: String, - pub chart_path: PathBuf, - pub cfgsync_file: PathBuf, - pub run_cfgsync_script: PathBuf, - pub run_nomos_script: PathBuf, - pub run_nomos_node_script: PathBuf, - pub values_file: PathBuf, - _tempdir: TempDir, -} - -pub fn cfgsync_port_value() -> u16 { - cfgsync_port() -} - -#[derive(Debug, Error)] -/// Failures preparing Helm assets and rendered cfgsync configuration. -pub enum AssetsError { - #[error("failed to locate workspace root: {source}")] - WorkspaceRoot { - #[source] - source: anyhow::Error, - }, - #[error("failed to render cfgsync configuration: {source}")] - Cfgsync { - #[source] - source: anyhow::Error, - }, - #[error("missing required script at {path}")] - MissingScript { path: PathBuf }, - #[error("missing Helm chart at {path}; ensure the repository is up-to-date")] - MissingChart { path: PathBuf }, - #[error("failed to create temporary directory for rendered assets: {source}")] - TempDir { - #[source] - source: io::Error, - }, - #[error("failed to write asset at {path}: {source}")] - Io { - path: PathBuf, - #[source] - source: io::Error, - }, - #[error("failed to render Helm values: {source}")] - Values { - #[source] - source: serde_yaml::Error, - }, -} - -/// Render cfgsync config, Helm values, and locate scripts for a -/// topology. -pub fn prepare_assets( - topology: &GeneratedTopology, - metrics_otlp_ingest_url: Option<&Url>, -) -> Result { - info!( - nodes = topology.nodes().len(), - "preparing k8s runner assets" - ); - - let root = workspace_root().map_err(|source| AssetsError::WorkspaceRoot { source })?; - let tempdir = create_assets_tempdir()?; - - let cfgsync_file = - render_and_write_cfgsync(&root, topology, metrics_otlp_ingest_url, &tempdir)?; - let scripts = validate_scripts(&root)?; - let chart_path = helm_chart_path()?; - let values_file = render_and_write_values(topology, &tempdir)?; - let image = testnet_image(); - - debug!( - cfgsync = %cfgsync_file.display(), - values = %values_file.display(), - image, - chart = %chart_path.display(), - "k8s runner assets prepared" - ); - - Ok(RunnerAssets { - image, - chart_path, - cfgsync_file, - run_nomos_script: scripts.run_shared, - run_cfgsync_script: scripts.run_cfgsync, - run_nomos_node_script: scripts.run_node, - values_file, - _tempdir: tempdir, - }) -} - -fn create_assets_tempdir() -> Result { - tempfile::Builder::new() - .prefix("nomos-helm-") - .tempdir() - .map_err(|source| AssetsError::TempDir { source }) -} - -fn render_and_write_cfgsync( - root: &Path, - topology: &GeneratedTopology, - metrics_otlp_ingest_url: Option<&Url>, - tempdir: &TempDir, -) -> Result { - let cfgsync_yaml = render_cfgsync_config(root, topology, metrics_otlp_ingest_url)?; - write_temp_file(tempdir.path(), "cfgsync.yaml", cfgsync_yaml) -} - -fn render_and_write_values( - topology: &GeneratedTopology, - tempdir: &TempDir, -) -> Result { - let values_yaml = render_values_yaml(topology)?; - write_temp_file(tempdir.path(), "values.yaml", values_yaml) -} - -fn testnet_image() -> String { - tf_env::nomos_testnet_image() - .unwrap_or_else(|| String::from("public.ecr.aws/r4s5t9y4/logos/logos-blockchain:test")) -} - -const CFGSYNC_K8S_TIMEOUT_SECS: u64 = 300; - -fn render_cfgsync_config( - root: &Path, - topology: &GeneratedTopology, - metrics_otlp_ingest_url: Option<&Url>, -) -> Result { - let cfgsync_template_path = stack_assets_root(root).join("cfgsync.yaml"); - debug!(path = %cfgsync_template_path.display(), "loading cfgsync template"); - - let mut cfg = load_cfgsync_template(&cfgsync_template_path) - .map_err(|source| AssetsError::Cfgsync { source })?; - - apply_topology_overrides(&mut cfg, topology); - - if let Some(endpoint) = metrics_otlp_ingest_url.cloned() { - cfg.tracing_settings.metrics = MetricsLayer::Otlp(OtlpMetricsConfig { - endpoint, - host_identifier: "node".into(), - }); - } - - cfg.timeout = cfg.timeout.max(CFGSYNC_K8S_TIMEOUT_SECS); - - render_cfgsync_yaml(&cfg).map_err(|source| AssetsError::Cfgsync { source }) -} - -struct ScriptPaths { - run_cfgsync: PathBuf, - run_shared: PathBuf, - run_node: PathBuf, -} - -fn validate_scripts(root: &Path) -> Result { - let scripts_dir = stack_scripts_root(root); - let run_cfgsync = scripts_dir.join("run_cfgsync.sh"); - let run_shared = scripts_dir.join("run_nomos.sh"); - let run_node = scripts_dir.join("run_nomos_node.sh"); - - for path in [&run_cfgsync, &run_shared, &run_node] { - if !path.exists() { - return Err(AssetsError::MissingScript { path: path.clone() }); - } - } - - debug!( - run_cfgsync = %run_cfgsync.display(), - run_shared = %run_shared.display(), - run_node = %run_node.display(), - "validated runner scripts exist" - ); - - Ok(ScriptPaths { - run_cfgsync, - run_shared, - run_node, - }) -} - -fn helm_chart_path() -> Result { - let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("helm/nomos-runner"); - if path.exists() { - Ok(path) - } else { - Err(AssetsError::MissingChart { path }) - } -} - -fn render_values_yaml(topology: &GeneratedTopology) -> Result { - let values = build_values(topology); - serde_yaml::to_string(&values).map_err(|source| AssetsError::Values { source }) -} - -fn write_temp_file( - dir: &Path, - name: &str, - contents: impl AsRef<[u8]>, -) -> Result { - let path = dir.join(name); - fs::write(&path, contents).map_err(|source| AssetsError::Io { - path: path.clone(), - source, - })?; - Ok(path) -} - -/// Locate the workspace root, honoring `CARGO_WORKSPACE_DIR` overrides. -pub fn workspace_root() -> AnyResult { - if let Ok(var) = env::var("CARGO_WORKSPACE_DIR") { - return Ok(PathBuf::from(var)); - } - let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR")); - manifest_dir - .parent() - .and_then(Path::parent) - .and_then(Path::parent) - .map(Path::to_path_buf) - .context("resolving workspace root from manifest dir") -} - -fn stack_assets_root(root: &Path) -> PathBuf { - let new_layout = root.join(DEFAULT_ASSETS_STACK_DIR); - if new_layout.exists() { - new_layout - } else { - root.join("testnet") - } -} - -fn stack_scripts_root(root: &Path) -> PathBuf { - let new_layout = root.join(DEFAULT_ASSETS_STACK_DIR).join("scripts"); - if new_layout.exists() { - new_layout - } else { - root.join("testnet/scripts") - } -} - -#[derive(Serialize)] -struct HelmValues { - #[serde(rename = "imagePullPolicy")] - image_pull_policy: String, - cfgsync: CfgsyncValues, - nodes: NodeGroup, -} - -#[derive(Serialize)] -struct CfgsyncValues { - port: u16, -} - -#[derive(Serialize)] -struct NodeGroup { - count: usize, - nodes: Vec, -} - -#[derive(Serialize)] -struct NodeValues { - #[serde(rename = "apiPort")] - api_port: u16, - #[serde(rename = "testingHttpPort")] - testing_http_port: u16, - env: BTreeMap, -} - -fn build_values(topology: &GeneratedTopology) -> HelmValues { - let cfgsync = CfgsyncValues { - port: cfgsync_port(), - }; - let image_pull_policy = - tf_env::nomos_testnet_image_pull_policy().unwrap_or_else(|| "IfNotPresent".into()); - let nodes = build_node_group("node", topology.nodes()); - - HelmValues { - image_pull_policy, - cfgsync, - nodes, - } -} - -fn build_node_group( - kind: &'static str, - nodes: &[testing_framework_core::topology::generation::GeneratedNodeConfig], -) -> NodeGroup { - let node_values = nodes - .iter() - .enumerate() - .map(|(index, node)| build_node_values(kind, index, node)) - .collect(); - - NodeGroup { - count: nodes.len(), - nodes: node_values, - } -} - -fn build_node_values( - kind: &'static str, - index: usize, - node: &testing_framework_core::topology::generation::GeneratedNodeConfig, -) -> NodeValues { - let mut env = BTreeMap::new(); - env.insert("CFG_NETWORK_PORT".into(), node.network_port().to_string()); - env.insert("CFG_BLEND_PORT".into(), node.blend_port.to_string()); - env.insert( - "CFG_API_PORT".into(), - node.general.api_config.address.port().to_string(), - ); - env.insert( - "CFG_TESTING_HTTP_PORT".into(), - node.general - .api_config - .testing_http_address - .port() - .to_string(), - ); - env.insert("CFG_HOST_KIND".into(), kind.to_string()); - env.insert("CFG_HOST_IDENTIFIER".into(), format!("{kind}-{index}")); - - NodeValues { - api_port: node.general.api_config.address.port(), - testing_http_port: node.general.api_config.testing_http_address.port(), - env, - } -} diff --git a/testing-framework/deployers/k8s/src/infrastructure/cluster.rs b/testing-framework/deployers/k8s/src/infrastructure/cluster.rs index e313b02..d1048fd 100644 --- a/testing-framework/deployers/k8s/src/infrastructure/cluster.rs +++ b/testing-framework/deployers/k8s/src/infrastructure/cluster.rs @@ -1,18 +1,13 @@ -use std::env; - use kube::Client; use reqwest::Url; -use testing_framework_core::{ - nodes::ApiClient, - scenario::{CleanupGuard, NodeClients, http_probe::NODE_ROLE}, - topology::{generation::GeneratedTopology, readiness::ReadinessError}, +use testing_framework_core::scenario::{ + CleanupGuard, DynError, HttpReadinessRequirement, NodeClients, }; use tracing::{debug, info}; use url::ParseError; -use uuid::Uuid; use crate::{ - infrastructure::assets::RunnerAssets, + env::K8sDeployEnv, lifecycle::{cleanup::RunnerCleanup, logs::dump_namespace_logs}, wait::{ ClusterPorts, ClusterReady, NodeConfigPorts, PortForwardHandle, wait_for_cluster_ready, @@ -107,13 +102,10 @@ impl ClusterEnvironment { #[derive(Debug, thiserror::Error)] /// Failures while building node clients against forwarded ports. pub enum NodeClientError { - #[error("failed to build {endpoint} client URL for {role} port {port}: {source}")] - Endpoint { - role: &'static str, - endpoint: &'static str, - port: u16, + #[error("failed to build node clients: {source}")] + Build { #[source] - source: ParseError, + source: DynError, }, } @@ -130,54 +122,42 @@ pub enum RemoteReadinessError { #[error("remote readiness probe failed: {source}")] Remote { #[source] - source: ReadinessError, + source: DynError, }, } -pub fn collect_port_specs(descriptors: &GeneratedTopology) -> PortSpecs { - let nodes = descriptors - .nodes() - .iter() - .map(|node| NodeConfigPorts { - api: node.general.api_config.address.port(), - testing: node.general.api_config.testing_http_address.port(), - }) - .collect(); - - let specs = PortSpecs { nodes }; - +pub fn collect_port_specs(descriptors: &E::Deployment) -> PortSpecs { + let specs = E::collect_port_specs(descriptors); debug!(nodes = specs.nodes.len(), "collected k8s port specs"); - specs } -pub fn build_node_clients(cluster: &ClusterEnvironment) -> Result { - let nodes = cluster - .node_api_ports - .iter() - .copied() - .zip(cluster.node_testing_ports.iter().copied()) - .map(|(api_port, testing_port)| { - api_client_from_ports(&cluster.node_host, NODE_ROLE, api_port, testing_port) - }) - .collect::, _>>()?; +pub fn build_node_clients( + cluster: &ClusterEnvironment, +) -> Result, NodeClientError> { + let nodes = E::build_node_clients( + &cluster.node_host, + &cluster.node_api_ports, + &cluster.node_testing_ports, + ) + .map_err(|source| NodeClientError::Build { source })?; debug!(nodes = nodes.len(), "built k8s node clients"); Ok(NodeClients::new(nodes)) } -pub async fn ensure_cluster_readiness( - descriptors: &GeneratedTopology, +pub async fn ensure_cluster_readiness( + descriptors: &E::Deployment, cluster: &ClusterEnvironment, + requirement: HttpReadinessRequirement, ) -> Result<(), RemoteReadinessError> { info!("waiting for remote readiness (API + membership)"); let (node_api, _node_testing) = cluster.node_ports(); - let node_urls = readiness_urls(node_api, NODE_ROLE, &cluster.node_host)?; + let node_urls = readiness_urls(node_api, E::node_role(), &cluster.node_host)?; - descriptors - .wait_remote_readiness(&node_urls) + E::wait_remote_readiness(descriptors, &node_urls, requirement) .await .map_err(|source| RemoteReadinessError::Remote { source })?; @@ -189,47 +169,7 @@ pub async fn ensure_cluster_readiness( Ok(()) } -pub fn cluster_identifiers() -> (String, String) { - if let Ok(namespace) = env::var("K8S_RUNNER_NAMESPACE") - && !namespace.is_empty() - { - let release = env::var("K8S_RUNNER_RELEASE") - .ok() - .filter(|value| !value.is_empty()) - .unwrap_or_else(|| namespace.clone()); - return (namespace, release); - } - - let run_id = Uuid::new_v4().simple().to_string(); - let namespace = format!("nomos-k8s-{run_id}"); - (namespace.clone(), namespace) -} - -pub async fn install_stack( - client: &Client, - assets: &RunnerAssets, - namespace: &str, - release: &str, - nodes: usize, -) -> Result { - tracing::info!( - release = %release, - namespace = %namespace, - "installing helm release" - ); - crate::infrastructure::helm::install_release(assets, release, namespace, nodes).await?; - tracing::info!(release = %release, "helm install succeeded"); - - let preserve = env::var("K8S_RUNNER_PRESERVE").is_ok(); - Ok(RunnerCleanup::new( - client.clone(), - namespace.to_owned(), - release.to_owned(), - preserve, - )) -} - -pub async fn wait_for_ports_or_cleanup( +pub async fn wait_for_ports_or_cleanup( client: &Client, namespace: &str, release: &str, @@ -242,7 +182,7 @@ pub async fn wait_for_ports_or_cleanup( %release, "waiting for cluster port-forwards" ); - match wait_for_cluster_ready(client, namespace, release, &specs.nodes).await { + match wait_for_cluster_ready::(client, namespace, release, &specs.nodes).await { Ok(ports) => { info!( node_ports = ?ports.ports.nodes, @@ -250,6 +190,7 @@ pub async fn wait_for_ports_or_cleanup( ); Ok(ports) } + Err(err) => { cleanup_pending(client, namespace, cleanup_guard).await; Err(err.into()) @@ -294,28 +235,3 @@ fn readiness_url(host: &str, role: &'static str, port: u16) -> Result Result { Url::parse(&format!("http://{host}:{port}/")) } - -fn api_client_from_ports( - host: &str, - role: &'static str, - api_port: u16, - testing_port: u16, -) -> Result { - let base_endpoint = - cluster_host_url(host, api_port).map_err(|source| NodeClientError::Endpoint { - role, - endpoint: "api", - port: api_port, - source, - })?; - let testing_endpoint = - Some( - cluster_host_url(host, testing_port).map_err(|source| NodeClientError::Endpoint { - role, - endpoint: "testing", - port: testing_port, - source, - })?, - ); - Ok(ApiClient::from_urls(base_endpoint, testing_endpoint)) -} diff --git a/testing-framework/deployers/k8s/src/infrastructure/helm.rs b/testing-framework/deployers/k8s/src/infrastructure/helm.rs index add216d..36c05ef 100644 --- a/testing-framework/deployers/k8s/src/infrastructure/helm.rs +++ b/testing-framework/deployers/k8s/src/infrastructure/helm.rs @@ -1,10 +1,11 @@ -use std::{io, process::Stdio}; +use std::{ + io, + process::{Output, Stdio}, +}; use thiserror::Error; use tokio::process::Command; -use tracing::{debug, info}; - -use crate::infrastructure::assets::{RunnerAssets, cfgsync_port_value, workspace_root}; +use tracing::info; /// Errors returned from Helm invocations. #[derive(Debug, Error)] @@ -24,104 +25,6 @@ pub enum HelmError { }, } -/// Install the Helm release for the provided topology counts. -pub async fn install_release( - assets: &RunnerAssets, - release: &str, - namespace: &str, - nodes: usize, -) -> Result<(), HelmError> { - info!( - release, - namespace, - nodes, - image = %assets.image, - cfgsync_port = cfgsync_port_value(), - values = %assets.values_file.display(), - "installing helm release" - ); - - let command = format!("helm install {release}"); - let cmd = build_install_command(assets, release, namespace, nodes, &command); - let output = run_helm_command(cmd, &command).await?; - - maybe_log_install_output(&command, &output); - - info!(release, namespace, "helm install completed"); - Ok(()) -} - -fn build_install_command( - assets: &RunnerAssets, - release: &str, - namespace: &str, - nodes: usize, - command: &str, -) -> Command { - let mut cmd = Command::new("helm"); - cmd.arg("install") - .arg(release) - .arg(&assets.chart_path) - .arg("--namespace") - .arg(namespace) - .arg("--create-namespace") - .arg("--wait") - .arg("--timeout") - .arg("5m") - .arg("--set") - .arg(format!("image={}", assets.image)) - .arg("--set") - .arg(format!("nodes.count={nodes}")) - .arg("--set") - .arg(format!("cfgsync.port={}", cfgsync_port_value())) - .arg("-f") - .arg(&assets.values_file) - .arg("--set-file") - .arg(format!("cfgsync.config={}", assets.cfgsync_file.display())) - .arg("--set-file") - .arg(format!( - "scripts.runCfgsyncSh={}", - assets.run_cfgsync_script.display() - )) - .arg("--set-file") - .arg(format!( - "scripts.runNomosNodeSh={}", - assets.run_nomos_node_script.display() - )) - .arg("--set-file") - .arg(format!( - "scripts.runNomosSh={}", - assets.run_nomos_script.display() - )) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); - - if let Ok(root) = workspace_root() { - cmd.current_dir(root); - } - - cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); - debug!(command, "prepared helm install command"); - cmd -} - -fn maybe_log_install_output(command: &str, output: &std::process::Output) { - if std::env::var("K8S_RUNNER_DEBUG").is_err() { - return; - } - - debug!( - command, - stdout = %String::from_utf8_lossy(&output.stdout), - "helm install stdout" - ); - debug!( - command, - stderr = %String::from_utf8_lossy(&output.stderr), - "helm install stderr" - ); -} - /// Uninstall the release and namespace resources. pub async fn uninstall_release(release: &str, namespace: &str) -> Result<(), HelmError> { let mut cmd = Command::new("helm"); @@ -138,10 +41,7 @@ pub async fn uninstall_release(release: &str, namespace: &str) -> Result<(), Hel Ok(()) } -async fn run_helm_command( - mut cmd: Command, - command: &str, -) -> Result { +async fn run_helm_command(mut cmd: Command, command: &str) -> Result { cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); let output = cmd.output().await.map_err(|source| HelmError::Spawn { command: command.to_owned(), diff --git a/testing-framework/deployers/k8s/src/infrastructure/mod.rs b/testing-framework/deployers/k8s/src/infrastructure/mod.rs index 6bd03c5..2ff4fb3 100644 --- a/testing-framework/deployers/k8s/src/infrastructure/mod.rs +++ b/testing-framework/deployers/k8s/src/infrastructure/mod.rs @@ -1,3 +1,2 @@ -pub mod assets; pub mod cluster; pub mod helm; diff --git a/testing-framework/deployers/k8s/src/lib.rs b/testing-framework/deployers/k8s/src/lib.rs index 4cf8f5d..90251c3 100644 --- a/testing-framework/deployers/k8s/src/lib.rs +++ b/testing-framework/deployers/k8s/src/lib.rs @@ -1,4 +1,5 @@ mod deployer; +mod env; mod host; mod infrastructure; mod lifecycle; @@ -7,3 +8,6 @@ pub mod wait { } pub use deployer::{K8sDeployer, K8sRunnerError}; +pub use env::K8sDeployEnv; +pub use infrastructure::cluster::PortSpecs; +pub use lifecycle::cleanup::RunnerCleanup; diff --git a/testing-framework/deployers/k8s/src/lifecycle/block_feed.rs b/testing-framework/deployers/k8s/src/lifecycle/block_feed.rs index 054924d..554a7fd 100644 --- a/testing-framework/deployers/k8s/src/lifecycle/block_feed.rs +++ b/testing-framework/deployers/k8s/src/lifecycle/block_feed.rs @@ -1,25 +1,30 @@ -use testing_framework_core::scenario::{BlockFeed, BlockFeedTask, NodeClients, spawn_block_feed}; +use testing_framework_core::scenario::{ + Application, FeedHandle, FeedRuntime, NodeClients, spawn_feed, +}; use tracing::{debug, info}; use crate::deployer::K8sRunnerError; -pub async fn spawn_block_feed_with( - node_clients: &NodeClients, -) -> Result<(BlockFeed, BlockFeedTask), K8sRunnerError> { +pub async fn spawn_block_feed_with( + node_clients: &NodeClients, +) -> Result< + ( + <::FeedRuntime as FeedRuntime>::Feed, + FeedHandle, + ), + K8sRunnerError, +> { debug!( - nodes = node_clients.node_clients().len(), + nodes = node_clients.len(), "selecting node client for block feed" ); let block_source_client = node_clients - .node_clients() - .into_iter() - .next() - .or_else(|| node_clients.any_client()) + .random_client() .ok_or(K8sRunnerError::BlockFeedMissing)?; info!("starting block feed"); - spawn_block_feed(block_source_client) + spawn_feed::(block_source_client) .await .map_err(|source| K8sRunnerError::BlockFeed { source }) } diff --git a/testing-framework/deployers/k8s/src/lifecycle/cleanup.rs b/testing-framework/deployers/k8s/src/lifecycle/cleanup.rs index 9068f26..6d64ebe 100644 --- a/testing-framework/deployers/k8s/src/lifecycle/cleanup.rs +++ b/testing-framework/deployers/k8s/src/lifecycle/cleanup.rs @@ -1,11 +1,12 @@ -use std::thread; +use std::{io, process::Output, thread}; use k8s_openapi::api::core::v1::Namespace; use kube::{Api, Client, api::DeleteParams}; use testing_framework_core::scenario::CleanupGuard; use tokio::{ process::Command, - time::{Duration, sleep}, + runtime::{Handle, Runtime}, + time::{Duration, error::Elapsed, sleep, timeout}, }; use tracing::{info, warn}; @@ -52,25 +53,10 @@ impl RunnerCleanup { } fn blocking_cleanup_success(&self) -> bool { - match tokio::runtime::Runtime::new() { - Ok(rt) => match rt.block_on(async { - tokio::time::timeout(CLEANUP_TIMEOUT, self.cleanup_async()).await - }) { - Ok(()) => true, - Err(err) => { - warn!( - error = ?err, - "cleanup timed out after {}s; falling back to background thread", - CLEANUP_TIMEOUT.as_secs() - ); - false - } - }, - Err(err) => { - warn!(error = ?err, "unable to create cleanup runtime; falling back to background thread"); - false - } - } + run_cleanup_with_runtime(self).unwrap_or_else(|error| { + warn!(error = ?error, "unable to complete blocking cleanup; falling back to background thread"); + false + }) } fn spawn_cleanup_thread(self: Box) { @@ -83,11 +69,25 @@ impl RunnerCleanup { warn!(error = ?err, "cleanup thread panicked"); } } + Err(err) => warn!(error = ?err, "failed to spawn cleanup thread"), } } } +#[derive(Debug, thiserror::Error)] +enum BlockingCleanupError { + #[error("failed to create tokio runtime: {source}")] + RuntimeInit { source: io::Error }, +} + +fn run_cleanup_with_runtime(cleanup: &RunnerCleanup) -> Result { + let runtime = Runtime::new().map_err(|source| BlockingCleanupError::RuntimeInit { source })?; + let result = + runtime.block_on(async { timeout(CLEANUP_TIMEOUT, cleanup.cleanup_async()).await }); + Ok(cleanup_completed(result)) +} + async fn uninstall_release_and_namespace(client: &Client, release: &str, namespace: &str) { if let Err(err) = uninstall_release(release, namespace).await { warn!(release, namespace, error = ?err, "helm uninstall failed during cleanup"); @@ -99,27 +99,24 @@ async fn uninstall_release_and_namespace(client: &Client, release: &str, namespa } fn run_background_cleanup(cleanup: Box) { - match tokio::runtime::Runtime::new() { + match Runtime::new() { Ok(rt) => { - if let Err(err) = rt.block_on(async { - tokio::time::timeout(CLEANUP_TIMEOUT, cleanup.cleanup_async()).await - }) { - warn!("[k8s-runner] background cleanup timed out: {err}"); + if let Err(err) = + rt.block_on(async { timeout(CLEANUP_TIMEOUT, cleanup.cleanup_async()).await }) + { + warn!(error = ?err, "background cleanup timed out"); } } - Err(err) => warn!("[k8s-runner] unable to create cleanup runtime: {err}"), + + Err(err) => warn!(error = ?err, "unable to create cleanup runtime"), } } async fn delete_namespace(client: &Client, namespace: &str) { let namespaces: Api = Api::all(client.clone()); + let deleted = try_delete_namespace(&namespaces, namespace).await; - if delete_namespace_via_api(&namespaces, namespace).await { - wait_for_namespace_termination(&namespaces, namespace).await; - return; - } - - if delete_namespace_via_cli(namespace).await { + if deleted { wait_for_namespace_termination(&namespaces, namespace).await; } else { warn!( @@ -129,9 +126,17 @@ async fn delete_namespace(client: &Client, namespace: &str) { } } +async fn try_delete_namespace(namespaces: &Api, namespace: &str) -> bool { + if delete_namespace_via_api(namespaces, namespace).await { + return true; + } + + delete_namespace_via_cli(namespace).await +} + async fn delete_namespace_via_api(namespaces: &Api, namespace: &str) -> bool { info!(namespace, "invoking kubernetes API to delete namespace"); - match tokio::time::timeout( + match timeout( NAMESPACE_DELETE_TIMEOUT, namespaces.delete(namespace, &DeleteParams::default()), ) @@ -144,10 +149,12 @@ async fn delete_namespace_via_api(namespaces: &Api, namespace: &str) ); true } + Ok(Err(err)) => { warn!(namespace, error = ?err, "failed to delete namespace via API"); false } + Err(_) => { warn!( namespace, @@ -160,28 +167,19 @@ async fn delete_namespace_via_api(namespaces: &Api, namespace: &str) async fn delete_namespace_via_cli(namespace: &str) -> bool { info!(namespace, "invoking kubectl delete namespace fallback"); - let output = Command::new("kubectl") - .arg("delete") - .arg("namespace") - .arg(namespace) - .arg("--wait=true") - .output() - .await; + let output = run_kubectl_delete_namespace(namespace).await; match output { Ok(result) if result.status.success() => { info!(namespace, "kubectl delete namespace completed successfully"); true } + Ok(result) => { - warn!( - namespace, - stderr = %String::from_utf8_lossy(&result.stderr), - stdout = %String::from_utf8_lossy(&result.stdout), - "kubectl delete namespace failed" - ); + log_kubectl_delete_failure(namespace, &result.stdout, &result.stderr); false } + Err(err) => { warn!(namespace, error = ?err, "failed to spawn kubectl delete namespace"); false @@ -189,6 +187,25 @@ async fn delete_namespace_via_cli(namespace: &str) -> bool { } } +async fn run_kubectl_delete_namespace(namespace: &str) -> Result { + Command::new("kubectl") + .arg("delete") + .arg("namespace") + .arg(namespace) + .arg("--wait=true") + .output() + .await +} + +fn log_kubectl_delete_failure(namespace: &str, stdout: &[u8], stderr: &[u8]) { + warn!( + namespace, + stderr = %String::from_utf8_lossy(stderr), + stdout = %String::from_utf8_lossy(stdout), + "kubectl delete namespace failed" + ); +} + async fn wait_for_namespace_termination(namespaces: &Api, namespace: &str) { const NAMESPACE_TERMINATION_POLL_ATTEMPTS: u32 = 60; const NAMESPACE_TERMINATION_POLL_INTERVAL: Duration = Duration::from_secs(1); @@ -200,10 +217,7 @@ async fn wait_for_namespace_termination(namespaces: &Api, namespace: sleep(NAMESPACE_TERMINATION_POLL_INTERVAL).await; } - warn!( - "[k8s-runner] namespace `{}` still present after waiting for deletion", - namespace - ); + warn_namespace_still_present(namespace); } async fn namespace_deleted(namespaces: &Api, namespace: &str, attempt: u32) -> bool { @@ -219,10 +233,12 @@ async fn namespace_deleted(namespaces: &Api, namespace: &str, attempt } false } + Ok(None) => { info!(namespace, "namespace deleted"); true } + Err(err) => { warn!(namespace, error = ?err, "namespace poll failed"); true @@ -230,9 +246,30 @@ async fn namespace_deleted(namespaces: &Api, namespace: &str, attempt } } +fn warn_namespace_still_present(namespace: &str) { + warn!( + namespace, + "namespace still present after waiting for deletion" + ); +} + +fn cleanup_completed(result: Result<(), Elapsed>) -> bool { + match result { + Ok(()) => true, + Err(error) => { + warn!( + error = ?error, + timeout_secs = CLEANUP_TIMEOUT.as_secs(), + "cleanup timed out; falling back to background thread" + ); + false + } + } +} + impl CleanupGuard for RunnerCleanup { fn cleanup(self: Box) { - if tokio::runtime::Handle::try_current().is_err() && self.blocking_cleanup_success() { + if Handle::try_current().is_err() && self.blocking_cleanup_success() { return; } self.spawn_cleanup_thread(); diff --git a/testing-framework/deployers/k8s/src/lifecycle/wait/deployment.rs b/testing-framework/deployers/k8s/src/lifecycle/wait/deployment.rs index 027fd57..1e68736 100644 --- a/testing-framework/deployers/k8s/src/lifecycle/wait/deployment.rs +++ b/testing-framework/deployers/k8s/src/lifecycle/wait/deployment.rs @@ -1,57 +1,101 @@ +use std::time::Duration; + use k8s_openapi::api::apps::v1::Deployment; use kube::{Api, Client}; -use tokio::time::sleep; +use tokio_retry::{RetryIf, strategy::FixedInterval}; use super::{ClusterWaitError, deployment_timeout}; -const DEPLOYMENT_POLL_INTERVAL_SECS: u64 = 2; +const DEPLOYMENT_POLL_INTERVAL: Duration = Duration::from_secs(2); +const MILLIS_PER_SECOND: u64 = 1_000; + +#[derive(Debug)] +enum DeploymentWaitError { + NotReady, + Fatal(ClusterWaitError), +} pub async fn wait_for_deployment_ready( client: &Client, namespace: &str, name: &str, ) -> Result<(), ClusterWaitError> { - let mut elapsed = std::time::Duration::ZERO; - let interval = std::time::Duration::from_secs(DEPLOYMENT_POLL_INTERVAL_SECS); - let timeout = deployment_timeout(); + let strategy = deployment_retry_strategy(timeout); + let deployments = Api::::namespaced(client.clone(), namespace); + let result = RetryIf::spawn( + strategy, + || check_deployment_ready(&deployments, name), + |error: &DeploymentWaitError| matches!(error, DeploymentWaitError::NotReady), + ) + .await; - while elapsed <= timeout { - match Api::::namespaced(client.clone(), namespace) - .get(name) - .await - { - Ok(deployment) => { - let desired = deployment - .spec - .as_ref() - .and_then(|spec| spec.replicas) - .unwrap_or(1); - let ready = deployment - .status - .as_ref() - .and_then(|status| status.ready_replicas) - .unwrap_or(0); + map_deployment_wait_result(result, name, namespace, timeout) +} - if ready >= desired { - return Ok(()); - } - } - Err(err) => { - return Err(ClusterWaitError::DeploymentFetch { - name: name.to_owned(), - source: err, - }); - } - } +fn deployment_retry_strategy(timeout: Duration) -> impl Iterator { + let max_attempts = max_attempts_for_timeout(timeout); + FixedInterval::from_millis(retry_interval_millis()).take(max_attempts) +} - sleep(interval).await; - elapsed += interval; +const fn retry_interval_millis() -> u64 { + DEPLOYMENT_POLL_INTERVAL.as_secs() * MILLIS_PER_SECOND +} + +fn max_attempts_for_timeout(timeout: Duration) -> usize { + let timeout_ms = timeout.as_millis(); + let interval_ms = DEPLOYMENT_POLL_INTERVAL.as_millis(); + + timeout_ms.div_ceil(interval_ms).max(1) as usize +} + +async fn check_deployment_ready( + deployments: &Api, + name: &str, +) -> Result<(), DeploymentWaitError> { + match deployments.get(name).await { + Ok(deployment) => ensure_ready_replicas(deployment), + Err(source) => Err(DeploymentWaitError::Fatal( + ClusterWaitError::DeploymentFetch { + name: name.to_owned(), + source, + }, + )), + } +} + +fn ensure_ready_replicas(deployment: Deployment) -> Result<(), DeploymentWaitError> { + let desired = deployment + .spec + .as_ref() + .and_then(|spec| spec.replicas) + .unwrap_or(1); + let ready = deployment + .status + .as_ref() + .and_then(|status| status.ready_replicas) + .unwrap_or(0); + + if ready >= desired { + return Ok(()); } - Err(ClusterWaitError::DeploymentTimeout { - name: name.to_owned(), - namespace: namespace.to_owned(), - timeout, - }) + Err(DeploymentWaitError::NotReady) +} + +fn map_deployment_wait_result( + result: Result<(), DeploymentWaitError>, + name: &str, + namespace: &str, + timeout: Duration, +) -> Result<(), ClusterWaitError> { + match result { + Ok(()) => Ok(()), + Err(DeploymentWaitError::Fatal(error)) => Err(error), + Err(DeploymentWaitError::NotReady) => Err(ClusterWaitError::DeploymentTimeout { + name: name.to_owned(), + namespace: namespace.to_owned(), + timeout, + }), + } } diff --git a/testing-framework/deployers/k8s/src/lifecycle/wait/forwarding.rs b/testing-framework/deployers/k8s/src/lifecycle/wait/forwarding.rs index a120959..7fd3c67 100644 --- a/testing-framework/deployers/k8s/src/lifecycle/wait/forwarding.rs +++ b/testing-framework/deployers/k8s/src/lifecycle/wait/forwarding.rs @@ -1,13 +1,14 @@ use std::{ + fmt, io, net::{Ipv4Addr, TcpListener, TcpStream}, - process::{Child, Command as StdCommand, Stdio}, + process::{Child, Command as StdCommand, ExitStatus, Stdio}, thread, time::Duration, }; -use anyhow::{Result as AnyhowResult, anyhow}; +use anyhow::anyhow; -use super::{ClusterWaitError, NodeConfigPorts, NodePortAllocation}; +use super::ClusterWaitError; const PORT_FORWARD_READY_ATTEMPTS: u32 = 240; const PORT_FORWARD_READY_POLL_INTERVAL: Duration = Duration::from_millis(250); @@ -16,8 +17,8 @@ pub struct PortForwardHandle { child: Child, } -impl std::fmt::Debug for PortForwardHandle { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl fmt::Debug for PortForwardHandle { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("PortForwardHandle").finish_non_exhaustive() } } @@ -40,94 +41,21 @@ pub struct PortForwardSpawn { pub handle: PortForwardHandle, } -pub fn port_forward_group( - namespace: &str, - release: &str, - kind: &str, - ports: &[NodeConfigPorts], - allocations: &mut Vec, -) -> Result, ClusterWaitError> { - let mut forwards = Vec::new(); - for (index, ports) in ports.iter().enumerate() { - let service = format!("{release}-{kind}-{index}"); - let PortForwardSpawn { - local_port: api_port, - handle: api_forward, - } = match port_forward_service(namespace, &service, ports.api) { - Ok(forward) => forward, - Err(err) => { - kill_port_forwards(&mut forwards); - return Err(err); - } - }; - let PortForwardSpawn { - local_port: testing_port, - handle: testing_forward, - } = match port_forward_service(namespace, &service, ports.testing) { - Ok(forward) => forward, - Err(err) => { - kill_port_forwards(&mut forwards); - return Err(err); - } - }; - allocations.push(NodePortAllocation { - api: api_port, - testing: testing_port, - }); - forwards.push(api_forward); - forwards.push(testing_forward); - } - Ok(forwards) -} - pub fn port_forward_service( namespace: &str, service: &str, remote_port: u16, ) -> Result { - let local_port = allocate_local_port().map_err(|source| ClusterWaitError::PortForward { - service: service.to_owned(), - port: remote_port, - source, - })?; + let local_port = + allocate_local_port().map_err(|source| port_forward_error(service, remote_port, source))?; + let mut child = spawn_kubectl_port_forward(namespace, service, local_port, remote_port) + .map_err(|source| port_forward_error(service, remote_port, source.into()))?; - let mut child = StdCommand::new("kubectl") - .arg("port-forward") - .arg("-n") - .arg(namespace) - .arg(format!("svc/{service}")) - .arg(format!("{local_port}:{remote_port}")) - .stdout(Stdio::null()) - .stderr(Stdio::null()) - .spawn() - .map_err(|source| ClusterWaitError::PortForward { - service: service.to_owned(), - port: remote_port, - source: source.into(), - })?; + wait_until_port_forward_ready(&mut child, local_port, service, remote_port)?; - for _ in 0..PORT_FORWARD_READY_ATTEMPTS { - if let Ok(Some(status)) = child.try_wait() { - return Err(ClusterWaitError::PortForward { - service: service.to_owned(), - port: remote_port, - source: anyhow!("kubectl exited with {status}"), - }); - } - if TcpStream::connect((Ipv4Addr::LOCALHOST, local_port)).is_ok() { - return Ok(PortForwardSpawn { - local_port, - handle: PortForwardHandle { child }, - }); - } - thread::sleep(PORT_FORWARD_READY_POLL_INTERVAL); - } - - let _ = child.kill(); - Err(ClusterWaitError::PortForward { - service: service.to_owned(), - port: remote_port, - source: anyhow!("port-forward did not become ready"), + Ok(PortForwardSpawn { + local_port, + handle: PortForwardHandle { child }, }) } @@ -138,9 +66,90 @@ pub fn kill_port_forwards(handles: &mut Vec) { handles.clear(); } -fn allocate_local_port() -> AnyhowResult { - let listener = TcpListener::bind((Ipv4Addr::LOCALHOST, 0))?; +fn allocate_local_port() -> anyhow::Result { + let listener = TcpListener::bind(localhost_addr(0))?; let port = listener.local_addr()?.port(); drop(listener); Ok(port) } + +fn spawn_kubectl_port_forward( + namespace: &str, + service: &str, + local_port: u16, + remote_port: u16, +) -> io::Result { + StdCommand::new("kubectl") + .arg("port-forward") + .arg("-n") + .arg(namespace) + .arg(format!("svc/{service}")) + .arg(format!("{local_port}:{remote_port}")) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() +} + +fn wait_until_port_forward_ready( + child: &mut Child, + local_port: u16, + service: &str, + remote_port: u16, +) -> Result<(), ClusterWaitError> { + for _ in 0..PORT_FORWARD_READY_ATTEMPTS { + ensure_port_forward_running(child, service, remote_port)?; + + if local_port_reachable(local_port) { + return Ok(()); + } + + thread::sleep(PORT_FORWARD_READY_POLL_INTERVAL); + } + + let _ = child.kill(); + Err(port_forward_ready_timeout_error(service, remote_port)) +} + +fn ensure_port_forward_running( + child: &mut Child, + service: &str, + remote_port: u16, +) -> Result<(), ClusterWaitError> { + let Some(status) = exited_status(child) else { + return Ok(()); + }; + + Err(port_forward_error( + service, + remote_port, + anyhow!("kubectl exited with {status}"), + )) +} + +fn port_forward_error(service: &str, remote_port: u16, source: anyhow::Error) -> ClusterWaitError { + ClusterWaitError::PortForward { + service: service.to_owned(), + port: remote_port, + source, + } +} + +fn port_forward_ready_timeout_error(service: &str, remote_port: u16) -> ClusterWaitError { + port_forward_error( + service, + remote_port, + anyhow!("port-forward did not become ready"), + ) +} + +fn exited_status(child: &mut Child) -> Option { + child.try_wait().ok().flatten() +} + +fn local_port_reachable(local_port: u16) -> bool { + TcpStream::connect(localhost_addr(local_port)).is_ok() +} + +const fn localhost_addr(port: u16) -> (Ipv4Addr, u16) { + (Ipv4Addr::LOCALHOST, port) +} diff --git a/testing-framework/deployers/k8s/src/lifecycle/wait/http_probe.rs b/testing-framework/deployers/k8s/src/lifecycle/wait/http_probe.rs index 9608f2a..585c34c 100644 --- a/testing-framework/deployers/k8s/src/lifecycle/wait/http_probe.rs +++ b/testing-framework/deployers/k8s/src/lifecycle/wait/http_probe.rs @@ -1,40 +1,42 @@ -use testing_framework_core::scenario::http_probe::{self, HttpReadinessError}; +use std::time::Duration; + +use testing_framework_core::scenario::HttpReadinessRequirement; use super::{ClusterWaitError, http_poll_interval, node_http_probe_timeout, node_http_timeout}; -use crate::host::node_host; +use crate::{env::K8sDeployEnv, host::node_host}; -pub async fn wait_for_node_http_nodeport( +const LOCALHOST: &str = "127.0.0.1"; +const READINESS_REQUIREMENT: HttpReadinessRequirement = HttpReadinessRequirement::AllNodesReady; + +pub async fn wait_for_node_http_nodeport( ports: &[u16], role: &'static str, ) -> Result<(), ClusterWaitError> { let host = node_host(); - wait_for_node_http_on_host(ports, role, &host, node_http_probe_timeout()).await + wait_for_node_http_on_host::(ports, role, &host, node_http_probe_timeout()).await } -const LOCALHOST: &str = "127.0.0.1"; - -pub async fn wait_for_node_http_port_forward( +pub async fn wait_for_node_http_port_forward( ports: &[u16], role: &'static str, ) -> Result<(), ClusterWaitError> { - wait_for_node_http_on_host(ports, role, LOCALHOST, node_http_timeout()).await + wait_for_node_http_on_host::(ports, role, LOCALHOST, node_http_timeout()).await } -async fn wait_for_node_http_on_host( +async fn wait_for_node_http_on_host( ports: &[u16], role: &'static str, host: &str, - timeout: std::time::Duration, + timeout: Duration, ) -> Result<(), ClusterWaitError> { - http_probe::wait_for_http_ports_with_host(ports, role, host, timeout, http_poll_interval()) - .await - .map_err(map_http_error) -} - -const fn map_http_error(error: HttpReadinessError) -> ClusterWaitError { - ClusterWaitError::NodeHttpTimeout { - role: error.role(), - port: error.port(), - timeout: error.timeout(), - } + E::wait_for_node_http( + ports, + role, + host, + timeout, + http_poll_interval(), + READINESS_REQUIREMENT, + ) + .await + .map_err(|source| ClusterWaitError::NodeHttp { role, source }) } diff --git a/testing-framework/deployers/k8s/src/lifecycle/wait/mod.rs b/testing-framework/deployers/k8s/src/lifecycle/wait/mod.rs index 584fe2f..ecbcfbb 100644 --- a/testing-framework/deployers/k8s/src/lifecycle/wait/mod.rs +++ b/testing-framework/deployers/k8s/src/lifecycle/wait/mod.rs @@ -10,11 +10,11 @@ mod orchestrator; mod ports; pub use forwarding::PortForwardHandle; +const DEFAULT_HTTP_POLL_INTERVAL: Duration = Duration::from_secs(1); +const DEFAULT_NODE_HTTP_TIMEOUT: Duration = Duration::from_secs(240); +const DEFAULT_NODE_HTTP_PROBE_TIMEOUT: Duration = Duration::from_secs(30); +const DEFAULT_K8S_DEPLOYMENT_TIMEOUT: Duration = Duration::from_secs(180); pub use orchestrator::wait_for_cluster_ready; -use testing_framework_config::constants::{ - DEFAULT_HTTP_POLL_INTERVAL, DEFAULT_K8S_DEPLOYMENT_TIMEOUT, DEFAULT_NODE_HTTP_PROBE_TIMEOUT, - DEFAULT_NODE_HTTP_TIMEOUT, -}; /// Container and host-side HTTP ports for a node in the Helm chart values. #[derive(Clone, Copy, Debug)] @@ -82,6 +82,12 @@ pub enum ClusterWaitError { port: u16, timeout: Duration, }, + #[error("failed to reach {role} HTTP endpoints: {source}")] + NodeHttp { + role: &'static str, + #[source] + source: testing_framework_core::scenario::DynError, + }, #[error("failed to start port-forward for service {service} port {port}: {source}")] PortForward { service: String, diff --git a/testing-framework/deployers/k8s/src/lifecycle/wait/orchestrator.rs b/testing-framework/deployers/k8s/src/lifecycle/wait/orchestrator.rs index b8962c4..858c957 100644 --- a/testing-framework/deployers/k8s/src/lifecycle/wait/orchestrator.rs +++ b/testing-framework/deployers/k8s/src/lifecycle/wait/orchestrator.rs @@ -1,15 +1,21 @@ use kube::Client; -use testing_framework_core::scenario::http_probe::NODE_ROLE; -use super::{ClusterPorts, ClusterReady, ClusterWaitError, NodeConfigPorts}; -use crate::lifecycle::wait::{ - deployment::wait_for_deployment_ready, - forwarding::{PortForwardHandle, kill_port_forwards, port_forward_group}, - http_probe::{wait_for_node_http_nodeport, wait_for_node_http_port_forward}, - ports::discover_node_ports, +use super::{ClusterPorts, ClusterReady, ClusterWaitError, NodeConfigPorts, NodePortAllocation}; +use crate::{ + env::K8sDeployEnv, + lifecycle::wait::{ + deployment::wait_for_deployment_ready, + forwarding::{ + PortForwardHandle, PortForwardSpawn, kill_port_forwards, port_forward_service, + }, + http_probe::{wait_for_node_http_nodeport, wait_for_node_http_port_forward}, + ports::discover_node_ports, + }, }; -pub async fn wait_for_cluster_ready( +const LOCALHOST: &str = "127.0.0.1"; + +pub async fn wait_for_cluster_ready( client: &Client, namespace: &str, release: &str, @@ -19,52 +25,158 @@ pub async fn wait_for_cluster_ready( return Err(ClusterWaitError::MissingNode); } - let mut node_allocations = Vec::with_capacity(node_ports.len()); - let mut node_host = crate::host::node_host(); + let node_allocations = + discover_ready_node_allocations::(client, namespace, release, node_ports).await?; + let role = E::node_role(); - for (index, ports) in node_ports.iter().enumerate() { - let name = format!("{release}-node-{index}"); - wait_for_deployment_ready(client, namespace, &name).await?; - let allocation = discover_node_ports(client, namespace, &name, *ports).await?; - node_allocations.push(allocation); - } + let (readiness, node_allocations) = + if needs_port_forward_fallback::(&node_allocations, role).await { + fallback_readiness::(namespace, release, node_ports, role).await? + } else { + (direct_nodeport_readiness(), node_allocations) + }; - let mut port_forwards: Vec = Vec::new(); + Ok(cluster_ready(readiness, node_allocations)) +} - let node_api_ports: Vec = node_allocations.iter().map(|ports| ports.api).collect(); - if wait_for_node_http_nodeport(&node_api_ports, NODE_ROLE) +struct ReadinessResolution { + node_host: String, + port_forwards: Vec, +} + +async fn needs_port_forward_fallback( + allocations: &[NodePortAllocation], + role: &'static str, +) -> bool { + let ports = api_ports(allocations); + + wait_for_node_http_nodeport::(&ports, role) .await .is_err() +} + +async fn resolve_with_port_forwards( + namespace: &str, + release: &str, + node_ports: &[NodeConfigPorts], + role: &'static str, +) -> Result<(Vec, Vec), ClusterWaitError> { + let (mut port_forwards, node_allocations) = spawn_port_forwards::( + namespace.to_owned(), + release.to_owned(), + node_ports.to_vec(), + ) + .await?; + + if let Err(error) = + wait_for_node_http_port_forward::(&api_ports(&node_allocations), role).await { - node_allocations.clear(); - node_host = "127.0.0.1".to_owned(); - let namespace = namespace.to_owned(); - let release = release.to_owned(); - let ports = node_ports.to_vec(); - let (forwards, allocations) = tokio::task::spawn_blocking(move || { - let mut allocations = Vec::with_capacity(ports.len()); - let forwards = - port_forward_group(&namespace, &release, "node", &ports, &mut allocations)?; - Ok::<_, ClusterWaitError>((forwards, allocations)) - }) - .await - .map_err(|source| ClusterWaitError::PortForwardTask { - source: source.into(), - })??; - port_forwards = forwards; - node_allocations = allocations; - let node_api_ports: Vec = node_allocations.iter().map(|ports| ports.api).collect(); - if let Err(err) = wait_for_node_http_port_forward(&node_api_ports, NODE_ROLE).await { - kill_port_forwards(&mut port_forwards); - return Err(err); - } + kill_port_forwards(&mut port_forwards); + return Err(error); } - Ok(ClusterReady { + Ok((port_forwards, node_allocations)) +} + +async fn fallback_readiness( + namespace: &str, + release: &str, + node_ports: &[NodeConfigPorts], + role: &'static str, +) -> Result<(ReadinessResolution, Vec), ClusterWaitError> { + let (port_forwards, allocations) = + resolve_with_port_forwards::(namespace, release, node_ports, role).await?; + + Ok(( + ReadinessResolution { + node_host: LOCALHOST.to_owned(), + port_forwards, + }, + allocations, + )) +} + +fn direct_nodeport_readiness() -> ReadinessResolution { + ReadinessResolution { + node_host: crate::host::node_host(), + port_forwards: Vec::new(), + } +} + +async fn discover_ready_node_allocations( + client: &Client, + namespace: &str, + release: &str, + node_ports: &[NodeConfigPorts], +) -> Result, ClusterWaitError> { + let mut allocations = Vec::with_capacity(node_ports.len()); + + for (index, ports) in node_ports.iter().enumerate() { + let deployment_name = E::node_deployment_name(release, index); + wait_for_deployment_ready(client, namespace, &deployment_name).await?; + let allocation = discover_node_ports(client, namespace, &deployment_name, *ports).await?; + allocations.push(allocation); + } + + Ok(allocations) +} + +async fn spawn_port_forwards( + namespace: String, + release: String, + node_ports: Vec, +) -> Result<(Vec, Vec), ClusterWaitError> { + tokio::task::spawn_blocking(move || { + let mut allocations = Vec::with_capacity(node_ports.len()); + let mut forwards = Vec::new(); + + for (index, ports) in node_ports.iter().enumerate() { + let service = E::node_service_name(&release, index); + let api_forward = port_forward_service(&namespace, &service, ports.api)?; + let testing_forward = port_forward_service(&namespace, &service, ports.testing)?; + register_forward_pair( + &mut allocations, + &mut forwards, + api_forward, + testing_forward, + ); + } + + Ok::<_, ClusterWaitError>((forwards, allocations)) + }) + .await + .map_err(|source| ClusterWaitError::PortForwardTask { + source: source.into(), + })? +} + +fn api_ports(nodes: &[NodePortAllocation]) -> Vec { + nodes.iter().map(|ports| ports.api).collect() +} + +fn cluster_ready( + readiness: ReadinessResolution, + node_allocations: Vec, +) -> ClusterReady { + ClusterReady { ports: ClusterPorts { nodes: node_allocations, - node_host, + node_host: readiness.node_host, }, - port_forwards, - }) + port_forwards: readiness.port_forwards, + } +} + +fn register_forward_pair( + allocations: &mut Vec, + forwards: &mut Vec, + api_forward: PortForwardSpawn, + testing_forward: PortForwardSpawn, +) { + allocations.push(NodePortAllocation { + api: api_forward.local_port, + testing: testing_forward.local_port, + }); + forwards.push(api_forward.handle); + forwards.push(testing_forward.handle); } diff --git a/testing-framework/deployers/k8s/src/lifecycle/wait/ports.rs b/testing-framework/deployers/k8s/src/lifecycle/wait/ports.rs index 2e045df..5b7bb25 100644 --- a/testing-framework/deployers/k8s/src/lifecycle/wait/ports.rs +++ b/testing-framework/deployers/k8s/src/lifecycle/wait/ports.rs @@ -1,11 +1,20 @@ -use k8s_openapi::api::core::v1::Service; +use std::time::Duration; + +use k8s_openapi::api::core::v1::{Service, ServicePort}; use kube::{Api, Client}; -use tokio::time::sleep; +use tokio_retry::{RetryIf, strategy::FixedInterval}; use super::{ClusterWaitError, NodeConfigPorts, NodePortAllocation}; const NODE_PORT_LOOKUP_ATTEMPTS: u32 = 120; -const NODE_PORT_LOOKUP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(1); +const NODE_PORT_LOOKUP_ATTEMPTS_USIZE: usize = NODE_PORT_LOOKUP_ATTEMPTS as usize; +const NODE_PORT_LOOKUP_INTERVAL: Duration = Duration::from_secs(1); + +#[derive(Debug)] +enum NodePortLookupError { + NotAvailable, + Fatal(ClusterWaitError), +} pub async fn find_node_port( client: &Client, @@ -13,38 +22,16 @@ pub async fn find_node_port( service_name: &str, service_port: u16, ) -> Result { - for _ in 0..NODE_PORT_LOOKUP_ATTEMPTS { - match Api::::namespaced(client.clone(), namespace) - .get(service_name) - .await - { - Ok(service) => { - if let Some(spec) = service.spec.clone() - && let Some(ports) = spec.ports - { - for port in ports { - if port.port == i32::from(service_port) - && let Some(node_port) = port.node_port - { - return Ok(node_port as u16); - } - } - } - } - Err(err) => { - return Err(ClusterWaitError::ServiceFetch { - service: service_name.to_owned(), - source: err, - }); - } - } - sleep(NODE_PORT_LOOKUP_INTERVAL).await; - } + let services = Api::::namespaced(client.clone(), namespace); + let strategy = port_lookup_retry_strategy(); + let result = RetryIf::spawn( + strategy, + || query_node_port(&services, service_name, service_port), + |error: &NodePortLookupError| matches!(error, NodePortLookupError::NotAvailable), + ) + .await; - Err(ClusterWaitError::NodePortUnavailable { - service: service_name.to_owned(), - port: service_port, - }) + map_node_port_lookup_result(result, service_name, service_port) } pub async fn discover_node_ports( @@ -62,3 +49,60 @@ pub async fn discover_node_ports( testing: testing_port, }) } + +fn port_lookup_retry_strategy() -> impl Iterator { + FixedInterval::from_millis(NODE_PORT_LOOKUP_INTERVAL.as_millis() as u64) + .take(NODE_PORT_LOOKUP_ATTEMPTS_USIZE) +} + +async fn query_node_port( + services: &Api, + service_name: &str, + service_port: u16, +) -> Result { + match services.get(service_name).await { + Ok(service) => lookup_service_node_port(service, service_port), + Err(source) => Err(NodePortLookupError::Fatal(ClusterWaitError::ServiceFetch { + service: service_name.to_owned(), + source, + })), + } +} + +fn lookup_service_node_port( + service: Service, + service_port: u16, +) -> Result { + let ports = service.spec.and_then(|spec| spec.ports).unwrap_or_default(); + + for port in ports { + if let Some(node_port) = matching_node_port(&port, service_port) { + return Ok(node_port as u16); + } + } + + Err(NodePortLookupError::NotAvailable) +} + +fn matching_node_port(port: &ServicePort, service_port: u16) -> Option { + if port.port != i32::from(service_port) { + return None; + } + + port.node_port +} + +fn map_node_port_lookup_result( + result: Result, + service_name: &str, + service_port: u16, +) -> Result { + match result { + Ok(port) => Ok(port), + Err(NodePortLookupError::Fatal(error)) => Err(error), + Err(NodePortLookupError::NotAvailable) => Err(ClusterWaitError::NodePortUnavailable { + service: service_name.to_owned(), + port: service_port, + }), + } +} diff --git a/testing-framework/deployers/local/Cargo.toml b/testing-framework/deployers/local/Cargo.toml index 1ea16bd..ce7c833 100644 --- a/testing-framework/deployers/local/Cargo.toml +++ b/testing-framework/deployers/local/Cargo.toml @@ -13,17 +13,10 @@ version = "0.1.0" workspace = true [dependencies] -async-trait = "0.1" -lb-libp2p = { workspace = true } -lb-network-service = { workspace = true } -lb-node = { workspace = true } -lb-utils = { workspace = true } -rand = { workspace = true } -testing-framework-config = { workspace = true } -testing-framework-core = { path = "../../core" } -thiserror = { workspace = true } -tokio = { workspace = true } -tracing = { workspace = true } - -[dev-dependencies] -tracing-subscriber = "0.3" +async-trait = "0.1" +tempfile = { workspace = true } +testing-framework-core = { path = "../../core" } +thiserror = { workspace = true } +tokio = { workspace = true } +tokio-retry = "0.3" +tracing = { workspace = true } diff --git a/testing-framework/deployers/local/src/binary.rs b/testing-framework/deployers/local/src/binary.rs new file mode 100644 index 0000000..b023b07 --- /dev/null +++ b/testing-framework/deployers/local/src/binary.rs @@ -0,0 +1,73 @@ +use std::{env, path::PathBuf}; + +use tracing::{debug, info}; + +pub struct BinaryConfig { + /// Env var that overrides binary path. + pub env_var: &'static str, + /// Binary name expected on PATH. + pub binary_name: &'static str, + /// Repository-local fallback path when PATH lookup fails. + pub fallback_path: &'static str, +} + +pub struct BinaryResolver; + +impl BinaryResolver { + #[must_use] + pub fn resolve_path(config: &BinaryConfig) -> PathBuf { + if let Some(path) = Self::resolve_from_env(config) { + return path; + } + + if let Some(path) = Self::resolve_from_path(config.binary_name) { + return path; + } + + Self::fallback_path(config.binary_name, config.fallback_path) + } + + fn which_on_path(bin: &str) -> Option { + let path_env = env::var_os("PATH")?; + env::split_paths(&path_env) + .map(|p| p.join(bin)) + .find(|candidate| candidate.is_file()) + } + + fn resolve_from_env(config: &BinaryConfig) -> Option { + let path = env::var_os(config.env_var).map(PathBuf::from)?; + + info!( + env = config.env_var, + binary = config.binary_name, + path = %path.display(), + "resolved binary from env override" + ); + + Some(path) + } + + fn resolve_from_path(binary_name: &str) -> Option { + let path = Self::which_on_path(binary_name)?; + + info!( + binary = binary_name, + path = %path.display(), + "resolved binary from PATH" + ); + + Some(path) + } + + fn fallback_path(binary_name: &str, fallback_path: &str) -> PathBuf { + let fallback = PathBuf::from(fallback_path); + + debug!( + binary = binary_name, + path = %fallback.display(), + "falling back to binary path" + ); + + fallback + } +} diff --git a/testing-framework/deployers/local/src/deployer.rs b/testing-framework/deployers/local/src/deployer.rs new file mode 100644 index 0000000..d0006c1 --- /dev/null +++ b/testing-framework/deployers/local/src/deployer.rs @@ -0,0 +1,463 @@ +use std::{ + marker::PhantomData, + sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + }, + time::Duration, +}; + +use async_trait::async_trait; +use testing_framework_core::{ + scenario::{ + Application, CleanupGuard, Deployer, DeploymentPolicy, DynError, FeedHandle, FeedRuntime, + HttpReadinessRequirement, Metrics, NodeClients, NodeControlCapability, NodeControlHandle, + RetryPolicy, RunContext, Runner, Scenario, ScenarioError, spawn_feed, + }, + topology::DeploymentDescriptor, +}; +use thiserror::Error; +use tokio_retry::{ + RetryIf, + strategy::{ExponentialBackoff, jitter}, +}; +use tracing::{debug, info, warn}; + +use crate::{ + env::{LocalDeployerEnv, Node, wait_local_http_readiness}, + manual::ManualCluster, + node_control::{NodeManager, NodeManagerSeed}, +}; + +const READINESS_ATTEMPTS: usize = 3; +const READINESS_BACKOFF_BASE_MS: u64 = 250; +const READINESS_BACKOFF_MAX_SECS: u64 = 2; + +struct LocalProcessGuard { + nodes: Vec>, + feed_task: Option, +} + +impl LocalProcessGuard { + fn new(nodes: Vec>, feed_task: FeedHandle) -> Self { + Self { + nodes, + feed_task: Some(feed_task), + } + } +} + +impl CleanupGuard for LocalProcessGuard { + fn cleanup(mut self: Box) { + if let Some(feed_task) = self.feed_task.take() { + CleanupGuard::cleanup(Box::new(feed_task)); + } + // Nodes own local processes; dropping them stops the processes. + drop(self.nodes); + } +} +/// Spawns nodes as local processes. +#[derive(Clone)] +pub struct ProcessDeployer { + membership_check: bool, + _env: PhantomData, +} + +/// Errors returned by the local deployer. +#[derive(Debug, Error)] +pub enum ProcessDeployerError { + #[error("failed to spawn local topology: {source}")] + Spawn { + #[source] + source: DynError, + }, + #[error("readiness probe failed: {source}")] + ReadinessFailed { + #[source] + source: DynError, + }, + #[error("scenario topology is not supported by the local deployer")] + UnsupportedTopology, + #[error("workload failed: {source}")] + WorkloadFailed { + #[source] + source: DynError, + }, + #[error("expectations failed: {source}")] + ExpectationsFailed { + #[source] + source: DynError, + }, +} + +#[derive(Debug, Error)] +enum RetryAttemptError { + #[error("failed to spawn local topology: {source}")] + Spawn { + #[source] + source: DynError, + }, + #[error("readiness probe failed: {source}")] + Readiness { + #[source] + source: DynError, + }, +} + +impl From for ProcessDeployerError { + fn from(value: RetryAttemptError) -> Self { + match value { + RetryAttemptError::Spawn { source } => Self::Spawn { source }, + RetryAttemptError::Readiness { source } => Self::ReadinessFailed { source }, + } + } +} + +#[derive(Clone, Copy)] +struct RetryExecutionConfig { + max_attempts: usize, + keep_tempdir: bool, + readiness_enabled: bool, + readiness_requirement: HttpReadinessRequirement, +} + +impl From for ProcessDeployerError { + fn from(value: ScenarioError) -> Self { + match value { + ScenarioError::Workload(source) => Self::WorkloadFailed { source }, + ScenarioError::ExpectationCapture(source) | ScenarioError::Expectations(source) => { + Self::ExpectationsFailed { source } + } + } + } +} + +#[async_trait] +impl Deployer for ProcessDeployer { + type Error = ProcessDeployerError; + + async fn deploy(&self, scenario: &Scenario) -> Result, Self::Error> { + self.deploy_without_node_control(scenario).await + } +} + +#[async_trait] +impl Deployer for ProcessDeployer { + type Error = ProcessDeployerError; + + async fn deploy( + &self, + scenario: &Scenario, + ) -> Result, Self::Error> { + self.deploy_with_node_control(scenario).await + } +} + +impl ProcessDeployer { + /// Construct a local deployer. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Enable or disable membership readiness checks. + #[must_use] + pub fn with_membership_check(mut self, enabled: bool) -> Self { + self.membership_check = enabled; + self + } + + /// Build a manual cluster from a prepared topology descriptor. + #[must_use] + pub fn manual_cluster_from_descriptors(&self, descriptors: E::Deployment) -> ManualCluster { + ManualCluster::from_topology(descriptors) + } + + async fn deploy_without_node_control( + &self, + scenario: &Scenario, + ) -> Result, ProcessDeployerError> { + log_local_deploy_start( + scenario.deployment().node_count(), + scenario.deployment_policy(), + false, + ); + + let nodes = Self::spawn_nodes_for_scenario(scenario, self.membership_check).await?; + let node_clients = NodeClients::::new(nodes.iter().map(|node| node.client()).collect()); + let runtime = run_context_for( + scenario.deployment().clone(), + node_clients, + scenario.duration(), + scenario.expectation_cooldown(), + None, + ) + .await?; + + let cleanup_guard: Box = + Box::new(LocalProcessGuard::::new(nodes, runtime.feed_task)); + + Ok(Runner::new(runtime.context, Some(cleanup_guard))) + } + + async fn deploy_with_node_control( + &self, + scenario: &Scenario, + ) -> Result, ProcessDeployerError> { + log_local_deploy_start( + scenario.deployment().node_count(), + scenario.deployment_policy(), + true, + ); + + let nodes = Self::spawn_nodes_for_scenario(scenario, self.membership_check).await?; + let node_control = self.node_control_from(scenario, nodes); + let node_clients = node_control.node_clients(); + let runtime = run_context_for( + scenario.deployment().clone(), + node_clients, + scenario.duration(), + scenario.expectation_cooldown(), + Some(node_control), + ) + .await?; + + Ok(Runner::new( + runtime.context, + Some(Box::new(runtime.feed_task)), + )) + } + + fn node_control_from( + &self, + scenario: &Scenario, + nodes: Vec>, + ) -> Arc> { + let node_control = Arc::new(NodeManager::new_with_seed( + scenario.deployment().clone(), + NodeClients::default(), + keep_tempdir(scenario.deployment_policy()), + NodeManagerSeed::default(), + )); + node_control.initialize_with_nodes(nodes); + node_control + } + + async fn spawn_nodes_for_scenario( + scenario: &Scenario, + membership_check: bool, + ) -> Result>, ProcessDeployerError> { + info!( + nodes = scenario.deployment().node_count(), + "spawning local nodes" + ); + Self::spawn_with_readiness_retry( + scenario.deployment(), + membership_check, + scenario.deployment_policy(), + ) + .await + } + + async fn spawn_with_readiness_retry( + descriptors: &E::Deployment, + membership_check: bool, + deployment_policy: DeploymentPolicy, + ) -> Result>, ProcessDeployerError> { + let (retry_policy, execution) = + build_retry_execution_config(deployment_policy, membership_check); + let attempts = Arc::new(AtomicUsize::new(0)); + let strategy = retry_backoff_strategy(retry_policy, execution.max_attempts); + let operation = { + let attempts = Arc::clone(&attempts); + move || { + let attempts = Arc::clone(&attempts); + async move { run_retry_attempt::(descriptors, execution, attempts).await } + } + }; + let should_retry = retry_decision(Arc::clone(&attempts), execution.max_attempts); + + let nodes = RetryIf::spawn(strategy, operation, should_retry).await?; + Ok(nodes) + } +} + +fn build_retry_execution_config( + deployment_policy: DeploymentPolicy, + membership_check: bool, +) -> (RetryPolicy, RetryExecutionConfig) { + let retry_policy = retry_policy_from(deployment_policy); + let execution = RetryExecutionConfig { + max_attempts: retry_policy.max_attempts.max(1), + keep_tempdir: keep_tempdir(deployment_policy), + readiness_enabled: deployment_policy.readiness_enabled && membership_check, + readiness_requirement: deployment_policy.readiness_requirement, + }; + + (retry_policy, execution) +} + +async fn run_retry_attempt( + descriptors: &E::Deployment, + execution: RetryExecutionConfig, + attempts: Arc, +) -> Result>, RetryAttemptError> { + let attempt = attempts.fetch_add(1, Ordering::Relaxed) + 1; + let nodes = spawn_nodes_for_attempt::(descriptors, execution.keep_tempdir).await?; + run_readiness_for_attempt::(attempt, nodes, execution).await +} + +fn retry_policy_from(deployment_policy: DeploymentPolicy) -> RetryPolicy { + deployment_policy + .retry_policy + .unwrap_or_else(default_local_retry_policy) +} + +fn retry_backoff_strategy( + retry_policy: RetryPolicy, + max_attempts: usize, +) -> impl Iterator { + ExponentialBackoff::from_millis(retry_policy.base_delay.as_millis() as u64) + .max_delay(retry_policy.max_delay) + .map(jitter) + .take(max_attempts.saturating_sub(1)) +} + +async fn spawn_nodes_for_attempt( + descriptors: &E::Deployment, + keep_tempdir: bool, +) -> Result>, RetryAttemptError> { + NodeManager::::spawn_initial_nodes(descriptors, keep_tempdir) + .await + .map_err(|source| RetryAttemptError::Spawn { + source: source.into(), + }) +} + +async fn run_readiness_for_attempt( + attempt: usize, + nodes: Vec>, + execution: RetryExecutionConfig, +) -> Result>, RetryAttemptError> { + if !execution.readiness_enabled { + info!("skipping local readiness checks"); + return Ok(nodes); + } + + match wait_local_http_readiness::(&nodes, execution.readiness_requirement).await { + Ok(()) => { + info!(attempt, "local nodes are ready"); + Ok(nodes) + } + Err(source) => { + let error: DynError = source.into(); + debug!(attempt, error = ?error, "local readiness failed"); + drop(nodes); + Err(RetryAttemptError::Readiness { source: error }) + } + } +} + +fn retry_decision( + attempts: Arc, + max_attempts: usize, +) -> impl FnMut(&RetryAttemptError) -> bool { + move |error: &RetryAttemptError| { + let attempt = attempts.load(Ordering::Relaxed); + if attempt < max_attempts { + warn!( + attempt, + max_attempts, + error = %error, + "local spawn/readiness failed; retrying with backoff" + ); + true + } else { + false + } + } +} + +impl Default for ProcessDeployer { + fn default() -> Self { + Self { + membership_check: true, + _env: PhantomData, + } + } +} + +const fn default_local_retry_policy() -> RetryPolicy { + RetryPolicy::new( + READINESS_ATTEMPTS, + Duration::from_millis(READINESS_BACKOFF_BASE_MS), + Duration::from_secs(READINESS_BACKOFF_MAX_SECS), + ) +} + +const fn keep_tempdir(policy: DeploymentPolicy) -> bool { + policy.cleanup_policy.preserve_artifacts +} + +async fn spawn_feed_with( + node_clients: &NodeClients, +) -> Result<(::Feed, FeedHandle), ProcessDeployerError> { + debug!( + nodes = node_clients.len(), + "selecting node client for local feed" + ); + + let Some(block_source_client) = node_clients.random_client() else { + return Err(ProcessDeployerError::WorkloadFailed { + source: "feed requires at least one node".into(), + }); + }; + + info!("starting feed"); + + spawn_feed::(block_source_client) + .await + .map_err(workload_error) +} + +fn workload_error(source: impl Into) -> ProcessDeployerError { + ProcessDeployerError::WorkloadFailed { + source: source.into(), + } +} + +fn log_local_deploy_start(node_count: usize, policy: DeploymentPolicy, has_node_control: bool) { + info!( + nodes = node_count, + node_control = has_node_control, + readiness_enabled = policy.readiness_enabled, + readiness_requirement = ?policy.readiness_requirement, + "starting local deployment" + ); +} + +struct RuntimeContext { + context: RunContext, + feed_task: FeedHandle, +} + +async fn run_context_for( + descriptors: E::Deployment, + node_clients: NodeClients, + duration: Duration, + expectation_cooldown: Duration, + node_control: Option>>, +) -> Result, ProcessDeployerError> { + let (feed, feed_task) = spawn_feed_with::(&node_clients).await?; + let context = RunContext::new( + descriptors, + node_clients, + duration, + expectation_cooldown, + Metrics::empty(), + feed, + node_control, + ); + + Ok(RuntimeContext { context, feed_task }) +} diff --git a/testing-framework/deployers/local/src/env.rs b/testing-framework/deployers/local/src/env.rs new file mode 100644 index 0000000..063c342 --- /dev/null +++ b/testing-framework/deployers/local/src/env.rs @@ -0,0 +1,207 @@ +use std::{collections::HashMap, path::Path}; + +use testing_framework_core::scenario::{ + Application, DynError, HttpReadinessRequirement, ReadinessError, StartNodeOptions, + wait_for_http_ports_with_requirement, +}; + +use crate::process::{LaunchSpec, NodeEndpoints, ProcessNode, ProcessSpawnError}; + +pub type Node = ProcessNode<::NodeConfig, ::NodeClient>; + +pub struct BuiltNodeConfig { + pub config: Config, + pub network_port: u16, +} + +pub struct NodeConfigEntry { + pub name: String, + pub config: NodeConfigValue, +} + +#[async_trait::async_trait] +pub trait LocalDeployerEnv: Application + Sized +where + ::NodeConfig: Clone + Send + Sync + 'static, +{ + fn build_node_config( + topology: &Self::Deployment, + index: usize, + peer_ports_by_name: &HashMap, + options: &StartNodeOptions, + peer_ports: &[u16], + ) -> Result::NodeConfig>, DynError>; + + fn build_initial_node_configs( + topology: &Self::Deployment, + ) -> Result::NodeConfig>>, ProcessSpawnError>; + + fn build_launch_spec( + config: &::NodeConfig, + dir: &Path, + label: &str, + ) -> Result; + + fn node_endpoints(config: &::NodeConfig) -> NodeEndpoints; + + fn node_peer_port(node: &Node) -> u16 { + node.endpoints().api.port() + } + + fn node_client(endpoints: &NodeEndpoints) -> Self::NodeClient; + + fn readiness_endpoint_path() -> &'static str { + "/" + } + + async fn wait_readiness_stable(_nodes: &[Node]) -> Result<(), DynError> { + Ok(()) + } +} + +pub async fn wait_local_http_readiness( + nodes: &[Node], + requirement: HttpReadinessRequirement, +) -> Result<(), ReadinessError> { + let ports: Vec<_> = nodes + .iter() + .map(|node| node.endpoints().api.port()) + .collect(); + wait_for_http_ports_with_requirement(&ports, E::readiness_endpoint_path(), requirement).await?; + + E::wait_readiness_stable(nodes) + .await + .map_err(|source| ReadinessError::ClusterStable { source }) +} + +pub async fn spawn_node_from_config( + label: String, + config: ::NodeConfig, + keep_tempdir: bool, + persist_dir: Option<&std::path::Path>, +) -> Result, ProcessSpawnError> { + ProcessNode::spawn( + &label, + config, + E::build_launch_spec, + E::node_endpoints, + keep_tempdir, + persist_dir, + E::node_client, + ) + .await +} + +#[cfg(test)] +mod tests { + use std::{ + path::Path, + sync::atomic::{AtomicUsize, Ordering}, + }; + + use async_trait::async_trait; + use testing_framework_core::{ + scenario::{Application, DynError, Feed, FeedRuntime}, + topology::DeploymentDescriptor, + }; + + use super::*; + + static STABLE_CALLS: AtomicUsize = AtomicUsize::new(0); + + #[derive(Clone)] + struct DummyFeed; + + impl Feed for DummyFeed { + type Subscription = (); + + fn subscribe(&self) -> Self::Subscription {} + } + + struct DummyFeedRuntime; + + #[async_trait] + impl FeedRuntime for DummyFeedRuntime { + type Feed = DummyFeed; + + async fn run(self: Box) {} + } + + #[derive(Clone)] + struct DummyConfig; + + #[derive(Clone)] + struct DummyTopology; + + impl DeploymentDescriptor for DummyTopology { + fn node_count(&self) -> usize { + 0 + } + } + + struct DummyEnv; + + #[async_trait] + impl Application for DummyEnv { + type Deployment = DummyTopology; + type NodeClient = (); + type NodeConfig = DummyConfig; + type FeedRuntime = DummyFeedRuntime; + + async fn prepare_feed( + _client: Self::NodeClient, + ) -> Result<(::Feed, Self::FeedRuntime), DynError> + { + Ok((DummyFeed, DummyFeedRuntime)) + } + } + + #[async_trait] + impl LocalDeployerEnv for DummyEnv { + fn build_node_config( + _topology: &Self::Deployment, + _index: usize, + _peer_ports_by_name: &HashMap, + _options: &StartNodeOptions, + _peer_ports: &[u16], + ) -> Result::NodeConfig>, DynError> { + unreachable!("not used in this test") + } + + fn build_initial_node_configs( + _topology: &Self::Deployment, + ) -> Result::NodeConfig>>, ProcessSpawnError> + { + unreachable!("not used in this test") + } + + fn build_launch_spec( + _config: &::NodeConfig, + _dir: &Path, + _label: &str, + ) -> Result { + Ok(LaunchSpec::default()) + } + + fn node_endpoints(_config: &::NodeConfig) -> NodeEndpoints { + NodeEndpoints::default() + } + + fn node_client(_endpoints: &NodeEndpoints) -> Self::NodeClient {} + + async fn wait_readiness_stable(_nodes: &[Node]) -> Result<(), DynError> { + STABLE_CALLS.fetch_add(1, Ordering::SeqCst); + Ok(()) + } + } + + #[tokio::test] + async fn empty_cluster_still_runs_stability_hook() { + STABLE_CALLS.store(0, Ordering::SeqCst); + let nodes: Vec> = Vec::new(); + wait_local_http_readiness::(&nodes, HttpReadinessRequirement::AllNodesReady) + .await + .expect("empty cluster should be considered ready"); + assert_eq!(STABLE_CALLS.load(Ordering::SeqCst), 1); + } +} diff --git a/testing-framework/deployers/local/src/lib.rs b/testing-framework/deployers/local/src/lib.rs index a9379bf..03be32e 100644 --- a/testing-framework/deployers/local/src/lib.rs +++ b/testing-framework/deployers/local/src/lib.rs @@ -1,7 +1,16 @@ +pub mod binary; +mod deployer; +pub mod env; mod manual; mod node_control; -mod runner; +pub mod process; -pub use manual::{LocalManualCluster, ManualClusterError}; -pub use node_control::{LocalNodeManager, LocalNodeManagerError, LocalNodeManagerSeed}; -pub use runner::{LocalDeployer, LocalDeployerError}; +pub use binary::{BinaryConfig, BinaryResolver}; +pub use deployer::{ProcessDeployer, ProcessDeployerError}; +pub use env::{BuiltNodeConfig, LocalDeployerEnv, NodeConfigEntry}; +pub use manual::{ManualCluster, ManualClusterError}; +pub use node_control::{NodeManager, NodeManagerError, NodeManagerSeed}; +pub use process::{ + LaunchEnvVar, LaunchFile, LaunchSpec, NodeEndpointPort, NodeEndpoints, ProcessNode, + ProcessSpawnError, +}; diff --git a/testing-framework/deployers/local/src/manual/mod.rs b/testing-framework/deployers/local/src/manual/mod.rs index fa90a61..7dbccaf 100644 --- a/testing-framework/deployers/local/src/manual/mod.rs +++ b/testing-framework/deployers/local/src/manual/mod.rs @@ -1,57 +1,37 @@ use testing_framework_core::{ manual::ManualClusterHandle, - nodes::ApiClient, - scenario::{DynError, NodeControlHandle, StartNodeOptions, StartedNode}, - topology::{ - config::{TopologyBuildError, TopologyBuilder, TopologyConfig}, - readiness::{ReadinessCheck, ReadinessError}, - }, + scenario::{DynError, NodeControlHandle, ReadinessError, StartNodeOptions, StartedNode}, }; use thiserror::Error; -use crate::node_control::{LocalNodeManager, LocalNodeManagerError, ReadinessNode}; - -mod readiness; - -use readiness::ManualNetworkReadiness; +use crate::{ + env::LocalDeployerEnv, + node_control::{NodeManager, NodeManagerError}, +}; #[derive(Debug, Error)] pub enum ManualClusterError { - #[error("failed to build topology: {source}")] - Build { - #[source] - source: TopologyBuildError, - }, #[error(transparent)] - Dynamic(#[from] LocalNodeManagerError), + Dynamic(#[from] NodeManagerError), } /// Imperative, in-process cluster that can start nodes on demand. -pub struct LocalManualCluster { - nodes: LocalNodeManager, +pub struct ManualCluster { + nodes: NodeManager, } -impl LocalManualCluster { - pub(crate) fn from_builder(builder: TopologyBuilder) -> Result { - let descriptors = builder - .build() - .map_err(|source| ManualClusterError::Build { source })?; - - let nodes = LocalNodeManager::new( +impl ManualCluster { + pub fn from_topology(descriptors: E::Deployment) -> Self { + let nodes = NodeManager::new( descriptors, testing_framework_core::scenario::NodeClients::default(), ); - Ok(Self { nodes }) - } - - pub(crate) fn from_config(config: TopologyConfig) -> Result { - let builder = TopologyBuilder::new(config); - Self::from_builder(builder) + Self { nodes } } #[must_use] - pub fn node_client(&self, name: &str) -> Option { + pub fn node_client(&self, name: &str) -> Option { self.nodes.node_client(name) } @@ -60,18 +40,18 @@ impl LocalManualCluster { self.nodes.node_pid(name) } - pub async fn start_node(&self, name: &str) -> Result { + pub async fn start_node(&self, name: &str) -> Result, ManualClusterError> { Ok(self .nodes - .start_node_with(name, StartNodeOptions::default()) + .start_node_with(name, StartNodeOptions::::default()) .await?) } pub async fn start_node_with( &self, name: &str, - options: StartNodeOptions, - ) -> Result { + options: StartNodeOptions, + ) -> Result, ManualClusterError> { Ok(self.nodes.start_node_with(name, options).await?) } @@ -88,31 +68,18 @@ impl LocalManualCluster { } pub async fn wait_network_ready(&self) -> Result<(), ReadinessError> { - let nodes = self.nodes.readiness_nodes(); - if self.is_singleton(&nodes) { - return Ok(()); - } - - self.wait_nodes_ready(nodes).await - } - - fn is_singleton(&self, nodes: &[ReadinessNode]) -> bool { - nodes.len() <= 1 - } - - async fn wait_nodes_ready(&self, nodes: Vec) -> Result<(), ReadinessError> { - ManualNetworkReadiness::new(nodes).wait().await + self.nodes.wait_network_ready().await } } -impl Drop for LocalManualCluster { +impl Drop for ManualCluster { fn drop(&mut self) { self.stop_all(); } } #[async_trait::async_trait] -impl NodeControlHandle for LocalManualCluster { +impl NodeControlHandle for ManualCluster { async fn restart_node(&self, name: &str) -> Result<(), DynError> { self.nodes .restart_node(name) @@ -124,8 +91,9 @@ impl NodeControlHandle for LocalManualCluster { self.nodes.stop_node(name).await.map_err(|err| err.into()) } - async fn start_node(&self, name: &str) -> Result { - self.start_node_with(name, StartNodeOptions::default()) + async fn start_node(&self, name: &str) -> Result, DynError> { + self.nodes + .start_node_with(name, StartNodeOptions::::default()) .await .map_err(|err| err.into()) } @@ -133,14 +101,15 @@ impl NodeControlHandle for LocalManualCluster { async fn start_node_with( &self, name: &str, - options: StartNodeOptions, - ) -> Result { - self.start_node_with(name, options) + options: StartNodeOptions, + ) -> Result, DynError> { + self.nodes + .start_node_with(name, options) .await .map_err(|err| err.into()) } - fn node_client(&self, name: &str) -> Option { + fn node_client(&self, name: &str) -> Option { self.node_client(name) } @@ -150,13 +119,14 @@ impl NodeControlHandle for LocalManualCluster { } #[async_trait::async_trait] -impl ManualClusterHandle for LocalManualCluster { +impl ManualClusterHandle for ManualCluster { async fn start_node_with( &self, name: &str, - options: StartNodeOptions, - ) -> Result { - self.start_node_with(name, options) + options: StartNodeOptions, + ) -> Result, DynError> { + self.nodes + .start_node_with(name, options) .await .map_err(|err| err.into()) } diff --git a/testing-framework/deployers/local/src/manual/readiness.rs b/testing-framework/deployers/local/src/manual/readiness.rs deleted file mode 100644 index 2e9ac08..0000000 --- a/testing-framework/deployers/local/src/manual/readiness.rs +++ /dev/null @@ -1,74 +0,0 @@ -use std::time::Duration; - -use lb_network_service::backends::libp2p::Libp2pInfo; -use testing_framework_core::topology::readiness::ReadinessCheck; -use tokio::time::timeout; - -use crate::node_control::ReadinessNode; - -const NETWORK_REQUEST_TIMEOUT: Duration = Duration::from_secs(10); - -pub(super) struct ManualNetworkReadiness { - nodes: Vec, -} - -impl ManualNetworkReadiness { - pub(super) fn new(nodes: Vec) -> Self { - Self { nodes } - } -} - -#[async_trait::async_trait] -impl<'a> ReadinessCheck<'a> for ManualNetworkReadiness { - type Data = Vec; - - async fn collect(&'a self) -> Self::Data { - let mut statuses = Vec::with_capacity(self.nodes.len()); - for node in &self.nodes { - let result = timeout(NETWORK_REQUEST_TIMEOUT, node.api.network_info()) - .await - .map_err(|_| "network_info request timed out".to_owned()) - .and_then(|res| res.map_err(|err| err.to_string())); - - statuses.push(ManualNetworkStatus { - label: node.label.clone(), - expected_peers: node.expected_peers, - result, - }); - } - statuses - } - - fn is_ready(&self, data: &Self::Data) -> bool { - data.iter().all( - |status| match (status.expected_peers, status.result.as_ref()) { - (Some(expected), Ok(info)) => info.n_peers >= expected, - _ => false, - }, - ) - } - - fn timeout_message(&self, data: Self::Data) -> String { - let summary = data - .into_iter() - .map(|entry| match entry.result { - Ok(info) => format!( - "{} (peers {}/{})", - entry.label, - info.n_peers, - entry.expected_peers.unwrap_or(0) - ), - Err(err) => format!("{} (error: {err})", entry.label), - }) - .collect::>() - .join(", "); - - format!("timed out waiting for network readiness: {summary}") - } -} - -pub(super) struct ManualNetworkStatus { - label: String, - expected_peers: Option, - result: Result, -} diff --git a/testing-framework/deployers/local/src/node_control/config.rs b/testing-framework/deployers/local/src/node_control/config.rs deleted file mode 100644 index bcab4b9..0000000 --- a/testing-framework/deployers/local/src/node_control/config.rs +++ /dev/null @@ -1,119 +0,0 @@ -use std::collections::HashMap; - -use lb_libp2p::Multiaddr; -use lb_utils::net::get_available_udp_port; -use rand::Rng as _; -use testing_framework_config::topology::configs::{ - consensus, - runtime::{build_general_config_for_node, build_initial_peers}, - time::GeneralTimeConfig, -}; -pub(crate) use testing_framework_core::{ - scenario::{PeerSelection, StartNodeOptions}, - topology::{ - config::{NodeConfigPatch, TopologyConfig}, - configs::GeneralConfig, - generation::{GeneratedNodeConfig, GeneratedTopology}, - }, -}; - -use super::LocalNodeManagerError; - -pub(super) fn build_general_config_for( - descriptors: &GeneratedTopology, - base_consensus: &consensus::GeneralConsensusConfig, - base_time: &GeneralTimeConfig, - index: usize, - peer_ports_by_name: &HashMap, - options: &StartNodeOptions, - peer_ports: &[u16], -) -> Result<(GeneralConfig, u16, Option), LocalNodeManagerError> { - if let Some(node) = descriptor_for(descriptors, index) { - let mut config = node.general.clone(); - let initial_peers = resolve_initial_peers( - peer_ports_by_name, - options, - &config.network_config.backend.initial_peers, - descriptors, - peer_ports, - )?; - - config.network_config.backend.initial_peers = initial_peers; - - return Ok((config, node.network_port(), node.config_patch.clone())); - } - - let id = random_node_id(); - let network_port = allocate_udp_port("network port")?; - let blend_port = allocate_udp_port("Blend port")?; - let topology = descriptors.config(); - let initial_peers = - resolve_initial_peers(peer_ports_by_name, options, &[], descriptors, peer_ports)?; - let general_config = build_general_config_for_node( - id, - network_port, - initial_peers, - blend_port, - &topology.consensus_params, - &topology.wallet_config, - base_consensus, - base_time, - ) - .map_err(|source| LocalNodeManagerError::Config { source })?; - - Ok((general_config, network_port, None)) -} - -fn descriptor_for(descriptors: &GeneratedTopology, index: usize) -> Option<&GeneratedNodeConfig> { - descriptors.nodes().get(index) -} - -fn resolve_peer_names( - peer_ports_by_name: &HashMap, - peer_names: &[String], -) -> Result, LocalNodeManagerError> { - let mut peers = Vec::with_capacity(peer_names.len()); - for name in peer_names { - let port = - peer_ports_by_name - .get(name) - .ok_or_else(|| LocalNodeManagerError::InvalidArgument { - message: format!("unknown peer name '{name}'"), - })?; - peers.push(testing_framework_config::node_address_from_port(*port)); - } - Ok(peers) -} - -fn resolve_initial_peers( - peer_ports_by_name: &HashMap, - options: &StartNodeOptions, - default_peers: &[Multiaddr], - descriptors: &GeneratedTopology, - peer_ports: &[u16], -) -> Result, LocalNodeManagerError> { - match &options.peers { - PeerSelection::Named(names) => resolve_peer_names(peer_ports_by_name, names), - PeerSelection::DefaultLayout => { - if !default_peers.is_empty() { - Ok(default_peers.to_vec()) - } else { - let topology: &TopologyConfig = descriptors.config(); - Ok(build_initial_peers(&topology.network_params, peer_ports)) - } - } - PeerSelection::None => Ok(Vec::new()), - } -} - -fn random_node_id() -> [u8; 32] { - let mut id = [0u8; 32]; - rand::thread_rng().fill(&mut id); - id -} - -fn allocate_udp_port(label: &'static str) -> Result { - get_available_udp_port().ok_or_else(|| LocalNodeManagerError::PortAllocation { - message: format!("failed to allocate free UDP port for {label}"), - }) -} diff --git a/testing-framework/deployers/local/src/node_control/mod.rs b/testing-framework/deployers/local/src/node_control/mod.rs index d39965a..b0323c7 100644 --- a/testing-framework/deployers/local/src/node_control/mod.rs +++ b/testing-framework/deployers/local/src/node_control/mod.rs @@ -1,42 +1,42 @@ use std::{ - collections::{HashMap, HashSet}, - sync::Mutex, + collections::HashMap, + sync::{Mutex, MutexGuard}, }; -use lb_node::config::RunConfig; -use testing_framework_config::topology::configs::{consensus, time}; -use testing_framework_core::{ - nodes::{ - ApiClient, - node::{Node, apply_node_config_patch, create_node_config}, - }, - scenario::{DynError, NodeControlHandle, StartNodeOptions, StartedNode}, - topology::{ - deployment::Topology, - generation::{GeneratedTopology, find_expected_peer_counts}, - utils::multiaddr_port, - }, +use testing_framework_core::scenario::{ + Application, DynError, NodeClients, NodeControlHandle, ReadinessError, StartNodeOptions, + StartedNode, wait_for_http_ports, }; use thiserror::Error; -mod config; +use crate::{ + env::{LocalDeployerEnv, Node, spawn_node_from_config}, + process::ProcessSpawnError, +}; + mod state; -use config::build_general_config_for; use state::LocalNodeManagerState; -use testing_framework_core::scenario::NodeClients; + +#[derive(Clone)] +struct NodeStartSnapshot { + peer_ports: Vec, + peer_ports_by_name: HashMap, + node_name: String, + index: usize, +} #[derive(Debug, Error)] -pub enum LocalNodeManagerError { +pub enum NodeManagerError { #[error("failed to generate node config: {source}")] Config { #[source] - source: testing_framework_config::topology::configs::GeneralConfigError, + source: DynError, }, #[error("failed to spawn node: {source}")] Spawn { #[source] - source: testing_framework_core::nodes::common::node::SpawnNodeError, + source: DynError, }, #[error("{message}")] InvalidArgument { message: String }, @@ -49,97 +49,56 @@ pub enum LocalNodeManagerError { #[error("failed to restart node: {source}")] Restart { #[source] - source: testing_framework_core::nodes::common::node::SpawnNodeError, + source: DynError, }, } -pub struct LocalNodeManager { - descriptors: GeneratedTopology, - base_consensus: consensus::GeneralConsensusConfig, - base_time: time::GeneralTimeConfig, - node_clients: NodeClients, - seed: LocalNodeManagerSeed, - state: Mutex, +pub struct NodeManager { + descriptors: E::Deployment, + node_clients: NodeClients, + keep_tempdir: bool, + seed: NodeManagerSeed, + state: Mutex>, } #[derive(Clone, Default)] -pub struct LocalNodeManagerSeed { +pub struct NodeManagerSeed { pub node_count: usize, pub peer_ports: Vec, pub peer_ports_by_name: HashMap, } -impl LocalNodeManagerSeed { - #[must_use] - pub fn from_topology(descriptors: &GeneratedTopology) -> Self { - let peer_ports = descriptors - .nodes() - .iter() - .map(|node| node.network_port()) - .collect::>(); - - let peer_ports_by_name = descriptors - .nodes() - .iter() - .map(|node| (format!("node-{}", node.index()), node.network_port())) - .collect(); - - Self { - node_count: descriptors.nodes().len(), - peer_ports, - peer_ports_by_name, - } - } -} - -pub(crate) struct ReadinessNode { - pub(crate) label: String, - pub(crate) expected_peers: Option, - pub(crate) api: ApiClient, -} - -impl LocalNodeManager { - fn default_label(index: usize) -> String { - format!("node-{index}") - } - +impl NodeManager { pub async fn spawn_initial_nodes( - descriptors: &GeneratedTopology, - ) -> Result, testing_framework_core::nodes::common::node::SpawnNodeError> { - let mut nodes = Vec::with_capacity(descriptors.nodes().len()); - for node in descriptors.nodes() { - let label = Self::default_label(node.index()); - let config = create_node_config(node.general.clone()); - let spawned = Node::spawn(config, &label, node.persist_dir.clone()).await?; - nodes.push(spawned); + descriptors: &E::Deployment, + keep_tempdir: bool, + ) -> Result>, ProcessSpawnError> { + let configs = E::build_initial_node_configs(descriptors)?; + let mut spawned = Vec::with_capacity(configs.len()); + for config_entry in configs { + spawned.push( + spawn_node_from_config::( + config_entry.name, + config_entry.config, + keep_tempdir, + None, + ) + .await?, + ); } - Ok(nodes) + Ok(spawned) } - - pub async fn spawn_initial_topology( - descriptors: &GeneratedTopology, - ) -> Result { - let nodes = Self::spawn_initial_nodes(descriptors).await?; - Ok(Topology::from_nodes(nodes)) - } - pub fn new(descriptors: GeneratedTopology, node_clients: NodeClients) -> Self { - Self::new_with_seed(descriptors, node_clients, LocalNodeManagerSeed::default()) + pub fn new(descriptors: E::Deployment, node_clients: NodeClients) -> Self { + Self::new_with_seed(descriptors, node_clients, false, NodeManagerSeed::default()) } pub fn new_with_seed( - descriptors: GeneratedTopology, - node_clients: NodeClients, - seed: LocalNodeManagerSeed, + descriptors: E::Deployment, + node_clients: NodeClients, + keep_tempdir: bool, + seed: NodeManagerSeed, ) -> Self { - let base_node = descriptors - .nodes() - .first() - .expect("generated topology must include at least one node"); - - let base_consensus = base_node.general.consensus_config.clone(); - let base_time = base_node.general.time_config.clone(); - let state = LocalNodeManagerState { node_count: seed.node_count, peer_ports: seed.peer_ports.clone(), @@ -151,30 +110,23 @@ impl LocalNodeManager { Self { descriptors, - base_consensus, - base_time, node_clients, + keep_tempdir, seed, state: Mutex::new(state), } } #[must_use] - pub fn node_client(&self, name: &str) -> Option { - let state = self - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); + pub fn node_client(&self, name: &str) -> Option { + let state = self.lock_state(); state.clients_by_name.get(name).cloned() } #[must_use] pub fn node_pid(&self, name: &str) -> Option { - let mut state = self - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); + let mut state = self.lock_state(); let index = *state.indices_by_name.get(name)?; let node = state.nodes.get_mut(index)?; @@ -186,10 +138,7 @@ impl LocalNodeManager { } pub fn stop_all(&self) { - let mut state = self - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); + let mut state = self.lock_state(); state.nodes.clear(); state.peer_ports.clone_from(&self.seed.peer_ports); @@ -202,25 +151,16 @@ impl LocalNodeManager { self.node_clients.clear(); } - pub fn initialize_with_nodes(&self, nodes: Vec) { + pub fn initialize_with_nodes(&self, nodes: Vec>) { self.node_clients.clear(); - let mut state = self - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); - - state.nodes.clear(); - state.peer_ports.clear(); - state.peer_ports_by_name.clear(); - state.clients_by_name.clear(); - state.indices_by_name.clear(); - state.node_count = 0; + let mut state = self.lock_state(); + clear_registered_nodes(&mut state); for (idx, node) in nodes.into_iter().enumerate() { - let name = Self::default_label(idx); - let port = node.config().user.network.backend.swarm.port; - let client = node.api().clone(); + let name = default_node_label(idx); + let port = E::node_peer_port(&node); + let client = node.client(); self.node_clients.add_node(client.clone()); state.register_node(&name, port, client, node); @@ -228,253 +168,222 @@ impl LocalNodeManager { } #[must_use] - pub fn node_clients(&self) -> NodeClients { + pub fn node_clients(&self) -> NodeClients { self.node_clients.clone() } + pub async fn wait_network_ready(&self) -> Result<(), ReadinessError> { + let ports: Vec<_> = { + let state = self.lock_state(); + state + .nodes + .iter() + .map(|node| node.endpoints().api.port()) + .collect() + }; + + if ports.len() <= 1 { + return Ok(()); + } + + wait_for_http_ports(&ports, E::readiness_endpoint_path()).await + } + pub async fn start_node_with( &self, name: &str, - options: StartNodeOptions, - ) -> Result { - self.start_node(name, options).await - } + options: StartNodeOptions, + ) -> Result, NodeManagerError> { + let snapshot = self.start_snapshot(name)?; - pub(crate) fn readiness_nodes(&self) -> Vec { - let state = self - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); - - let listen_ports = state - .nodes - .iter() - .map(|node| node.config().user.network.backend.swarm.port) - .collect::>(); - - let initial_peer_ports = state - .nodes - .iter() - .map(|node| { - node.config() - .user - .network - .backend - .initial_peers - .iter() - .filter_map(multiaddr_port) - .collect::>() - }) - .collect::>(); - - let expected_peer_counts = find_expected_peer_counts(&listen_ports, &initial_peer_ports); - - state - .nodes - .iter() - .enumerate() - .map(|(idx, node)| ReadinessNode { - label: format!( - "node#{idx}@{}", - node.config().user.network.backend.swarm.port - ), - expected_peers: expected_peer_counts.get(idx).copied(), - api: node.api().clone(), - }) - .collect::>() - } - - async fn start_node( - &self, - name: &str, - options: StartNodeOptions, - ) -> Result { - let (peer_ports, peer_ports_by_name, node_name, index) = { - let state = self - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); - - let index = state.node_count; - let label = if name.trim().is_empty() { - Self::default_label(index) - } else if name.starts_with("node-") { - name.to_string() - } else { - format!("node-{name}") - }; - - if state.peer_ports_by_name.contains_key(&label) { - return Err(LocalNodeManagerError::InvalidArgument { - message: format!("node name '{label}' already exists"), - }); - } - - ( - state.peer_ports.clone(), - state.peer_ports_by_name.clone(), - label, - index, - ) - }; - - let (general_config, network_port, descriptor_patch) = build_general_config_for( + let mut built = E::build_node_config( &self.descriptors, - &self.base_consensus, - &self.base_time, - index, - &peer_ports_by_name, + snapshot.index, + &snapshot.peer_ports_by_name, &options, - &peer_ports, - )?; + &snapshot.peer_ports, + ) + .map_err(|source| NodeManagerError::Config { source })?; - let config = build_node_config( - general_config, - descriptor_patch.as_ref(), - options.config_patch.as_ref(), - )?; + if let Some(config_patch) = &options.config_patch { + built.config = + config_patch(built.config).map_err(|source| NodeManagerError::ConfigPatch { + message: source.to_string(), + })?; + } - let api_client = self - .spawn_and_register_node(&node_name, network_port, config, options.persist_dir) + let client = self + .spawn_and_register_node( + &snapshot.node_name, + built.network_port, + built.config, + options.persist_dir.as_deref(), + ) .await?; Ok(StartedNode { - name: node_name, - api: api_client, + name: snapshot.node_name, + client, }) } - pub async fn restart_node(&self, name: &str) -> Result<(), LocalNodeManagerError> { - let (index, mut node) = { - let mut state = self - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); + pub async fn restart_node(&self, name: &str) -> Result<(), NodeManagerError> { + let (index, mut node) = self.take_node(name)?; - let Some(index) = state.indices_by_name.get(name).copied() else { - return Err(LocalNodeManagerError::NodeName { - name: name.to_string(), - }); - }; + if let Err(source) = node.restart().await { + self.put_node_back(index, node); - if index >= state.nodes.len() { - return Err(LocalNodeManagerError::NodeName { - name: name.to_string(), - }); - } - - let node = state.nodes.remove(index); - (index, node) - }; - - node.restart() - .await - .map_err(|source| LocalNodeManagerError::Restart { source })?; - - let mut state = self - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); - - if index <= state.nodes.len() { - state.nodes.insert(index, node); - } else { - state.nodes.push(node); + return Err(NodeManagerError::Restart { + source: source.into(), + }); } + self.put_node_back(index, node); + Ok(()) } - pub async fn stop_node(&self, name: &str) -> Result<(), LocalNodeManagerError> { - let (index, mut node) = { - let mut state = self - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); - - let Some(index) = state.indices_by_name.get(name).copied() else { - return Err(LocalNodeManagerError::NodeName { - name: name.to_string(), - }); - }; - - if index >= state.nodes.len() { - return Err(LocalNodeManagerError::NodeName { - name: name.to_string(), - }); - } - - let node = state.nodes.remove(index); - (index, node) - }; + pub async fn stop_node(&self, name: &str) -> Result<(), NodeManagerError> { + let (index, mut node) = self.take_node(name)?; node.stop().await; - let mut state = self - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); + self.put_node_back(index, node); - if index <= state.nodes.len() { - state.nodes.insert(index, node); - } else { - state.nodes.push(node); - } Ok(()) } - async fn spawn_and_register_node( &self, node_name: &str, network_port: u16, - config: RunConfig, - persist_dir: Option, - ) -> Result { - let node = Node::spawn(config, node_name, persist_dir) - .await - .map_err(|source| LocalNodeManagerError::Spawn { source })?; - let client = node.api().clone(); + config: ::NodeConfig, + persist_dir: Option<&std::path::Path>, + ) -> Result { + let node = spawn_node_from_config::( + node_name.to_string(), + config, + self.keep_tempdir, + persist_dir, + ) + .await + .map_err(|source| NodeManagerError::Spawn { + source: source.into(), + })?; + let client = node.client(); self.node_clients.add_node(client.clone()); - let mut state = self - .state - .lock() - .unwrap_or_else(|poisoned| poisoned.into_inner()); + let mut state = self.lock_state(); state.register_node(node_name, network_port, client.clone(), node); Ok(client) } + + fn take_node(&self, name: &str) -> Result<(usize, Node), NodeManagerError> { + let mut state = self.lock_state(); + remove_node_from_state(&mut state, name) + } + + fn put_node_back(&self, index: usize, node: Node) { + let mut state = self.lock_state(); + reinsert_node_at(&mut state, index, node); + } + + fn start_snapshot(&self, requested_name: &str) -> Result { + let state = self.lock_state(); + let index = state.node_count; + let node_name = validate_new_node_name::(state.node_count, &state, requested_name)?; + + Ok(NodeStartSnapshot { + peer_ports: state.peer_ports.clone(), + peer_ports_by_name: state.peer_ports_by_name.clone(), + node_name, + index, + }) + } + + fn lock_state(&self) -> MutexGuard<'_, LocalNodeManagerState> { + self.state + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + } } -fn build_node_config( - general_config: testing_framework_config::topology::configs::GeneralConfig, - descriptor_patch: Option<&config::NodeConfigPatch>, - options_patch: Option<&config::NodeConfigPatch>, -) -> Result { - let mut config = create_node_config(general_config); - config = apply_patch_if_needed(config, descriptor_patch)?; - config = apply_patch_if_needed(config, options_patch)?; - - Ok(config) +fn clear_registered_nodes(state: &mut LocalNodeManagerState) { + state.nodes.clear(); + state.peer_ports.clear(); + state.peer_ports_by_name.clear(); + state.clients_by_name.clear(); + state.indices_by_name.clear(); + state.node_count = 0; } -fn apply_patch_if_needed( - config: RunConfig, - patch: Option<&config::NodeConfigPatch>, -) -> Result { - let Some(patch) = patch else { - return Ok(config); +fn validate_new_node_name( + node_count: usize, + state: &LocalNodeManagerState, + requested_name: &str, +) -> Result { + let label = normalize_node_name(node_count, requested_name); + + if state.peer_ports_by_name.contains_key(&label) { + return Err(NodeManagerError::InvalidArgument { + message: format!("node name '{label}' already exists"), + }); + } + + Ok(label) +} + +fn normalize_node_name(index: usize, requested_name: &str) -> String { + if requested_name.trim().is_empty() { + return default_node_label(index); + } + + if requested_name.starts_with("node-") { + return requested_name.to_string(); + } + + format!("node-{requested_name}") +} + +fn default_node_label(index: usize) -> String { + format!("node-{index}") +} + +fn remove_node_from_state( + state: &mut LocalNodeManagerState, + name: &str, +) -> Result<(usize, Node), NodeManagerError> { + let Some(index) = state.indices_by_name.get(name).copied() else { + return Err(NodeManagerError::NodeName { + name: name.to_string(), + }); }; - apply_node_config_patch(config, patch).map_err(|err| LocalNodeManagerError::ConfigPatch { - message: err.to_string(), - }) + if index >= state.nodes.len() { + return Err(NodeManagerError::NodeName { + name: name.to_string(), + }); + } + + Ok((index, state.nodes.remove(index))) +} + +fn reinsert_node_at( + state: &mut LocalNodeManagerState, + index: usize, + node: Node, +) { + if index <= state.nodes.len() { + state.nodes.insert(index, node); + } else { + state.nodes.push(node); + } } #[async_trait::async_trait] -impl NodeControlHandle for LocalNodeManager { +impl NodeControlHandle for NodeManager { async fn restart_node(&self, name: &str) -> Result<(), DynError> { self.restart_node(name).await.map_err(|err| err.into()) } @@ -483,8 +392,8 @@ impl NodeControlHandle for LocalNodeManager { self.stop_node(name).await.map_err(|err| err.into()) } - async fn start_node(&self, name: &str) -> Result { - self.start_node_with(name, StartNodeOptions::default()) + async fn start_node(&self, name: &str) -> Result, DynError> { + self.start_node_with(name, StartNodeOptions::::default()) .await .map_err(|err| err.into()) } @@ -492,14 +401,14 @@ impl NodeControlHandle for LocalNodeManager { async fn start_node_with( &self, name: &str, - options: StartNodeOptions, - ) -> Result { + options: StartNodeOptions, + ) -> Result, DynError> { self.start_node_with(name, options) .await .map_err(|err| err.into()) } - fn node_client(&self, name: &str) -> Option { + fn node_client(&self, name: &str) -> Option { self.node_client(name) } diff --git a/testing-framework/deployers/local/src/node_control/state.rs b/testing-framework/deployers/local/src/node_control/state.rs index 81fa525..d3dd1c3 100644 --- a/testing-framework/deployers/local/src/node_control/state.rs +++ b/testing-framework/deployers/local/src/node_control/state.rs @@ -1,18 +1,18 @@ use std::collections::HashMap; -use testing_framework_core::nodes::{ApiClient, node::Node}; +use crate::env::{LocalDeployerEnv, Node}; -pub(crate) struct LocalNodeManagerState { +pub(crate) struct LocalNodeManagerState { pub(crate) node_count: usize, pub(crate) peer_ports: Vec, pub(crate) peer_ports_by_name: HashMap, - pub(crate) clients_by_name: HashMap, + pub(crate) clients_by_name: HashMap, pub(crate) indices_by_name: HashMap, - pub(crate) nodes: Vec, + pub(crate) nodes: Vec>, } -impl LocalNodeManagerState { - fn register_common(&mut self, node_name: &str, network_port: u16, client: ApiClient) { +impl LocalNodeManagerState { + fn register_common(&mut self, node_name: &str, network_port: u16, client: E::NodeClient) { self.peer_ports.push(network_port); self.peer_ports_by_name .insert(node_name.to_string(), network_port); @@ -23,8 +23,8 @@ impl LocalNodeManagerState { &mut self, node_name: &str, network_port: u16, - client: ApiClient, - node: Node, + client: E::NodeClient, + node: Node, ) { self.register_common(node_name, network_port, client); let index = self.nodes.len(); diff --git a/testing-framework/deployers/local/src/process.rs b/testing-framework/deployers/local/src/process.rs new file mode 100644 index 0000000..612a338 --- /dev/null +++ b/testing-framework/deployers/local/src/process.rs @@ -0,0 +1,361 @@ +use std::{ + collections::HashMap, + fs, io, mem, + net::{Ipv4Addr, SocketAddr}, + path::{Path, PathBuf}, + process::Stdio, + thread, + time::Duration, +}; + +use tempfile::TempDir; +use testing_framework_core::{env::Application, process::RuntimeNode, scenario::DynError}; +use tokio::{ + process::{Child, Command}, + time::timeout, +}; + +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub enum NodeEndpointPort { + TestingApi, + Network, + Custom(String), +} + +#[derive(Clone)] +pub struct NodeEndpoints { + pub api: SocketAddr, + pub extra_ports: HashMap, +} + +impl Default for NodeEndpoints { + fn default() -> Self { + Self { + api: default_api_socket(), + extra_ports: HashMap::new(), + } + } +} + +impl NodeEndpoints { + pub fn insert_port(&mut self, key: NodeEndpointPort, port: u16) { + self.extra_ports.insert(key, port); + } + + pub fn port(&self, key: &NodeEndpointPort) -> Option { + self.extra_ports.get(key).copied() + } +} + +/// File materialized in the node working directory before spawn. +#[derive(Clone)] +pub struct LaunchFile { + /// Path relative to the node working directory. + pub relative_path: PathBuf, + /// Raw file contents to write. + pub contents: Vec, +} + +/// Environment variable passed to the spawned process. +#[derive(Clone)] +pub struct LaunchEnvVar { + /// Environment variable name. + pub key: String, + /// Environment variable value. + pub value: String, +} + +impl LaunchEnvVar { + #[must_use] + pub fn new(key: impl Into, value: impl Into) -> Self { + Self { + key: key.into(), + value: value.into(), + } + } +} + +/// Local process launch plan. +#[derive(Clone, Default)] +pub struct LaunchSpec { + /// Executable path. + pub binary: PathBuf, + /// Files to write before spawn. + pub files: Vec, + /// Command-line arguments. + pub args: Vec, + /// Process environment variables. + pub env: Vec, +} + +#[derive(Debug, thiserror::Error)] +pub enum ProcessSpawnError { + #[error("failed to create tempdir: {source}")] + TempDir { + #[source] + source: io::Error, + }, + #[error("failed to write config: {source}")] + Config { + #[source] + source: DynError, + }, + #[error("failed to spawn process: {source}")] + Spawn { + #[source] + source: io::Error, + }, + #[error("failed to materialize launch files: {source}")] + Materialize { + #[source] + source: io::Error, + }, + #[error("process wait failed: {source}")] + Wait { + #[source] + source: io::Error, + }, + #[error("process readiness failed: {source}")] + Readiness { + #[source] + source: tokio::time::error::Elapsed, + }, +} + +pub struct ProcessNode +{ + child: Child, + tempdir: TempDir, + keep_tempdir: bool, + launch: LaunchSpec, + config: Config, + endpoints: NodeEndpoints, + client: Client, +} + +impl + ProcessNode +{ + pub const fn config(&self) -> &Config { + &self.config + } + + pub fn client(&self) -> Client { + self.client.clone() + } + + pub fn client_ref(&self) -> &Client { + &self.client + } + + pub fn endpoints(&self) -> &NodeEndpoints { + &self.endpoints + } + + pub fn pid(&self) -> u32 { + self.child.id().unwrap_or_default() + } + + pub fn is_running(&mut self) -> bool { + matches!(self.child.try_wait(), Ok(None)) + } + + pub async fn wait_for_exit(&mut self, wait_timeout: Duration) -> bool { + timeout(wait_timeout, async { + loop { + if !self.is_running() { + return; + } + tokio::time::sleep(Duration::from_millis(100)).await; + } + }) + .await + .is_ok() + } + + pub fn start_kill(&mut self) { + let _ = self.child.start_kill(); + } + + pub fn keep_tempdir(&mut self) -> io::Result<()> { + let dir = mem::replace(&mut self.tempdir, tempfile::tempdir()?); + let _ = dir.keep(); + Ok(()) + } + + pub async fn spawn( + label: &str, + config: Config, + build_launch_spec: impl FnOnce(&Config, &Path, &str) -> Result, + endpoints_from_config: impl FnOnce(&Config) -> NodeEndpoints, + keep_tempdir: bool, + persist_dir: Option<&Path>, + client_from_endpoints: impl FnOnce(&NodeEndpoints) -> Client, + ) -> Result { + let tempdir = match persist_dir { + Some(path) => { + std::fs::create_dir_all(path).map_err(|source| ProcessSpawnError::TempDir { + source: io::Error::new( + source.kind(), + format!("failed to create persist dir {}: {source}", path.display()), + ), + })?; + TempDir::new_in(path).map_err(|source| ProcessSpawnError::TempDir { source })? + } + None => TempDir::new().map_err(|source| ProcessSpawnError::TempDir { source })?, + }; + let launch = build_launch_spec(&config, tempdir.path(), label) + .map_err(|source| ProcessSpawnError::Config { source })?; + let endpoints = endpoints_from_config(&config); + let client = client_from_endpoints(&endpoints); + + let child = spawn_child_for_launch(tempdir.path(), &launch).await?; + + Ok(Self { + child, + tempdir, + keep_tempdir, + launch, + config, + endpoints, + client, + }) + } + + async fn spawn_child(&self) -> Result { + spawn_child_for_launch(self.tempdir.path(), &self.launch).await + } + + async fn stop_child(&mut self) -> Result<(), ProcessSpawnError> { + let _ = self.child.kill().await; + let _ = self + .child + .wait() + .await + .map_err(|source| ProcessSpawnError::Wait { source })?; + Ok(()) + } + + pub async fn restart(&mut self) -> Result<(), ProcessSpawnError> { + self.stop_child().await?; + self.child = self.spawn_child().await?; + Ok(()) + } + + pub async fn stop(&mut self) { + let _ = self.stop_child().await; + } +} + +async fn spawn_child_for_launch( + tempdir: &Path, + launch: &LaunchSpec, +) -> Result { + materialize_launch_files(tempdir, launch) + .map_err(|source| ProcessSpawnError::Materialize { source })?; + + build_process_command(tempdir, launch) + .spawn() + .map_err(|source| ProcessSpawnError::Spawn { source }) +} + +fn build_process_command(tempdir: &Path, launch: &LaunchSpec) -> Command { + let mut command = Command::new(&launch.binary); + command + .args(&launch.args) + .envs(launch_env_pairs(&launch.env)) + .current_dir(tempdir) + .stdin(Stdio::null()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()); + command +} + +fn launch_env_pairs(env: &[LaunchEnvVar]) -> impl Iterator { + env.iter() + .map(|entry| (entry.key.as_str(), entry.value.as_str())) +} + +impl Drop + for ProcessNode +{ + fn drop(&mut self) { + if should_preserve_tempdir(self.keep_tempdir) { + let _ = self.keep_tempdir(); + } + self.start_kill(); + } +} + +fn should_preserve_tempdir(keep_tempdir: bool) -> bool { + thread::panicking() || keep_tempdir +} + +#[async_trait::async_trait] +impl RuntimeNode for ProcessNode +where + E: Application, + Config: Clone + Send + Sync + 'static, +{ + type SpawnError = ProcessSpawnError; + + fn client(&self) -> E::NodeClient { + self.client() + } + + fn is_running(&mut self) -> bool { + self.is_running() + } + + fn pid(&self) -> u32 { + self.pid() + } + + async fn stop(&mut self) { + self.stop().await; + } + + async fn restart(&mut self) -> Result<(), Self::SpawnError> { + self.restart().await + } +} + +fn materialize_launch_files(base: &Path, launch: &LaunchSpec) -> io::Result<()> { + for file in &launch.files { + write_launch_file(base, file)?; + } + + Ok(()) +} + +fn write_launch_file(base: &Path, file: &LaunchFile) -> io::Result<()> { + let path = base.join(&file.relative_path); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + fs::write(path, &file.contents) +} + +fn default_api_socket() -> SocketAddr { + SocketAddr::from((Ipv4Addr::LOCALHOST, 0)) +} + +#[cfg(test)] +mod tests { + use super::{NodeEndpointPort, NodeEndpoints}; + + #[test] + fn typed_ports_roundtrip() { + let mut endpoints = NodeEndpoints::default(); + endpoints.insert_port(NodeEndpointPort::TestingApi, 18081); + endpoints.insert_port(NodeEndpointPort::Network, 3000); + endpoints.insert_port(NodeEndpointPort::Custom("metrics".to_string()), 9000); + + assert_eq!(endpoints.port(&NodeEndpointPort::TestingApi), Some(18081)); + assert_eq!(endpoints.port(&NodeEndpointPort::Network), Some(3000)); + assert_eq!( + endpoints.port(&NodeEndpointPort::Custom("metrics".to_string())), + Some(9000) + ); + } +} diff --git a/testing-framework/deployers/local/src/runner.rs b/testing-framework/deployers/local/src/runner.rs deleted file mode 100644 index 45b8888..0000000 --- a/testing-framework/deployers/local/src/runner.rs +++ /dev/null @@ -1,249 +0,0 @@ -use std::sync::Arc; - -use async_trait::async_trait; -use testing_framework_core::{ - nodes::common::node::SpawnNodeError, - scenario::{ - BlockFeed, BlockFeedTask, Deployer, DynError, Metrics, NodeClients, NodeControlCapability, - RunContext, Runner, Scenario, ScenarioError, spawn_block_feed, - }, - topology::{ - config::{TopologyBuilder, TopologyConfig}, - deployment::Topology, - readiness::ReadinessError, - }, -}; -use thiserror::Error; -use tracing::{debug, info}; - -use crate::{ - manual::{LocalManualCluster, ManualClusterError}, - node_control::{LocalNodeManager, LocalNodeManagerSeed}, -}; -/// Spawns nodes as local processes, reusing the existing -/// integration harness. -#[derive(Clone)] -pub struct LocalDeployer { - membership_check: bool, -} - -/// Errors surfaced by the local deployer while driving a scenario. -#[derive(Debug, Error)] -pub enum LocalDeployerError { - #[error("failed to spawn local topology: {source}")] - Spawn { - #[source] - source: SpawnNodeError, - }, - #[error("readiness probe failed: {source}")] - ReadinessFailed { - #[source] - source: ReadinessError, - }, - #[error("workload failed: {source}")] - WorkloadFailed { - #[source] - source: DynError, - }, - #[error("expectations failed: {source}")] - ExpectationsFailed { - #[source] - source: DynError, - }, -} - -impl From for LocalDeployerError { - fn from(value: ScenarioError) -> Self { - match value { - ScenarioError::Workload(source) => Self::WorkloadFailed { source }, - ScenarioError::ExpectationCapture(source) | ScenarioError::Expectations(source) => { - Self::ExpectationsFailed { source } - } - } - } -} - -#[async_trait] -impl Deployer<()> for LocalDeployer { - type Error = LocalDeployerError; - - async fn deploy(&self, scenario: &Scenario<()>) -> Result { - info!( - nodes = scenario.topology().nodes().len(), - "starting local deployment" - ); - let topology = Self::prepare_topology(scenario, self.membership_check).await?; - let node_clients = NodeClients::from_topology(scenario.topology(), &topology); - - let (block_feed, block_feed_guard) = spawn_block_feed_with(&node_clients).await?; - - let context = RunContext::new( - scenario.topology().clone(), - Some(topology), - node_clients, - scenario.duration(), - Metrics::empty(), - block_feed, - None, - ); - - Ok(Runner::new(context, Some(Box::new(block_feed_guard)))) - } -} - -#[async_trait] -impl Deployer for LocalDeployer { - type Error = LocalDeployerError; - - async fn deploy( - &self, - scenario: &Scenario, - ) -> Result { - info!( - nodes = scenario.topology().nodes().len(), - "starting local deployment with node control" - ); - - let mut nodes = LocalNodeManager::spawn_initial_nodes(scenario.topology()) - .await - .map_err(|source| LocalDeployerError::Spawn { source })?; - - if self.membership_check { - let topology = Topology::from_nodes(nodes); - - wait_for_readiness(&topology).await.map_err(|source| { - debug!(error = ?source, "local readiness failed"); - LocalDeployerError::ReadinessFailed { source } - })?; - - nodes = topology.into_nodes(); - - info!("local nodes are ready"); - } else { - info!("skipping local membership readiness checks"); - } - - let node_control = Arc::new(LocalNodeManager::new_with_seed( - scenario.topology().clone(), - NodeClients::default(), - LocalNodeManagerSeed::from_topology(scenario.topology()), - )); - - node_control.initialize_with_nodes(nodes); - let node_clients = node_control.node_clients(); - - let (block_feed, block_feed_guard) = spawn_block_feed_with(&node_clients).await?; - - let context = RunContext::new( - scenario.topology().clone(), - None, - node_clients, - scenario.duration(), - Metrics::empty(), - block_feed, - Some(node_control), - ); - - Ok(Runner::new(context, Some(Box::new(block_feed_guard)))) - } -} - -impl LocalDeployer { - #[must_use] - /// Construct a local deployer. - pub fn new() -> Self { - Self::default() - } - - #[must_use] - /// Configure whether the deployer should enforce membership readiness - /// checks. - pub fn with_membership_check(mut self, enabled: bool) -> Self { - self.membership_check = enabled; - self - } - - /// Build a manual cluster using this deployer's local implementation. - pub fn manual_cluster( - &self, - config: TopologyConfig, - ) -> Result { - LocalManualCluster::from_config(config) - } - - /// Build a manual cluster from a pre-configured topology builder. - pub fn manual_cluster_with_builder( - &self, - builder: TopologyBuilder, - ) -> Result { - LocalManualCluster::from_builder(builder) - } - - async fn prepare_topology( - scenario: &Scenario, - membership_check: bool, - ) -> Result { - let descriptors = scenario.topology(); - - info!(nodes = descriptors.nodes().len(), "spawning local nodes"); - - let topology = LocalNodeManager::spawn_initial_topology(descriptors) - .await - .map_err(|source| LocalDeployerError::Spawn { source })?; - - if membership_check { - wait_for_readiness(&topology).await.map_err(|source| { - debug!(error = ?source, "local readiness failed"); - LocalDeployerError::ReadinessFailed { source } - })?; - - info!("local nodes are ready"); - } else { - info!("skipping local membership readiness checks"); - } - - Ok(topology) - } -} - -impl Default for LocalDeployer { - fn default() -> Self { - Self { - membership_check: true, - } - } -} - -async fn wait_for_readiness(topology: &Topology) -> Result<(), ReadinessError> { - info!("waiting for local network readiness"); - - topology.wait_network_ready().await?; - Ok(()) -} - -async fn spawn_block_feed_with( - node_clients: &NodeClients, -) -> Result<(BlockFeed, BlockFeedTask), LocalDeployerError> { - debug!( - nodes = node_clients.node_clients().len(), - "selecting node client for local block feed" - ); - - let Some(block_source_client) = node_clients.random_node() else { - return Err(LocalDeployerError::WorkloadFailed { - source: "block feed requires at least one node".into(), - }); - }; - - info!("starting block feed"); - - spawn_block_feed(block_source_client) - .await - .map_err(workload_error) -} - -fn workload_error(source: impl Into) -> LocalDeployerError { - LocalDeployerError::WorkloadFailed { - source: source.into(), - } -} diff --git a/testing-framework/deployers/local/tests/restart.rs b/testing-framework/deployers/local/tests/restart.rs deleted file mode 100644 index a16ee50..0000000 --- a/testing-framework/deployers/local/tests/restart.rs +++ /dev/null @@ -1,67 +0,0 @@ -use std::time::Duration; - -use testing_framework_core::{ - scenario::{Deployer, ScenarioBuilder}, - topology::config::TopologyConfig, -}; -use testing_framework_runner_local::LocalDeployer; -use tracing_subscriber::fmt::try_init; - -#[tokio::test] -#[ignore = "requires local node binary and open ports"] -async fn local_restart_node() -> Result<(), Box> { - let _ = try_init(); - let mut scenario = ScenarioBuilder::topology_with(|t| t.nodes(1)) - .enable_node_control() - .with_run_duration(Duration::from_secs(1)) - .build()?; - - let deployer = LocalDeployer::default(); - let runner = deployer.deploy(&scenario).await?; - let context = runner.context(); - - let control = context.node_control().ok_or("node control not available")?; - - let node_name = "node-0"; - let old_pid = control.node_pid(node_name).ok_or("missing node pid")?; - - control.restart_node(node_name).await?; - - let new_pid = control.node_pid(node_name).ok_or("missing node pid")?; - assert_ne!(old_pid, new_pid, "expected a new process after restart"); - - control.stop_node(node_name).await?; - assert!( - control.node_pid(node_name).is_none(), - "expected node pid to be absent after stop" - ); - - let _handle = runner.run(&mut scenario).await?; - - Ok(()) -} - -#[tokio::test] -#[ignore = "requires local node binary and open ports"] -async fn manual_cluster_restart_node() -> Result<(), Box> { - let _ = try_init(); - let deployer = LocalDeployer::default(); - let cluster = deployer.manual_cluster(TopologyConfig::with_node_numbers(1))?; - - let node_name = cluster.start_node("a").await?.name; - - let old_pid = cluster.node_pid(&node_name).ok_or("missing node pid")?; - - cluster.restart_node(&node_name).await?; - - let new_pid = cluster.node_pid(&node_name).ok_or("missing node pid")?; - assert_ne!(old_pid, new_pid, "expected a new process after restart"); - - cluster.stop_node(&node_name).await?; - assert!( - cluster.node_pid(&node_name).is_none(), - "expected node pid to be absent after stop" - ); - - Ok(()) -} diff --git a/testing-framework/tools/cfgsync-core/Cargo.toml b/testing-framework/tools/cfgsync-core/Cargo.toml new file mode 100644 index 0000000..63f2d1f --- /dev/null +++ b/testing-framework/tools/cfgsync-core/Cargo.toml @@ -0,0 +1,21 @@ +[package] +categories = { workspace = true } +description = { workspace = true } +edition = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +name = "cfgsync-core" +readme = { workspace = true } +repository = { workspace = true } +version = { workspace = true } + +[lints] +workspace = true + +[dependencies] +axum = { default-features = false, features = ["http1", "http2", "json", "tokio"], version = "0.7.5" } +reqwest = { features = ["json"], workspace = true } +serde = { default-features = false, features = ["derive"], version = "1" } +serde_json = { workspace = true } +thiserror = { workspace = true } +tokio = { default-features = false, features = ["macros", "net", "rt-multi-thread"], version = "1" } diff --git a/testing-framework/tools/cfgsync-core/src/client.rs b/testing-framework/tools/cfgsync-core/src/client.rs new file mode 100644 index 0000000..2df652c --- /dev/null +++ b/testing-framework/tools/cfgsync-core/src/client.rs @@ -0,0 +1,94 @@ +use serde::Serialize; +use thiserror::Error; + +use crate::{ + repo::{CfgSyncErrorResponse, CfgSyncPayload}, + server::ClientIp, +}; + +#[derive(Debug, Error)] +pub enum ClientError { + #[error("request failed: {0}")] + Request(#[from] reqwest::Error), + #[error("cfgsync server error {status}: {message}")] + Status { + status: reqwest::StatusCode, + message: String, + error: Option, + }, + #[error("failed to parse cfgsync response: {0}")] + Decode(serde_json::Error), +} + +#[derive(Clone, Debug)] +pub struct CfgSyncClient { + base_url: String, + http: reqwest::Client, +} + +impl CfgSyncClient { + #[must_use] + pub fn new(base_url: impl Into) -> Self { + let mut base_url = base_url.into(); + while base_url.ends_with('/') { + base_url.pop(); + } + Self { + base_url, + http: reqwest::Client::new(), + } + } + + #[must_use] + pub fn base_url(&self) -> &str { + &self.base_url + } + + pub async fn fetch_node_config( + &self, + payload: &ClientIp, + ) -> Result { + self.post_json("/node", payload).await + } + + pub async fn fetch_init_with_node_config( + &self, + payload: &ClientIp, + ) -> Result { + self.post_json("/init-with-node", payload).await + } + + pub async fn post_json( + &self, + path: &str, + payload: &P, + ) -> Result { + let url = self.endpoint_url(path); + let response = self.http.post(url).json(payload).send().await?; + + let status = response.status(); + let body = response.text().await?; + if !status.is_success() { + let error = serde_json::from_str::(&body).ok(); + let message = error + .as_ref() + .map(|err| err.message.clone()) + .unwrap_or_else(|| body.clone()); + return Err(ClientError::Status { + status, + message, + error, + }); + } + + serde_json::from_str(&body).map_err(ClientError::Decode) + } + + fn endpoint_url(&self, path: &str) -> String { + if path.starts_with('/') { + format!("{}{}", self.base_url, path) + } else { + format!("{}/{}", self.base_url, path) + } + } +} diff --git a/testing-framework/tools/cfgsync-core/src/lib.rs b/testing-framework/tools/cfgsync-core/src/lib.rs new file mode 100644 index 0000000..c437b8f --- /dev/null +++ b/testing-framework/tools/cfgsync-core/src/lib.rs @@ -0,0 +1,10 @@ +pub mod client; +pub mod repo; +pub mod server; + +pub use client::{CfgSyncClient, ClientError}; +pub use repo::{ + CFGSYNC_SCHEMA_VERSION, CfgSyncErrorCode, CfgSyncErrorResponse, CfgSyncPayload, ConfigRepo, + RepoResponse, +}; +pub use server::{CfgSyncState, ClientIp, RunCfgsyncError, cfgsync_app, run_cfgsync}; diff --git a/testing-framework/tools/cfgsync-core/src/repo.rs b/testing-framework/tools/cfgsync-core/src/repo.rs new file mode 100644 index 0000000..0a22b4e --- /dev/null +++ b/testing-framework/tools/cfgsync-core/src/repo.rs @@ -0,0 +1,80 @@ +use std::{collections::HashMap, sync::Arc}; + +use serde::{Deserialize, Serialize}; +use thiserror::Error; +use tokio::sync::oneshot::Sender; + +pub const CFGSYNC_SCHEMA_VERSION: u16 = 1; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CfgSyncPayload { + pub schema_version: u16, + pub config_yaml: String, +} + +impl CfgSyncPayload { + #[must_use] + pub fn new(config_yaml: String) -> Self { + Self { + schema_version: CFGSYNC_SCHEMA_VERSION, + config_yaml, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum CfgSyncErrorCode { + MissingConfig, + Internal, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Error)] +#[error("{code:?}: {message}")] +pub struct CfgSyncErrorResponse { + pub code: CfgSyncErrorCode, + pub message: String, +} + +impl CfgSyncErrorResponse { + #[must_use] + pub fn missing_config(identifier: &str) -> Self { + Self { + code: CfgSyncErrorCode::MissingConfig, + message: format!("missing config for host {identifier}"), + } + } + + #[must_use] + pub fn internal(message: impl Into) -> Self { + Self { + code: CfgSyncErrorCode::Internal, + message: message.into(), + } + } +} + +pub enum RepoResponse { + Config(CfgSyncPayload), + Error(CfgSyncErrorResponse), +} + +pub struct ConfigRepo { + configs: HashMap, +} + +impl ConfigRepo { + #[must_use] + pub fn from_bundle(configs: HashMap) -> Arc { + Arc::new(Self { configs }) + } + + pub async fn register(&self, identifier: String, reply_tx: Sender) { + let response = self.configs.get(&identifier).cloned().map_or_else( + || RepoResponse::Error(CfgSyncErrorResponse::missing_config(&identifier)), + RepoResponse::Config, + ); + + let _ = reply_tx.send(response); + } +} diff --git a/testing-framework/tools/cfgsync-core/src/server.rs b/testing-framework/tools/cfgsync-core/src/server.rs new file mode 100644 index 0000000..f519d53 --- /dev/null +++ b/testing-framework/tools/cfgsync-core/src/server.rs @@ -0,0 +1,95 @@ +use std::{io, net::Ipv4Addr, sync::Arc}; + +use axum::{Json, Router, extract::State, http::StatusCode, response::IntoResponse, routing::post}; +use serde::{Deserialize, Serialize}; +use thiserror::Error; +use tokio::sync::oneshot::channel; + +use crate::repo::{CfgSyncErrorResponse, ConfigRepo, RepoResponse}; + +#[derive(Serialize, Deserialize)] +pub struct ClientIp { + /// Node IP that can be used by clients for observability/logging. + pub ip: Ipv4Addr, + /// Stable node identifier used as key in cfgsync bundle lookup. + pub identifier: String, +} + +pub struct CfgSyncState { + repo: Arc, +} + +impl CfgSyncState { + #[must_use] + pub fn new(repo: Arc) -> Self { + Self { repo } + } +} + +#[derive(Debug, Error)] +pub enum RunCfgsyncError { + #[error("failed to bind cfgsync server on {bind_addr}: {source}")] + Bind { + bind_addr: String, + #[source] + source: io::Error, + }, + #[error("cfgsync server terminated unexpectedly: {source}")] + Serve { + #[source] + source: io::Error, + }, +} + +async fn node_config( + State(state): State>, + Json(payload): Json, +) -> impl IntoResponse { + let identifier = payload.identifier.clone(); + let (reply_tx, reply_rx) = channel(); + state.repo.register(identifier, reply_tx).await; + + match reply_rx.await { + Err(_) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(CfgSyncErrorResponse::internal( + "error receiving config from repo", + )), + ) + .into_response(), + Ok(RepoResponse::Config(payload_data)) => { + (StatusCode::OK, Json(payload_data)).into_response() + } + + Ok(RepoResponse::Error(error)) => { + let status = match error.code { + crate::repo::CfgSyncErrorCode::MissingConfig => StatusCode::NOT_FOUND, + crate::repo::CfgSyncErrorCode::Internal => StatusCode::INTERNAL_SERVER_ERROR, + }; + (status, Json(error)).into_response() + } + } +} + +pub fn cfgsync_app(state: CfgSyncState) -> Router { + Router::new() + .route("/node", post(node_config)) + .route("/init-with-node", post(node_config)) + .with_state(Arc::new(state)) +} + +pub async fn run_cfgsync(port: u16, state: CfgSyncState) -> Result<(), RunCfgsyncError> { + let app = cfgsync_app(state); + println!("Server running on http://0.0.0.0:{port}"); + + let bind_addr = format!("0.0.0.0:{port}"); + let listener = tokio::net::TcpListener::bind(&bind_addr) + .await + .map_err(|source| RunCfgsyncError::Bind { bind_addr, source })?; + + axum::serve(listener, app) + .await + .map_err(|source| RunCfgsyncError::Serve { source })?; + + Ok(()) +} diff --git a/testing-framework/tools/cfgsync-runtime/Cargo.toml b/testing-framework/tools/cfgsync-runtime/Cargo.toml new file mode 100644 index 0000000..f51d081 --- /dev/null +++ b/testing-framework/tools/cfgsync-runtime/Cargo.toml @@ -0,0 +1,22 @@ +[package] +categories = { workspace = true } +description = { workspace = true } +edition = { workspace = true } +keywords = { workspace = true } +license = { workspace = true } +name = "cfgsync-runtime" +readme = { workspace = true } +repository = { workspace = true } +version = { workspace = true } + +[lints] +workspace = true + +[dependencies] +anyhow = "1" +cfgsync-core = { workspace = true } +clap = { version = "4", features = ["derive"] } +serde = { workspace = true } +serde_yaml = { workspace = true } +testing-framework-core = { workspace = true } +tokio = { default-features = false, features = ["macros", "net", "rt-multi-thread"], version = "1" } diff --git a/testing-framework/tools/cfgsync-runtime/src/bin/cfgsync-client.rs b/testing-framework/tools/cfgsync-runtime/src/bin/cfgsync-client.rs new file mode 100644 index 0000000..98c3914 --- /dev/null +++ b/testing-framework/tools/cfgsync-runtime/src/bin/cfgsync-client.rs @@ -0,0 +1,21 @@ +use std::{env, process}; + +use cfgsync_runtime::run_cfgsync_client_from_env; + +const CFGSYNC_PORT_ENV: &str = "LOGOS_BLOCKCHAIN_CFGSYNC_PORT"; +const DEFAULT_CFGSYNC_PORT: u16 = 4400; + +fn cfgsync_port() -> u16 { + env::var(CFGSYNC_PORT_ENV) + .ok() + .and_then(|value| value.parse::().ok()) + .unwrap_or(DEFAULT_CFGSYNC_PORT) +} + +#[tokio::main] +async fn main() { + if let Err(err) = run_cfgsync_client_from_env(cfgsync_port()).await { + eprintln!("Error: {err}"); + process::exit(1); + } +} diff --git a/testing-framework/tools/cfgsync-runtime/src/bin/cfgsync-server.rs b/testing-framework/tools/cfgsync-runtime/src/bin/cfgsync-server.rs new file mode 100644 index 0000000..28134ea --- /dev/null +++ b/testing-framework/tools/cfgsync-runtime/src/bin/cfgsync-server.rs @@ -0,0 +1,16 @@ +use std::path::PathBuf; + +use cfgsync_runtime::run_cfgsync_server; +use clap::Parser; + +#[derive(Parser, Debug)] +#[command(about = "CfgSync")] +struct Args { + config: PathBuf, +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = Args::parse(); + run_cfgsync_server(&args.config).await +} diff --git a/testing-framework/tools/cfgsync-runtime/src/bundle.rs b/testing-framework/tools/cfgsync-runtime/src/bundle.rs new file mode 100644 index 0000000..1453b55 --- /dev/null +++ b/testing-framework/tools/cfgsync-runtime/src/bundle.rs @@ -0,0 +1,31 @@ +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use testing_framework_core::cfgsync::{CfgsyncEnv, build_cfgsync_node_configs}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CfgSyncBundle { + pub nodes: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CfgSyncBundleNode { + pub identifier: String, + pub config_yaml: String, +} + +pub fn build_cfgsync_bundle_with_hostnames( + deployment: &E::Deployment, + hostnames: &[String], +) -> Result { + let nodes = build_cfgsync_node_configs::(deployment, hostnames)?; + + Ok(CfgSyncBundle { + nodes: nodes + .into_iter() + .map(|node| CfgSyncBundleNode { + identifier: node.identifier, + config_yaml: node.config_yaml, + }) + .collect(), + }) +} diff --git a/testing-framework/tools/cfgsync-runtime/src/client.rs b/testing-framework/tools/cfgsync-runtime/src/client.rs new file mode 100644 index 0000000..75f0836 --- /dev/null +++ b/testing-framework/tools/cfgsync-runtime/src/client.rs @@ -0,0 +1,71 @@ +use std::{env, fs, net::Ipv4Addr}; + +use anyhow::{Context as _, Result}; +use cfgsync_core::{CFGSYNC_SCHEMA_VERSION, CfgSyncClient, ClientIp}; +use tokio::time::{Duration, sleep}; + +const FETCH_ATTEMPTS: usize = 5; +const FETCH_RETRY_DELAY: Duration = Duration::from_millis(250); + +fn parse_ip(ip_str: &str) -> Ipv4Addr { + ip_str.parse().unwrap_or(Ipv4Addr::LOCALHOST) +} + +async fn fetch_with_retry( + payload: &ClientIp, + server_addr: &str, +) -> Result { + let client = CfgSyncClient::new(server_addr); + let mut last_error: Option = None; + + for attempt in 1..=FETCH_ATTEMPTS { + match client.fetch_node_config(payload).await { + Ok(config) => return Ok(config), + Err(error) => { + last_error = Some(error.into()); + + if attempt < FETCH_ATTEMPTS { + sleep(FETCH_RETRY_DELAY).await; + } + } + } + } + + match last_error { + Some(error) => Err(error), + None => Err(anyhow::anyhow!( + "cfgsync client fetch failed without an error" + )), + } +} + +async fn pull_to_file(payload: ClientIp, server_addr: &str, config_file: &str) -> Result<()> { + let config = fetch_with_retry(&payload, server_addr) + .await + .context("fetching cfgsync node config")?; + + if config.schema_version != CFGSYNC_SCHEMA_VERSION { + anyhow::bail!( + "unsupported cfgsync payload schema version {}, expected {}", + config.schema_version, + CFGSYNC_SCHEMA_VERSION + ); + } + + fs::write(config_file, &config.config_yaml) + .with_context(|| format!("writing config to {}", config_file))?; + + println!("Config saved to {config_file}"); + Ok(()) +} + +pub async fn run_cfgsync_client_from_env(default_port: u16) -> Result<()> { + let config_file_path = env::var("CFG_FILE_PATH").unwrap_or_else(|_| "config.yaml".to_owned()); + let server_addr = + env::var("CFG_SERVER_ADDR").unwrap_or_else(|_| format!("http://127.0.0.1:{default_port}")); + let ip = parse_ip(&env::var("CFG_HOST_IP").unwrap_or_else(|_| "127.0.0.1".to_owned())); + let identifier = + env::var("CFG_HOST_IDENTIFIER").unwrap_or_else(|_| "unidentified-node".to_owned()); + + pull_to_file(ClientIp { ip, identifier }, &server_addr, &config_file_path).await +} diff --git a/testing-framework/tools/cfgsync-runtime/src/lib.rs b/testing-framework/tools/cfgsync-runtime/src/lib.rs new file mode 100644 index 0000000..bc28a08 --- /dev/null +++ b/testing-framework/tools/cfgsync-runtime/src/lib.rs @@ -0,0 +1,10 @@ +pub mod bundle; +pub mod render; + +pub use cfgsync_core as core; + +mod client; +mod server; + +pub use client::run_cfgsync_client_from_env; +pub use server::{CfgSyncServerConfig, run_cfgsync_server}; diff --git a/testing-framework/tools/cfgsync-runtime/src/render.rs b/testing-framework/tools/cfgsync-runtime/src/render.rs new file mode 100644 index 0000000..105e6ca --- /dev/null +++ b/testing-framework/tools/cfgsync-runtime/src/render.rs @@ -0,0 +1,44 @@ +use std::{fs, path::Path}; + +use anyhow::Result; + +#[derive(Debug, Clone)] +pub struct RenderedCfgsync { + pub config_yaml: String, + pub bundle_yaml: String, +} + +#[derive(Debug, Clone, Copy)] +pub struct CfgsyncOutputPaths<'a> { + pub config_path: &'a Path, + pub bundle_path: &'a Path, +} + +pub fn ensure_bundle_path(bundle_path: &mut Option, output_bundle_path: &Path) { + if bundle_path.is_some() { + return; + } + + *bundle_path = Some( + output_bundle_path + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("cfgsync.bundle.yaml") + .to_string(), + ); +} + +pub fn apply_timeout_floor(timeout: &mut u64, min_timeout_secs: Option) { + if let Some(min_timeout_secs) = min_timeout_secs { + *timeout = (*timeout).max(min_timeout_secs); + } +} + +pub fn write_rendered_cfgsync( + rendered: &RenderedCfgsync, + output: CfgsyncOutputPaths<'_>, +) -> Result<()> { + fs::write(output.config_path, &rendered.config_yaml)?; + fs::write(output.bundle_path, &rendered.bundle_yaml)?; + Ok(()) +} diff --git a/testing-framework/tools/cfgsync-runtime/src/server.rs b/testing-framework/tools/cfgsync-runtime/src/server.rs new file mode 100644 index 0000000..a497515 --- /dev/null +++ b/testing-framework/tools/cfgsync-runtime/src/server.rs @@ -0,0 +1,72 @@ +use std::{collections::HashMap, fs, path::Path, sync::Arc}; + +use anyhow::Context as _; +use cfgsync_core::{CfgSyncPayload, CfgSyncState, ConfigRepo, run_cfgsync}; +use serde::Deserialize; + +#[derive(Debug, Deserialize, Clone)] +pub struct CfgSyncServerConfig { + pub port: u16, + pub bundle_path: String, +} + +impl CfgSyncServerConfig { + pub fn load_from_file(path: &Path) -> anyhow::Result { + let config_content = fs::read_to_string(path) + .with_context(|| format!("failed to read cfgsync config file {}", path.display()))?; + serde_yaml::from_str(&config_content) + .with_context(|| format!("failed to parse cfgsync config file {}", path.display())) + } +} + +#[derive(Debug, Deserialize)] +struct CfgSyncBundle { + nodes: Vec, +} + +#[derive(Debug, Deserialize)] +struct CfgSyncBundleNode { + identifier: String, + config_yaml: String, +} + +fn load_bundle(bundle_path: &Path) -> anyhow::Result> { + let bundle_content = fs::read_to_string(bundle_path).with_context(|| { + format!( + "failed to read cfgsync bundle file {}", + bundle_path.display() + ) + })?; + let bundle: CfgSyncBundle = serde_yaml::from_str(&bundle_content) + .with_context(|| format!("failed to parse cfgsync bundle {}", bundle_path.display()))?; + + let configs = bundle + .nodes + .into_iter() + .map(|node| (node.identifier, CfgSyncPayload::new(node.config_yaml))) + .collect::>(); + + Ok(ConfigRepo::from_bundle(configs)) +} + +fn resolve_bundle_path(config_path: &Path, bundle_path: &str) -> std::path::PathBuf { + let path = Path::new(bundle_path); + if path.is_absolute() { + return path.to_path_buf(); + } + + config_path + .parent() + .unwrap_or_else(|| Path::new(".")) + .join(path) +} + +pub async fn run_cfgsync_server(config_path: &Path) -> anyhow::Result<()> { + let config = CfgSyncServerConfig::load_from_file(config_path)?; + let bundle_path = resolve_bundle_path(config_path, &config.bundle_path); + + let repo = load_bundle(&bundle_path)?; + let state = CfgSyncState::new(repo); + run_cfgsync(config.port, state).await?; + Ok(()) +} diff --git a/testing-framework/tools/cfgsync_tf/Cargo.toml b/testing-framework/tools/cfgsync_tf/Cargo.toml deleted file mode 100644 index d17c5b0..0000000 --- a/testing-framework/tools/cfgsync_tf/Cargo.toml +++ /dev/null @@ -1,38 +0,0 @@ -[package] -categories = { workspace = true } -description = { workspace = true } -edition = { workspace = true } -keywords = { workspace = true } -license = { workspace = true } -name = "cfgsync_tf" -readme = { workspace = true } -repository = { workspace = true } -version = { workspace = true } - -[lints] -workspace = true - -[dependencies] -anyhow = "1" -axum = { default-features = false, features = ["http1", "http2", "json", "tokio"], version = "0.7.5" } -clap = { default-features = false, version = "4" } -hex = { workspace = true } -lb-core = { workspace = true } -lb-groth16 = { workspace = true } -lb-key-management-system-service = { workspace = true } -lb-libp2p = { workspace = true } -lb-node = { workspace = true } -lb-tracing-service = { workspace = true } -lb-utils = { workspace = true } -rand = { workspace = true } -reqwest = { workspace = true } -serde = { default-features = false, version = "1" } -serde_json = { default-features = false, version = "1.0" } -serde_path_to_error = "0.1" -serde_with = { workspace = true } -serde_yaml = "0.9" -testing-framework-config = { workspace = true } -testing-framework-core = { path = "../../core" } -thiserror = { workspace = true } -tokio = { default-features = false, features = ["macros", "net", "rt-multi-thread"], version = "1" } -tracing = { workspace = true } diff --git a/testing-framework/tools/cfgsync_tf/src/bin/cfgsync-client.rs b/testing-framework/tools/cfgsync_tf/src/bin/cfgsync-client.rs deleted file mode 100644 index cb95f6a..0000000 --- a/testing-framework/tools/cfgsync_tf/src/bin/cfgsync-client.rs +++ /dev/null @@ -1,80 +0,0 @@ -use std::{env, fs, net::Ipv4Addr, process}; - -use cfgsync_tf::{ - client::{FetchedConfig, get_config}, - server::ClientIp, -}; -use lb_node::UserConfig; -use serde::{Serialize, de::DeserializeOwned}; -use testing_framework_config::constants::cfgsync_port as default_cfgsync_port; -use testing_framework_core::nodes::common::config::injection::{ - inject_ibd_into_cryptarchia, normalize_ed25519_sigs, -}; - -fn parse_ip(ip_str: &str) -> Ipv4Addr { - ip_str.parse().unwrap_or_else(|_| { - eprintln!("Invalid IP format, defaulting to 127.0.0.1"); - Ipv4Addr::LOCALHOST - }) -} - -async fn pull_to_file(payload: ClientIp, url: &str, config_file: &str) -> Result<(), String> -where - Config: Serialize + DeserializeOwned, -{ - let FetchedConfig { - config, - raw: _unused, - } = get_config::(payload, url).await?; - - let mut yaml_value = serde_yaml::to_value(&config) - .map_err(|err| format!("Failed to serialize config to YAML value: {err}"))?; - inject_ibd_into_cryptarchia(&mut yaml_value); - normalize_ed25519_sigs(&mut yaml_value); - let yaml = serde_yaml::to_string(&yaml_value) - .map_err(|err| format!("Failed to serialize config to YAML: {err}"))?; - - fs::write(config_file, yaml).map_err(|err| format!("Failed to write config to file: {err}"))?; - - println!("Config saved to {config_file}"); - Ok(()) -} - -#[tokio::main] -async fn main() { - let config_file_path = env::var("CFG_FILE_PATH").unwrap_or_else(|_| "config.yaml".to_owned()); - let server_addr = env::var("CFG_SERVER_ADDR") - .unwrap_or_else(|_| format!("http://127.0.0.1:{}", default_cfgsync_port())); - let ip = parse_ip(&env::var("CFG_HOST_IP").unwrap_or_else(|_| "127.0.0.1".to_owned())); - let identifier = - env::var("CFG_HOST_IDENTIFIER").unwrap_or_else(|_| "unidentified-node".to_owned()); - - let network_port = env::var("CFG_NETWORK_PORT") - .ok() - .and_then(|v| v.parse().ok()); - let blend_port = env::var("CFG_BLEND_PORT").ok().and_then(|v| v.parse().ok()); - let api_port = env::var("CFG_API_PORT").ok().and_then(|v| v.parse().ok()); - let testing_http_port = env::var("CFG_TESTING_HTTP_PORT") - .ok() - .and_then(|v| v.parse().ok()); - - let payload = ClientIp { - ip, - identifier, - network_port, - blend_port, - api_port, - testing_http_port, - }; - - let node_config_endpoint = format!("{server_addr}/node"); - - let config_result = - pull_to_file::(payload, &node_config_endpoint, &config_file_path).await; - - // Handle error if the config request fails - if let Err(err) = config_result { - eprintln!("Error: {err}"); - process::exit(1); - } -} diff --git a/testing-framework/tools/cfgsync_tf/src/bin/cfgsync-server.rs b/testing-framework/tools/cfgsync_tf/src/bin/cfgsync-server.rs deleted file mode 100644 index 6d2cdae..0000000 --- a/testing-framework/tools/cfgsync_tf/src/bin/cfgsync-server.rs +++ /dev/null @@ -1,40 +0,0 @@ -use std::path::PathBuf; - -use anyhow::Context as _; -use cfgsync_tf::server::{CfgSyncConfig, cfgsync_app}; -use clap::Parser; -use tokio::net::TcpListener; - -#[derive(Parser, Debug)] -#[command(about = "CfgSync")] -struct Args { - config: PathBuf, -} - -#[tokio::main] -async fn main() -> anyhow::Result<()> { - let cli = Args::parse(); - - let config = CfgSyncConfig::load_from_file(&cli.config) - .map_err(anyhow::Error::msg) - .with_context(|| { - format!( - "failed to load cfgsync config from {}", - cli.config.display() - ) - })?; - - let port = config.port; - let app = cfgsync_app(config.into()); - - println!("Server running on http://0.0.0.0:{port}"); - let listener = TcpListener::bind(&format!("0.0.0.0:{port}")) - .await - .with_context(|| format!("failed to bind cfgsync server on 0.0.0.0:{port}"))?; - - axum::serve(listener, app) - .await - .context("cfgsync server terminated unexpectedly")?; - - Ok(()) -} diff --git a/testing-framework/tools/cfgsync_tf/src/client.rs b/testing-framework/tools/cfgsync_tf/src/client.rs deleted file mode 100644 index 6e26c89..0000000 --- a/testing-framework/tools/cfgsync_tf/src/client.rs +++ /dev/null @@ -1,46 +0,0 @@ -use reqwest::{Client, Response}; -use serde::de::DeserializeOwned; - -use crate::server::ClientIp; - -#[derive(Debug)] -pub struct FetchedConfig { - pub config: Config, - pub raw: serde_json::Value, -} - -async fn deserialize_response( - response: Response, -) -> Result, String> { - let body = response - .text() - .await - .map_err(|error| format!("Failed to read response body: {error}"))?; - let raw: serde_json::Value = - serde_json::from_str(&body).map_err(|error| format!("Failed to parse body: {error}"))?; - let mut json_deserializer = serde_json::Deserializer::from_str(&body); - let config = serde_path_to_error::deserialize(&mut json_deserializer) - .map_err(|error| format!("Failed to deserialize body: {error}, raw body: {body}"))?; - - Ok(FetchedConfig { config, raw }) -} - -pub async fn get_config( - payload: ClientIp, - url: &str, -) -> Result, String> { - let client = Client::new(); - - let response = client - .post(url) - .json(&payload) - .send() - .await - .map_err(|err| format!("Failed to send IP announcement: {err}"))?; - - if !response.status().is_success() { - return Err(format!("Server error: {:?}", response.status())); - } - - deserialize_response(response).await -} diff --git a/testing-framework/tools/cfgsync_tf/src/config/builder.rs b/testing-framework/tools/cfgsync_tf/src/config/builder.rs deleted file mode 100644 index bbc1dae..0000000 --- a/testing-framework/tools/cfgsync_tf/src/config/builder.rs +++ /dev/null @@ -1,255 +0,0 @@ -use std::{collections::HashMap, net::Ipv4Addr, str::FromStr as _}; - -use lb_core::mantle::GenesisTx as _; -use lb_libp2p::{Multiaddr, PeerId, ed25519}; -use lb_tracing_service::TracingSettings; -use rand::{Rng as _, thread_rng}; -use testing_framework_config::topology::configs::{ - GeneralConfig, - api::GeneralApiConfig, - base::{BaseConfigError, BaseConfigs, build_base_configs}, - consensus::{ - ConsensusConfigError, ConsensusParams, create_genesis_tx_with_declarations, - sync_utxos_with_genesis, - }, - network::NetworkParams, - time::default_time_config, - wallet::WalletConfig, -}; -use thiserror::Error; - -use crate::{ - config::{ - kms::create_kms_configs, - providers::{ProviderBuildError, try_create_providers}, - tracing::update_tracing_identifier, - validation::{ValidationError, validate_inputs}, - }, - host::{Host, sort_hosts}, - network::rewrite_initial_peers, -}; - -pub fn create_node_configs( - consensus_params: &ConsensusParams, - tracing_settings: &TracingSettings, - wallet_config: &WalletConfig, - ids: Option>, - blend_ports: Option>, - hosts: Vec, -) -> Result, NodeConfigBuildError> { - try_create_node_configs( - consensus_params, - tracing_settings, - wallet_config, - ids, - blend_ports, - hosts, - ) -} - -#[derive(Debug, Error)] -pub enum NodeConfigBuildError { - #[error(transparent)] - Validation(#[from] ValidationError), - #[error(transparent)] - Providers(#[from] ProviderBuildError), - #[error(transparent)] - Base(#[from] BaseConfigError), - #[error(transparent)] - Genesis(#[from] ConsensusConfigError), - #[error("failed to allocate an available UDP port")] - PortAllocFailed, - #[error("failed to parse multiaddr '{value}': {message}")] - InvalidMultiaddr { value: String, message: String }, - #[error("failed to parse socket addr '{value}': {source}")] - InvalidSocketAddr { - value: String, - source: std::net::AddrParseError, - }, - #[error("failed to decode ed25519 secret key bytes")] - InvalidEd25519SecretKey, - #[error("config generation requires at least one consensus config")] - MissingConsensusConfig, - #[error("host/config length mismatch")] - HostConfigLenMismatch, - #[error(transparent)] - PeerRewrite(#[from] crate::network::peers::PeerRewriteError), -} - -pub fn try_create_node_configs( - consensus_params: &ConsensusParams, - tracing_settings: &TracingSettings, - wallet_config: &WalletConfig, - ids: Option>, - blend_ports: Option>, - hosts: Vec, -) -> Result, NodeConfigBuildError> { - let hosts = sort_hosts(hosts); - - validate_inputs(&hosts, consensus_params, ids.as_ref(), blend_ports.as_ref())?; - - let ids = generate_ids(consensus_params.n_participants, ids); - let blend_ports = resolve_blend_ports(&hosts, blend_ports); - - let BaseConfigs { - mut consensus_configs, - bootstrap_configs, - network_configs, - blend_configs, - } = build_base_configs( - &ids, - consensus_params, - &NetworkParams::default(), - wallet_config, - &blend_ports, - )?; - - let api_configs = build_api_configs(&hosts)?; - let mut configured_hosts = HashMap::new(); - - let initial_peer_templates: Vec> = network_configs - .iter() - .map(|cfg| cfg.backend.initial_peers.clone()) - .collect(); - let original_network_ports: Vec = network_configs - .iter() - .map(|cfg| cfg.backend.swarm.port) - .collect(); - let peer_ids = build_peer_ids(&ids)?; - - let host_network_init_peers = rewrite_initial_peers( - &initial_peer_templates, - &original_network_ports, - &hosts, - &peer_ids, - )?; - - let providers = try_create_providers(&hosts, &consensus_configs, &blend_configs)?; - - let first_consensus = consensus_configs - .get(0) - .ok_or(NodeConfigBuildError::MissingConsensusConfig)?; - let ledger_tx = first_consensus.genesis_tx.mantle_tx().ledger_tx.clone(); - let genesis_tx = create_genesis_tx_with_declarations(ledger_tx, providers)?; - - for c in &mut consensus_configs { - c.genesis_tx = genesis_tx.clone(); - sync_utxos_with_genesis(&mut c.utxos, &genesis_tx)?; - } - - let kms_configs = create_kms_configs(&blend_configs); - - for (i, host) in hosts.into_iter().enumerate() { - if i >= consensus_configs.len() - || i >= api_configs.len() - || i >= network_configs.len() - || i >= blend_configs.len() - || i >= host_network_init_peers.len() - || i >= kms_configs.len() - || i >= bootstrap_configs.len() - { - return Err(NodeConfigBuildError::HostConfigLenMismatch); - } - - let consensus_config = consensus_configs[i].clone(); - let api_config = api_configs[i].clone(); - - let mut network_config = network_configs[i].clone(); - network_config.backend.swarm.host = Ipv4Addr::UNSPECIFIED; - network_config.backend.swarm.port = host.network_port; - network_config.backend.initial_peers = host_network_init_peers[i].clone(); - let nat_value = format!("/ip4/{}/udp/{}/quic-v1", host.ip, host.network_port); - let nat_addr = Multiaddr::from_str(&nat_value).map_err(|source| { - NodeConfigBuildError::InvalidMultiaddr { - value: nat_value, - message: source.to_string(), - } - })?; - network_config.backend.swarm.nat_config = lb_libp2p::NatSettings::Static { - external_address: nat_addr, - }; - - let mut blend_config = blend_configs[i].clone(); - let blend_value = format!("/ip4/0.0.0.0/udp/{}/quic-v1", host.blend_port); - blend_config.backend_core.listening_address = - Multiaddr::from_str(&blend_value).map_err(|source| { - NodeConfigBuildError::InvalidMultiaddr { - value: blend_value, - message: source.to_string(), - } - })?; - - let tracing_config = - update_tracing_identifier(tracing_settings.clone(), host.identifier.clone()); - let time_config = default_time_config(); - - configured_hosts.insert( - host.clone(), - GeneralConfig { - consensus_config, - bootstrapping_config: bootstrap_configs[i].clone(), - network_config, - blend_config, - api_config, - tracing_config, - time_config, - kms_config: kms_configs[i].clone(), - }, - ); - } - - Ok(configured_hosts) -} - -fn generate_ids(count: usize, ids: Option>) -> Vec<[u8; 32]> { - ids.unwrap_or_else(|| { - let mut generated = vec![[0; 32]; count]; - - for id in &mut generated { - thread_rng().fill(id); - } - - generated - }) -} - -fn resolve_blend_ports(hosts: &[Host], blend_ports: Option>) -> Vec { - blend_ports.unwrap_or_else(|| hosts.iter().map(|h| h.blend_port).collect()) -} - -fn build_api_configs(hosts: &[Host]) -> Result, NodeConfigBuildError> { - hosts - .iter() - .map(|host| { - let address_value = format!("0.0.0.0:{}", host.api_port); - let testing_value = format!("0.0.0.0:{}", host.testing_http_port); - Ok(GeneralApiConfig { - address: address_value.parse().map_err(|source| { - NodeConfigBuildError::InvalidSocketAddr { - value: address_value, - source, - } - })?, - testing_http_address: testing_value.parse().map_err(|source| { - NodeConfigBuildError::InvalidSocketAddr { - value: testing_value, - source, - } - })?, - }) - }) - .collect() -} - -fn build_peer_ids(ids: &[[u8; 32]]) -> Result, NodeConfigBuildError> { - ids.iter() - .map(|bytes| { - let mut key_bytes = *bytes; - let secret = ed25519::SecretKey::try_from_bytes(&mut key_bytes) - .map_err(|_| NodeConfigBuildError::InvalidEd25519SecretKey)?; - Ok(PeerId::from_public_key( - &ed25519::Keypair::from(secret).public().into(), - )) - }) - .collect() -} diff --git a/testing-framework/tools/cfgsync_tf/src/config/kms.rs b/testing-framework/tools/cfgsync_tf/src/config/kms.rs deleted file mode 100644 index f9699bb..0000000 --- a/testing-framework/tools/cfgsync_tf/src/config/kms.rs +++ /dev/null @@ -1,24 +0,0 @@ -use lb_groth16::fr_to_bytes; -use lb_key_management_system_service::{backend::preload::PreloadKMSBackendSettings, keys::Key}; -use testing_framework_config::topology::configs::blend::GeneralBlendConfig; - -pub fn create_kms_configs(blend_configs: &[GeneralBlendConfig]) -> Vec { - blend_configs - .iter() - .map(|blend_conf| PreloadKMSBackendSettings { - keys: [ - ( - hex::encode(blend_conf.signer.public_key().to_bytes()), - Key::Ed25519(blend_conf.signer.clone()), - ), - ( - hex::encode(fr_to_bytes( - blend_conf.secret_zk_key.to_public_key().as_fr(), - )), - Key::Zk(blend_conf.secret_zk_key.clone()), - ), - ] - .into(), - }) - .collect() -} diff --git a/testing-framework/tools/cfgsync_tf/src/config/providers.rs b/testing-framework/tools/cfgsync_tf/src/config/providers.rs deleted file mode 100644 index c8fad65..0000000 --- a/testing-framework/tools/cfgsync_tf/src/config/providers.rs +++ /dev/null @@ -1,103 +0,0 @@ -use std::str::FromStr; - -use lb_core::sdp::{Locator, ServiceType}; -use lb_libp2p::Multiaddr; -use testing_framework_config::topology::configs::{ - blend::GeneralBlendConfig, - consensus::{GeneralConsensusConfig, ProviderInfo}, -}; -use thiserror::Error; - -use crate::host::Host; - -#[derive(Debug, Error)] -pub enum ProviderBuildError { - #[error("consensus configs are empty")] - MissingConsensusConfigs, - #[error("config length mismatch (hosts={hosts}, blend_configs={blend_configs})")] - HostConfigLenMismatch { hosts: usize, blend_configs: usize }, - #[error("consensus notes length mismatch, blend_notes={blend_notes})")] - NoteLenMismatch { blend_notes: usize }, - #[error("failed to parse multiaddr '{value}': {message}")] - InvalidMultiaddr { value: String, message: String }, -} - -pub fn try_create_providers( - hosts: &[Host], - consensus_configs: &[GeneralConsensusConfig], - blend_configs: &[GeneralBlendConfig], -) -> Result, ProviderBuildError> { - let first = consensus_configs - .first() - .ok_or(ProviderBuildError::MissingConsensusConfigs)?; - - validate_provider_inputs(hosts, first, blend_configs)?; - - let mut providers = Vec::with_capacity(blend_configs.len()); - providers.extend(build_blend_providers(hosts, first, blend_configs)?); - Ok(providers) -} - -pub fn create_providers( - hosts: &[Host], - consensus_configs: &[GeneralConsensusConfig], - blend_configs: &[GeneralBlendConfig], -) -> Result, ProviderBuildError> { - try_create_providers(hosts, consensus_configs, blend_configs) -} - -fn validate_provider_inputs( - hosts: &[Host], - first: &GeneralConsensusConfig, - blend_configs: &[GeneralBlendConfig], -) -> Result<(), ProviderBuildError> { - if hosts.len() != blend_configs.len() { - return Err(ProviderBuildError::HostConfigLenMismatch { - hosts: hosts.len(), - blend_configs: blend_configs.len(), - }); - } - - if first.blend_notes.len() < blend_configs.len() { - return Err(ProviderBuildError::NoteLenMismatch { - blend_notes: first.blend_notes.len(), - }); - } - - Ok(()) -} - -fn build_blend_providers( - hosts: &[Host], - first: &GeneralConsensusConfig, - blend_configs: &[GeneralBlendConfig], -) -> Result, ProviderBuildError> { - blend_configs - .iter() - .enumerate() - .map(|(i, blend_conf)| { - let locator = locator_for_host(hosts, i, hosts[i].blend_port)?; - Ok(ProviderInfo { - service_type: ServiceType::BlendNetwork, - provider_sk: blend_conf.signer.clone(), - zk_sk: blend_conf.secret_zk_key.clone(), - locator, - note: first.blend_notes[i].clone(), - }) - }) - .collect() -} - -fn locator_for_host( - hosts: &[Host], - index: usize, - port: u16, -) -> Result { - let value = format!("/ip4/{}/udp/{port}/quic-v1", hosts[index].ip); - let locator = - Multiaddr::from_str(&value).map_err(|source| ProviderBuildError::InvalidMultiaddr { - value, - message: source.to_string(), - })?; - Ok(Locator(locator)) -} diff --git a/testing-framework/tools/cfgsync_tf/src/config/tracing.rs b/testing-framework/tools/cfgsync_tf/src/config/tracing.rs deleted file mode 100644 index 04123c2..0000000 --- a/testing-framework/tools/cfgsync_tf/src/config/tracing.rs +++ /dev/null @@ -1,36 +0,0 @@ -use lb_tracing_service::{LoggerLayer, MetricsLayer, TracingLayer, TracingSettings}; -use testing_framework_config::topology::configs::tracing::GeneralTracingConfig; - -pub fn update_tracing_identifier( - settings: TracingSettings, - identifier: String, -) -> GeneralTracingConfig { - GeneralTracingConfig { - tracing_settings: TracingSettings { - logger: match settings.logger { - LoggerLayer::Loki(mut config) => { - config.host_identifier.clone_from(&identifier); - LoggerLayer::Loki(config) - } - other => other, - }, - tracing: match settings.tracing { - TracingLayer::Otlp(mut config) => { - config.service_name.clone_from(&identifier); - TracingLayer::Otlp(config) - } - other @ TracingLayer::None => other, - }, - filter: settings.filter, - metrics: match settings.metrics { - MetricsLayer::Otlp(mut config) => { - config.host_identifier = identifier; - MetricsLayer::Otlp(config) - } - other @ MetricsLayer::None => other, - }, - console: settings.console, - level: settings.level, - }, - } -} diff --git a/testing-framework/tools/cfgsync_tf/src/config/validation.rs b/testing-framework/tools/cfgsync_tf/src/config/validation.rs deleted file mode 100644 index 041b26a..0000000 --- a/testing-framework/tools/cfgsync_tf/src/config/validation.rs +++ /dev/null @@ -1,35 +0,0 @@ -use testing_framework_config::topology::{ - configs::consensus::ConsensusParams, - invariants::{TopologyInvariantError, validate_node_vectors}, -}; -use thiserror::Error; - -use crate::host::Host; - -#[derive(Debug, Error, PartialEq, Eq)] -pub enum ValidationError { - #[error("host count {actual} does not match participants {expected}")] - HostCountMismatch { actual: usize, expected: usize }, - #[error(transparent)] - TopologyInvariant(#[from] TopologyInvariantError), -} - -pub fn validate_inputs( - hosts: &[Host], - consensus_params: &ConsensusParams, - ids: Option<&Vec<[u8; 32]>>, - blend_ports: Option<&Vec>, -) -> Result<(), ValidationError> { - let expected = consensus_params.n_participants; - - if hosts.len() != expected { - return Err(ValidationError::HostCountMismatch { - actual: hosts.len(), - expected, - }); - } - - validate_node_vectors(expected, ids, blend_ports)?; - - Ok(()) -} diff --git a/testing-framework/tools/cfgsync_tf/src/host.rs b/testing-framework/tools/cfgsync_tf/src/host.rs deleted file mode 100644 index 49ac2b1..0000000 --- a/testing-framework/tools/cfgsync_tf/src/host.rs +++ /dev/null @@ -1,65 +0,0 @@ -use std::net::Ipv4Addr; - -use testing_framework_config::constants::{ - DEFAULT_API_PORT, DEFAULT_BLEND_NETWORK_PORT, DEFAULT_LIBP2P_NETWORK_PORT, -}; - -#[derive(Copy, Clone, Eq, PartialEq, Hash)] -pub enum HostKind { - Node, -} - -#[derive(Eq, PartialEq, Hash, Clone)] -pub struct Host { - pub kind: HostKind, - pub ip: Ipv4Addr, - pub identifier: String, - pub network_port: u16, - pub blend_port: u16, - pub api_port: u16, - pub testing_http_port: u16, -} - -#[derive(Clone, Copy)] -pub struct PortOverrides { - pub network_port: Option, - pub blend_port: Option, - pub api_port: Option, - pub testing_http_port: Option, -} - -impl Host { - fn from_parts(kind: HostKind, ip: Ipv4Addr, identifier: String, ports: PortOverrides) -> Self { - Self { - kind, - ip, - identifier, - network_port: ports.network_port.unwrap_or(DEFAULT_LIBP2P_NETWORK_PORT), - blend_port: ports.blend_port.unwrap_or(DEFAULT_BLEND_NETWORK_PORT), - api_port: ports.api_port.unwrap_or(DEFAULT_API_PORT), - testing_http_port: ports.testing_http_port.unwrap_or(DEFAULT_API_PORT + 1), - } - } - - #[must_use] - pub fn node_from_ip(ip: Ipv4Addr, identifier: String, ports: PortOverrides) -> Self { - Self::from_parts(HostKind::Node, ip, identifier, ports) - } -} - -#[must_use] -pub fn sort_hosts(mut hosts: Vec) -> Vec { - hosts.sort_by_key(|host| { - let index = host - .identifier - .rsplit('-') - .next() - .and_then(|raw| raw.parse::().ok()) - .unwrap_or(0); - let kind = match host.kind { - HostKind::Node => 0, - }; - (kind, index) - }); - hosts -} diff --git a/testing-framework/tools/cfgsync_tf/src/lib.rs b/testing-framework/tools/cfgsync_tf/src/lib.rs deleted file mode 100644 index 2628d64..0000000 --- a/testing-framework/tools/cfgsync_tf/src/lib.rs +++ /dev/null @@ -1,12 +0,0 @@ -pub mod client; -pub mod host; -pub mod config { - pub mod builder; - pub mod kms; - pub mod providers; - pub mod tracing; - pub mod validation; -} -pub mod network; -pub mod repo; -pub mod server; diff --git a/testing-framework/tools/cfgsync_tf/src/network/address.rs b/testing-framework/tools/cfgsync_tf/src/network/address.rs deleted file mode 100644 index 462a163..0000000 --- a/testing-framework/tools/cfgsync_tf/src/network/address.rs +++ /dev/null @@ -1,19 +0,0 @@ -use lb_libp2p::{Multiaddr, Protocol}; - -pub fn extract_udp_port(addr: &Multiaddr) -> Option { - addr.iter().find_map(|protocol| { - if let Protocol::Udp(port) = protocol { - Some(port) - } else { - None - } - }) -} - -pub fn find_matching_host(addr: &Multiaddr, original_ports: &[u16]) -> Option { - extract_udp_port(addr).and_then(|port| { - original_ports - .iter() - .position(|candidate| *candidate == port) - }) -} diff --git a/testing-framework/tools/cfgsync_tf/src/network/mod.rs b/testing-framework/tools/cfgsync_tf/src/network/mod.rs deleted file mode 100644 index ec42960..0000000 --- a/testing-framework/tools/cfgsync_tf/src/network/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub mod address; -pub mod peers; - -pub use peers::rewrite_initial_peers; diff --git a/testing-framework/tools/cfgsync_tf/src/network/peers.rs b/testing-framework/tools/cfgsync_tf/src/network/peers.rs deleted file mode 100644 index 138a96f..0000000 --- a/testing-framework/tools/cfgsync_tf/src/network/peers.rs +++ /dev/null @@ -1,69 +0,0 @@ -use lb_libp2p::{Multiaddr, PeerId, Protocol}; -use thiserror::Error; - -use super::address::find_matching_host; -use crate::host::Host; - -#[derive(Debug, Error)] -pub enum PeerRewriteError { - #[error("hosts and peer ids length mismatch (hosts={hosts}, peer_ids={peer_ids})")] - HostPeerLenMismatch { hosts: usize, peer_ids: usize }, - #[error("peer index {peer_idx} out of bounds for hosts (len={hosts_len})")] - HostIndexOutOfBounds { peer_idx: usize, hosts_len: usize }, - #[error("peer index {peer_idx} out of bounds for peer ids (len={peer_ids_len})")] - PeerIdIndexOutOfBounds { - peer_idx: usize, - peer_ids_len: usize, - }, -} - -pub fn rewrite_initial_peers( - templates: &[Vec], - original_ports: &[u16], - hosts: &[Host], - peer_ids: &[PeerId], -) -> Result>, PeerRewriteError> { - if hosts.len() != peer_ids.len() { - return Err(PeerRewriteError::HostPeerLenMismatch { - hosts: hosts.len(), - peer_ids: peer_ids.len(), - }); - } - - let mut rewritten = Vec::with_capacity(templates.len()); - for (node_idx, peers) in templates.iter().enumerate() { - let mut node_peers = Vec::new(); - for addr in peers { - let Some(peer_idx) = find_matching_host(addr, original_ports) else { - continue; - }; - if peer_idx == node_idx { - continue; - } - - let host = hosts - .get(peer_idx) - .ok_or(PeerRewriteError::HostIndexOutOfBounds { - peer_idx, - hosts_len: hosts.len(), - })?; - let peer_id = - peer_ids - .get(peer_idx) - .ok_or(PeerRewriteError::PeerIdIndexOutOfBounds { - peer_idx, - peer_ids_len: peer_ids.len(), - })?; - - let mut rewritten_addr = Multiaddr::empty(); - rewritten_addr.push(Protocol::Ip4(host.ip)); - rewritten_addr.push(Protocol::Udp(host.network_port)); - rewritten_addr.push(Protocol::QuicV1); - rewritten_addr.push(Protocol::P2p((*peer_id).into())); - node_peers.push(rewritten_addr); - } - rewritten.push(node_peers); - } - - Ok(rewritten) -} diff --git a/testing-framework/tools/cfgsync_tf/src/repo.rs b/testing-framework/tools/cfgsync_tf/src/repo.rs deleted file mode 100644 index 6ccf57e..0000000 --- a/testing-framework/tools/cfgsync_tf/src/repo.rs +++ /dev/null @@ -1,182 +0,0 @@ -use std::{collections::HashMap, sync::Arc, time::Duration}; - -use lb_tracing_service::TracingSettings; -use testing_framework_config::topology::configs::{ - GeneralConfig, consensus::ConsensusParams, wallet::WalletConfig, -}; -use tokio::{ - sync::{Mutex, oneshot::Sender}, - time::timeout, -}; -use tracing::{error, info, warn}; - -use crate::{config::builder::try_create_node_configs, host::Host, server::CfgSyncConfig}; - -const HOST_POLLING_INTERVAL: Duration = Duration::from_secs(1); - -pub enum RepoResponse { - Config(Box), - Timeout, - Error(String), -} - -pub struct ConfigRepo { - waiting_hosts: Mutex>>, - n_hosts: usize, - consensus_params: ConsensusParams, - tracing_settings: TracingSettings, - wallet_config: WalletConfig, - timeout_duration: Duration, - ids: Option>, - blend_ports: Option>, -} - -impl From for Arc { - fn from(config: CfgSyncConfig) -> Self { - let consensus_params = config.to_consensus_params(); - let tracing_settings = config.to_tracing_settings(); - let wallet_config = config.wallet_config(); - let ids = config.ids; - let blend_ports = config.blend_ports; - - ConfigRepo::new( - config.n_hosts, - consensus_params, - tracing_settings, - wallet_config, - ids, - blend_ports, - Duration::from_secs(config.timeout), - ) - } -} - -impl ConfigRepo { - #[must_use] - pub fn new( - n_hosts: usize, - consensus_params: ConsensusParams, - tracing_settings: TracingSettings, - wallet_config: WalletConfig, - ids: Option>, - blend_ports: Option>, - timeout_duration: Duration, - ) -> Arc { - let repo = Arc::new(Self { - waiting_hosts: Mutex::new(HashMap::new()), - n_hosts, - consensus_params, - tracing_settings, - wallet_config, - ids, - blend_ports, - timeout_duration, - }); - - let repo_clone = Arc::clone(&repo); - tokio::spawn(async move { - repo_clone.run().await; - }); - - repo - } - - pub async fn register(&self, host: Host, reply_tx: Sender) { - let mut waiting_hosts = self.waiting_hosts.lock().await; - waiting_hosts.insert(host, reply_tx); - } - - async fn run(&self) { - let timeout_duration = self.timeout_duration; - - if wait_for_hosts_with_timeout(self, timeout_duration).await { - info!("all hosts have announced their IPs"); - - let mut waiting_hosts = take_waiting_hosts(self).await; - let hosts = waiting_hosts.keys().cloned().collect(); - - let configs = match generate_node_configs(self, hosts) { - Ok(configs) => configs, - Err(message) => { - send_error_to_all(&mut waiting_hosts, &message); - return; - } - }; - - send_configs_to_all_hosts(&mut waiting_hosts, &configs); - return; - } - - warn!("timeout: not all hosts announced within the time limit"); - let mut waiting_hosts = take_waiting_hosts(self).await; - send_timeout_to_all(&mut waiting_hosts); - } - - async fn wait_for_hosts(&self) { - loop { - let len = { self.waiting_hosts.lock().await.len() }; - if len >= self.n_hosts { - break; - } - tokio::time::sleep(HOST_POLLING_INTERVAL).await; - } - } -} - -async fn wait_for_hosts_with_timeout(repo: &ConfigRepo, timeout_duration: Duration) -> bool { - timeout(timeout_duration, repo.wait_for_hosts()) - .await - .is_ok() -} - -async fn take_waiting_hosts(repo: &ConfigRepo) -> HashMap> { - let mut guard = repo.waiting_hosts.lock().await; - std::mem::take(&mut *guard) -} - -fn generate_node_configs( - repo: &ConfigRepo, - hosts: Vec, -) -> Result, String> { - try_create_node_configs( - &repo.consensus_params, - &repo.tracing_settings, - &repo.wallet_config, - repo.ids.clone(), - repo.blend_ports.clone(), - hosts, - ) - .map_err(|err| { - error!(error = %err, "failed to generate node configs"); - err.to_string() - }) -} - -fn send_error_to_all(waiting_hosts: &mut HashMap>, message: &str) { - for (_, sender) in waiting_hosts.drain() { - let _ = sender.send(RepoResponse::Error(message.to_string())); - } -} - -fn send_timeout_to_all(waiting_hosts: &mut HashMap>) { - for (_, sender) in waiting_hosts.drain() { - let _ = sender.send(RepoResponse::Timeout); - } -} - -fn send_configs_to_all_hosts( - waiting_hosts: &mut HashMap>, - configs: &HashMap, -) { - for (host, sender) in waiting_hosts.drain() { - match configs.get(&host) { - Some(config) => { - let _ = sender.send(RepoResponse::Config(Box::new(config.to_owned()))); - } - None => { - warn!(identifier = %host.identifier, "missing config for host"); - let _ = sender.send(RepoResponse::Error("missing config for host".to_string())); - } - } - } -} diff --git a/testing-framework/tools/cfgsync_tf/src/server.rs b/testing-framework/tools/cfgsync_tf/src/server.rs deleted file mode 100644 index 7a79126..0000000 --- a/testing-framework/tools/cfgsync_tf/src/server.rs +++ /dev/null @@ -1,206 +0,0 @@ -use std::{fs, net::Ipv4Addr, num::NonZero, path::PathBuf, sync::Arc, time::Duration}; - -// Bootstrap Constants -const DEFAULT_DELAY_BEFORE_NEW_DOWNLOAD_SECS: u64 = 10; -const DEFAULT_MAX_ORPHAN_CACHE_SIZE: usize = 5; - -use axum::{Json, Router, extract::State, http::StatusCode, response::IntoResponse, routing::post}; -use lb_tracing_service::TracingSettings; -use lb_utils::bounded_duration::{MinimalBoundedDuration, SECOND}; -use serde::{Deserialize, Serialize}; -use serde_json::{Value, json, to_value}; -use serde_with::serde_as; -use testing_framework_config::{ - nodes::node::create_node_config, - topology::configs::{consensus::ConsensusParams, wallet::WalletConfig}, -}; -use tokio::sync::oneshot::channel; - -use crate::{ - host::{Host, PortOverrides}, - repo::{ConfigRepo, RepoResponse}, -}; - -#[serde_as] -#[derive(Debug, Deserialize)] -pub struct CfgSyncConfig { - pub port: u16, - pub n_hosts: usize, - pub timeout: u64, - - // ConsensusConfig related parameters - pub security_param: NonZero, - pub active_slot_coeff: f64, - pub wallet: WalletConfig, - #[serde(default)] - pub ids: Option>, - #[serde(default)] - pub blend_ports: Option>, - - // DaConfig related parameters - pub subnetwork_size: usize, - pub dispersal_factor: usize, - pub num_samples: u16, - pub num_subnets: u16, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - pub old_blobs_check_interval: Duration, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - pub blobs_validity_duration: Duration, - pub min_dispersal_peers: usize, - pub min_replication_peers: usize, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - pub monitor_failure_time_window: Duration, - #[serde_as(as = "MinimalBoundedDuration<0, SECOND>")] - pub balancer_interval: Duration, - pub retry_shares_limit: usize, - pub retry_commitments_limit: usize, - - // Tracing params - pub tracing_settings: TracingSettings, -} - -impl CfgSyncConfig { - pub fn load_from_file(file_path: &PathBuf) -> Result { - let config_content = fs::read_to_string(file_path) - .map_err(|err| format!("Failed to read config file: {err}"))?; - serde_yaml::from_str(&config_content) - .map_err(|err| format!("Failed to parse config file: {err}")) - } - - #[must_use] - pub const fn to_consensus_params(&self) -> ConsensusParams { - ConsensusParams { - n_participants: self.n_hosts, - security_param: self.security_param, - active_slot_coeff: self.active_slot_coeff, - } - } - - #[must_use] - pub fn to_tracing_settings(&self) -> TracingSettings { - self.tracing_settings.clone() - } - - #[must_use] - pub fn wallet_config(&self) -> WalletConfig { - self.wallet.clone() - } -} - -#[derive(Serialize, Deserialize)] -pub struct ClientIp { - pub ip: Ipv4Addr, - pub identifier: String, - #[serde(default)] - pub network_port: Option, - #[serde(default)] - pub blend_port: Option, - #[serde(default)] - pub api_port: Option, - #[serde(default)] - pub testing_http_port: Option, -} - -async fn node_config( - State(config_repo): State>, - Json(payload): Json, -) -> impl IntoResponse { - let ClientIp { - ip, - identifier, - network_port, - blend_port, - api_port, - testing_http_port, - } = payload; - let ports = PortOverrides { - network_port, - blend_port, - api_port, - testing_http_port, - }; - - let (reply_tx, reply_rx) = channel(); - config_repo - .register(Host::node_from_ip(ip, identifier, ports), reply_tx) - .await; - - (reply_rx.await).map_or_else( - |_| (StatusCode::INTERNAL_SERVER_ERROR, "Error receiving config").into_response(), - |config_response| match config_response { - RepoResponse::Config(config) => { - let config = create_node_config(*config); - let mut value = match to_value(&config) { - Ok(value) => value, - Err(err) => { - return ( - StatusCode::INTERNAL_SERVER_ERROR, - format!("failed to serialize node config: {err}"), - ) - .into_response(); - } - }; - - inject_defaults(&mut value); - override_api_ports(&mut value, &ports); - override_min_session_members(&mut value); - - (StatusCode::OK, Json(value)).into_response() - } - RepoResponse::Timeout => (StatusCode::REQUEST_TIMEOUT).into_response(), - RepoResponse::Error(message) => { - (StatusCode::INTERNAL_SERVER_ERROR, message).into_response() - } - }, - ) -} - -pub fn cfgsync_app(config_repo: Arc) -> Router { - Router::new() - .route("/node", post(node_config)) - .with_state(config_repo) -} - -fn override_api_ports(config: &mut Value, ports: &PortOverrides) { - if let Some(api_port) = ports.api_port { - if let Some(address) = config.pointer_mut("/http/backend_settings/address") { - *address = json!(format!("0.0.0.0:{api_port}")); - } - } - - if let Some(testing_port) = ports.testing_http_port { - if let Some(address) = config.pointer_mut("/testing_http/backend_settings/address") { - *address = json!(format!("0.0.0.0:{testing_port}")); - } - } -} - -fn override_min_session_members(config: &mut Value) { - if let Some(value) = config.pointer_mut("/da_network/min_session_members") { - *value = json!(1); - } -} - -fn inject_defaults(config: &mut Value) { - if let Some(cryptarchia) = config - .get_mut("cryptarchia") - .and_then(|v| v.as_object_mut()) - { - let bootstrap = cryptarchia.entry("bootstrap").or_insert_with(|| json!({})); - if let Some(bootstrap_map) = bootstrap.as_object_mut() { - bootstrap_map.entry("ibd").or_insert_with( - || json!({ "peers": [], "delay_before_new_download": { "secs": DEFAULT_DELAY_BEFORE_NEW_DOWNLOAD_SECS, "nanos": 0 } }), - ); - } - - cryptarchia - .entry("network_adapter_settings") - .or_insert_with(|| json!({ "topic": "/cryptarchia/proto" })); - - cryptarchia.entry("sync").or_insert_with(|| { - json!({ - "orphan": { "max_orphan_cache_size": DEFAULT_MAX_ORPHAN_CACHE_SIZE } - }) - }); - } -} diff --git a/testing-framework/workflows/Cargo.toml b/testing-framework/workflows/Cargo.toml deleted file mode 100644 index 9b575dd..0000000 --- a/testing-framework/workflows/Cargo.toml +++ /dev/null @@ -1,28 +0,0 @@ -[package] -categories.workspace = true -description.workspace = true -edition.workspace = true -keywords.workspace = true -license.workspace = true -name = "testing-framework-workflows" -readme.workspace = true -repository.workspace = true -version = "0.1.0" - -[lints] -workspace = true - -[dependencies] -async-trait = "0.1" -lb-core = { workspace = true } -lb-key-management-system-service = { workspace = true } -rand = { workspace = true } -reqwest = { workspace = true } -testing-framework-config = { workspace = true } -testing-framework-core = { workspace = true } -thiserror = { workspace = true } -tokio = { features = ["macros", "net", "rt-multi-thread", "time"], workspace = true } -tracing = { workspace = true } - -[package.metadata.cargo-machete] -ignored = ["chain-service"] diff --git a/testing-framework/workflows/src/builder/mod.rs b/testing-framework/workflows/src/builder/mod.rs deleted file mode 100644 index a144253..0000000 --- a/testing-framework/workflows/src/builder/mod.rs +++ /dev/null @@ -1,622 +0,0 @@ -use std::{ - num::{NonZeroU64, NonZeroUsize}, - time::Duration, -}; - -use testing_framework_core::{ - scenario::{Builder as CoreScenarioBuilder, NodeControlCapability, ObservabilityCapability}, - topology::configs::wallet::WalletConfig, -}; - -use crate::{ - expectations::ConsensusLiveness, - workloads::{chaos::RandomRestartWorkload, transaction}, -}; - -#[derive(Debug, thiserror::Error)] -pub enum BuilderInputError { - #[error("{field} must be non-zero")] - ZeroValue { field: &'static str }, - #[error("invalid url for {field}: '{value}': {message}")] - InvalidUrl { - field: &'static str, - value: String, - message: String, - }, -} - -/// Extension methods for building test scenarios with common patterns. -pub trait ScenarioBuilderExt: Sized { - /// Configure a transaction flow workload. - fn transactions(self) -> TransactionFlowBuilder; - - /// Configure a transaction flow workload via closure. - fn transactions_with( - self, - f: impl FnOnce(TransactionFlowBuilder) -> TransactionFlowBuilder, - ) -> CoreScenarioBuilder; - #[must_use] - /// Attach a consensus liveness expectation. - fn expect_consensus_liveness(self) -> Self; - - #[must_use] - /// Seed deterministic wallets with total funds split across `users`. - fn initialize_wallet(self, total_funds: u64, users: usize) -> Self; -} - -impl ScenarioBuilderExt for CoreScenarioBuilder { - fn transactions(self) -> TransactionFlowBuilder { - TransactionFlowBuilder::new(self) - } - - fn transactions_with( - self, - f: impl FnOnce(TransactionFlowBuilder) -> TransactionFlowBuilder, - ) -> CoreScenarioBuilder { - f(self.transactions()).apply() - } - - fn expect_consensus_liveness(self) -> Self { - self.with_expectation(ConsensusLiveness::default()) - } - - fn initialize_wallet(self, total_funds: u64, users: usize) -> Self { - let Some(user_count) = NonZeroUsize::new(users) else { - tracing::warn!( - users, - "wallet user count must be non-zero; ignoring initialize_wallet" - ); - return self; - }; - self.with_wallet_config(WalletConfig::uniform(total_funds, user_count)) - } -} - -/// Observability helpers for scenarios that want to reuse external telemetry. -pub trait ObservabilityBuilderExt: Sized { - /// Reuse an existing Prometheus endpoint instead of provisioning one (k8s - /// runner). - fn with_metrics_query_url( - self, - url: reqwest::Url, - ) -> CoreScenarioBuilder; - - /// Convenience wrapper that parses a URL string (panics if invalid). - fn with_metrics_query_url_str(self, url: &str) -> CoreScenarioBuilder; - - /// Like `with_metrics_query_url_str`, but returns an error instead of - /// panicking. - fn try_with_metrics_query_url_str( - self, - url: &str, - ) -> Result, BuilderInputError>; - - /// Configure the OTLP HTTP metrics ingest endpoint to which nodes should - /// export metrics (must be a full URL, including any required path). - fn with_metrics_otlp_ingest_url( - self, - url: reqwest::Url, - ) -> CoreScenarioBuilder; - - /// Convenience wrapper that parses a URL string (panics if invalid). - fn with_metrics_otlp_ingest_url_str( - self, - url: &str, - ) -> CoreScenarioBuilder; - - /// Like `with_metrics_otlp_ingest_url_str`, but returns an error instead of - /// panicking. - fn try_with_metrics_otlp_ingest_url_str( - self, - url: &str, - ) -> Result, BuilderInputError>; - - /// Optional Grafana base URL for printing/logging (human access). - fn with_grafana_url(self, url: reqwest::Url) -> CoreScenarioBuilder; - - /// Convenience wrapper that parses a URL string (panics if invalid). - fn with_grafana_url_str(self, url: &str) -> CoreScenarioBuilder; - - /// Like `with_grafana_url_str`, but returns an error instead of panicking. - fn try_with_grafana_url_str( - self, - url: &str, - ) -> Result, BuilderInputError>; - - #[deprecated(note = "use with_metrics_query_url")] - fn with_external_prometheus( - self, - url: reqwest::Url, - ) -> CoreScenarioBuilder { - self.with_metrics_query_url(url) - } - - #[deprecated(note = "use with_metrics_query_url_str")] - fn with_external_prometheus_str( - self, - url: &str, - ) -> CoreScenarioBuilder { - self.with_metrics_query_url_str(url) - } - - #[deprecated(note = "use with_metrics_otlp_ingest_url")] - fn with_external_otlp_metrics_endpoint( - self, - url: reqwest::Url, - ) -> CoreScenarioBuilder { - self.with_metrics_otlp_ingest_url(url) - } - - #[deprecated(note = "use with_metrics_otlp_ingest_url_str")] - fn with_external_otlp_metrics_endpoint_str( - self, - url: &str, - ) -> CoreScenarioBuilder { - self.with_metrics_otlp_ingest_url_str(url) - } -} - -impl ObservabilityBuilderExt for CoreScenarioBuilder<()> { - fn with_metrics_query_url( - self, - url: reqwest::Url, - ) -> CoreScenarioBuilder { - self.with_capabilities(ObservabilityCapability { - metrics_query_url: Some(url), - metrics_otlp_ingest_url: None, - grafana_url: None, - }) - } - - fn with_metrics_query_url_str(self, url: &str) -> CoreScenarioBuilder { - match reqwest::Url::parse(url) { - Ok(parsed) => self.with_metrics_query_url(parsed), - Err(err) => { - tracing::warn!( - url, - error = %err, - "metrics query url must be valid; leaving metrics_query_url unset" - ); - self.with_capabilities(ObservabilityCapability { - metrics_query_url: None, - metrics_otlp_ingest_url: None, - grafana_url: None, - }) - } - } - } - - fn try_with_metrics_query_url_str( - self, - url: &str, - ) -> Result, BuilderInputError> { - let parsed = reqwest::Url::parse(url).map_err(|err| BuilderInputError::InvalidUrl { - field: "metrics_query_url", - value: url.to_string(), - message: err.to_string(), - })?; - Ok(self.with_metrics_query_url(parsed)) - } - - fn with_metrics_otlp_ingest_url( - self, - url: reqwest::Url, - ) -> CoreScenarioBuilder { - self.with_capabilities(ObservabilityCapability { - metrics_query_url: None, - metrics_otlp_ingest_url: Some(url), - grafana_url: None, - }) - } - - fn with_metrics_otlp_ingest_url_str( - self, - url: &str, - ) -> CoreScenarioBuilder { - match reqwest::Url::parse(url) { - Ok(parsed) => self.with_metrics_otlp_ingest_url(parsed), - Err(err) => { - tracing::warn!( - url, - error = %err, - "metrics OTLP ingest url must be valid; leaving metrics_otlp_ingest_url unset" - ); - self.with_capabilities(ObservabilityCapability { - metrics_query_url: None, - metrics_otlp_ingest_url: None, - grafana_url: None, - }) - } - } - } - - fn try_with_metrics_otlp_ingest_url_str( - self, - url: &str, - ) -> Result, BuilderInputError> { - let parsed = reqwest::Url::parse(url).map_err(|err| BuilderInputError::InvalidUrl { - field: "metrics_otlp_ingest_url", - value: url.to_string(), - message: err.to_string(), - })?; - Ok(self.with_metrics_otlp_ingest_url(parsed)) - } - - fn with_grafana_url(self, url: reqwest::Url) -> CoreScenarioBuilder { - self.with_capabilities(ObservabilityCapability { - metrics_query_url: None, - metrics_otlp_ingest_url: None, - grafana_url: Some(url), - }) - } - - fn with_grafana_url_str(self, url: &str) -> CoreScenarioBuilder { - match reqwest::Url::parse(url) { - Ok(parsed) => self.with_grafana_url(parsed), - Err(err) => { - tracing::warn!( - url, - error = %err, - "grafana url must be valid; leaving grafana_url unset" - ); - self.with_capabilities(ObservabilityCapability { - metrics_query_url: None, - metrics_otlp_ingest_url: None, - grafana_url: None, - }) - } - } - } - - fn try_with_grafana_url_str( - self, - url: &str, - ) -> Result, BuilderInputError> { - let parsed = reqwest::Url::parse(url).map_err(|err| BuilderInputError::InvalidUrl { - field: "grafana_url", - value: url.to_string(), - message: err.to_string(), - })?; - Ok(self.with_grafana_url(parsed)) - } -} - -impl ObservabilityBuilderExt for CoreScenarioBuilder { - fn with_metrics_query_url( - mut self, - url: reqwest::Url, - ) -> CoreScenarioBuilder { - self.capabilities_mut().metrics_query_url = Some(url); - self - } - - fn with_metrics_query_url_str(self, url: &str) -> CoreScenarioBuilder { - match reqwest::Url::parse(url) { - Ok(parsed) => self.with_metrics_query_url(parsed), - Err(err) => { - tracing::warn!( - url, - error = %err, - "metrics query url must be valid; leaving metrics_query_url unchanged" - ); - self - } - } - } - - fn try_with_metrics_query_url_str( - self, - url: &str, - ) -> Result, BuilderInputError> { - let parsed = reqwest::Url::parse(url).map_err(|err| BuilderInputError::InvalidUrl { - field: "metrics_query_url", - value: url.to_string(), - message: err.to_string(), - })?; - Ok(self.with_metrics_query_url(parsed)) - } - - fn with_metrics_otlp_ingest_url( - mut self, - url: reqwest::Url, - ) -> CoreScenarioBuilder { - self.capabilities_mut().metrics_otlp_ingest_url = Some(url); - self - } - - fn with_metrics_otlp_ingest_url_str( - self, - url: &str, - ) -> CoreScenarioBuilder { - match reqwest::Url::parse(url) { - Ok(parsed) => self.with_metrics_otlp_ingest_url(parsed), - Err(err) => { - tracing::warn!( - url, - error = %err, - "metrics OTLP ingest url must be valid; leaving metrics_otlp_ingest_url unchanged" - ); - self - } - } - } - - fn try_with_metrics_otlp_ingest_url_str( - self, - url: &str, - ) -> Result, BuilderInputError> { - let parsed = reqwest::Url::parse(url).map_err(|err| BuilderInputError::InvalidUrl { - field: "metrics_otlp_ingest_url", - value: url.to_string(), - message: err.to_string(), - })?; - Ok(self.with_metrics_otlp_ingest_url(parsed)) - } - - fn with_grafana_url( - mut self, - url: reqwest::Url, - ) -> CoreScenarioBuilder { - self.capabilities_mut().grafana_url = Some(url); - self - } - - fn with_grafana_url_str(self, url: &str) -> CoreScenarioBuilder { - match reqwest::Url::parse(url) { - Ok(parsed) => self.with_grafana_url(parsed), - Err(err) => { - tracing::warn!( - url, - error = %err, - "grafana url must be valid; leaving grafana_url unchanged" - ); - self - } - } - } - - fn try_with_grafana_url_str( - self, - url: &str, - ) -> Result, BuilderInputError> { - let parsed = reqwest::Url::parse(url).map_err(|err| BuilderInputError::InvalidUrl { - field: "grafana_url", - value: url.to_string(), - message: err.to_string(), - })?; - Ok(self.with_grafana_url(parsed)) - } -} - -/// Builder for transaction workloads. -pub struct TransactionFlowBuilder { - builder: CoreScenarioBuilder, - rate: NonZeroU64, - users: Option, -} - -impl TransactionFlowBuilder { - const fn default_rate() -> NonZeroU64 { - NonZeroU64::MIN - } - - const fn new(builder: CoreScenarioBuilder) -> Self { - Self { - builder, - rate: Self::default_rate(), - users: None, - } - } - - #[must_use] - /// Set transaction submission rate per block (ignores zero). - pub fn rate(mut self, rate: u64) -> Self { - match NonZeroU64::new(rate) { - Some(rate) => self.rate = rate, - None => tracing::warn!( - rate, - "transaction rate must be non-zero; keeping previous rate" - ), - } - self - } - - /// Like `rate`, but returns an error instead of panicking. - pub fn try_rate(self, rate: u64) -> Result { - let Some(rate) = NonZeroU64::new(rate) else { - return Err(BuilderInputError::ZeroValue { - field: "transaction_rate", - }); - }; - Ok(self.rate_per_block(rate)) - } - - #[must_use] - /// Set transaction submission rate per block. - pub const fn rate_per_block(mut self, rate: NonZeroU64) -> Self { - self.rate = rate; - self - } - - #[must_use] - /// Limit how many users will submit transactions. - pub fn users(mut self, users: usize) -> Self { - match NonZeroUsize::new(users) { - Some(value) => self.users = Some(value), - None => tracing::warn!( - users, - "transaction user count must be non-zero; keeping previous setting" - ), - }; - self - } - - /// Like `users`, but returns an error instead of panicking. - pub fn try_users(mut self, users: usize) -> Result { - let Some(value) = NonZeroUsize::new(users) else { - return Err(BuilderInputError::ZeroValue { - field: "transaction_users", - }); - }; - self.users = Some(value); - Ok(self) - } - - #[must_use] - /// Attach the transaction workload to the scenario. - pub fn apply(mut self) -> CoreScenarioBuilder { - let workload = transaction::Workload::new(self.rate).with_user_limit(self.users); - - tracing::info!( - rate = self.rate.get(), - users = self.users.map(|u| u.get()), - "attaching transaction workload" - ); - - self.builder = self.builder.with_workload(workload); - self.builder - } -} - -/// Chaos helpers for scenarios that can control nodes. -pub trait ChaosBuilderExt: Sized { - /// Entry point into chaos workloads. - fn chaos(self) -> ChaosBuilder; - - /// Configure chaos via closure. - fn chaos_with( - self, - f: impl FnOnce(ChaosBuilder) -> CoreScenarioBuilder, - ) -> CoreScenarioBuilder; -} - -impl ChaosBuilderExt for CoreScenarioBuilder { - fn chaos(self) -> ChaosBuilder { - ChaosBuilder { builder: self } - } - - fn chaos_with( - self, - f: impl FnOnce(ChaosBuilder) -> CoreScenarioBuilder, - ) -> CoreScenarioBuilder { - f(self.chaos()) - } -} - -/// Chaos workload builder root. -/// -/// Start with `chaos()` on a scenario builder, then select a workload variant -/// such as `restart()`. -pub struct ChaosBuilder { - builder: CoreScenarioBuilder, -} - -impl ChaosBuilder { - /// Finish without adding a chaos workload. - #[must_use] - pub fn apply(self) -> CoreScenarioBuilder { - self.builder - } - - /// Configure a random restarts chaos workload. - #[must_use] - pub fn restart(self) -> ChaosRestartBuilder { - const DEFAULT_CHAOS_MIN_DELAY: Duration = Duration::from_secs(10); - const DEFAULT_CHAOS_MAX_DELAY: Duration = Duration::from_secs(30); - const DEFAULT_CHAOS_TARGET_COOLDOWN: Duration = Duration::from_secs(60); - - ChaosRestartBuilder { - builder: self.builder, - min_delay: DEFAULT_CHAOS_MIN_DELAY, - max_delay: DEFAULT_CHAOS_MAX_DELAY, - target_cooldown: DEFAULT_CHAOS_TARGET_COOLDOWN, - include_nodes: true, - } - } -} - -pub struct ChaosRestartBuilder { - builder: CoreScenarioBuilder, - min_delay: Duration, - max_delay: Duration, - target_cooldown: Duration, - include_nodes: bool, -} - -impl ChaosRestartBuilder { - #[must_use] - /// Set the minimum delay between restart operations. - pub fn min_delay(mut self, delay: Duration) -> Self { - if delay.is_zero() { - tracing::warn!("chaos restart min delay must be non-zero; keeping previous value"); - } else { - self.min_delay = delay; - } - self - } - - #[must_use] - /// Set the maximum delay between restart operations. - pub fn max_delay(mut self, delay: Duration) -> Self { - if delay.is_zero() { - tracing::warn!("chaos restart max delay must be non-zero; keeping previous value"); - } else { - self.max_delay = delay; - } - self - } - - #[must_use] - /// Cooldown to allow between restarts for a target node. - pub fn target_cooldown(mut self, cooldown: Duration) -> Self { - if cooldown.is_zero() { - tracing::warn!( - "chaos restart target cooldown must be non-zero; keeping previous value" - ); - } else { - self.target_cooldown = cooldown; - } - self - } - - #[must_use] - /// Include nodes in the restart target set. - pub const fn include_nodes(mut self, enabled: bool) -> Self { - self.include_nodes = enabled; - self - } - - #[must_use] - /// Finalize the chaos restart workload and attach it to the scenario. - pub fn apply(mut self) -> CoreScenarioBuilder { - if self.min_delay > self.max_delay { - tracing::warn!( - min_delay_secs = self.min_delay.as_secs(), - max_delay_secs = self.max_delay.as_secs(), - "chaos restart min delay exceeds max delay; swapping" - ); - std::mem::swap(&mut self.min_delay, &mut self.max_delay); - } - if self.target_cooldown < self.min_delay { - tracing::warn!( - target_cooldown_secs = self.target_cooldown.as_secs(), - min_delay_secs = self.min_delay.as_secs(), - "chaos restart target cooldown must be >= min delay; bumping cooldown" - ); - self.target_cooldown = self.min_delay; - } - if !self.include_nodes { - tracing::warn!("chaos restart requires at least one node group; enabling all targets"); - self.include_nodes = true; - } - - let workload = RandomRestartWorkload::new( - self.min_delay, - self.max_delay, - self.target_cooldown, - self.include_nodes, - ); - self.builder = self.builder.with_workload(workload); - self.builder - } -} diff --git a/testing-framework/workflows/src/expectations/consensus_liveness.rs b/testing-framework/workflows/src/expectations/consensus_liveness.rs deleted file mode 100644 index 1cdabfc..0000000 --- a/testing-framework/workflows/src/expectations/consensus_liveness.rs +++ /dev/null @@ -1,298 +0,0 @@ -use std::time::Duration; - -use async_trait::async_trait; -use lb_core::header::HeaderId; -use testing_framework_core::{ - nodes::ApiClient, - scenario::{DynError, Expectation, RunContext}, -}; -use thiserror::Error; -use tokio::time::sleep; - -#[derive(Clone, Copy, Debug)] -/// Checks that every node reaches near the highest observed height within an -/// allowance. -pub struct ConsensusLiveness { - lag_allowance: u64, -} - -impl Default for ConsensusLiveness { - fn default() -> Self { - Self { - lag_allowance: LAG_ALLOWANCE, - } - } -} - -const LAG_ALLOWANCE: u64 = 2; -const MIN_PROGRESS_BLOCKS: u64 = 3; -const REQUEST_RETRIES: usize = 15; -const REQUEST_RETRY_DELAY: Duration = Duration::from_secs(2); -const MAX_LAG_ALLOWANCE: u64 = 5; - -#[async_trait] -impl Expectation for ConsensusLiveness { - fn name(&self) -> &'static str { - "consensus_liveness" - } - - async fn evaluate(&mut self, ctx: &RunContext) -> Result<(), DynError> { - Self::ensure_participants(ctx)?; - let target_hint = Self::target_blocks(ctx); - tracing::info!(target_hint, "consensus liveness: collecting samples"); - let check = Self::collect_results(ctx).await; - (*self).report(target_hint, check) - } -} - -const fn consensus_target_blocks(ctx: &RunContext) -> u64 { - ctx.expected_blocks() -} - -#[derive(Debug, Error)] -enum ConsensusLivenessIssue { - #[error("{node} height {height} below target {target}")] - HeightBelowTarget { - node: String, - height: u64, - target: u64, - }, - #[error("{node} consensus_info failed: {source}")] - RequestFailed { - node: String, - #[source] - source: DynError, - }, -} - -#[derive(Debug, Error)] -enum ConsensusLivenessError { - #[error("consensus liveness requires at least one validator")] - MissingParticipants, - #[error("consensus liveness violated (target={target}):\n{details}")] - Violations { - target: u64, - #[source] - details: ViolationIssues, - }, -} - -#[derive(Debug, Error)] -#[error("{message}")] -struct ViolationIssues { - issues: Vec, - message: String, -} - -impl ConsensusLiveness { - const fn target_blocks(ctx: &RunContext) -> u64 { - consensus_target_blocks(ctx) - } - - fn ensure_participants(ctx: &RunContext) -> Result<(), DynError> { - if ctx.node_clients().all_clients().is_empty() { - Err(Box::new(ConsensusLivenessError::MissingParticipants)) - } else { - Ok(()) - } - } - - async fn collect_results(ctx: &RunContext) -> LivenessCheck { - let clients = ctx.node_clients().all_clients(); - let mut samples = Vec::with_capacity(clients.len()); - let mut issues = Vec::new(); - - for (idx, client) in clients.iter().enumerate() { - let node = format!("node-{idx}"); - - for attempt in 0..REQUEST_RETRIES { - match Self::fetch_cluster_info(client).await { - Ok(sample) => { - tracing::debug!( - node = %node, - height = sample.height, - tip = ?sample.tip, - attempt, - "consensus_info collected" - ); - samples.push(NodeSample { - label: node.clone(), - height: sample.height, - tip: sample.tip, - }); - - break; - } - - Err(err) if attempt + 1 == REQUEST_RETRIES => { - tracing::warn!(node = %node, %err, "consensus_info failed after retries"); - - issues.push(ConsensusLivenessIssue::RequestFailed { - node: node.clone(), - source: err, - }); - } - - Err(_) => sleep(REQUEST_RETRY_DELAY).await, - } - } - } - - LivenessCheck { samples, issues } - } - - async fn fetch_cluster_info(client: &ApiClient) -> Result { - client - .consensus_info() - .await - .map(|info| ConsensusInfoSample { - height: info.height, - tip: info.tip, - }) - .map_err(|err| -> DynError { err.into() }) - } - - #[must_use] - /// Adjusts how many blocks behind the leader a node may be before failing. - pub const fn with_lag_allowance(mut self, lag_allowance: u64) -> Self { - self.lag_allowance = lag_allowance; - self - } - - fn effective_lag_allowance(&self, target: u64) -> u64 { - (target / 10).clamp(self.lag_allowance, MAX_LAG_ALLOWANCE) - } - - fn report(self, target_hint: u64, mut check: LivenessCheck) -> Result<(), DynError> { - if check.samples.is_empty() { - return Err(Box::new(ConsensusLivenessError::MissingParticipants)); - } - - let max_height = check - .samples - .iter() - .map(|sample| sample.height) - .max() - .unwrap_or(0); - let max_height_nodes = format_max_height_nodes(&check.samples, max_height); - - let mut target = target_hint; - if target == 0 || target > max_height { - target = max_height; - } - let lag_allowance = self.effective_lag_allowance(target); - - if max_height < MIN_PROGRESS_BLOCKS { - check - .issues - .push(ConsensusLivenessIssue::HeightBelowTarget { - node: "network".to_owned(), - height: max_height, - target: MIN_PROGRESS_BLOCKS, - }); - } - - for sample in &check.samples { - if sample.height + lag_allowance < target { - check - .issues - .push(ConsensusLivenessIssue::HeightBelowTarget { - node: sample.label.clone(), - height: sample.height, - target, - }); - } - } - - if check.issues.is_empty() { - let observed_heights: Vec<_> = check.samples.iter().map(|s| s.height).collect(); - let observed_tips: Vec<_> = check.samples.iter().map(|s| s.tip).collect(); - - tracing::info!( - target, - samples = check.samples.len(), - heights = ?observed_heights, - tips = ?observed_tips, - "consensus liveness expectation satisfied" - ); - Ok(()) - } else { - if let Some(nodes) = max_height_nodes.as_deref() { - tracing::warn!( - max_height, - nodes, - "consensus liveness: highest observed node(s)" - ); - } - for issue in &check.issues { - tracing::warn!(?issue, "consensus liveness issue"); - } - - Err(Box::new(ConsensusLivenessError::Violations { - target, - details: ViolationIssues::new(check.issues, max_height_nodes), - })) - } - } -} - -struct ConsensusInfoSample { - height: u64, - tip: HeaderId, -} - -struct NodeSample { - label: String, - height: u64, - tip: HeaderId, -} - -struct LivenessCheck { - samples: Vec, - issues: Vec, -} - -impl From> for ViolationIssues { - fn from(issues: Vec) -> Self { - Self::new(issues, None) - } -} - -impl ViolationIssues { - fn new(issues: Vec, max_height_nodes: Option) -> Self { - let mut message = String::new(); - if let Some(nodes) = &max_height_nodes { - message.push_str("max_height node(s): "); - message.push_str(nodes); - message.push('\n'); - } - - for issue in &issues { - message.push_str("- "); - message.push_str(&issue.to_string()); - message.push('\n'); - } - if message.ends_with('\n') { - message.pop(); - } - Self { issues, message } - } -} - -fn format_max_height_nodes(samples: &[NodeSample], max_height: u64) -> Option { - let mut leaders = samples.iter().filter(|sample| sample.height == max_height); - let first = leaders.next()?; - - let mut rendered = vec![format!( - "{} (height={}, tip={:?})", - first.label, first.height, first.tip - )]; - for sample in leaders { - rendered.push(format!( - "{} (height={}, tip={:?})", - sample.label, sample.height, sample.tip - )); - } - - Some(rendered.join(", ")) -} diff --git a/testing-framework/workflows/src/expectations/mod.rs b/testing-framework/workflows/src/expectations/mod.rs deleted file mode 100644 index e17d8ca..0000000 --- a/testing-framework/workflows/src/expectations/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -mod consensus_liveness; - -pub use consensus_liveness::ConsensusLiveness; diff --git a/testing-framework/workflows/src/lib.rs b/testing-framework/workflows/src/lib.rs deleted file mode 100644 index 0e97399..0000000 --- a/testing-framework/workflows/src/lib.rs +++ /dev/null @@ -1,10 +0,0 @@ -pub mod builder; -pub mod expectations; -pub mod manual; -pub mod util; -pub mod workloads; - -pub use builder::{ChaosBuilderExt, ObservabilityBuilderExt, ScenarioBuilderExt}; -pub use expectations::ConsensusLiveness; -pub use manual::{start_node_with_timeout, wait_for_min_height}; -pub use workloads::transaction::TxInclusionExpectation; diff --git a/testing-framework/workflows/src/util/mod.rs b/testing-framework/workflows/src/util/mod.rs deleted file mode 100644 index d7c3294..0000000 --- a/testing-framework/workflows/src/util/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod tx; diff --git a/testing-framework/workflows/src/util/tx.rs b/testing-framework/workflows/src/util/tx.rs deleted file mode 100644 index 181e9a1..0000000 --- a/testing-framework/workflows/src/util/tx.rs +++ /dev/null @@ -1,43 +0,0 @@ -use lb_core::mantle::{ - MantleTx, Op, OpProof, SignedMantleTx, Transaction as _, - ledger::Tx as LedgerTx, - ops::channel::{ChannelId, MsgId, inscribe::InscriptionOp}, -}; -use lb_key_management_system_service::keys::{Ed25519Key, ZkKey}; -use testing_framework_core::scenario::DynError; - -/// Builds a signed inscription transaction with deterministic payload for -/// testing. -pub fn create_inscription_transaction_with_id(id: ChannelId) -> Result { - let signing_key = Ed25519Key::from_bytes(&[0u8; 32]); - let signer = signing_key.public_key(); - - let inscription_op = InscriptionOp { - channel_id: id, - inscription: format!("Test channel inscription {id:?}").into_bytes(), - parent: MsgId::root(), - signer, - }; - - let mantle_tx = MantleTx { - ops: vec![Op::ChannelInscribe(inscription_op)], - ledger_tx: LedgerTx::new(vec![], vec![]), - storage_gas_price: 0, - execution_gas_price: 0, - }; - - let tx_hash = mantle_tx.hash(); - let signature = signing_key.sign_payload(tx_hash.as_signing_bytes().as_ref()); - let zk_key = ZkKey::zero(); - tracing::debug!(channel = ?id, tx_hash = ?tx_hash, "building inscription transaction"); - - let zk_signature = ZkKey::multi_sign(&[zk_key], tx_hash.as_ref()) - .map_err(|err| format!("zk signature generation failed: {err}"))?; - - SignedMantleTx::new( - mantle_tx, - vec![OpProof::Ed25519Sig(signature)], - zk_signature, - ) - .map_err(|err| format!("failed to build signed mantle transaction: {err}").into()) -} diff --git a/testing-framework/workflows/src/workloads/chaos.rs b/testing-framework/workflows/src/workloads/chaos.rs deleted file mode 100644 index 20c7afe..0000000 --- a/testing-framework/workflows/src/workloads/chaos.rs +++ /dev/null @@ -1,178 +0,0 @@ -use std::{collections::HashMap, time::Duration}; - -use async_trait::async_trait; -use rand::{Rng as _, seq::SliceRandom as _, thread_rng}; -use testing_framework_core::scenario::{DynError, RunContext, Workload}; -use tokio::time::{Instant, sleep}; -use tracing::info; - -const MIN_DELAY_SPREAD_FALLBACK: Duration = Duration::from_millis(1); - -/// Randomly restarts nodes during a run to introduce chaos. -#[derive(Debug)] -pub struct RandomRestartWorkload { - min_delay: Duration, - max_delay: Duration, - target_cooldown: Duration, - include_nodes: bool, -} - -impl RandomRestartWorkload { - /// Creates a restart workload with delay bounds and per-target cooldown. - /// - /// `min_delay`/`max_delay` bound the sleep between restart attempts, while - /// `target_cooldown` prevents repeatedly restarting the same node too - /// quickly. Nodes can be selectively included. - #[must_use] - pub const fn new( - min_delay: Duration, - max_delay: Duration, - target_cooldown: Duration, - include_nodes: bool, - ) -> Self { - Self { - min_delay, - max_delay, - target_cooldown, - include_nodes, - } - } - - fn targets(&self, ctx: &RunContext) -> Vec { - let mut targets = Vec::new(); - let node_count = ctx.descriptors().nodes().len(); - if self.include_nodes { - if node_count > 1 { - for index in 0..node_count { - targets.push(Target::Node(format!("node-{index}"))); - } - } else if node_count == 1 { - info!("chaos restart skipping nodes: only one node configured"); - } - } - targets - } - - fn random_delay(&self) -> Duration { - if self.max_delay <= self.min_delay { - return self.min_delay; - } - let spread = self - .max_delay - .checked_sub(self.min_delay) - .unwrap_or(MIN_DELAY_SPREAD_FALLBACK) - .as_secs_f64(); - let offset = thread_rng().gen_range(0.0..=spread); - let delay = self - .min_delay - .checked_add(Duration::from_secs_f64(offset)) - .unwrap_or(self.max_delay); - tracing::debug!(delay_ms = delay.as_millis(), "chaos restart selected delay"); - delay - } - - fn initialize_cooldowns(&self, targets: &[Target]) -> HashMap { - let now = Instant::now(); - let ready = now.checked_sub(self.target_cooldown).unwrap_or(now); - targets - .iter() - .cloned() - .map(|target| (target, ready)) - .collect() - } - - async fn pick_target( - &self, - targets: &[Target], - cooldowns: &HashMap, - ) -> Result { - if targets.is_empty() { - return Err("chaos restart workload has no eligible targets".into()); - } - - loop { - let now = Instant::now(); - if let Some(next_ready) = cooldowns - .values() - .copied() - .filter(|ready| *ready > now) - .min() - { - let wait = next_ready.saturating_duration_since(now); - if !wait.is_zero() { - tracing::debug!( - wait_ms = wait.as_millis(), - "chaos restart waiting for cooldown" - ); - sleep(wait).await; - continue; - } - } - - let available: Vec = targets - .iter() - .cloned() - .filter(|target| cooldowns.get(target).is_none_or(|ready| *ready <= now)) - .collect(); - - if let Some(choice) = available.choose(&mut thread_rng()).cloned() { - tracing::debug!(?choice, "chaos restart picked target"); - return Ok(choice); - } - - if let Some(choice) = targets.choose(&mut thread_rng()).cloned() { - return Ok(choice); - } - return Err("chaos restart workload has no eligible targets".into()); - } - } -} - -#[async_trait] -impl Workload for RandomRestartWorkload { - fn name(&self) -> &'static str { - "chaos_restart" - } - - async fn start(&self, ctx: &RunContext) -> Result<(), DynError> { - let handle = ctx - .node_control() - .ok_or_else(|| "chaos restart workload requires node control".to_owned())?; - - let targets = self.targets(ctx); - if targets.is_empty() { - return Err("chaos restart workload has no eligible targets".into()); - } - - tracing::info!( - config = ?self, - nodes = ctx.descriptors().nodes().len(), - target_count = targets.len(), - "starting chaos restart workload" - ); - - let mut cooldowns = self.initialize_cooldowns(&targets); - - loop { - sleep(self.random_delay()).await; - let target = self.pick_target(&targets, &cooldowns).await?; - - match target { - Target::Node(ref name) => { - tracing::info!(name, "chaos restarting node"); - handle - .restart_node(name) - .await - .map_err(|err| format!("node restart failed: {err}"))? - } - } - - cooldowns.insert(target, Instant::now() + self.target_cooldown); - } - } -} - -#[derive(Clone, PartialEq, Eq, Hash, Debug)] -enum Target { - Node(String), -} diff --git a/testing-framework/workflows/src/workloads/mod.rs b/testing-framework/workflows/src/workloads/mod.rs deleted file mode 100644 index 2945c13..0000000 --- a/testing-framework/workflows/src/workloads/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -pub mod chaos; -pub mod transaction; -pub mod util; - -pub use transaction::TxInclusionExpectation; diff --git a/testing-framework/workflows/src/workloads/transaction/expectation.rs b/testing-framework/workflows/src/workloads/transaction/expectation.rs deleted file mode 100644 index 4aa62af..0000000 --- a/testing-framework/workflows/src/workloads/transaction/expectation.rs +++ /dev/null @@ -1,217 +0,0 @@ -use std::{ - collections::HashSet, - num::{NonZeroU64, NonZeroUsize}, - sync::{ - Arc, - atomic::{AtomicU64, Ordering}, - }, - time::Duration, -}; - -use async_trait::async_trait; -use lb_core::{header::HeaderId, mantle::AuthenticatedMantleTx as _}; -use lb_key_management_system_service::keys::ZkPublicKey; -use testing_framework_core::scenario::{DynError, Expectation, RunContext}; -use thiserror::Error; -use tokio::{sync::broadcast, time::sleep}; - -use super::workload::{SubmissionPlan, limited_user_count, submission_plan}; - -const MIN_INCLUSION_RATIO: f64 = 0.5; -const CATCHUP_POLL_INTERVAL: Duration = Duration::from_secs(1); -const MAX_CATCHUP_WAIT: Duration = Duration::from_secs(60); - -#[derive(Clone)] -pub struct TxInclusionExpectation { - txs_per_block: NonZeroU64, - user_limit: Option, - capture_state: Option, -} - -#[derive(Clone)] -struct CaptureState { - observed: Arc, - expected: u64, -} - -#[derive(Debug, Error)] -enum TxExpectationError { - #[error("transaction workload requires seeded accounts")] - MissingAccounts, - #[error("transaction workload planned zero transactions")] - NoPlannedTransactions, - #[error("transaction inclusion expectation not captured")] - NotCaptured, - #[error("transaction inclusion observed {observed} below required {required}")] - InsufficientInclusions { observed: u64, required: u64 }, -} - -impl TxInclusionExpectation { - /// Expectation that checks a minimum fraction of planned transactions were - /// included. - pub const NAME: &'static str = "tx_inclusion_expectation"; - - /// Constructs an inclusion expectation using the same parameters as the - /// workload. - #[must_use] - pub const fn new(txs_per_block: NonZeroU64, user_limit: Option) -> Self { - Self { - txs_per_block, - user_limit, - capture_state: None, - } - } -} - -#[async_trait] -impl Expectation for TxInclusionExpectation { - fn name(&self) -> &'static str { - Self::NAME - } - - async fn start_capture(&mut self, ctx: &RunContext) -> Result<(), DynError> { - if self.capture_state.is_some() { - return Ok(()); - } - - let (plan, tracked_accounts) = build_capture_plan(self, ctx)?; - if plan.transaction_count == 0 { - return Err(TxExpectationError::NoPlannedTransactions.into()); - } - - tracing::info!( - planned_txs = plan.transaction_count, - txs_per_block = self.txs_per_block.get(), - user_limit = self.user_limit.map(|u| u.get()), - "tx inclusion expectation starting capture" - ); - - let observed = Arc::new(AtomicU64::new(0)); - spawn_tx_inclusion_capture( - ctx.block_feed().subscribe(), - Arc::new(tracked_accounts), - Arc::clone(&observed), - ); - - self.capture_state = Some(CaptureState { - observed, - expected: plan.transaction_count as u64, - }); - - Ok(()) - } - - async fn evaluate(&mut self, ctx: &RunContext) -> Result<(), DynError> { - let state = self - .capture_state - .as_ref() - .ok_or(TxExpectationError::NotCaptured)?; - - let required = ((state.expected as f64) * MIN_INCLUSION_RATIO).ceil() as u64; - - let mut observed = state.observed.load(Ordering::Relaxed); - if observed < required { - let security_param = ctx.descriptors().config().consensus_params.security_param; - let hinted_wait = ctx - .run_metrics() - .block_interval_hint() - .map(|interval| interval.mul_f64(security_param.get() as f64)); - - let mut remaining = hinted_wait - .unwrap_or(MAX_CATCHUP_WAIT) - .min(MAX_CATCHUP_WAIT); - while observed < required && remaining > Duration::ZERO { - sleep(CATCHUP_POLL_INTERVAL).await; - remaining = remaining.saturating_sub(CATCHUP_POLL_INTERVAL); - observed = state.observed.load(Ordering::Relaxed); - } - } - - if observed >= required { - tracing::info!( - observed, - required, - expected = state.expected, - "tx inclusion expectation satisfied" - ); - Ok(()) - } else { - tracing::warn!( - observed, - required, - expected = state.expected, - "tx inclusion expectation failed" - ); - Err(TxExpectationError::InsufficientInclusions { observed, required }.into()) - } - } -} - -fn build_capture_plan( - expectation: &TxInclusionExpectation, - ctx: &RunContext, -) -> Result<(SubmissionPlan, HashSet), DynError> { - let wallet_accounts = ctx.descriptors().config().wallet().accounts.clone(); - if wallet_accounts.is_empty() { - return Err(TxExpectationError::MissingAccounts.into()); - } - - let available = limited_user_count(expectation.user_limit, wallet_accounts.len()); - let plan = submission_plan(expectation.txs_per_block, ctx, available)?; - - let wallet_pks = wallet_accounts - .into_iter() - .take(plan.transaction_count) - .map(|account| account.secret_key.to_public_key()) - .collect::>(); - - Ok((plan, wallet_pks)) -} - -fn spawn_tx_inclusion_capture( - mut receiver: broadcast::Receiver>, - tracked_accounts: Arc>, - observed: Arc, -) { - tokio::spawn(async move { - let genesis_parent = HeaderId::from([0; 32]); - tracing::debug!("tx inclusion capture task started"); - - loop { - match receiver.recv().await { - Ok(record) => { - if record.block.header().parent_block() == genesis_parent { - continue; - } - - capture_tx_outputs(record.as_ref(), &tracked_accounts, &observed); - } - Err(broadcast::error::RecvError::Lagged(skipped)) => { - tracing::debug!(skipped, "tx inclusion capture lagged"); - } - Err(broadcast::error::RecvError::Closed) => { - tracing::debug!("tx inclusion capture feed closed"); - break; - } - } - } - - tracing::debug!("tx inclusion capture task exiting"); - }); -} - -fn capture_tx_outputs( - record: &testing_framework_core::scenario::BlockRecord, - tracked_accounts: &HashSet, - observed: &AtomicU64, -) { - for tx in record.block.transactions() { - for note in &tx.mantle_tx().ledger_tx.outputs { - if tracked_accounts.contains(¬e.pk) { - observed.fetch_add(1, Ordering::Relaxed); - tracing::debug!(pk = ?note.pk, "tx inclusion observed account output"); - break; - } - } - } -} diff --git a/testing-framework/workflows/src/workloads/transaction/mod.rs b/testing-framework/workflows/src/workloads/transaction/mod.rs deleted file mode 100644 index df5c612..0000000 --- a/testing-framework/workflows/src/workloads/transaction/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -mod expectation; -mod workload; - -pub use expectation::TxInclusionExpectation; -pub use workload::Workload; diff --git a/testing-framework/workflows/src/workloads/transaction/workload.rs b/testing-framework/workflows/src/workloads/transaction/workload.rs deleted file mode 100644 index 389e771..0000000 --- a/testing-framework/workflows/src/workloads/transaction/workload.rs +++ /dev/null @@ -1,317 +0,0 @@ -use std::{ - collections::{HashMap, VecDeque}, - num::{NonZeroU64, NonZeroUsize}, - sync::Arc, - time::Duration, -}; - -use async_trait::async_trait; -use lb_core::mantle::{ - GenesisTx as _, Note, SignedMantleTx, Transaction as _, Utxo, tx_builder::MantleTxBuilder, -}; -use lb_key_management_system_service::keys::{ZkKey, ZkPublicKey}; -use testing_framework_config::topology::configs::wallet::WalletAccount; -use testing_framework_core::{ - scenario::{DynError, Expectation, RunContext, RunMetrics, Workload as ScenarioWorkload}, - topology::generation::{GeneratedNodeConfig, GeneratedTopology}, -}; - -/// Submission timing plan for transaction workload execution -#[derive(Debug, Clone, Copy)] -pub(super) struct SubmissionPlan { - /// Number of transactions to submit - pub transaction_count: usize, - /// Time interval between submissions - pub submission_interval: Duration, -} -use tokio::time::sleep; - -use super::expectation::TxInclusionExpectation; -use crate::workloads::util::submit_transaction_via_cluster; - -const MAX_SUBMISSION_INTERVAL: Duration = Duration::from_secs(1); - -#[derive(Clone)] -pub struct Workload { - txs_per_block: NonZeroU64, - user_limit: Option, - accounts: Vec, -} - -#[derive(Clone)] -struct WalletInput { - account: WalletAccount, - utxo: Utxo, -} - -#[async_trait] -impl ScenarioWorkload for Workload { - fn name(&self) -> &'static str { - "tx_workload" - } - - fn expectations(&self) -> Vec> { - vec![Box::new(TxInclusionExpectation::new( - self.txs_per_block, - self.user_limit, - ))] - } - - fn init( - &mut self, - descriptors: &GeneratedTopology, - _run_metrics: &RunMetrics, - ) -> Result<(), DynError> { - tracing::info!("initializing transaction workload"); - - let wallet_accounts = descriptors.config().wallet().accounts.clone(); - if wallet_accounts.is_empty() { - return Err( - "Transaction workload initialization failed: no seeded wallet accounts configured" - .into(), - ); - } - - let reference_node = descriptors - .nodes() - .first() - .ok_or("transaction workload requires at least one node in the topology")?; - - let utxo_map = wallet_utxo_map(reference_node); - - fn match_account_to_utxo( - account: WalletAccount, - utxo_map: &HashMap, - ) -> Option { - utxo_map - .get(&account.public_key()) - .copied() - .map(|utxo| WalletInput { account, utxo }) - } - - let mut accounts = wallet_accounts - .into_iter() - .filter_map(|account| match_account_to_utxo(account, &utxo_map)) - .collect::>(); - - apply_user_limit(&mut accounts, self.user_limit); - - if accounts.is_empty() { - return Err( - "Transaction workload initialization failed: could not match any wallet accounts to genesis UTXOs".into(), - ); - } - - tracing::info!( - available_accounts = accounts.len(), - user_limit = self.user_limit.map(|u| u.get()), - "transaction workload accounts prepared" - ); - - self.accounts = accounts; - Ok(()) - } - - async fn start(&self, ctx: &RunContext) -> Result<(), DynError> { - tracing::info!( - txs_per_block = self.txs_per_block.get(), - users = self.user_limit.map(|u| u.get()), - "starting transaction workload submission" - ); - Submission::new(self, ctx)?.execute().await - } -} - -impl Workload { - /// Creates a workload that targets the provided transactions per block - /// rate. - #[must_use] - pub const fn new(txs_per_block: NonZeroU64) -> Self { - Self { - txs_per_block, - user_limit: None, - accounts: Vec::new(), - } - } - - /// Creates a workload from a raw rate, returning `None` when zero is given. - #[must_use] - pub fn with_rate(txs_per_block: u64) -> Option { - NonZeroU64::new(txs_per_block).map(Self::new) - } - - /// Returns the configured transactions per block rate. - #[must_use] - pub const fn txs_per_block(&self) -> NonZeroU64 { - self.txs_per_block - } - - /// Limits the number of distinct users that will submit transactions. - #[must_use] - pub const fn with_user_limit(mut self, user_limit: Option) -> Self { - self.user_limit = user_limit; - self - } -} - -impl Default for Workload { - fn default() -> Self { - Self::new(NonZeroU64::MIN) - } -} - -struct Submission<'a> { - plan: VecDeque, - ctx: &'a RunContext, - interval: Duration, -} - -impl<'a> Submission<'a> { - fn new(workload: &Workload, ctx: &'a RunContext) -> Result { - if workload.accounts.is_empty() { - return Err("Transaction workload submission failed: no available accounts for transaction creation".into()); - } - - let submission_plan = - submission_plan(workload.txs_per_block, ctx, workload.accounts.len())?; - - let plan = workload - .accounts - .iter() - .take(submission_plan.transaction_count) - .cloned() - .collect::>(); - - tracing::info!( - planned = submission_plan.transaction_count, - interval_ms = submission_plan.submission_interval.as_millis(), - accounts_available = workload.accounts.len(), - "transaction workload submission plan" - ); - - Ok(Self { - plan, - ctx, - interval: submission_plan.submission_interval, - }) - } - - async fn execute(mut self) -> Result<(), DynError> { - let total = self.plan.len(); - tracing::info!( - total, - interval_ms = self.interval.as_millis(), - "begin transaction submissions" - ); - - while let Some(input) = self.plan.pop_front() { - submit_wallet_transaction(self.ctx, &input).await?; - - if !self.interval.is_zero() { - sleep(self.interval).await; - } - } - - tracing::info!("transaction submissions finished"); - - Ok(()) - } -} - -async fn submit_wallet_transaction(ctx: &RunContext, input: &WalletInput) -> Result<(), DynError> { - let signed_tx = Arc::new(build_wallet_transaction(input)?); - tracing::debug!( - tx_hash = ?signed_tx.hash(), - user = ?input.account.public_key(), - "submitting wallet transaction" - ); - submit_transaction_via_cluster(ctx, signed_tx).await -} - -fn build_wallet_transaction(input: &WalletInput) -> Result { - let builder = MantleTxBuilder::new() - .add_ledger_input(input.utxo) - .add_ledger_output(Note::new(input.utxo.note.value, input.account.public_key())); - - let mantle_tx = builder.build(); - - let tx_hash = mantle_tx.hash(); - - let signature = ZkKey::multi_sign( - std::slice::from_ref(&input.account.secret_key), - tx_hash.as_ref(), - ) - .map_err(|err| { - format!("Transaction workload signing failed: could not sign transaction: {err}") - })?; - - SignedMantleTx::new(mantle_tx, Vec::new(), signature).map_err(|err| { - format!("Transaction workload construction failed: invalid transaction structure: {err}") - .into() - }) -} - -fn wallet_utxo_map(node: &GeneratedNodeConfig) -> HashMap { - let genesis_tx = node.general.consensus_config.genesis_tx.clone(); - let ledger_tx = genesis_tx.mantle_tx().ledger_tx.clone(); - - let tx_hash = ledger_tx.hash(); - - ledger_tx - .outputs - .iter() - .enumerate() - .map(|(idx, note)| (note.pk, Utxo::new(tx_hash, idx, *note))) - .collect() -} - -fn apply_user_limit(items: &mut Vec, user_limit: Option) { - if let Some(limit) = user_limit { - let allowed = limit.get().min(items.len()); - - items.truncate(allowed); - } -} - -pub(super) fn limited_user_count(user_limit: Option, available: usize) -> usize { - user_limit.map_or(available, |limit| limit.get().min(available)) -} - -pub(super) fn submission_plan( - txs_per_block: NonZeroU64, - ctx: &RunContext, - available_accounts: usize, -) -> Result { - if available_accounts == 0 { - return Err("Transaction workload planning failed: no accounts available for transaction scheduling".into()); - } - - let run_secs = ctx.run_duration().as_secs_f64(); - let block_secs = ctx - .run_metrics() - .block_interval_hint() - .unwrap_or_else(|| ctx.run_duration()) - .as_secs_f64(); - - let estimated_blocks_in_run = run_secs / block_secs; - let target_transaction_count = (estimated_blocks_in_run * txs_per_block.get() as f64) - .floor() - .clamp(0.0, u64::MAX as f64) as u64; - - let actual_transactions_to_submit = - target_transaction_count.min(available_accounts as u64) as usize; - - if actual_transactions_to_submit == 0 { - return Err("Transaction workload planning failed: calculated zero transactions to submit based on run duration and target rate".into()); - } - - let mut submission_interval = - Duration::from_secs_f64(run_secs / actual_transactions_to_submit as f64); - if submission_interval > MAX_SUBMISSION_INTERVAL { - submission_interval = MAX_SUBMISSION_INTERVAL; - } - Ok(SubmissionPlan { - transaction_count: actual_transactions_to_submit, - submission_interval, - }) -} diff --git a/testing-framework/workflows/src/workloads/util.rs b/testing-framework/workflows/src/workloads/util.rs deleted file mode 100644 index 95cc53f..0000000 --- a/testing-framework/workflows/src/workloads/util.rs +++ /dev/null @@ -1,75 +0,0 @@ -use std::{sync::Arc, time::Duration}; - -use lb_core::{ - block::Block, - mantle::{ - AuthenticatedMantleTx as _, SignedMantleTx, Transaction as MantleTx, - ops::{Op, channel::MsgId}, - }, -}; -use rand::{seq::SliceRandom as _, thread_rng}; -use testing_framework_core::scenario::{DynError, RunContext}; -use tracing::debug; - -const SUBMIT_RETRIES: usize = 5; -const SUBMIT_RETRY_DELAY: Duration = Duration::from_millis(500); - -/// Scans a block and invokes the matcher for every operation until it returns -/// `Some(...)`. Returns `None` when no matching operation is found. -pub fn find_channel_op(block: &Block, matcher: &mut F) -> Option -where - F: FnMut(&Op) -> Option, -{ - debug!( - txs = block.transactions().len(), - "scanning block for channel op" - ); - for tx in block.transactions() { - for op in &tx.mantle_tx().ops { - if let Some(msg_id) = matcher(op) { - return Some(msg_id); - } - } - } - - None -} - -/// Submits a transaction to the cluster, fanning out across clients until one -/// succeeds. -pub async fn submit_transaction_via_cluster( - ctx: &RunContext, - tx: Arc, -) -> Result<(), DynError> { - let tx_hash = tx.hash(); - debug!(?tx_hash, "submitting transaction via cluster (nodes first)"); - - let node_clients = ctx.node_clients(); - let mut clients = node_clients.node_clients(); - clients.shuffle(&mut thread_rng()); - - let mut last_err = None; - for attempt in 0..SUBMIT_RETRIES { - clients.shuffle(&mut thread_rng()); - - for client in &clients { - let url = client.base_url().clone(); - debug!(?tx_hash, %url, attempt, "submitting transaction to client"); - match client - .submit_transaction(&tx) - .await - .map_err(|err| -> DynError { err.into() }) - { - Ok(()) => return Ok(()), - Err(err) => { - debug!(?tx_hash, %url, attempt, "transaction submission failed"); - last_err = Some(err); - } - } - } - - tokio::time::sleep(SUBMIT_RETRY_DELAY).await; - } - - Err(last_err.unwrap_or_else(|| "cluster client exhausted all nodes".into())) -} diff --git a/versions.env b/versions.env index 2413c6e..8080783 100644 --- a/versions.env +++ b/versions.env @@ -1,7 +1,7 @@ -VERSION=v0.3.2 +VERSION=v0.4.1 LOGOS_BLOCKCHAIN_BUNDLE_VERSION=v4 # Pinned logos-blockchain-node revision used for CI builds and binary bundles. -LOGOS_BLOCKCHAIN_NODE_REV=feac5ab97ef6dfcebcf6536363a5f330cb79b5e0 +LOGOS_BLOCKCHAIN_NODE_REV=a4275d00eb3041ed6bfb394e0913cd1ad172224c -# Optional: local logos-blockchain-node checkout override (do not commit absolute paths). -# LOGOS_BLOCKCHAIN_NODE_PATH= +# Local logos-blockchain-node checkout override. +# LOGOS_BLOCKCHAIN_NODE_PATH=/path/to/local/nomos-node