From 1acedcf71c05e3e80e3d3634c719acf058357ffa Mon Sep 17 00:00:00 2001 From: Eric <5089238+emizzle@users.noreply.github.com> Date: Fri, 16 Jan 2026 21:47:59 +1100 Subject: [PATCH] fix(ci): introduce a number of integration test fixes (#1342) Signed-off-by: Slava <20563034+veaceslavdoina@users.noreply.github.com> Co-authored-by: Slava <20563034+veaceslavdoina@users.noreply.github.com> Co-authored-by: Arnaud Co-authored-by: gmega --- .../actions/nimbus-build-system/action.yml | 30 +- .github/workflows/ci-reusable.yml | 11 +- .github/workflows/ci.yml | 1 + Makefile | 16 +- build.nims | 39 ++- codex.nimble | 2 +- codex/logutils.nim | 3 + config.nims | 4 +- tests/asynctest.nim | 12 +- tests/codex/slots/sampler/testutils.nim | 6 +- tests/ethertest.nim | 4 +- tests/helpers.nim | 2 + tests/integration/1_minute/testcli.nim | 92 +++++- tests/integration/1_minute/testecbug.nim | 12 +- .../30_minutes/testmarketplace.nim.ignore | 9 +- .../30_minutes/testvalidator.nim.ignore | 28 +- tests/integration/codexconfig.nim | 3 +- tests/integration/codexprocess.nim | 95 ++++-- tests/integration/hardhatprocess.nim | 108 +++++- tests/integration/marketplacesuite.nim | 5 +- tests/integration/multinodes.nim | 309 ++++++++++++------ tests/integration/nodeprocess.nim | 94 +++--- tests/integration/scripts/winkillprocess.sh | 97 ++++++ tests/integration/utils.nim | 92 ++++++ vendor/nim-chronos | 2 +- 25 files changed, 830 insertions(+), 246 deletions(-) create mode 100644 tests/integration/scripts/winkillprocess.sh create mode 100644 tests/integration/utils.nim diff --git a/.github/actions/nimbus-build-system/action.yml b/.github/actions/nimbus-build-system/action.yml index f70af604..4acbff33 100644 --- a/.github/actions/nimbus-build-system/action.yml +++ b/.github/actions/nimbus-build-system/action.yml @@ -29,6 +29,7 @@ runs: shell: ${{ inputs.shell }} {0} run: | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs/ | sh -s -- --default-toolchain=${{ inputs.rust_version }} -y + echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" - name: APT (Linux amd64/arm64) if: inputs.os == 'linux' && (inputs.cpu == 'amd64' || inputs.cpu == 'arm64') @@ -83,7 +84,7 @@ runs: - name: Install gcc 14 on Linux # We don't want to install gcc 14 for coverage (Ubuntu 20.04) - if : ${{ inputs.os == 'linux' && inputs.coverage != 'true' }} + if: ${{ inputs.os == 'linux' && inputs.coverage != 'true' }} shell: ${{ inputs.shell }} {0} run: | # Skip for older Ubuntu versions @@ -101,15 +102,22 @@ runs: if: inputs.os == 'linux' || inputs.os == 'macos' uses: hendrikmuhs/ccache-action@v1.2 with: - create-symlink: true - key: ${{ inputs.os }}-${{ inputs.builder }}-${{ inputs.cpu }}-${{ inputs.tests }}-${{ inputs.nim_version }} + create-symlink: false + key: ${{ inputs.os }}-${{ inputs.builder }}-${{ inputs.cpu }}-${{ inputs.tests }}-${{ inputs.nim_version }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }} evict-old-files: 7d + - name: Add ccache to path on Linux/Mac + if: inputs.os == 'linux' || inputs.os == 'macos' + shell: ${{ inputs.shell }} {0} + run: | + echo "/usr/lib/ccache:/usr/local/opt/ccache/libexec" >> "$GITHUB_PATH" + echo "/usr/local/opt/ccache/libexec" >> "$GITHUB_PATH" + - name: Install ccache on Windows if: inputs.os == 'windows' uses: hendrikmuhs/ccache-action@v1.2 with: - key: ${{ inputs.os }}-${{ inputs.builder }}-${{ inputs.cpu }}-${{ inputs.tests }}-${{ inputs.nim_version }} + key: ${{ inputs.os }}-${{ inputs.builder }}-${{ inputs.cpu }}-${{ inputs.tests }}-${{ inputs.nim_version }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }} evict-old-files: 7d - name: Enable ccache on Windows @@ -117,11 +125,11 @@ runs: shell: ${{ inputs.shell }} {0} run: | CCACHE_DIR=$(dirname $(which ccache))/ccached - mkdir ${CCACHE_DIR} - ln -s $(which ccache) ${CCACHE_DIR}/gcc.exe - ln -s $(which ccache) ${CCACHE_DIR}/g++.exe - ln -s $(which ccache) ${CCACHE_DIR}/cc.exe - ln -s $(which ccache) ${CCACHE_DIR}/c++.exe + mkdir -p ${CCACHE_DIR} + ln -sf $(which ccache) ${CCACHE_DIR}/gcc.exe + ln -sf $(which ccache) ${CCACHE_DIR}/g++.exe + ln -sf $(which ccache) ${CCACHE_DIR}/cc.exe + ln -sf $(which ccache) ${CCACHE_DIR}/c++.exe echo "export PATH=${CCACHE_DIR}:\$PATH" >> $HOME/.bash_profile # prefix path in MSYS2 - name: Derive environment variables @@ -202,10 +210,10 @@ runs: - name: Restore Nim toolchain binaries from cache id: nim-cache uses: actions/cache@v4 - if : ${{ inputs.coverage != 'true' }} + if: ${{ inputs.coverage != 'true' }} with: path: NimBinaries - key: ${{ inputs.os }}-${{ inputs.cpu }}-nim-${{ inputs.nim_version }}-cache-${{ env.cache_nonce }}-${{ github.run_id }} + key: ${{ inputs.os }}-${{ inputs.cpu }}-nim-${{ inputs.nim_version }}-cache-${{ env.cache_nonce }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }} restore-keys: ${{ inputs.os }}-${{ inputs.cpu }}-nim-${{ inputs.nim_version }}-cache-${{ env.cache_nonce }} - name: Set NIM_COMMIT diff --git a/.github/workflows/ci-reusable.yml b/.github/workflows/ci-reusable.yml index b6131b18..ae3aecbe 100644 --- a/.github/workflows/ci-reusable.yml +++ b/.github/workflows/ci-reusable.yml @@ -54,13 +54,20 @@ jobs: with: node-version: 22 - - name: Start Ethereum node with Logos Storage contracts + - name: Install Ethereum node dependencies if: matrix.tests == 'contract' || matrix.tests == 'integration' || matrix.tests == 'tools' || matrix.tests == 'all' working-directory: vendor/logos-storage-contracts-eth env: MSYS2_PATH_TYPE: inherit run: | npm ci + + - name: Run Ethereum node with Logos Storage contracts + if: matrix.tests == 'contract' || matrix.tests == 'integration' || matrix.tests == 'tools' || matrix.tests == 'all' + working-directory: vendor/logos-storage-contracts-eth + env: + MSYS2_PATH_TYPE: inherit + run: | npm start & # Wait for the contracts to be deployed sleep 5 @@ -75,7 +82,7 @@ jobs: if: matrix.tests == 'integration' || matrix.tests == 'all' env: CODEX_INTEGRATION_TEST_INCLUDES: ${{ matrix.includes }} - run: make -j${ncpu} testIntegration + run: make -j${ncpu} DEBUG=${{ runner.debug }} testIntegration - name: Upload integration tests log files uses: actions/upload-artifact@v4 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a09ebce1..0d24507d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,6 +18,7 @@ concurrency: jobs: matrix: + name: Compute matrix runs-on: ubuntu-latest outputs: matrix: ${{ steps.matrix.outputs.matrix }} diff --git a/Makefile b/Makefile index d9b9d70e..8b5bc371 100644 --- a/Makefile +++ b/Makefile @@ -140,10 +140,24 @@ testContracts: | build deps echo -e $(BUILD_MSG) "build/$@" && \ $(ENV_SCRIPT) nim testContracts $(NIM_PARAMS) --define:ws_resubscribe=240 build.nims +TEST_PARAMS := +ifdef DEBUG + TEST_PARAMS := $(TEST_PARAMS) -d:DebugTestHarness=$(DEBUG) + TEST_PARAMS := $(TEST_PARAMS) -d:NoCodexLogFilters=$(DEBUG) + TEST_PARAMS := $(TEST_PARAMS) -d:ShowContinuousStatusUpdates=$(DEBUG) + TEST_PARAMS := $(TEST_PARAMS) -d:DebugHardhat=$(DEBUG) +endif +ifdef TEST_TIMEOUT + TEST_PARAMS := $(TEST_PARAMS) -d:TestTimeout=$(TEST_TIMEOUT) +endif +ifdef PARALLEL + TEST_PARAMS := $(TEST_PARAMS) -d:EnableParallelTests=$(PARALLEL) +endif + # Builds and runs the integration tests testIntegration: | build deps echo -e $(BUILD_MSG) "build/$@" && \ - $(ENV_SCRIPT) nim testIntegration $(NIM_PARAMS) --define:ws_resubscribe=240 build.nims + $(ENV_SCRIPT) nim testIntegration $(TEST_PARAMS) $(NIM_PARAMS) --define:ws_resubscribe=240 build.nims # Builds and runs all tests (except for Taiko L2 tests) testAll: | build deps diff --git a/build.nims b/build.nims index 47e848b3..72f44921 100644 --- a/build.nims +++ b/build.nims @@ -1,9 +1,20 @@ mode = ScriptMode.Verbose import std/os except commandLineParams +import std/strutils ### Helper functions -proc buildBinary(srcName: string, outName = os.lastPathPart(srcName), srcDir = "./", params = "", lang = "c") = +proc truthy(val: string): bool = + const truthySwitches = @["yes", "1", "on", "true"] + return val in truthySwitches + +proc buildBinary( + srcName: string, + outName = os.lastPathPart(srcName), + srcDir = "./", + params = "", + lang = "c", +) = if not dirExists "build": mkDir "build" @@ -43,10 +54,8 @@ proc buildLibrary(name: string, srcDir = "./", params = "", `type` = "dynamic") exec "nim c" & " --out:build/" & name & ".a --threads:on --app:staticlib --opt:size --noMain --mm:refc --header --d:metrics " & "--nimMainPrefix:libstorage -d:noSignalHandler " & - "-d:LeopardExtraCompilerFlags=-fPIC " & - "-d:chronicles_runtime_filtering " & - "-d:chronicles_log_level=TRACE " & - params & " " & srcDir & name & ".nim" + "-d:LeopardExtraCompilerFlags=-fPIC " & "-d:chronicles_runtime_filtering " & + "-d:chronicles_log_level=TRACE " & params & " " & srcDir & name & ".nim" proc test(name: string, outName = name, srcDir = "tests/", params = "", lang = "c") = buildBinary name, outName, srcDir, params @@ -61,7 +70,8 @@ task toolsCirdl, "build tools/cirdl binary": buildBinary "tools/cirdl/cirdl" task testStorage, "Build & run Logos Storage tests": - test "testCodex", outName = "testStorage", params = "-d:storage_enable_proof_failures=true" + test "testCodex", + outName = "testStorage", params = "-d:storage_enable_proof_failures=true" task testContracts, "Build & run Logos Storage Contract tests": test "testContracts" @@ -70,11 +80,18 @@ task testIntegration, "Run integration tests": buildBinary "codex", outName = "storage", params = - "-d:chronicles_runtime_filtering -d:chronicles_log_level=TRACE -d:storage_enable_proof_failures=true" - test "testIntegration" + "-d:chronicles_runtime_filtering -d:chronicles_log_level=TRACE -d:chronicles_disabled_topics=JSONRPC-HTTP-CLIENT,websock,libp2p,discv5 -d:codex_enable_proof_failures=true" + var sinks = @["textlines[nocolors,file]"] + for i in 2 ..< paramCount(): + if "DebugTestHarness" in paramStr(i) and truthy paramStr(i).split('=')[1]: + sinks.add "textlines[stdout]" + break + var testParams = + "-d:chronicles_log_level=TRACE -d:chronicles_sinks=\"" & sinks.join(",") & "\"" + test "testIntegration", params = testParams # use params to enable logging from the integration test executable # test "testIntegration", params = "-d:chronicles_sinks=textlines[notimestamps,stdout],textlines[dynamic] " & - # "-d:chronicles_enabled_topics:integration:TRACE" + # "-d:chronicles_enabled_topics:integration:TRACE" task build, "build Logos Storage binary": storageTask() @@ -139,7 +156,9 @@ task coverage, "generates code coverage report": nimSrcs ) echo " ======== Generating HTML coverage report ======== " - exec("genhtml coverage/coverage.f.info --keep-going --output-directory coverage/report ") + exec( + "genhtml coverage/coverage.f.info --keep-going --output-directory coverage/report " + ) echo " ======== Coverage report Done ======== " task showCoverage, "open coverage html": diff --git a/codex.nimble b/codex.nimble index 43c39219..61e1f470 100644 --- a/codex.nimble +++ b/codex.nimble @@ -4,6 +4,6 @@ description = "p2p data durability engine" license = "MIT" binDir = "build" srcDir = "." -installFiles = @["build.nims"] +installFiles = @["build.nims"] include "build.nims" diff --git a/codex/logutils.nim b/codex/logutils.nim index ae27df7f..f3b98548 100644 --- a/codex/logutils.nim +++ b/codex/logutils.nim @@ -92,6 +92,7 @@ import std/sugar import std/typetraits import pkg/chronicles except toJson, `%` +from pkg/chronos import TransportAddress from pkg/libp2p import Cid, MultiAddress, `$` import pkg/questionable import pkg/questionable/results @@ -255,3 +256,5 @@ formatIt(LogFormat.textLines, array[32, byte]): it.short0xHexLog formatIt(LogFormat.json, array[32, byte]): it.to0xHex +formatIt(TransportAddress): + $it diff --git a/config.nims b/config.nims index b1bc4cbe..e9e3eb0a 100644 --- a/config.nims +++ b/config.nims @@ -65,8 +65,8 @@ else: # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65782 # ("-fno-asynchronous-unwind-tables" breaks Nim's exception raising, sometimes) switch("passC", "-march=x86-64") - else: switch("passC", "-march=native") - + else: + switch("passC", "-march=native") --tlsEmulation: off diff --git a/tests/asynctest.nim b/tests/asynctest.nim index 4db8277f..6f9d2d42 100644 --- a/tests/asynctest.nim +++ b/tests/asynctest.nim @@ -1,3 +1,13 @@ import pkg/asynctest/chronos/unittest2 -export unittest2 +export unittest2 except eventually + +template eventuallySafe*( + expression: untyped, timeout = 5000, pollInterval = 1000 +): bool = + ## More sane defaults, for use with HTTP connections + eventually(expression, timeout, pollInterval) + +template eventually*(expression: untyped, timeout = 5000, pollInterval = 10): bool = + ## Fast defaults, do not use with HTTP connections! + eventually(expression, timeout, pollInterval) diff --git a/tests/codex/slots/sampler/testutils.nim b/tests/codex/slots/sampler/testutils.nim index f20b5efc..5460fde7 100644 --- a/tests/codex/slots/sampler/testutils.nim +++ b/tests/codex/slots/sampler/testutils.nim @@ -77,15 +77,13 @@ asyncchecksuite "Test proof sampler utils": ) proc getExpectedIndices(n: int): seq[Natural] = - return collect( - newSeq, + return collect(newSeq): (; for i in 1 .. n: cellIndex( proofInput.entropy, proofInput.slotRoot, proofInput.nCellsPerSlot, i ) - ), - ) + ) check: slotCellIndices(3) == getExpectedIndices(3) diff --git a/tests/ethertest.nim b/tests/ethertest.nim index 2cab8bf5..636760a1 100644 --- a/tests/ethertest.nim +++ b/tests/ethertest.nim @@ -5,6 +5,8 @@ import pkg/chronos import ./asynctest import ./checktest +const HardhatPort {.intdefine.}: int = 8545 + ## Unit testing suite that sets up an Ethereum testing environment. ## Injects a `ethProvider` instance, and a list of `accounts`. ## Calls the `evm_snapshot` and `evm_revert` methods to ensure that any @@ -16,7 +18,7 @@ template ethersuite*(name, body) = var snapshot: JsonNode setup: - ethProvider = JsonRpcProvider.new("ws://localhost:8545") + ethProvider = JsonRpcProvider.new("ws://localhost:" & $HardhatPort) snapshot = await send(ethProvider, "evm_snapshot") accounts = await ethProvider.listAccounts() teardown: diff --git a/tests/helpers.nim b/tests/helpers.nim index 742bc10d..e938015f 100644 --- a/tests/helpers.nim +++ b/tests/helpers.nim @@ -4,6 +4,8 @@ import helpers/templeveldb import std/times import std/sequtils, chronos +import ./asynctest + export multisetup, trackers, templeveldb ### taken from libp2p errorhelpers.nim diff --git a/tests/integration/1_minute/testcli.nim b/tests/integration/1_minute/testcli.nim index 778608b8..699b1551 100644 --- a/tests/integration/1_minute/testcli.nim +++ b/tests/integration/1_minute/testcli.nim @@ -1,31 +1,91 @@ import std/tempfiles +import std/times import codex/conf import codex/utils/fileutils import ../../asynctest import ../../checktest import ../codexprocess import ../nodeprocess +import ../utils import ../../examples +const HardhatPort {.intdefine.}: int = 8545 +const CodexApiPort {.intdefine.}: int = 8080 +const CodexDiscPort {.intdefine.}: int = 8090 +const CodexLogToFile {.booldefine.}: bool = false +const CodexLogLevel {.strdefine.}: string = "" +const CodexLogsDir {.strdefine.}: string = "" + asyncchecksuite "Command line interface": + let startTime = now().format("yyyy-MM-dd'_'HH:mm:ss") let key = "4242424242424242424242424242424242424242424242424242424242424242" - proc startCodex(args: seq[string]): Future[CodexProcess] {.async.} = - return await CodexProcess.startNode(args, false, "cli-test-node") + var currentTestName = "" + var testCount = 0 + var nodeCount = 0 + + template test(tname, tbody) = + inc testCount + currentTestName = tname + test tname: + tbody + + proc addLogFile(args: seq[string]): seq[string] = + var args = args + when CodexLogToFile: + args.add( + "--log-file=" & + getLogFile( + CodexLogsDir, + startTime, + "Command line interface", + currentTestName, + "Client", + some nodeCount mod testCount, + ) + ) + when CodexLogLevel != "": + args.add "--log-level=" & CodexLogLevel + + return args + + proc startCodex(arguments: seq[string]): Future[CodexProcess] {.async.} = + inc nodeCount + let args = arguments.addLogFile + return await CodexProcess.startNode( + args.concat( + @[ + "--api-port=" & $(await nextFreePort(CodexApiPort + nodeCount)), + "--disc-port=" & $(await nextFreePort(CodexDiscPort + nodeCount)), + ] + ), + debug = false, + "cli-test-node", + ) test "complains when persistence is enabled without ethereum account": let node = await startCodex(@["persistence"]) await node.waitUntilOutput("Persistence enabled, but no Ethereum account was set") - await node.stop() + # Expect the codex process to return an exit code of 1 indicating the result + # of the operation was unsuccessful. + await node.stop(expectedExitCode = 1) test "complains when ethereum private key file has wrong permissions": let unsafeKeyFile = genTempPath("", "") discard unsafeKeyFile.writeFile(key, 0o666) - let node = await startCodex(@["persistence", "--eth-private-key=" & unsafeKeyFile]) + let node = await startCodex( + @[ + "persistence", + "--eth-provider=" & "ws://localhost:" & $HardhatPort, + "--eth-private-key=" & unsafeKeyFile, + ] + ) await node.waitUntilOutput( "Ethereum private key file does not have safe file permissions" ) - await node.stop() + # Expect the codex process to return an exit code of 1 indicating the result + # of the operation was unsuccessful. + await node.stop(expectedExitCode = 1) discard removeFile(unsafeKeyFile) let @@ -36,25 +96,37 @@ asyncchecksuite "Command line interface": test "suggests downloading of circuit files when persistence is enabled without accessible r1cs file": let node = await startCodex(@["persistence", "prover", marketplaceArg]) await node.waitUntilOutput(expectedDownloadInstruction) - await node.stop() + # Expect the codex process to return an exit code of 1 indicating the result + # of the operation was unsuccessful. + await node.stop(expectedExitCode = 1) test "suggests downloading of circuit files when persistence is enabled without accessible wasm file": let node = await startCodex( @[ - "persistence", "prover", marketplaceArg, + "persistence", + "--eth-provider=" & "ws://localhost:" & $HardhatPort, + "prover", + marketplaceArg, "--circom-r1cs=tests/circuits/fixtures/proof_main.r1cs", ] ) await node.waitUntilOutput(expectedDownloadInstruction) - await node.stop() + # Expect the codex process to return an exit code of 1 indicating the result + # of the operation was unsuccessful. + await node.stop(expectedExitCode = 1) test "suggests downloading of circuit files when persistence is enabled without accessible zkey file": let node = await startCodex( @[ - "persistence", "prover", marketplaceArg, + "persistence", + "--eth-provider=" & "ws://localhost:" & $HardhatPort, + "prover", + marketplaceArg, "--circom-r1cs=tests/circuits/fixtures/proof_main.r1cs", "--circom-wasm=tests/circuits/fixtures/proof_main.wasm", ] ) await node.waitUntilOutput(expectedDownloadInstruction) - await node.stop() + # Expect the codex process to return an exit code of 1 indicating the result + # of the operation was unsuccessful. + await node.stop(expectedExitCode = 1) diff --git a/tests/integration/1_minute/testecbug.nim b/tests/integration/1_minute/testecbug.nim index a5bfa832..74f8aaff 100644 --- a/tests/integration/1_minute/testecbug.nim +++ b/tests/integration/1_minute/testecbug.nim @@ -9,12 +9,12 @@ marketplacesuite( ): test "should be able to create storage request and download dataset", NodeConfigs( - clients: CodexConfigs - .init(nodes = 1) - # .debug() # uncomment to enable console log output.debug() - .withLogFile() - # uncomment to output log file to tests/integration/logs/ //_.log - .withLogTopics("node", "erasure", "marketplace").some, + clients: CodexConfigs.init(nodes = 1) + # .debug() # uncomment to enable console log output.debug() + # .withLogFile() + # uncomment to output log file to tests/integration/logs/ //_.log + # .withLogTopics("node", "erasure", "marketplace") + .some, providers: CodexConfigs.init(nodes = 0).some, ): let diff --git a/tests/integration/30_minutes/testmarketplace.nim.ignore b/tests/integration/30_minutes/testmarketplace.nim.ignore index b04626c4..15059664 100644 --- a/tests/integration/30_minutes/testmarketplace.nim.ignore +++ b/tests/integration/30_minutes/testmarketplace.nim.ignore @@ -6,6 +6,7 @@ import ../../contracts/deployment import ./../marketplacesuite import ../twonodes import ../nodeconfigs +from ../../helpers import eventuallySafe marketplacesuite(name = "Marketplace", stopOnRequestFail = true): let marketplaceConfig = NodeConfigs( @@ -122,11 +123,11 @@ marketplacesuite(name = "Marketplace", stopOnRequestFail = true): # Checking that the hosting node received reward for at least the time between let slotSize = slotSize(blocks, ecNodes, ecTolerance) let pricePerSlotPerSecond = minPricePerBytePerSecond * slotSize - check eventually (await token.balanceOf(hostAccount)) - startBalanceHost >= + check eventuallySafe (await token.balanceOf(hostAccount)) - startBalanceHost >= (duration - 5 * 60).u256 * pricePerSlotPerSecond * ecNodes.u256 # Checking that client node receives some funds back that were not used for the host nodes - check eventually( + check eventuallySafe( (await token.balanceOf(clientAccount)) - clientBalanceBeforeFinished > 0, timeout = 10 * 1000, # give client a bit of time to withdraw its funds ) @@ -296,12 +297,12 @@ marketplacesuite(name = "Marketplace payouts", stopOnRequestFail = true): let slotSize = slotSize(blocks, ecNodes, ecTolerance) let pricePerSlotPerSecond = minPricePerBytePerSecond * slotSize - check eventually ( + check eventuallySafe ( let endBalanceProvider = (await token.balanceOf(provider.ethAccount)) endBalanceProvider > startBalanceProvider and endBalanceProvider < startBalanceProvider + expiry.u256 * pricePerSlotPerSecond ) - check eventually( + check eventuallySafe( ( let endBalanceClient = (await token.balanceOf(client.ethAccount)) let endBalanceProvider = (await token.balanceOf(provider.ethAccount)) diff --git a/tests/integration/30_minutes/testvalidator.nim.ignore b/tests/integration/30_minutes/testvalidator.nim.ignore index ed67b5d0..b6ce1bbe 100644 --- a/tests/integration/30_minutes/testvalidator.nim.ignore +++ b/tests/integration/30_minutes/testvalidator.nim.ignore @@ -28,12 +28,12 @@ marketplacesuite(name = "Validation", stopOnRequestFail = false): NodeConfigs( # Uncomment to start Hardhat automatically, typically so logs can be inspected locally hardhat: HardhatConfig.none, - clients: CodexConfigs - .init(nodes = 1) - # .debug() # uncomment to enable console log output - .withLogFile() - # uncomment to output log file to tests/integration/logs/ //_.log - .withLogTopics("purchases", "onchain").some, + clients: CodexConfigs.init(nodes = 1) + # .debug() # uncomment to enable console log output + # .withLogFile() + # uncomment to output log file to tests/integration/logs/ //_.log + # .withLogTopics("purchases", "onchain") + .some, providers: CodexConfigs .init(nodes = 1) .withSimulateProofFailures(idx = 0, failEveryNProofs = 1) @@ -47,9 +47,9 @@ marketplacesuite(name = "Validation", stopOnRequestFail = false): .withValidationGroupIndex(idx = 0, groupIndex = 0) .withValidationGroupIndex(idx = 1, groupIndex = 1) # .debug() # uncomment to enable console log output - .withLogFile() + # .withLogFile() # uncomment to output log file to tests/integration/logs/ //_.log - .withLogTopics("validator") + # .withLogTopics("validator") # each topic as a separate string argument .some, ): @@ -100,12 +100,12 @@ marketplacesuite(name = "Validation", stopOnRequestFail = false): NodeConfigs( # Uncomment to start Hardhat automatically, typically so logs can be inspected locally hardhat: HardhatConfig.none, - clients: CodexConfigs - .init(nodes = 1) - # .debug() # uncomment to enable console log output - .withLogFile() - # uncomment to output log file to tests/integration/logs/ //_.log - .withLogTopics("purchases", "onchain").some, + clients: CodexConfigs.init(nodes = 1) + # .debug() # uncomment to enable console log output + # .withLogFile() + # uncomment to output log file to tests/integration/logs/ //_.log + # .withLogTopics("purchases", "onchain") + .some, providers: CodexConfigs .init(nodes = 1) .withSimulateProofFailures(idx = 0, failEveryNProofs = 1) diff --git a/tests/integration/codexconfig.nim b/tests/integration/codexconfig.nim index 138ae274..8d0cdb33 100644 --- a/tests/integration/codexconfig.nim +++ b/tests/integration/codexconfig.nim @@ -169,7 +169,8 @@ proc withLogFile*(self: CodexConfigs): CodexConfigs {.raises: [CodexConfigError] proc withLogFile*( self: var CodexConfig, logFile: string -) {.raises: [CodexConfigError].} = #: CodexConfigs = +) {.raises: [CodexConfigError].} = + #: CodexConfigs = ## typically called internally from the test suite, sets a log file path to ## be created during the test run, for a specified node in the group # var config = self diff --git a/tests/integration/codexprocess.nim b/tests/integration/codexprocess.nim index 04c2904f..824d4c43 100644 --- a/tests/integration/codexprocess.nim +++ b/tests/integration/codexprocess.nim @@ -7,6 +7,7 @@ import pkg/ethers import pkg/libp2p import std/os import std/strutils +import std/times import codex/conf import ./codexclient import ./nodeprocess @@ -15,11 +16,28 @@ export codexclient export chronicles export nodeprocess +{.push raises: [].} + logScope: topics = "integration testing codex process" -type CodexProcess* = ref object of NodeProcess - client: ?CodexClient +type + CodexProcess* = ref object of NodeProcess + client: ?CodexClient + + CodexProcessError* = object of NodeProcessError + +proc raiseCodexProcessError( + msg: string, parent: ref CatchableError +) {.raises: [CodexProcessError].} = + raise newException(CodexProcessError, msg & ": " & parent.msg, parent) + +template convertError(msg, body: typed) = + # Don't use this in an async proc, unless body does not raise CancelledError + try: + body + except CatchableError as parent: + raiseCodexProcessError(msg, parent) method workingDir(node: CodexProcess): string = return currentSourcePath() / ".." / ".." / ".." @@ -33,44 +51,83 @@ method startedOutput(node: CodexProcess): string = method processOptions(node: CodexProcess): set[AsyncProcessOption] = return {AsyncProcessOption.StdErrToStdOut} -method outputLineEndings(node: CodexProcess): string {.raises: [].} = +method outputLineEndings(node: CodexProcess): string = return "\n" -method onOutputLineCaptured(node: CodexProcess, line: string) {.raises: [].} = +method onOutputLineCaptured(node: CodexProcess, line: string) = discard -proc dataDir(node: CodexProcess): string = - let config = CodexConf.load(cmdLine = node.arguments, quitOnFailure = false) - return config.dataDir.string +proc config(node: CodexProcess): CodexConf {.raises: [CodexProcessError].} = + # cannot use convertError here as it uses typed parameters which forces type + # resolution, while confutils.load uses untyped parameters and expects type + # resolution not to happen yet. In other words, it won't compile. + try: + return CodexConf.load( + cmdLine = node.arguments, quitOnFailure = false, secondarySources = nil + ) + except ConfigurationError as parent: + raiseCodexProcessError "Failed to load node arguments into CodexConf", parent -proc ethAccount*(node: CodexProcess): Address = - let config = CodexConf.load(cmdLine = node.arguments, quitOnFailure = false) - without ethAccount =? config.ethAccount: +proc dataDir(node: CodexProcess): string {.raises: [CodexProcessError].} = + return node.config.dataDir.string + +proc ethAccount*(node: CodexProcess): Address {.raises: [CodexProcessError].} = + without ethAccount =? node.config.ethAccount: raiseAssert "eth account not set" return Address(ethAccount) -proc apiUrl*(node: CodexProcess): string = - let config = CodexConf.load(cmdLine = node.arguments, quitOnFailure = false) - return - "http://" & config.apiBindAddress.get() & ":" & $config.apiPort & "/api/storage/v1" +proc apiUrl*(node: CodexProcess): string {.raises: [CodexProcessError].} = + let config = node.config + without apiBindAddress =? config.apiBindAddress: + raise + newException(CodexProcessError, "REST API not started: --api-bindaddr not set") + return "http://" & apiBindAddress & ":" & $config.apiPort & "/api/storage/v1" -proc client*(node: CodexProcess): CodexClient = +proc logFile*(node: CodexProcess): ?string {.raises: [CodexProcessError].} = + node.config.logFile + +proc client*(node: CodexProcess): CodexClient {.raises: [CodexProcessError].} = if client =? node.client: return client let client = CodexClient.new(node.apiUrl) node.client = some client return client -method stop*(node: CodexProcess) {.async.} = +proc updateLogFile(node: CodexProcess, newLogFile: string) = + for arg in node.arguments.mitems: + if arg.startsWith("--log-file="): + arg = "--log-file=" & newLogFile + break + +method restart*(node: CodexProcess) {.async.} = + trace "restarting codex" + await node.stop() + if logFile =? node.logFile: + # chronicles truncates the existing log file on start, so changed the log + # file cli param to create a new one + node.updateLogFile( + logFile & "_restartedAt_" & now().format("yyyy-MM-dd'_'HH-mm-ss") & ".log" + ) + await node.start() + await node.waitUntilStarted() + trace "codex process restarted" + +method stop*(node: CodexProcess) {.async: (raises: []).} = logScope: nodeName = node.name + trace "stopping codex client" await procCall NodeProcess(node).stop() - trace "stopping Storage client" + if not node.process.isNil: + trace "closing node process' streams" + await node.process.closeWait() + trace "node process' streams closed" + if client =? node.client: await client.close() node.client = none CodexClient -method removeDataDir*(node: CodexProcess) = - removeDir(node.dataDir) +method removeDataDir*(node: CodexProcess) {.raises: [CodexProcessError].} = + convertError("failed to remove codex node data directory"): + removeDir(node.dataDir) diff --git a/tests/integration/hardhatprocess.nim b/tests/integration/hardhatprocess.nim index 915c8c53..8342f05f 100644 --- a/tests/integration/hardhatprocess.nim +++ b/tests/integration/hardhatprocess.nim @@ -8,28 +8,38 @@ import pkg/stew/io2 import std/os import std/sets import std/sequtils +import std/strformat import std/strutils import pkg/codex/conf import pkg/codex/utils/trackedfutures import ./codexclient import ./nodeprocess +import ./utils export codexclient export chronicles +export nodeprocess + +{.push raises: [].} logScope: topics = "integration testing hardhat process" - nodeName = "hardhat" -type HardhatProcess* = ref object of NodeProcess - logFile: ?IoHandle +type + OnOutputLineCaptured = proc(line: string) {.gcsafe, raises: [].} + HardhatProcess* = ref object of NodeProcess + logFile: ?IoHandle + onOutputLine: OnOutputLineCaptured + + HardhatProcessError* = object of NodeProcessError method workingDir(node: HardhatProcess): string = return currentSourcePath() / ".." / ".." / ".." / "vendor" / "logos-storage-contracts-eth" method executable(node: HardhatProcess): string = - return "node_modules" / ".bin" / "hardhat" + return + "node_modules" / ".bin" / (when defined(windows): "hardhat.cmd" else: "hardhat") method startedOutput(node: HardhatProcess): string = return "Started HTTP and WebSocket JSON-RPC server at" @@ -37,7 +47,7 @@ method startedOutput(node: HardhatProcess): string = method processOptions(node: HardhatProcess): set[AsyncProcessOption] = return {} -method outputLineEndings(node: HardhatProcess): string {.raises: [].} = +method outputLineEndings(node: HardhatProcess): string = return "\n" proc openLogFile(node: HardhatProcess, logFilePath: string): IoHandle = @@ -52,7 +62,21 @@ proc openLogFile(node: HardhatProcess, logFilePath: string): IoHandle = return fileHandle -method start*(node: HardhatProcess) {.async.} = +method start*( + node: HardhatProcess +) {.async: (raises: [CancelledError, NodeProcessError]).} = + logScope: + nodeName = node.name + + var executable = "" + try: + executable = absolutePath(node.workingDir / node.executable) + if not fileExists(executable): + raiseAssert "cannot start hardhat, executable doesn't exist (looking for " & + &"{executable}). Try running `npm install` in {node.workingDir}." + except CatchableError as parent: + raiseAssert "failed build path to hardhat executable: " & parent.msg + let poptions = node.processOptions + {AsyncProcessOption.StdErrToStdOut} trace "starting node", @@ -89,19 +113,37 @@ method start*(node: HardhatProcess) {.async.} = trace "hardhat post start scripts executed" except CancelledError as error: raise error - except CatchableError as e: - error "failed to start hardhat process", error = e.msg + except CatchableError as parent: + raise newException( + HardhatProcessError, "failed to start hardhat process: " & parent.msg, parent + ) + +proc port(node: HardhatProcess): ?int = + var next = false + for arg in node.arguments: + # TODO: move to constructor + if next: + return parseInt(arg).catch.option + if arg.contains "--port": + next = true + + return none int proc startNode*( _: type HardhatProcess, args: seq[string], debug: string | bool = false, name: string, -): Future[HardhatProcess] {.async.} = + onOutputLineCaptured: OnOutputLineCaptured = nil, +): Future[HardhatProcess] {.async: (raises: [CancelledError, NodeProcessError]).} = + logScope: + nodeName = name + var logFilePath = "" var arguments = newSeq[string]() for arg in args: + # TODO: move to constructor if arg.contains "--log-file=": logFilePath = arg.split("=")[1] else: @@ -114,17 +156,25 @@ proc startNode*( arguments: arguments, debug: ($debug != "false"), trackedFutures: TrackedFutures.new(), - name: "hardhat", + name: name, + onOutputLine: onOutputLineCaptured, ) await hardhat.start() + # TODO: move to constructor if logFilePath != "": hardhat.logFile = some hardhat.openLogFile(logFilePath) return hardhat method onOutputLineCaptured(node: HardhatProcess, line: string) = + logScope: + nodeName = node.name + + if not node.onOutputLine.isNil: + node.onOutputLine(line) + without logFile =? node.logFile: return @@ -133,13 +183,49 @@ method onOutputLineCaptured(node: HardhatProcess, line: string) = discard logFile.closeFile() node.logFile = none IoHandle -method stop*(node: HardhatProcess) {.async.} = +proc closeProcessStreams(node: HardhatProcess) {.async: (raises: []).} = + when not defined(windows): + if not node.process.isNil: + trace "closing node process' streams" + await node.process.closeWait() + trace "node process' streams closed" + else: + # Windows hangs when attempting to close hardhat's process streams, so try + # to kill the process externally. + without port =? node.port: + error "Failed to get port from Hardhat args" + return + try: + let cmdResult = await forceKillProcess("node.exe", &"--port {port}") + if cmdResult.status > 0: + error "Failed to forcefully kill windows hardhat process", + port, exitCode = cmdResult.status, stderr = cmdResult.stdError + else: + trace "Successfully killed windows hardhat process by force", + port, exitCode = cmdResult.status, stdout = cmdResult.stdOutput + except ValueError, OSError: + let eMsg = getCurrentExceptionMsg() + error "Failed to forcefully kill windows hardhat process, bad path to command", + error = eMsg + except CancelledError as e: + discard + except AsyncProcessError as e: + error "Failed to forcefully kill windows hardhat process", port, error = e.msg + except AsyncProcessTimeoutError as e: + error "Timeout while forcefully killing windows hardhat process", + port, error = e.msg + +method stop*(node: HardhatProcess) {.async: (raises: []).} = # terminate the process await procCall NodeProcess(node).stop() + await node.closeProcessStreams() + if logFile =? node.logFile: trace "closing hardhat log file" discard logFile.closeFile() + node.process = nil + method removeDataDir*(node: HardhatProcess) = discard diff --git a/tests/integration/marketplacesuite.nim b/tests/integration/marketplacesuite.nim index 5a0a11a6..cc0ee246 100644 --- a/tests/integration/marketplacesuite.nim +++ b/tests/integration/marketplacesuite.nim @@ -86,7 +86,10 @@ template marketplacesuite*(name: string, stopOnRequestFail: bool, body: untyped) duration: uint64, collateralPerByte: UInt256, minPricePerBytePerSecond: UInt256, - ): Future[void] {.async: (raises: [CancelledError, HttpError, ConfigurationError]).} = + ): Future[void] {. + async: + (raises: [CancelledError, HttpError, ConfigurationError, CodexProcessError]) + .} = let totalCollateral = datasetSize.u256 * collateralPerByte # post availability to each provider for i in 0 ..< providers().len: diff --git a/tests/integration/multinodes.nim b/tests/integration/multinodes.nim index 5f149585..05bb6fb0 100644 --- a/tests/integration/multinodes.nim +++ b/tests/integration/multinodes.nim @@ -1,3 +1,4 @@ +import std/httpclient import std/os import std/sequtils import std/strutils @@ -13,6 +14,7 @@ import ./codexprocess import ./hardhatconfig import ./hardhatprocess import ./nodeconfigs +import ./utils import ../asynctest import ../checktest @@ -24,6 +26,8 @@ export hardhatconfig export codexconfig export nodeconfigs +{.push raises: [].} + type RunningNode* = ref object role*: Role @@ -36,31 +40,33 @@ type Hardhat MultiNodeSuiteError = object of CatchableError + SuiteTimeoutError = object of MultiNodeSuiteError -const jsonRpcProviderUrl* = "ws://localhost:8545" +const HardhatPort {.intdefine.}: int = 8545 +const CodexApiPort {.intdefine.}: int = 8080 +const CodexDiscPort {.intdefine.}: int = 8090 +const TestId {.strdefine.}: string = "TestId" +const CodexLogToFile {.booldefine.}: bool = false +const CodexLogLevel {.strdefine.}: string = "" +const CodexLogsDir {.strdefine.}: string = "" -proc raiseMultiNodeSuiteError(msg: string) = - raise newException(MultiNodeSuiteError, msg) +proc raiseMultiNodeSuiteError( + msg: string, parent: ref CatchableError = nil +) {.raises: [MultiNodeSuiteError].} = + raise newException(MultiNodeSuiteError, msg, parent) -proc nextFreePort*(startPort: int): Future[int] {.async.} = - proc client(server: StreamServer, transp: StreamTransport) {.async.} = - await transp.closeWait() +template withLock(lock: AsyncLock, body: untyped) = + if lock.isNil: + lock = newAsyncLock() - var port = startPort - while true: - trace "checking if port is free", port + await lock.acquire() + try: + body + finally: try: - let host = initTAddress("127.0.0.1", port) - # We use ReuseAddr here only to be able to reuse the same IP/Port when - # there's a TIME_WAIT socket. It's useful when running the test multiple - # times or if a test ran previously using the same port. - var server = createStreamServer(host, client, {ReuseAddr}) - trace "port is free", port - await server.closeWait() - return port - except TransportOsError: - trace "port is not free", port - inc port + lock.release() + except AsyncLockError as parent: + raiseMultiNodeSuiteError "lock error", parent proc sanitize(pathSegment: string): string = var sanitized = pathSegment @@ -71,8 +77,8 @@ proc sanitize(pathSegment: string): string = proc getTempDirName*(starttime: string, role: Role, roleIdx: int): string = getTempDir() / "Storage" / sanitize($starttime) / sanitize($role & "_" & $roleIdx) -template multinodesuite*(name: string, body: untyped) = - asyncchecksuite name: +template multinodesuite*(suiteName: string, body: untyped) = + asyncchecksuite suiteName: # Following the problem described here: # https://github.com/NomicFoundation/hardhat/issues/2053 # It may be desirable to use http RPC provider. @@ -85,7 +91,7 @@ template multinodesuite*(name: string, body: untyped) = # If you want to use a different provider url in the nodes, you can # use withEthProvider config modifier in the node config # to set the desired provider url. E.g.: - # NodeConfigs( + # NodeConfigs( # hardhat: # HardhatConfig.none, # clients: @@ -93,6 +99,7 @@ template multinodesuite*(name: string, body: untyped) = # .withEthProvider("ws://localhost:8545") # .some, # ... + var jsonRpcProviderUrl = "ws://localhost:" & $HardhatPort var running {.inject, used.}: seq[RunningNode] var bootstrapNodes: seq[string] let starttime = now().format("yyyy-MM-dd'_'HH:mm:ss") @@ -101,6 +108,10 @@ template multinodesuite*(name: string, body: untyped) = var ethProvider {.inject, used.}: JsonRpcProvider var accounts {.inject, used.}: seq[Address] var snapshot: JsonNode + var lastUsedHardhatPort = HardhatPort + var lastUsedCodexApiPort = CodexApiPort + var lastUsedCodexDiscPort = CodexDiscPort + var codexPortLock: AsyncLock template test(tname, startNodeConfigs, tbody) = currentTestName = tname @@ -108,47 +119,50 @@ template multinodesuite*(name: string, body: untyped) = test tname: tbody - proc sanitize(pathSegment: string): string = - var sanitized = pathSegment - for invalid in invalidFilenameChars.items: - sanitized = sanitized.replace(invalid, '_').replace(' ', '_') - sanitized - - proc getLogFile(role: Role, index: ?int): string = - # create log file path, format: - # tests/integration/logs/ //_.log - - var logDir = - currentSourcePath.parentDir() / "logs" / sanitize($starttime & "__" & name) / - sanitize($currentTestName) - createDir(logDir) - - var fn = $role - if idx =? index: - fn &= "_" & $idx - fn &= ".log" - - let fileName = logDir / fn - return fileName + proc updatePort(url: var string, port: int) = + let parts = url.split(':') + url = @[parts[0], parts[1], $port].join(":") proc newHardhatProcess( config: HardhatConfig, role: Role - ): Future[NodeProcess] {.async.} = + ): Future[NodeProcess] {.async: (raises: [MultiNodeSuiteError, CancelledError]).} = var args: seq[string] = @[] if config.logFile: - let updatedLogFile = getLogFile(role, none int) - args.add "--log-file=" & updatedLogFile + try: + let updatedLogFile = getLogFile( + CodexLogsDir, starttime, suiteName, currentTestName, $role, none int + ) + args.add "--log-file=" & updatedLogFile + except IOError as e: + raiseMultiNodeSuiteError( + "failed to start hardhat because logfile path could not be obtained: " & + e.msg, + e, + ) + except OSError as e: + raiseMultiNodeSuiteError( + "failed to start hardhat because logfile path could not be obtained: " & + e.msg, + e, + ) + + let port = await nextFreePort(lastUsedHardhatPort) + jsonRpcProviderUrl.updatePort(port) + args.add("--port") + args.add($port) + lastUsedHardhatPort = port try: let node = await HardhatProcess.startNode(args, config.debugEnabled, "hardhat") + await node.waitUntilStarted() trace "hardhat node started" return node except NodeProcessError as e: - raiseMultiNodeSuiteError "cannot start hardhat process: " & e.msg + raiseMultiNodeSuiteError "hardhat node not started: " & e.msg proc newCodexProcess( roleIdx: int, conf: CodexConfig, role: Role - ): Future[NodeProcess] {.async.} = + ): Future[NodeProcess] {.async: (raises: [MultiNodeSuiteError, CancelledError]).} = let nodeIdx = running.len var config = conf @@ -156,34 +170,60 @@ template multinodesuite*(name: string, body: untyped) = raiseMultiNodeSuiteError "Cannot start node at nodeIdx " & $nodeIdx & ", not enough eth accounts." - let datadir = getTempDirName(starttime, role, roleIdx) + let datadir = getDataDir(TestId, currentTestName, $starttime, $role, some roleIdx) try: - if config.logFile.isSome: - let updatedLogFile = getLogFile(role, some roleIdx) - config.withLogFile(updatedLogFile) + if config.logFile.isSome or CodexLogToFile: + try: + let updatedLogFile = getLogFile( + CodexLogsDir, starttime, suiteName, currentTestName, $role, some roleIdx + ) + config.withLogFile(updatedLogFile) + except IOError as e: + raiseMultiNodeSuiteError( + "failed to start " & $role & + " because logfile path could not be obtained: " & e.msg, + e, + ) + except OSError as e: + raiseMultiNodeSuiteError( + "failed to start " & $role & + " because logfile path could not be obtained: " & e.msg, + e, + ) + + when CodexLogLevel != "": + config.addCliOption("--log-level", CodexLogLevel) + + var apiPort, discPort: int + withLock(codexPortLock): + apiPort = await nextFreePort(lastUsedCodexApiPort + nodeIdx) + discPort = await nextFreePort(lastUsedCodexDiscPort + nodeIdx) + config.addCliOption("--api-port", $apiPort) + config.addCliOption("--disc-port", $discPort) + lastUsedCodexApiPort = apiPort + lastUsedCodexDiscPort = discPort for bootstrapNode in bootstrapNodes: config.addCliOption("--bootstrap-node", bootstrapNode) - config.addCliOption("--api-port", $await nextFreePort(8080 + nodeIdx)) + config.addCliOption("--data-dir", datadir) config.addCliOption("--nat", "none") config.addCliOption("--listen-addrs", "/ip4/127.0.0.1/tcp/0") - config.addCliOption("--disc-port", $await nextFreePort(8090 + nodeIdx)) except CodexConfigError as e: raiseMultiNodeSuiteError "invalid cli option, error: " & e.msg - let node = await CodexProcess.startNode( - config.cliArgs, config.debugEnabled, $role & $roleIdx - ) - try: + let node = await CodexProcess.startNode( + config.cliArgs, config.debugEnabled, $role & $roleIdx + ) await node.waitUntilStarted() trace "node started", nodeName = $role & $roleIdx + return node + except CodexConfigError as e: + raiseMultiNodeSuiteError "failed to get cli args from config: " & e.msg, e except NodeProcessError as e: - raiseMultiNodeSuiteError "node not started, error: " & e.msg - - return node + raiseMultiNodeSuiteError "node not started, error: " & e.msg, e proc hardhat(): HardhatProcess = for r in running: @@ -209,7 +249,9 @@ template multinodesuite*(name: string, body: untyped) = if r.role == Role.Validator: CodexProcess(r.node) - proc startHardhatNode(config: HardhatConfig): Future[NodeProcess] {.async.} = + proc startHardhatNode( + config: HardhatConfig + ): Future[NodeProcess] {.async: (raises: [MultiNodeSuiteError, CancelledError]).} = return await newHardhatProcess(config, Role.Hardhat) proc startClientNode(conf: CodexConfig): Future[NodeProcess] {.async.} = @@ -221,44 +263,64 @@ template multinodesuite*(name: string, body: untyped) = ) return await newCodexProcess(clientIdx, config, Role.Client) - proc startProviderNode(conf: CodexConfig): Future[NodeProcess] {.async.} = - let providerIdx = providers().len - var config = conf - config.addCliOption(StartUpCmd.persistence, "--eth-provider", jsonRpcProviderUrl) - config.addCliOption( - StartUpCmd.persistence, "--eth-account", $accounts[running.len] - ) - config.addCliOption( - PersistenceCmd.prover, "--circom-r1cs", - "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.r1cs", - ) - config.addCliOption( - PersistenceCmd.prover, "--circom-wasm", - "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.wasm", - ) - config.addCliOption( - PersistenceCmd.prover, "--circom-zkey", - "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.zkey", - ) + proc startProviderNode( + conf: CodexConfig + ): Future[NodeProcess] {.async: (raises: [MultiNodeSuiteError, CancelledError]).} = + try: + let providerIdx = providers().len + var config = conf + config.addCliOption( + StartUpCmd.persistence, "--eth-provider", jsonRpcProviderUrl + ) + config.addCliOption( + StartUpCmd.persistence, "--eth-account", $accounts[running.len] + ) + config.addCliOption( + PersistenceCmd.prover, "--circom-r1cs", + "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.r1cs", + ) + config.addCliOption( + PersistenceCmd.prover, "--circom-wasm", + "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.wasm", + ) + config.addCliOption( + PersistenceCmd.prover, "--circom-zkey", + "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.zkey", + ) - return await newCodexProcess(providerIdx, config, Role.Provider) + return await newCodexProcess(providerIdx, config, Role.Provider) + except CodexConfigError as exc: + raiseMultiNodeSuiteError "Failed to start codex node, error adding cli options: " & + exc.msg, exc - proc startValidatorNode(conf: CodexConfig): Future[NodeProcess] {.async.} = - let validatorIdx = validators().len - var config = conf - config.addCliOption(StartUpCmd.persistence, "--eth-provider", jsonRpcProviderUrl) - config.addCliOption( - StartUpCmd.persistence, "--eth-account", $accounts[running.len] - ) - config.addCliOption(StartUpCmd.persistence, "--validator") + proc startValidatorNode( + conf: CodexConfig + ): Future[NodeProcess] {.async: (raises: [MultiNodeSuiteError, CancelledError]).} = + try: + let validatorIdx = validators().len + var config = conf + config.addCliOption( + StartUpCmd.persistence, "--eth-provider", jsonRpcProviderUrl + ) + config.addCliOption( + StartUpCmd.persistence, "--eth-account", $accounts[running.len] + ) + config.addCliOption(StartUpCmd.persistence, "--validator") - return await newCodexProcess(validatorIdx, config, Role.Validator) + return await newCodexProcess(validatorIdx, config, Role.Validator) + except CodexConfigError as e: + raiseMultiNodeSuiteError "Failed to start validator node, error adding cli options: " & + e.msg, e - proc teardownImpl() {.async.} = + proc teardownImpl() {.async: (raises: []).} = + trace "Tearing down test", suite = suiteName, test = currentTestName for nodes in @[validators(), clients(), providers()]: for node in nodes: await node.stop() # also stops rest client - node.removeDataDir() + try: + node.removeDataDir() + except CodexProcessError as e: + error "Failed to remove data dir during teardown", error = e.msg # if hardhat was started in the test, kill the node # otherwise revert the snapshot taken in the test setup @@ -266,15 +328,28 @@ template multinodesuite*(name: string, body: untyped) = if not hardhat.isNil: await hardhat.stop() else: - discard await send(ethProvider, "evm_revert", @[snapshot]) + try: + discard await noCancel send(ethProvider, "evm_revert", @[snapshot]) + except ProviderError as e: + error "Failed to revert hardhat state during teardown", error = e.msg - await ethProvider.close() + # TODO: JsonRpcProvider.close should NOT raise any exceptions + try: + await ethProvider.close() + except CatchableError: + discard running = @[] template failAndTeardownOnError(message: string, tryBody: untyped) = try: tryBody + except CancelledError as e: + await teardownImpl() + when declared(teardownAllIMPL): + teardownAllIMPL() + fail() + quit(1) except CatchableError as er: fatal message, error = er.msg echo "[FATAL] ", message, ": ", er.msg @@ -286,19 +361,34 @@ template multinodesuite*(name: string, body: untyped) = proc updateBootstrapNodes( node: CodexProcess - ): Future[void] {.async: (raises: [CatchableError]).} = - without ninfo =? await node.client.info(): - # raise CatchableError instead of Defect (with .get or !) so we - # can gracefully shutdown and prevent zombies - raiseMultiNodeSuiteError "Failed to get node info" - bootstrapNodes.add ninfo["spr"].getStr() + ): Future[void] {.async: (raises: [MultiNodeSuiteError]).} = + try: + without ninfo =? await node.client.info(): + # raise CatchableError instead of Defect (with .get or !) so we + # can gracefully shutdown and prevent zombies + raiseMultiNodeSuiteError "Failed to get node info" + bootstrapNodes.add ninfo["spr"].getStr() + except CatchableError as e: + raiseMultiNodeSuiteError "Failed to get node info: " & e.msg, e + + setupAll: + # When this file is run with `-d:chronicles_sinks=textlines[file]`, we + # need to set the log file path at runtime, otherwise chronicles didn't seem to + # create a log file even when using an absolute path + when defaultChroniclesStream.outputs is (FileOutput,) and CodexLogsDir.len > 0: + let logFile = + CodexLogsDir / sanitize(getAppFilename().extractFilename & ".chronicles.log") + let success = defaultChroniclesStream.outputs[0].open(logFile, fmAppend) + doAssert success, "Failed to open log file: " & logFile setup: + trace "Setting up test", suite = suiteName, test = currentTestName, nodeConfigs + if var conf =? nodeConfigs.hardhat: try: - let node = await startHardhatNode(conf) + let node = await noCancel startHardhatNode(conf) running.add RunningNode(role: Role.Hardhat, node: node) - except CatchableError as e: + except CatchableError as e: # CancelledError not raised due to noCancel echo "failed to start hardhat node" fail() quit(1) @@ -307,12 +397,16 @@ template multinodesuite*(name: string, body: untyped) = # Workaround for https://github.com/NomicFoundation/hardhat/issues/2053 # Do not use websockets, but use http and polling to stop subscriptions # from being removed after 5 minutes - ethProvider = JsonRpcProvider.new(jsonRpcProviderUrl) + ethProvider = JsonRpcProvider.new( + jsonRpcProviderUrl, pollingInterval = chronos.milliseconds(1000) + ) # if hardhat was NOT started by the test, take a snapshot so it can be # reverted in the test teardown if nodeConfigs.hardhat.isNone: snapshot = await send(ethProvider, "evm_snapshot") accounts = await ethProvider.listAccounts() + except CancelledError as e: + raise e except CatchableError as e: echo "Hardhat not running. Run hardhat manually " & "before executing tests, or include a " & "HardhatConfig in the test setup." @@ -342,7 +436,10 @@ template multinodesuite*(name: string, body: untyped) = # ensure that we have a recent block with a fresh timestamp discard await send(ethProvider, "evm_mine") + trace "Starting test", suite = suiteName, test = currentTestName + teardown: await teardownImpl() + trace "Test completed", suite = suiteName, test = currentTestName body diff --git a/tests/integration/nodeprocess.nim b/tests/integration/nodeprocess.nim index 9ac0f8c3..a45e7806 100644 --- a/tests/integration/nodeprocess.nim +++ b/tests/integration/nodeprocess.nim @@ -5,6 +5,7 @@ import pkg/chronicles import pkg/chronos/asyncproc import pkg/libp2p import std/os +import std/strformat import std/strutils import codex/conf import codex/utils/exceptions @@ -14,6 +15,8 @@ import ./codexclient export codexclient export chronicles +{.push raises: [].} + logScope: topics = "integration testing node process" @@ -39,24 +42,19 @@ method startedOutput(node: NodeProcess): string {.base, gcsafe.} = method processOptions(node: NodeProcess): set[AsyncProcessOption] {.base, gcsafe.} = raiseAssert "not implemented" -method outputLineEndings(node: NodeProcess): string {.base, gcsafe, raises: [].} = +method outputLineEndings(node: NodeProcess): string {.base, gcsafe.} = raiseAssert "not implemented" -method onOutputLineCaptured( - node: NodeProcess, line: string -) {.base, gcsafe, raises: [].} = +method onOutputLineCaptured(node: NodeProcess, line: string) {.base, gcsafe.} = raiseAssert "not implemented" -method start*(node: NodeProcess) {.base, async.} = +method start*(node: NodeProcess) {.base, async: (raises: [CancelledError]).} = logScope: nodeName = node.name let poptions = node.processOptions + {AsyncProcessOption.StdErrToStdOut} trace "starting node", - args = node.arguments, - executable = node.executable, - workingDir = node.workingDir, - processOptions = poptions + args = node.arguments, executable = node.executable, workingDir = node.workingDir try: if node.debug: @@ -81,11 +79,13 @@ proc captureOutput( trace "waiting for output", output - let stream = node.process.stdoutStream - try: while node.process.running.option == some true: - while (let line = await stream.readLine(0, node.outputLineEndings); line != ""): + while ( + let line = await node.process.stdoutStream.readLine(0, node.outputLineEndings) + line != "" + ) + : if node.debug: # would be nice if chronicles could parse and display with colors echo line @@ -95,8 +95,8 @@ proc captureOutput( node.onOutputLineCaptured(line) - await sleepAsync(1.millis) - await sleepAsync(1.millis) + await sleepAsync(1.nanos) + await sleepAsync(1.nanos) except CancelledError: discard # do not propagate as captureOutput was asyncSpawned except AsyncStreamError as e: @@ -104,7 +104,7 @@ proc captureOutput( proc startNode*[T: NodeProcess]( _: type T, args: seq[string], debug: string | bool = false, name: string -): Future[T] {.async.} = +): Future[T] {.async: (raises: [CancelledError]).} = ## Starts a Logos Storage Node with the specified arguments. ## Set debug to 'true' to see output of the node. let node = T( @@ -116,34 +116,36 @@ proc startNode*[T: NodeProcess]( await node.start() return node -method stop*(node: NodeProcess) {.base, async.} = +method stop*( + node: NodeProcess, expectedExitCode: int = 0 +) {.base, async: (raises: []).} = logScope: nodeName = node.name await node.trackedFutures.cancelTracked() - if node.process != nil: + if not node.process.isNil: + let processId = node.process.processId + trace "terminating node process...", processId try: - trace "terminating node process..." - if errCode =? node.process.terminate().errorOption: - error "failed to terminate process", errCode = $errCode + let exitCode = await noCancel node.process.terminateAndWaitForExit(2.seconds) + if exitCode > 0 and exitCode != 143 and # 143 = SIGTERM (initiated above) + exitCode != expectedExitCode: + warn "process exited with a non-zero exit code", exitCode + trace "node process terminated", exitCode + except CatchableError: + try: + let forcedExitCode = await noCancel node.process.killAndWaitForExit(3.seconds) + trace "node process forcibly killed with exit code: ", exitCode = forcedExitCode + except CatchableError as e: + warn "failed to kill node process in time, it will be killed when the parent process exits", + error = e.msg + writeStackTrace() - trace "waiting for node process to exit" - let exitCode = await node.process.waitForExit(3.seconds) - if exitCode > 0: - error "failed to exit process, check for zombies", exitCode + trace "node stopped" - trace "closing node process' streams" - await node.process.closeWait() - except CancelledError as error: - raise error - except CatchableError as e: - error "error stopping node process", error = e.msg - finally: - node.process = nil - - trace "node stopped" - -proc waitUntilOutput*(node: NodeProcess, output: string) {.async.} = +proc waitUntilOutput*( + node: NodeProcess, output: string +) {.async: (raises: [CancelledError, AsyncTimeoutError]).} = logScope: nodeName = node.name @@ -153,9 +155,21 @@ proc waitUntilOutput*(node: NodeProcess, output: string) {.async.} = let fut = node.captureOutput(output, started) node.trackedFutures.track(fut) asyncSpawn fut - await started.wait(60.seconds) # allow enough time for proof generation + try: + await started.wait(60.seconds) # allow enough time for proof generation + except AsyncTimeoutError as e: + raise e + except CancelledError as e: + raise e + except CatchableError as e: # unsure where this originates from + error "unexpected error occurred waiting for node output", error = e.msg + +proc waitUntilStarted*( + node: NodeProcess +) {.async: (raises: [CancelledError, NodeProcessError]).} = + logScope: + nodeName = node.name -proc waitUntilStarted*(node: NodeProcess) {.async.} = try: await node.waitUntilOutput(node.startedOutput) trace "node started" @@ -168,10 +182,10 @@ proc waitUntilStarted*(node: NodeProcess) {.async.} = raise newException(NodeProcessError, "node did not output '" & node.startedOutput & "'") -proc restart*(node: NodeProcess) {.async.} = +method restart*(node: NodeProcess) {.base, async.} = await node.stop() await node.start() await node.waitUntilStarted() -method removeDataDir*(node: NodeProcess) {.base.} = +method removeDataDir*(node: NodeProcess) {.base, raises: [NodeProcessError].} = raiseAssert "[removeDataDir] not implemented" diff --git a/tests/integration/scripts/winkillprocess.sh b/tests/integration/scripts/winkillprocess.sh new file mode 100644 index 00000000..b5e58ab4 --- /dev/null +++ b/tests/integration/scripts/winkillprocess.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +# List all processes with a specific name +list() { + local name=$1 + echo "Listing all processes named '$name'..." + powershell.exe -Command "Get-CimInstance Win32_Process -Filter \"name = '$name'\" | Select-Object ProcessId, Name, CommandLine | Format-Table -AutoSize" +} + +# Search for processes with a specific name and command line pattern +search() { + local name=$1 + local pattern=$2 + echo "Searching for '$name' processes with command line matching '$pattern'..." + powershell.exe -Command " + \$processes = Get-CimInstance Win32_Process -Filter \"name = '$name'\" | Where-Object { \$_.CommandLine -match '$pattern' }; + if (\$processes) { + \$processes | Select-Object ProcessId, Name, CommandLine | Format-Table -AutoSize; + } else { + Write-Host \"No matching '$name' processes found\"; + } + " +} + +# Kill all processes with a specific name +killall() { + local name=$1 + echo "Finding and killing all '$name' processes..." + powershell.exe -Command " + \$processes = Get-CimInstance Win32_Process -Filter \"name = '$name'\"; + if (\$processes) { + foreach (\$process in \$processes) { + Stop-Process -Id \$process.ProcessId -Force; + Write-Host \"Killed process \$(\$process.ProcessId)\"; + } + } else { + Write-Host \"No '$name' processes found\"; + } + " +} + +# Kill processes with a specific name and command line pattern +kill() { + local name=$1 + local pattern=$2 + echo "Finding and killing '$name' processes with command line matching '$pattern'..." + powershell.exe -Command " + \$processes = Get-CimInstance Win32_Process -Filter \"name = '$name'\" | Where-Object { \$_.CommandLine -match '$pattern' }; + if (\$processes) { + foreach (\$process in \$processes) { + Stop-Process -Id \$process.ProcessId -Force; + Write-Host \"Killed process \$(\$process.ProcessId)\"; + } + } else { + Write-Host \"No matching '$name' processes found\"; + } + " +} + +# Check if being run directly or sourced +if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then + # If run directly (not sourced), provide command line interface + case "$1" in + list) + if [ -z "$2" ]; then + echo "Usage: $0 list PROCESS_NAME" + exit 1 + fi + list "$2" + ;; + search) + if [ -z "$2" ] || [ -z "$3" ]; then + echo "Usage: $0 search PROCESS_NAME COMMANDLINE_PATTERN" + exit 1 + fi + search "$2" "$3" + ;; + killall) + if [ -z "$2" ]; then + echo "Usage: $0 killall PROCESS_NAME" + exit 1 + fi + killall "$2" + ;; + kill) + if [ -z "$2" ] || [ -z "$3" ]; then + echo "Usage: $0 kill PROCESS_NAME COMMANDLINE_PATTERN" + exit 1 + fi + kill "$2" "$3" + ;; + *) + echo "Usage: $0 {list PROCESS_NAME|search PROCESS_NAME COMMANDLINE_PATTERN|killall PROCESS_NAME|kill PROCESS_NAME COMMANDLINE_PATTERN}" + exit 1 + ;; + esac +fi diff --git a/tests/integration/utils.nim b/tests/integration/utils.nim new file mode 100644 index 00000000..3e522a04 --- /dev/null +++ b/tests/integration/utils.nim @@ -0,0 +1,92 @@ +import std/os +import std/strformat +import pkg/chronos +import pkg/chronos/asyncproc +import pkg/codex/logutils + +{.push raises: [].} + +proc nextFreePort*(startPort: int): Future[int] {.async: (raises: [CancelledError]).} = + proc client(server: StreamServer, transp: StreamTransport) {.async: (raises: []).} = + await transp.closeWait() + + var port = startPort + while true: + trace "checking if port is free", port + try: + let host = initTAddress("127.0.0.1", port) + # We use ReuseAddr here only to be able to reuse the same IP/Port when + # there's a TIME_WAIT socket. It's useful when running the test multiple + # times or if a test ran previously using the same port. + var server = createStreamServer(host, client, {ReuseAddr}) + trace "port is free", port + await server.closeWait() + return port + except TransportOsError: + trace "port is not free", port + inc port + except TransportAddressError: + raiseAssert "bad address" + +proc sanitize*(pathSegment: string): string = + var sanitized = pathSegment + for invalid in invalidFilenameChars.items: + sanitized = sanitized.replace(invalid, '_').replace(' ', '_') + sanitized + +proc getLogFile*( + logDir, startTime, suiteName, testName, role: string, index = int.none +): string {.raises: [IOError, OSError].} = + let logsDir = + if logDir == "": + currentSourcePath.parentDir() / "logs" / sanitize(startTime & "__" & suiteName) / + sanitize(testName) + else: + logDir / sanitize(suiteName) / sanitize(testName) + + createDir(logsDir) + + var fn = $role + if idx =? index: + fn &= "_" & $idx + fn &= ".log" + + let fileName = logsDir / fn + return fileName + +proc appendFile*(filename: string, content: string) {.raises: [IOError].} = + ## Opens a file named `filename` for writing. Then writes the + ## `content` completely to the file and closes the file afterwards. + ## Raises an IO exception in case of an error. + var f: File + try: + f = open(filename, fmAppend) + f.write(content) + except IOError as e: + raise newException(IOError, "cannot open and write " & filename & ": " & e.msg) + finally: + close(f) + +when defined(windows): + proc forceKillProcess*( + processName, matchingCriteria: string + ): Future[CommandExResponse] {. + async: ( + raises: [ + AsyncProcessError, AsyncProcessTimeoutError, CancelledError, ValueError, + OSError, + ] + ) + .} = + let path = splitFile(currentSourcePath()).dir / "scripts" / "winkillprocess.sh" + let cmd = &"{absolutePath(path)} kill {processName} \"{matchingCriteria}\"" + trace "Forcefully killing windows process", processName, matchingCriteria, cmd + return await execCommandEx(cmd, timeout = 5.seconds) + +proc getDataDir*(testId, testName, startTime, role: string, index = int.none): string = + var suffix = role + if idx =? index: + suffix &= "_" & $idx + + getTempDir() / "Codex" / sanitize(testId) / sanitize(testName) / sanitize(startTime) / + sanitize(suffix) diff --git a/vendor/nim-chronos b/vendor/nim-chronos index c04576d8..0646c444 160000 --- a/vendor/nim-chronos +++ b/vendor/nim-chronos @@ -1 +1 @@ -Subproject commit c04576d829b8a0a1b12baaa8bc92037501b3a4a0 +Subproject commit 0646c444fce7c7ed08ef6f2c9a7abfd172ffe655