From 1acedcf71c05e3e80e3d3634c719acf058357ffa Mon Sep 17 00:00:00 2001
From: Eric <5089238+emizzle@users.noreply.github.com>
Date: Fri, 16 Jan 2026 21:47:59 +1100
Subject: [PATCH] fix(ci): introduce a number of integration test fixes (#1342)

Signed-off-by: Slava <20563034+veaceslavdoina@users.noreply.github.com>
Co-authored-by: Slava <20563034+veaceslavdoina@users.noreply.github.com>
Co-authored-by: Arnaud <arnaud@status.im>
Co-authored-by: gmega <giuliano.mega@gmail.com>
---
 .../actions/nimbus-build-system/action.yml    |  30 +-
 .github/workflows/ci-reusable.yml             |  11 +-
 .github/workflows/ci.yml                      |   1 +
 Makefile                                      |  16 +-
 build.nims                                    |  39 ++-
 codex.nimble                                  |   2 +-
 codex/logutils.nim                            |   3 +
 config.nims                                   |   4 +-
 tests/asynctest.nim                           |  12 +-
 tests/codex/slots/sampler/testutils.nim       |   6 +-
 tests/ethertest.nim                           |   4 +-
 tests/helpers.nim                             |   2 +
 tests/integration/1_minute/testcli.nim        |  92 +++++-
 tests/integration/1_minute/testecbug.nim      |  12 +-
 .../30_minutes/testmarketplace.nim.ignore     |   9 +-
 .../30_minutes/testvalidator.nim.ignore       |  28 +-
 tests/integration/codexconfig.nim             |   3 +-
 tests/integration/codexprocess.nim            |  95 ++++--
 tests/integration/hardhatprocess.nim          | 108 +++++-
 tests/integration/marketplacesuite.nim        |   5 +-
 tests/integration/multinodes.nim              | 309 ++++++++++++------
 tests/integration/nodeprocess.nim             |  94 +++---
 tests/integration/scripts/winkillprocess.sh   |  97 ++++++
 tests/integration/utils.nim                   |  92 ++++++
 vendor/nim-chronos                            |   2 +-
 25 files changed, 830 insertions(+), 246 deletions(-)
 create mode 100644 tests/integration/scripts/winkillprocess.sh
 create mode 100644 tests/integration/utils.nim

diff --git a/.github/actions/nimbus-build-system/action.yml b/.github/actions/nimbus-build-system/action.yml
index f70af604..4acbff33 100644
--- a/.github/actions/nimbus-build-system/action.yml
+++ b/.github/actions/nimbus-build-system/action.yml
@@ -29,6 +29,7 @@ runs:
       shell: ${{ inputs.shell }} {0}
       run: |
         curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs/ | sh -s -- --default-toolchain=${{ inputs.rust_version }} -y
+        echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
 
     - name: APT (Linux amd64/arm64)
       if: inputs.os == 'linux' && (inputs.cpu == 'amd64' || inputs.cpu == 'arm64')
@@ -83,7 +84,7 @@ runs:
 
     - name: Install gcc 14 on Linux
       # We don't want to install gcc 14 for coverage (Ubuntu 20.04)
-      if : ${{ inputs.os == 'linux' && inputs.coverage != 'true' }}
+      if: ${{ inputs.os == 'linux' && inputs.coverage != 'true' }}
       shell: ${{ inputs.shell }} {0}
       run: |
         # Skip for older Ubuntu versions
@@ -101,15 +102,22 @@ runs:
       if: inputs.os == 'linux' || inputs.os == 'macos'
       uses: hendrikmuhs/ccache-action@v1.2
       with:
-        create-symlink: true
-        key: ${{ inputs.os }}-${{ inputs.builder }}-${{ inputs.cpu }}-${{ inputs.tests }}-${{ inputs.nim_version }}
+        create-symlink: false
+        key: ${{ inputs.os }}-${{ inputs.builder }}-${{ inputs.cpu }}-${{ inputs.tests }}-${{ inputs.nim_version }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
         evict-old-files: 7d
 
+    - name: Add ccache to path on Linux/Mac
+      if: inputs.os == 'linux' || inputs.os == 'macos'
+      shell: ${{ inputs.shell }} {0}
+      run: |
+        echo "/usr/lib/ccache:/usr/local/opt/ccache/libexec" >> "$GITHUB_PATH"
+        echo "/usr/local/opt/ccache/libexec" >> "$GITHUB_PATH"
+
     - name: Install ccache on Windows
       if: inputs.os == 'windows'
       uses: hendrikmuhs/ccache-action@v1.2
       with:
-        key: ${{ inputs.os }}-${{ inputs.builder }}-${{ inputs.cpu }}-${{ inputs.tests }}-${{ inputs.nim_version }}
+        key: ${{ inputs.os }}-${{ inputs.builder }}-${{ inputs.cpu }}-${{ inputs.tests }}-${{ inputs.nim_version }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
         evict-old-files: 7d
 
     - name: Enable ccache on Windows
@@ -117,11 +125,11 @@ runs:
       shell: ${{ inputs.shell }} {0}
       run: |
         CCACHE_DIR=$(dirname $(which ccache))/ccached
-        mkdir ${CCACHE_DIR}
-        ln -s $(which ccache) ${CCACHE_DIR}/gcc.exe
-        ln -s $(which ccache) ${CCACHE_DIR}/g++.exe
-        ln -s $(which ccache) ${CCACHE_DIR}/cc.exe
-        ln -s $(which ccache) ${CCACHE_DIR}/c++.exe
+        mkdir -p ${CCACHE_DIR}
+        ln -sf $(which ccache) ${CCACHE_DIR}/gcc.exe
+        ln -sf $(which ccache) ${CCACHE_DIR}/g++.exe
+        ln -sf $(which ccache) ${CCACHE_DIR}/cc.exe
+        ln -sf $(which ccache) ${CCACHE_DIR}/c++.exe
         echo "export PATH=${CCACHE_DIR}:\$PATH" >> $HOME/.bash_profile # prefix path in MSYS2
 
     - name: Derive environment variables
@@ -202,10 +210,10 @@ runs:
     - name: Restore Nim toolchain binaries from cache
       id: nim-cache
       uses: actions/cache@v4
-      if : ${{ inputs.coverage != 'true'  }}
+      if: ${{ inputs.coverage != 'true' }}
       with:
         path: NimBinaries
-        key: ${{ inputs.os }}-${{ inputs.cpu }}-nim-${{ inputs.nim_version }}-cache-${{ env.cache_nonce }}-${{ github.run_id }}
+        key: ${{ inputs.os }}-${{ inputs.cpu }}-nim-${{ inputs.nim_version }}-cache-${{ env.cache_nonce }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
         restore-keys: ${{ inputs.os }}-${{ inputs.cpu }}-nim-${{ inputs.nim_version }}-cache-${{ env.cache_nonce }}
 
     - name: Set NIM_COMMIT
diff --git a/.github/workflows/ci-reusable.yml b/.github/workflows/ci-reusable.yml
index b6131b18..ae3aecbe 100644
--- a/.github/workflows/ci-reusable.yml
+++ b/.github/workflows/ci-reusable.yml
@@ -54,13 +54,20 @@ jobs:
         with:
           node-version: 22
 
-      - name: Start Ethereum node with Logos Storage contracts
+      - name: Install Ethereum node dependencies
         if: matrix.tests == 'contract' || matrix.tests == 'integration' || matrix.tests == 'tools' || matrix.tests == 'all'
         working-directory: vendor/logos-storage-contracts-eth
         env:
           MSYS2_PATH_TYPE: inherit
         run: |
           npm ci
+
+      - name: Run Ethereum node with Logos Storage contracts
+        if: matrix.tests == 'contract' || matrix.tests == 'integration' || matrix.tests == 'tools' || matrix.tests == 'all'
+        working-directory: vendor/logos-storage-contracts-eth
+        env:
+          MSYS2_PATH_TYPE: inherit
+        run: |
           npm start &
           # Wait for the contracts to be deployed
           sleep 5
@@ -75,7 +82,7 @@ jobs:
         if: matrix.tests == 'integration' || matrix.tests == 'all'
         env:
           CODEX_INTEGRATION_TEST_INCLUDES: ${{ matrix.includes }}
-        run: make -j${ncpu} testIntegration
+        run: make -j${ncpu} DEBUG=${{ runner.debug }} testIntegration
 
       - name: Upload integration tests log files
         uses: actions/upload-artifact@v4
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a09ebce1..0d24507d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,6 +18,7 @@ concurrency:
 jobs:
 
   matrix:
+    name: Compute matrix
     runs-on: ubuntu-latest
     outputs:
       matrix: ${{ steps.matrix.outputs.matrix }}
diff --git a/Makefile b/Makefile
index d9b9d70e..8b5bc371 100644
--- a/Makefile
+++ b/Makefile
@@ -140,10 +140,24 @@ testContracts: | build deps
 	echo -e $(BUILD_MSG) "build/$@" && \
 		$(ENV_SCRIPT) nim testContracts $(NIM_PARAMS) --define:ws_resubscribe=240 build.nims
 
+TEST_PARAMS :=
+ifdef DEBUG
+	TEST_PARAMS := $(TEST_PARAMS) -d:DebugTestHarness=$(DEBUG)
+  TEST_PARAMS := $(TEST_PARAMS) -d:NoCodexLogFilters=$(DEBUG)
+  TEST_PARAMS := $(TEST_PARAMS) -d:ShowContinuousStatusUpdates=$(DEBUG)
+  TEST_PARAMS := $(TEST_PARAMS) -d:DebugHardhat=$(DEBUG)
+endif
+ifdef TEST_TIMEOUT
+  TEST_PARAMS := $(TEST_PARAMS) -d:TestTimeout=$(TEST_TIMEOUT)
+endif
+ifdef PARALLEL
+  TEST_PARAMS := $(TEST_PARAMS) -d:EnableParallelTests=$(PARALLEL)
+endif
+
 # Builds and runs the integration tests
 testIntegration: | build deps
 	echo -e $(BUILD_MSG) "build/$@" && \
-		$(ENV_SCRIPT) nim testIntegration $(NIM_PARAMS) --define:ws_resubscribe=240 build.nims
+		$(ENV_SCRIPT) nim testIntegration $(TEST_PARAMS) $(NIM_PARAMS) --define:ws_resubscribe=240 build.nims
 
 # Builds and runs all tests (except for Taiko L2 tests)
 testAll: | build deps
diff --git a/build.nims b/build.nims
index 47e848b3..72f44921 100644
--- a/build.nims
+++ b/build.nims
@@ -1,9 +1,20 @@
 mode = ScriptMode.Verbose
 
 import std/os except commandLineParams
+import std/strutils
 
 ### Helper functions
-proc buildBinary(srcName: string, outName = os.lastPathPart(srcName),  srcDir = "./", params = "", lang = "c") =
+proc truthy(val: string): bool =
+  const truthySwitches = @["yes", "1", "on", "true"]
+  return val in truthySwitches
+
+proc buildBinary(
+    srcName: string,
+    outName = os.lastPathPart(srcName),
+    srcDir = "./",
+    params = "",
+    lang = "c",
+) =
   if not dirExists "build":
     mkDir "build"
 
@@ -43,10 +54,8 @@ proc buildLibrary(name: string, srcDir = "./", params = "", `type` = "dynamic")
     exec "nim c" & " --out:build/" & name &
       ".a --threads:on --app:staticlib --opt:size --noMain --mm:refc --header --d:metrics " &
       "--nimMainPrefix:libstorage -d:noSignalHandler " &
-      "-d:LeopardExtraCompilerFlags=-fPIC " &
-      "-d:chronicles_runtime_filtering " &
-      "-d:chronicles_log_level=TRACE " &
-      params & " " & srcDir & name & ".nim"
+      "-d:LeopardExtraCompilerFlags=-fPIC " & "-d:chronicles_runtime_filtering " &
+      "-d:chronicles_log_level=TRACE " & params & " " & srcDir & name & ".nim"
 
 proc test(name: string, outName = name, srcDir = "tests/", params = "", lang = "c") =
   buildBinary name, outName, srcDir, params
@@ -61,7 +70,8 @@ task toolsCirdl, "build tools/cirdl binary":
   buildBinary "tools/cirdl/cirdl"
 
 task testStorage, "Build & run Logos Storage tests":
-  test "testCodex", outName = "testStorage", params = "-d:storage_enable_proof_failures=true"
+  test "testCodex",
+    outName = "testStorage", params = "-d:storage_enable_proof_failures=true"
 
 task testContracts, "Build & run Logos Storage Contract tests":
   test "testContracts"
@@ -70,11 +80,18 @@ task testIntegration, "Run integration tests":
   buildBinary "codex",
     outName = "storage",
     params =
-      "-d:chronicles_runtime_filtering -d:chronicles_log_level=TRACE -d:storage_enable_proof_failures=true"
-  test "testIntegration"
+      "-d:chronicles_runtime_filtering -d:chronicles_log_level=TRACE -d:chronicles_disabled_topics=JSONRPC-HTTP-CLIENT,websock,libp2p,discv5 -d:codex_enable_proof_failures=true"
+  var sinks = @["textlines[nocolors,file]"]
+  for i in 2 ..< paramCount():
+    if "DebugTestHarness" in paramStr(i) and truthy paramStr(i).split('=')[1]:
+      sinks.add "textlines[stdout]"
+      break
+  var testParams =
+    "-d:chronicles_log_level=TRACE -d:chronicles_sinks=\"" & sinks.join(",") & "\""
+  test "testIntegration", params = testParams
   # use params to enable logging from the integration test executable
   # test "testIntegration", params = "-d:chronicles_sinks=textlines[notimestamps,stdout],textlines[dynamic] " &
-  #   "-d:chronicles_enabled_topics:integration:TRACE"  
+  #   "-d:chronicles_enabled_topics:integration:TRACE"
 
 task build, "build Logos Storage binary":
   storageTask()
@@ -139,7 +156,9 @@ task coverage, "generates code coverage report":
       nimSrcs
   )
   echo " ======== Generating HTML coverage report ======== "
-  exec("genhtml coverage/coverage.f.info --keep-going --output-directory coverage/report ")
+  exec(
+    "genhtml coverage/coverage.f.info --keep-going --output-directory coverage/report "
+  )
   echo " ======== Coverage report Done ======== "
 
 task showCoverage, "open coverage html":
diff --git a/codex.nimble b/codex.nimble
index 43c39219..61e1f470 100644
--- a/codex.nimble
+++ b/codex.nimble
@@ -4,6 +4,6 @@ description = "p2p data durability engine"
 license = "MIT"
 binDir = "build"
 srcDir = "."
-installFiles  = @["build.nims"]
+installFiles = @["build.nims"]
 
 include "build.nims"
diff --git a/codex/logutils.nim b/codex/logutils.nim
index ae27df7f..f3b98548 100644
--- a/codex/logutils.nim
+++ b/codex/logutils.nim
@@ -92,6 +92,7 @@ import std/sugar
 import std/typetraits
 
 import pkg/chronicles except toJson, `%`
+from pkg/chronos import TransportAddress
 from pkg/libp2p import Cid, MultiAddress, `$`
 import pkg/questionable
 import pkg/questionable/results
@@ -255,3 +256,5 @@ formatIt(LogFormat.textLines, array[32, byte]):
   it.short0xHexLog
 formatIt(LogFormat.json, array[32, byte]):
   it.to0xHex
+formatIt(TransportAddress):
+  $it
diff --git a/config.nims b/config.nims
index b1bc4cbe..e9e3eb0a 100644
--- a/config.nims
+++ b/config.nims
@@ -65,8 +65,8 @@ else:
     # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65782
     # ("-fno-asynchronous-unwind-tables" breaks Nim's exception raising, sometimes)
     switch("passC", "-march=x86-64")
-  else: switch("passC", "-march=native")
-    
+  else:
+    switch("passC", "-march=native")
 
 --tlsEmulation:
   off
diff --git a/tests/asynctest.nim b/tests/asynctest.nim
index 4db8277f..6f9d2d42 100644
--- a/tests/asynctest.nim
+++ b/tests/asynctest.nim
@@ -1,3 +1,13 @@
 import pkg/asynctest/chronos/unittest2
 
-export unittest2
+export unittest2 except eventually
+
+template eventuallySafe*(
+    expression: untyped, timeout = 5000, pollInterval = 1000
+): bool =
+  ## More sane defaults, for use with HTTP connections
+  eventually(expression, timeout, pollInterval)
+
+template eventually*(expression: untyped, timeout = 5000, pollInterval = 10): bool =
+  ## Fast defaults, do not use with HTTP connections!
+  eventually(expression, timeout, pollInterval)
diff --git a/tests/codex/slots/sampler/testutils.nim b/tests/codex/slots/sampler/testutils.nim
index f20b5efc..5460fde7 100644
--- a/tests/codex/slots/sampler/testutils.nim
+++ b/tests/codex/slots/sampler/testutils.nim
@@ -77,15 +77,13 @@ asyncchecksuite "Test proof sampler utils":
       )
 
     proc getExpectedIndices(n: int): seq[Natural] =
-      return collect(
-        newSeq,
+      return collect(newSeq):
         (;
           for i in 1 .. n:
             cellIndex(
               proofInput.entropy, proofInput.slotRoot, proofInput.nCellsPerSlot, i
             )
-        ),
-      )
+        )
 
     check:
       slotCellIndices(3) == getExpectedIndices(3)
diff --git a/tests/ethertest.nim b/tests/ethertest.nim
index 2cab8bf5..636760a1 100644
--- a/tests/ethertest.nim
+++ b/tests/ethertest.nim
@@ -5,6 +5,8 @@ import pkg/chronos
 import ./asynctest
 import ./checktest
 
+const HardhatPort {.intdefine.}: int = 8545
+
 ## Unit testing suite that sets up an Ethereum testing environment.
 ## Injects a `ethProvider` instance, and a list of `accounts`.
 ## Calls the `evm_snapshot` and `evm_revert` methods to ensure that any
@@ -16,7 +18,7 @@ template ethersuite*(name, body) =
     var snapshot: JsonNode
 
     setup:
-      ethProvider = JsonRpcProvider.new("ws://localhost:8545")
+      ethProvider = JsonRpcProvider.new("ws://localhost:" & $HardhatPort)
       snapshot = await send(ethProvider, "evm_snapshot")
       accounts = await ethProvider.listAccounts()
     teardown:
diff --git a/tests/helpers.nim b/tests/helpers.nim
index 742bc10d..e938015f 100644
--- a/tests/helpers.nim
+++ b/tests/helpers.nim
@@ -4,6 +4,8 @@ import helpers/templeveldb
 import std/times
 import std/sequtils, chronos
 
+import ./asynctest
+
 export multisetup, trackers, templeveldb
 
 ### taken from libp2p errorhelpers.nim
diff --git a/tests/integration/1_minute/testcli.nim b/tests/integration/1_minute/testcli.nim
index 778608b8..699b1551 100644
--- a/tests/integration/1_minute/testcli.nim
+++ b/tests/integration/1_minute/testcli.nim
@@ -1,31 +1,91 @@
 import std/tempfiles
+import std/times
 import codex/conf
 import codex/utils/fileutils
 import ../../asynctest
 import ../../checktest
 import ../codexprocess
 import ../nodeprocess
+import ../utils
 import ../../examples
 
+const HardhatPort {.intdefine.}: int = 8545
+const CodexApiPort {.intdefine.}: int = 8080
+const CodexDiscPort {.intdefine.}: int = 8090
+const CodexLogToFile {.booldefine.}: bool = false
+const CodexLogLevel {.strdefine.}: string = ""
+const CodexLogsDir {.strdefine.}: string = ""
+
 asyncchecksuite "Command line interface":
+  let startTime = now().format("yyyy-MM-dd'_'HH:mm:ss")
   let key = "4242424242424242424242424242424242424242424242424242424242424242"
 
-  proc startCodex(args: seq[string]): Future[CodexProcess] {.async.} =
-    return await CodexProcess.startNode(args, false, "cli-test-node")
+  var currentTestName = ""
+  var testCount = 0
+  var nodeCount = 0
+
+  template test(tname, tbody) =
+    inc testCount
+    currentTestName = tname
+    test tname:
+      tbody
+
+  proc addLogFile(args: seq[string]): seq[string] =
+    var args = args
+    when CodexLogToFile:
+      args.add(
+        "--log-file=" &
+          getLogFile(
+            CodexLogsDir,
+            startTime,
+            "Command line interface",
+            currentTestName,
+            "Client",
+            some nodeCount mod testCount,
+          )
+      )
+    when CodexLogLevel != "":
+      args.add "--log-level=" & CodexLogLevel
+
+    return args
+
+  proc startCodex(arguments: seq[string]): Future[CodexProcess] {.async.} =
+    inc nodeCount
+    let args = arguments.addLogFile
+    return await CodexProcess.startNode(
+      args.concat(
+        @[
+          "--api-port=" & $(await nextFreePort(CodexApiPort + nodeCount)),
+          "--disc-port=" & $(await nextFreePort(CodexDiscPort + nodeCount)),
+        ]
+      ),
+      debug = false,
+      "cli-test-node",
+    )
 
   test "complains when persistence is enabled without ethereum account":
     let node = await startCodex(@["persistence"])
     await node.waitUntilOutput("Persistence enabled, but no Ethereum account was set")
-    await node.stop()
+    # Expect the codex process to return an exit code of 1 indicating the result
+    # of the operation was unsuccessful.
+    await node.stop(expectedExitCode = 1)
 
   test "complains when ethereum private key file has wrong permissions":
     let unsafeKeyFile = genTempPath("", "")
     discard unsafeKeyFile.writeFile(key, 0o666)
-    let node = await startCodex(@["persistence", "--eth-private-key=" & unsafeKeyFile])
+    let node = await startCodex(
+      @[
+        "persistence",
+        "--eth-provider=" & "ws://localhost:" & $HardhatPort,
+        "--eth-private-key=" & unsafeKeyFile,
+      ]
+    )
     await node.waitUntilOutput(
       "Ethereum private key file does not have safe file permissions"
     )
-    await node.stop()
+    # Expect the codex process to return an exit code of 1 indicating the result
+    # of the operation was unsuccessful.
+    await node.stop(expectedExitCode = 1)
     discard removeFile(unsafeKeyFile)
 
   let
@@ -36,25 +96,37 @@ asyncchecksuite "Command line interface":
   test "suggests downloading of circuit files when persistence is enabled without accessible r1cs file":
     let node = await startCodex(@["persistence", "prover", marketplaceArg])
     await node.waitUntilOutput(expectedDownloadInstruction)
-    await node.stop()
+    # Expect the codex process to return an exit code of 1 indicating the result
+    # of the operation was unsuccessful.
+    await node.stop(expectedExitCode = 1)
 
   test "suggests downloading of circuit files when persistence is enabled without accessible wasm file":
     let node = await startCodex(
       @[
-        "persistence", "prover", marketplaceArg,
+        "persistence",
+        "--eth-provider=" & "ws://localhost:" & $HardhatPort,
+        "prover",
+        marketplaceArg,
         "--circom-r1cs=tests/circuits/fixtures/proof_main.r1cs",
       ]
     )
     await node.waitUntilOutput(expectedDownloadInstruction)
-    await node.stop()
+    # Expect the codex process to return an exit code of 1 indicating the result
+    # of the operation was unsuccessful.
+    await node.stop(expectedExitCode = 1)
 
   test "suggests downloading of circuit files when persistence is enabled without accessible zkey file":
     let node = await startCodex(
       @[
-        "persistence", "prover", marketplaceArg,
+        "persistence",
+        "--eth-provider=" & "ws://localhost:" & $HardhatPort,
+        "prover",
+        marketplaceArg,
         "--circom-r1cs=tests/circuits/fixtures/proof_main.r1cs",
         "--circom-wasm=tests/circuits/fixtures/proof_main.wasm",
       ]
     )
     await node.waitUntilOutput(expectedDownloadInstruction)
-    await node.stop()
+    # Expect the codex process to return an exit code of 1 indicating the result
+    # of the operation was unsuccessful.
+    await node.stop(expectedExitCode = 1)
diff --git a/tests/integration/1_minute/testecbug.nim b/tests/integration/1_minute/testecbug.nim
index a5bfa832..74f8aaff 100644
--- a/tests/integration/1_minute/testecbug.nim
+++ b/tests/integration/1_minute/testecbug.nim
@@ -9,12 +9,12 @@ marketplacesuite(
 ):
   test "should be able to create storage request and download dataset",
     NodeConfigs(
-      clients: CodexConfigs
-        .init(nodes = 1)
-        # .debug() # uncomment to enable console log output.debug()
-        .withLogFile()
-        # uncomment to output log file to tests/integration/logs/<start_datetime> <suite_name>/<test_name>/<node_role>_<node_idx>.log
-        .withLogTopics("node", "erasure", "marketplace").some,
+      clients: CodexConfigs.init(nodes = 1)
+      # .debug() # uncomment to enable console log output.debug()
+      # .withLogFile()
+      # uncomment to output log file to tests/integration/logs/<start_datetime> <suite_name>/<test_name>/<node_role>_<node_idx>.log
+      # .withLogTopics("node", "erasure", "marketplace")
+      .some,
       providers: CodexConfigs.init(nodes = 0).some,
     ):
     let
diff --git a/tests/integration/30_minutes/testmarketplace.nim.ignore b/tests/integration/30_minutes/testmarketplace.nim.ignore
index b04626c4..15059664 100644
--- a/tests/integration/30_minutes/testmarketplace.nim.ignore
+++ b/tests/integration/30_minutes/testmarketplace.nim.ignore
@@ -6,6 +6,7 @@ import ../../contracts/deployment
 import ./../marketplacesuite
 import ../twonodes
 import ../nodeconfigs
+from ../../helpers import eventuallySafe
 
 marketplacesuite(name = "Marketplace", stopOnRequestFail = true):
   let marketplaceConfig = NodeConfigs(
@@ -122,11 +123,11 @@ marketplacesuite(name = "Marketplace", stopOnRequestFail = true):
     # Checking that the hosting node received reward for at least the time between <expiry;end>
     let slotSize = slotSize(blocks, ecNodes, ecTolerance)
     let pricePerSlotPerSecond = minPricePerBytePerSecond * slotSize
-    check eventually (await token.balanceOf(hostAccount)) - startBalanceHost >=
+    check eventuallySafe (await token.balanceOf(hostAccount)) - startBalanceHost >=
       (duration - 5 * 60).u256 * pricePerSlotPerSecond * ecNodes.u256
 
     # Checking that client node receives some funds back that were not used for the host nodes
-    check eventually(
+    check eventuallySafe(
       (await token.balanceOf(clientAccount)) - clientBalanceBeforeFinished > 0,
       timeout = 10 * 1000, # give client a bit of time to withdraw its funds
     )
@@ -296,12 +297,12 @@ marketplacesuite(name = "Marketplace payouts", stopOnRequestFail = true):
     let slotSize = slotSize(blocks, ecNodes, ecTolerance)
     let pricePerSlotPerSecond = minPricePerBytePerSecond * slotSize
 
-    check eventually (
+    check eventuallySafe (
       let endBalanceProvider = (await token.balanceOf(provider.ethAccount))
       endBalanceProvider > startBalanceProvider and
         endBalanceProvider < startBalanceProvider + expiry.u256 * pricePerSlotPerSecond
     )
-    check eventually(
+    check eventuallySafe(
       (
         let endBalanceClient = (await token.balanceOf(client.ethAccount))
         let endBalanceProvider = (await token.balanceOf(provider.ethAccount))
diff --git a/tests/integration/30_minutes/testvalidator.nim.ignore b/tests/integration/30_minutes/testvalidator.nim.ignore
index ed67b5d0..b6ce1bbe 100644
--- a/tests/integration/30_minutes/testvalidator.nim.ignore
+++ b/tests/integration/30_minutes/testvalidator.nim.ignore
@@ -28,12 +28,12 @@ marketplacesuite(name = "Validation", stopOnRequestFail = false):
     NodeConfigs(
       # Uncomment to start Hardhat automatically, typically so logs can be inspected locally
       hardhat: HardhatConfig.none,
-      clients: CodexConfigs
-        .init(nodes = 1)
-        # .debug() # uncomment to enable console log output
-        .withLogFile()
-        # uncomment to output log file to tests/integration/logs/<start_datetime> <suite_name>/<test_name>/<node_role>_<node_idx>.log
-        .withLogTopics("purchases", "onchain").some,
+      clients: CodexConfigs.init(nodes = 1)
+      # .debug() # uncomment to enable console log output
+      # .withLogFile()
+      # uncomment to output log file to tests/integration/logs/<start_datetime> <suite_name>/<test_name>/<node_role>_<node_idx>.log
+      # .withLogTopics("purchases", "onchain")
+      .some,
       providers: CodexConfigs
         .init(nodes = 1)
         .withSimulateProofFailures(idx = 0, failEveryNProofs = 1)
@@ -47,9 +47,9 @@ marketplacesuite(name = "Validation", stopOnRequestFail = false):
         .withValidationGroupIndex(idx = 0, groupIndex = 0)
         .withValidationGroupIndex(idx = 1, groupIndex = 1)
         # .debug() # uncomment to enable console log output
-        .withLogFile()
+        # .withLogFile()
         # uncomment to output log file to tests/integration/logs/<start_datetime> <suite_name>/<test_name>/<node_role>_<node_idx>.log
-        .withLogTopics("validator")
+        # .withLogTopics("validator")
         # each topic as a separate string argument
         .some,
     ):
@@ -100,12 +100,12 @@ marketplacesuite(name = "Validation", stopOnRequestFail = false):
     NodeConfigs(
       # Uncomment to start Hardhat automatically, typically so logs can be inspected locally
       hardhat: HardhatConfig.none,
-      clients: CodexConfigs
-        .init(nodes = 1)
-        # .debug() # uncomment to enable console log output
-        .withLogFile()
-        # uncomment to output log file to tests/integration/logs/<start_datetime> <suite_name>/<test_name>/<node_role>_<node_idx>.log
-        .withLogTopics("purchases", "onchain").some,
+      clients: CodexConfigs.init(nodes = 1)
+      # .debug() # uncomment to enable console log output
+      # .withLogFile()
+      # uncomment to output log file to tests/integration/logs/<start_datetime> <suite_name>/<test_name>/<node_role>_<node_idx>.log
+      # .withLogTopics("purchases", "onchain")
+      .some,
       providers: CodexConfigs
         .init(nodes = 1)
         .withSimulateProofFailures(idx = 0, failEveryNProofs = 1)
diff --git a/tests/integration/codexconfig.nim b/tests/integration/codexconfig.nim
index 138ae274..8d0cdb33 100644
--- a/tests/integration/codexconfig.nim
+++ b/tests/integration/codexconfig.nim
@@ -169,7 +169,8 @@ proc withLogFile*(self: CodexConfigs): CodexConfigs {.raises: [CodexConfigError]
 
 proc withLogFile*(
     self: var CodexConfig, logFile: string
-) {.raises: [CodexConfigError].} = #: CodexConfigs =
+) {.raises: [CodexConfigError].} =
+  #: CodexConfigs =
   ## typically called internally from the test suite, sets a log file path to
   ## be created during the test run, for a specified node in the group
   # var config = self
diff --git a/tests/integration/codexprocess.nim b/tests/integration/codexprocess.nim
index 04c2904f..824d4c43 100644
--- a/tests/integration/codexprocess.nim
+++ b/tests/integration/codexprocess.nim
@@ -7,6 +7,7 @@ import pkg/ethers
 import pkg/libp2p
 import std/os
 import std/strutils
+import std/times
 import codex/conf
 import ./codexclient
 import ./nodeprocess
@@ -15,11 +16,28 @@ export codexclient
 export chronicles
 export nodeprocess
 
+{.push raises: [].}
+
 logScope:
   topics = "integration testing codex process"
 
-type CodexProcess* = ref object of NodeProcess
-  client: ?CodexClient
+type
+  CodexProcess* = ref object of NodeProcess
+    client: ?CodexClient
+
+  CodexProcessError* = object of NodeProcessError
+
+proc raiseCodexProcessError(
+    msg: string, parent: ref CatchableError
+) {.raises: [CodexProcessError].} =
+  raise newException(CodexProcessError, msg & ": " & parent.msg, parent)
+
+template convertError(msg, body: typed) =
+  # Don't use this in an async proc, unless body does not raise CancelledError
+  try:
+    body
+  except CatchableError as parent:
+    raiseCodexProcessError(msg, parent)
 
 method workingDir(node: CodexProcess): string =
   return currentSourcePath() / ".." / ".." / ".."
@@ -33,44 +51,83 @@ method startedOutput(node: CodexProcess): string =
 method processOptions(node: CodexProcess): set[AsyncProcessOption] =
   return {AsyncProcessOption.StdErrToStdOut}
 
-method outputLineEndings(node: CodexProcess): string {.raises: [].} =
+method outputLineEndings(node: CodexProcess): string =
   return "\n"
 
-method onOutputLineCaptured(node: CodexProcess, line: string) {.raises: [].} =
+method onOutputLineCaptured(node: CodexProcess, line: string) =
   discard
 
-proc dataDir(node: CodexProcess): string =
-  let config = CodexConf.load(cmdLine = node.arguments, quitOnFailure = false)
-  return config.dataDir.string
+proc config(node: CodexProcess): CodexConf {.raises: [CodexProcessError].} =
+  # cannot use convertError here as it uses typed parameters which forces type
+  # resolution, while confutils.load uses untyped parameters and expects type
+  # resolution not to happen yet. In other words, it won't compile.
+  try:
+    return CodexConf.load(
+      cmdLine = node.arguments, quitOnFailure = false, secondarySources = nil
+    )
+  except ConfigurationError as parent:
+    raiseCodexProcessError "Failed to load node arguments into CodexConf", parent
 
-proc ethAccount*(node: CodexProcess): Address =
-  let config = CodexConf.load(cmdLine = node.arguments, quitOnFailure = false)
-  without ethAccount =? config.ethAccount:
+proc dataDir(node: CodexProcess): string {.raises: [CodexProcessError].} =
+  return node.config.dataDir.string
+
+proc ethAccount*(node: CodexProcess): Address {.raises: [CodexProcessError].} =
+  without ethAccount =? node.config.ethAccount:
     raiseAssert "eth account not set"
   return Address(ethAccount)
 
-proc apiUrl*(node: CodexProcess): string =
-  let config = CodexConf.load(cmdLine = node.arguments, quitOnFailure = false)
-  return
-    "http://" & config.apiBindAddress.get() & ":" & $config.apiPort & "/api/storage/v1"
+proc apiUrl*(node: CodexProcess): string {.raises: [CodexProcessError].} =
+  let config = node.config
+  without apiBindAddress =? config.apiBindAddress:
+    raise
+      newException(CodexProcessError, "REST API not started: --api-bindaddr not set")
+  return "http://" & apiBindAddress & ":" & $config.apiPort & "/api/storage/v1"
 
-proc client*(node: CodexProcess): CodexClient =
+proc logFile*(node: CodexProcess): ?string {.raises: [CodexProcessError].} =
+  node.config.logFile
+
+proc client*(node: CodexProcess): CodexClient {.raises: [CodexProcessError].} =
   if client =? node.client:
     return client
   let client = CodexClient.new(node.apiUrl)
   node.client = some client
   return client
 
-method stop*(node: CodexProcess) {.async.} =
+proc updateLogFile(node: CodexProcess, newLogFile: string) =
+  for arg in node.arguments.mitems:
+    if arg.startsWith("--log-file="):
+      arg = "--log-file=" & newLogFile
+      break
+
+method restart*(node: CodexProcess) {.async.} =
+  trace "restarting codex"
+  await node.stop()
+  if logFile =? node.logFile:
+    # chronicles truncates the existing log file on start, so changed the log
+    # file cli param to create a new one
+    node.updateLogFile(
+      logFile & "_restartedAt_" & now().format("yyyy-MM-dd'_'HH-mm-ss") & ".log"
+    )
+  await node.start()
+  await node.waitUntilStarted()
+  trace "codex process restarted"
+
+method stop*(node: CodexProcess) {.async: (raises: []).} =
   logScope:
     nodeName = node.name
 
+  trace "stopping codex client"
   await procCall NodeProcess(node).stop()
 
-  trace "stopping Storage client"
+  if not node.process.isNil:
+    trace "closing node process' streams"
+    await node.process.closeWait()
+    trace "node process' streams closed"
+
   if client =? node.client:
     await client.close()
     node.client = none CodexClient
 
-method removeDataDir*(node: CodexProcess) =
-  removeDir(node.dataDir)
+method removeDataDir*(node: CodexProcess) {.raises: [CodexProcessError].} =
+  convertError("failed to remove codex node data directory"):
+    removeDir(node.dataDir)
diff --git a/tests/integration/hardhatprocess.nim b/tests/integration/hardhatprocess.nim
index 915c8c53..8342f05f 100644
--- a/tests/integration/hardhatprocess.nim
+++ b/tests/integration/hardhatprocess.nim
@@ -8,28 +8,38 @@ import pkg/stew/io2
 import std/os
 import std/sets
 import std/sequtils
+import std/strformat
 import std/strutils
 import pkg/codex/conf
 import pkg/codex/utils/trackedfutures
 import ./codexclient
 import ./nodeprocess
+import ./utils
 
 export codexclient
 export chronicles
+export nodeprocess
+
+{.push raises: [].}
 
 logScope:
   topics = "integration testing hardhat process"
-  nodeName = "hardhat"
 
-type HardhatProcess* = ref object of NodeProcess
-  logFile: ?IoHandle
+type
+  OnOutputLineCaptured = proc(line: string) {.gcsafe, raises: [].}
+  HardhatProcess* = ref object of NodeProcess
+    logFile: ?IoHandle
+    onOutputLine: OnOutputLineCaptured
+
+  HardhatProcessError* = object of NodeProcessError
 
 method workingDir(node: HardhatProcess): string =
   return
     currentSourcePath() / ".." / ".." / ".." / "vendor" / "logos-storage-contracts-eth"
 
 method executable(node: HardhatProcess): string =
-  return "node_modules" / ".bin" / "hardhat"
+  return
+    "node_modules" / ".bin" / (when defined(windows): "hardhat.cmd" else: "hardhat")
 
 method startedOutput(node: HardhatProcess): string =
   return "Started HTTP and WebSocket JSON-RPC server at"
@@ -37,7 +47,7 @@ method startedOutput(node: HardhatProcess): string =
 method processOptions(node: HardhatProcess): set[AsyncProcessOption] =
   return {}
 
-method outputLineEndings(node: HardhatProcess): string {.raises: [].} =
+method outputLineEndings(node: HardhatProcess): string =
   return "\n"
 
 proc openLogFile(node: HardhatProcess, logFilePath: string): IoHandle =
@@ -52,7 +62,21 @@ proc openLogFile(node: HardhatProcess, logFilePath: string): IoHandle =
 
   return fileHandle
 
-method start*(node: HardhatProcess) {.async.} =
+method start*(
+    node: HardhatProcess
+) {.async: (raises: [CancelledError, NodeProcessError]).} =
+  logScope:
+    nodeName = node.name
+
+  var executable = ""
+  try:
+    executable = absolutePath(node.workingDir / node.executable)
+    if not fileExists(executable):
+      raiseAssert "cannot start hardhat, executable doesn't exist (looking for " &
+        &"{executable}). Try running `npm install` in {node.workingDir}."
+  except CatchableError as parent:
+    raiseAssert "failed build path to hardhat executable: " & parent.msg
+
   let poptions = node.processOptions + {AsyncProcessOption.StdErrToStdOut}
 
   trace "starting node",
@@ -89,19 +113,37 @@ method start*(node: HardhatProcess) {.async.} =
     trace "hardhat post start scripts executed"
   except CancelledError as error:
     raise error
-  except CatchableError as e:
-    error "failed to start hardhat process", error = e.msg
+  except CatchableError as parent:
+    raise newException(
+      HardhatProcessError, "failed to start hardhat process: " & parent.msg, parent
+    )
+
+proc port(node: HardhatProcess): ?int =
+  var next = false
+  for arg in node.arguments:
+    # TODO: move to constructor
+    if next:
+      return parseInt(arg).catch.option
+    if arg.contains "--port":
+      next = true
+
+  return none int
 
 proc startNode*(
     _: type HardhatProcess,
     args: seq[string],
     debug: string | bool = false,
     name: string,
-): Future[HardhatProcess] {.async.} =
+    onOutputLineCaptured: OnOutputLineCaptured = nil,
+): Future[HardhatProcess] {.async: (raises: [CancelledError, NodeProcessError]).} =
+  logScope:
+    nodeName = name
+
   var logFilePath = ""
 
   var arguments = newSeq[string]()
   for arg in args:
+    # TODO: move to constructor
     if arg.contains "--log-file=":
       logFilePath = arg.split("=")[1]
     else:
@@ -114,17 +156,25 @@ proc startNode*(
     arguments: arguments,
     debug: ($debug != "false"),
     trackedFutures: TrackedFutures.new(),
-    name: "hardhat",
+    name: name,
+    onOutputLine: onOutputLineCaptured,
   )
 
   await hardhat.start()
 
+  # TODO: move to constructor
   if logFilePath != "":
     hardhat.logFile = some hardhat.openLogFile(logFilePath)
 
   return hardhat
 
 method onOutputLineCaptured(node: HardhatProcess, line: string) =
+  logScope:
+    nodeName = node.name
+
+  if not node.onOutputLine.isNil:
+    node.onOutputLine(line)
+
   without logFile =? node.logFile:
     return
 
@@ -133,13 +183,49 @@ method onOutputLineCaptured(node: HardhatProcess, line: string) =
     discard logFile.closeFile()
     node.logFile = none IoHandle
 
-method stop*(node: HardhatProcess) {.async.} =
+proc closeProcessStreams(node: HardhatProcess) {.async: (raises: []).} =
+  when not defined(windows):
+    if not node.process.isNil:
+      trace "closing node process' streams"
+      await node.process.closeWait()
+      trace "node process' streams closed"
+  else:
+    # Windows hangs when attempting to close hardhat's process streams, so try
+    # to kill the process externally.
+    without port =? node.port:
+      error "Failed to get port from Hardhat args"
+      return
+    try:
+      let cmdResult = await forceKillProcess("node.exe", &"--port {port}")
+      if cmdResult.status > 0:
+        error "Failed to forcefully kill windows hardhat process",
+          port, exitCode = cmdResult.status, stderr = cmdResult.stdError
+      else:
+        trace "Successfully killed windows hardhat process by force",
+          port, exitCode = cmdResult.status, stdout = cmdResult.stdOutput
+    except ValueError, OSError:
+      let eMsg = getCurrentExceptionMsg()
+      error "Failed to forcefully kill windows hardhat process, bad path to command",
+        error = eMsg
+    except CancelledError as e:
+      discard
+    except AsyncProcessError as e:
+      error "Failed to forcefully kill windows hardhat process", port, error = e.msg
+    except AsyncProcessTimeoutError as e:
+      error "Timeout while forcefully killing windows hardhat process",
+        port, error = e.msg
+
+method stop*(node: HardhatProcess) {.async: (raises: []).} =
   # terminate the process
   await procCall NodeProcess(node).stop()
 
+  await node.closeProcessStreams()
+
   if logFile =? node.logFile:
     trace "closing hardhat log file"
     discard logFile.closeFile()
 
+  node.process = nil
+
 method removeDataDir*(node: HardhatProcess) =
   discard
diff --git a/tests/integration/marketplacesuite.nim b/tests/integration/marketplacesuite.nim
index 5a0a11a6..cc0ee246 100644
--- a/tests/integration/marketplacesuite.nim
+++ b/tests/integration/marketplacesuite.nim
@@ -86,7 +86,10 @@ template marketplacesuite*(name: string, stopOnRequestFail: bool, body: untyped)
         duration: uint64,
         collateralPerByte: UInt256,
         minPricePerBytePerSecond: UInt256,
-    ): Future[void] {.async: (raises: [CancelledError, HttpError, ConfigurationError]).} =
+    ): Future[void] {.
+        async:
+          (raises: [CancelledError, HttpError, ConfigurationError, CodexProcessError])
+    .} =
       let totalCollateral = datasetSize.u256 * collateralPerByte
       # post availability to each provider
       for i in 0 ..< providers().len:
diff --git a/tests/integration/multinodes.nim b/tests/integration/multinodes.nim
index 5f149585..05bb6fb0 100644
--- a/tests/integration/multinodes.nim
+++ b/tests/integration/multinodes.nim
@@ -1,3 +1,4 @@
+import std/httpclient
 import std/os
 import std/sequtils
 import std/strutils
@@ -13,6 +14,7 @@ import ./codexprocess
 import ./hardhatconfig
 import ./hardhatprocess
 import ./nodeconfigs
+import ./utils
 import ../asynctest
 import ../checktest
 
@@ -24,6 +26,8 @@ export hardhatconfig
 export codexconfig
 export nodeconfigs
 
+{.push raises: [].}
+
 type
   RunningNode* = ref object
     role*: Role
@@ -36,31 +40,33 @@ type
     Hardhat
 
   MultiNodeSuiteError = object of CatchableError
+  SuiteTimeoutError = object of MultiNodeSuiteError
 
-const jsonRpcProviderUrl* = "ws://localhost:8545"
+const HardhatPort {.intdefine.}: int = 8545
+const CodexApiPort {.intdefine.}: int = 8080
+const CodexDiscPort {.intdefine.}: int = 8090
+const TestId {.strdefine.}: string = "TestId"
+const CodexLogToFile {.booldefine.}: bool = false
+const CodexLogLevel {.strdefine.}: string = ""
+const CodexLogsDir {.strdefine.}: string = ""
 
-proc raiseMultiNodeSuiteError(msg: string) =
-  raise newException(MultiNodeSuiteError, msg)
+proc raiseMultiNodeSuiteError(
+    msg: string, parent: ref CatchableError = nil
+) {.raises: [MultiNodeSuiteError].} =
+  raise newException(MultiNodeSuiteError, msg, parent)
 
-proc nextFreePort*(startPort: int): Future[int] {.async.} =
-  proc client(server: StreamServer, transp: StreamTransport) {.async.} =
-    await transp.closeWait()
+template withLock(lock: AsyncLock, body: untyped) =
+  if lock.isNil:
+    lock = newAsyncLock()
 
-  var port = startPort
-  while true:
-    trace "checking if port is free", port
+  await lock.acquire()
+  try:
+    body
+  finally:
     try:
-      let host = initTAddress("127.0.0.1", port)
-      # We use ReuseAddr here only to be able to reuse the same IP/Port when
-      # there's a TIME_WAIT socket. It's useful when running the test multiple
-      # times or if a test ran previously using the same port.
-      var server = createStreamServer(host, client, {ReuseAddr})
-      trace "port is free", port
-      await server.closeWait()
-      return port
-    except TransportOsError:
-      trace "port is not free", port
-      inc port
+      lock.release()
+    except AsyncLockError as parent:
+      raiseMultiNodeSuiteError "lock error", parent
 
 proc sanitize(pathSegment: string): string =
   var sanitized = pathSegment
@@ -71,8 +77,8 @@ proc sanitize(pathSegment: string): string =
 proc getTempDirName*(starttime: string, role: Role, roleIdx: int): string =
   getTempDir() / "Storage" / sanitize($starttime) / sanitize($role & "_" & $roleIdx)
 
-template multinodesuite*(name: string, body: untyped) =
-  asyncchecksuite name:
+template multinodesuite*(suiteName: string, body: untyped) =
+  asyncchecksuite suiteName:
     # Following the problem described here:
     # https://github.com/NomicFoundation/hardhat/issues/2053
     # It may be desirable to use http RPC provider.
@@ -85,7 +91,7 @@ template multinodesuite*(name: string, body: untyped) =
     # If you want to use a different provider url in the nodes, you can
     # use withEthProvider config modifier in the node config
     # to set the desired provider url. E.g.:
-    #   NodeConfigs(    
+    #   NodeConfigs(
     #     hardhat:
     #       HardhatConfig.none,
     #     clients:
@@ -93,6 +99,7 @@ template multinodesuite*(name: string, body: untyped) =
     #         .withEthProvider("ws://localhost:8545")
     #         .some,
     #     ...
+    var jsonRpcProviderUrl = "ws://localhost:" & $HardhatPort
     var running {.inject, used.}: seq[RunningNode]
     var bootstrapNodes: seq[string]
     let starttime = now().format("yyyy-MM-dd'_'HH:mm:ss")
@@ -101,6 +108,10 @@ template multinodesuite*(name: string, body: untyped) =
     var ethProvider {.inject, used.}: JsonRpcProvider
     var accounts {.inject, used.}: seq[Address]
     var snapshot: JsonNode
+    var lastUsedHardhatPort = HardhatPort
+    var lastUsedCodexApiPort = CodexApiPort
+    var lastUsedCodexDiscPort = CodexDiscPort
+    var codexPortLock: AsyncLock
 
     template test(tname, startNodeConfigs, tbody) =
       currentTestName = tname
@@ -108,47 +119,50 @@ template multinodesuite*(name: string, body: untyped) =
       test tname:
         tbody
 
-    proc sanitize(pathSegment: string): string =
-      var sanitized = pathSegment
-      for invalid in invalidFilenameChars.items:
-        sanitized = sanitized.replace(invalid, '_').replace(' ', '_')
-      sanitized
-
-    proc getLogFile(role: Role, index: ?int): string =
-      # create log file path, format:
-      # tests/integration/logs/<start_datetime> <suite_name>/<test_name>/<node_role>_<node_idx>.log
-
-      var logDir =
-        currentSourcePath.parentDir() / "logs" / sanitize($starttime & "__" & name) /
-        sanitize($currentTestName)
-      createDir(logDir)
-
-      var fn = $role
-      if idx =? index:
-        fn &= "_" & $idx
-      fn &= ".log"
-
-      let fileName = logDir / fn
-      return fileName
+    proc updatePort(url: var string, port: int) =
+      let parts = url.split(':')
+      url = @[parts[0], parts[1], $port].join(":")
 
     proc newHardhatProcess(
         config: HardhatConfig, role: Role
-    ): Future[NodeProcess] {.async.} =
+    ): Future[NodeProcess] {.async: (raises: [MultiNodeSuiteError, CancelledError]).} =
       var args: seq[string] = @[]
       if config.logFile:
-        let updatedLogFile = getLogFile(role, none int)
-        args.add "--log-file=" & updatedLogFile
+        try:
+          let updatedLogFile = getLogFile(
+            CodexLogsDir, starttime, suiteName, currentTestName, $role, none int
+          )
+          args.add "--log-file=" & updatedLogFile
+        except IOError as e:
+          raiseMultiNodeSuiteError(
+            "failed to start hardhat because logfile path could not be obtained: " &
+              e.msg,
+            e,
+          )
+        except OSError as e:
+          raiseMultiNodeSuiteError(
+            "failed to start hardhat because logfile path could not be obtained: " &
+              e.msg,
+            e,
+          )
+
+      let port = await nextFreePort(lastUsedHardhatPort)
+      jsonRpcProviderUrl.updatePort(port)
+      args.add("--port")
+      args.add($port)
+      lastUsedHardhatPort = port
 
       try:
         let node = await HardhatProcess.startNode(args, config.debugEnabled, "hardhat")
+        await node.waitUntilStarted()
         trace "hardhat node started"
         return node
       except NodeProcessError as e:
-        raiseMultiNodeSuiteError "cannot start hardhat process: " & e.msg
+        raiseMultiNodeSuiteError "hardhat node not started: " & e.msg
 
     proc newCodexProcess(
         roleIdx: int, conf: CodexConfig, role: Role
-    ): Future[NodeProcess] {.async.} =
+    ): Future[NodeProcess] {.async: (raises: [MultiNodeSuiteError, CancelledError]).} =
       let nodeIdx = running.len
       var config = conf
 
@@ -156,34 +170,60 @@ template multinodesuite*(name: string, body: untyped) =
         raiseMultiNodeSuiteError "Cannot start node at nodeIdx " & $nodeIdx &
           ", not enough eth accounts."
 
-      let datadir = getTempDirName(starttime, role, roleIdx)
+      let datadir = getDataDir(TestId, currentTestName, $starttime, $role, some roleIdx)
 
       try:
-        if config.logFile.isSome:
-          let updatedLogFile = getLogFile(role, some roleIdx)
-          config.withLogFile(updatedLogFile)
+        if config.logFile.isSome or CodexLogToFile:
+          try:
+            let updatedLogFile = getLogFile(
+              CodexLogsDir, starttime, suiteName, currentTestName, $role, some roleIdx
+            )
+            config.withLogFile(updatedLogFile)
+          except IOError as e:
+            raiseMultiNodeSuiteError(
+              "failed to start " & $role &
+                " because logfile path could not be obtained: " & e.msg,
+              e,
+            )
+          except OSError as e:
+            raiseMultiNodeSuiteError(
+              "failed to start " & $role &
+                " because logfile path could not be obtained: " & e.msg,
+              e,
+            )
+
+        when CodexLogLevel != "":
+          config.addCliOption("--log-level", CodexLogLevel)
+
+        var apiPort, discPort: int
+        withLock(codexPortLock):
+          apiPort = await nextFreePort(lastUsedCodexApiPort + nodeIdx)
+          discPort = await nextFreePort(lastUsedCodexDiscPort + nodeIdx)
+          config.addCliOption("--api-port", $apiPort)
+          config.addCliOption("--disc-port", $discPort)
+          lastUsedCodexApiPort = apiPort
+          lastUsedCodexDiscPort = discPort
 
         for bootstrapNode in bootstrapNodes:
           config.addCliOption("--bootstrap-node", bootstrapNode)
-        config.addCliOption("--api-port", $await nextFreePort(8080 + nodeIdx))
+
         config.addCliOption("--data-dir", datadir)
         config.addCliOption("--nat", "none")
         config.addCliOption("--listen-addrs", "/ip4/127.0.0.1/tcp/0")
-        config.addCliOption("--disc-port", $await nextFreePort(8090 + nodeIdx))
       except CodexConfigError as e:
         raiseMultiNodeSuiteError "invalid cli option, error: " & e.msg
 
-      let node = await CodexProcess.startNode(
-        config.cliArgs, config.debugEnabled, $role & $roleIdx
-      )
-
       try:
+        let node = await CodexProcess.startNode(
+          config.cliArgs, config.debugEnabled, $role & $roleIdx
+        )
         await node.waitUntilStarted()
         trace "node started", nodeName = $role & $roleIdx
+        return node
+      except CodexConfigError as e:
+        raiseMultiNodeSuiteError "failed to get cli args from config: " & e.msg, e
       except NodeProcessError as e:
-        raiseMultiNodeSuiteError "node not started, error: " & e.msg
-
-      return node
+        raiseMultiNodeSuiteError "node not started, error: " & e.msg, e
 
     proc hardhat(): HardhatProcess =
       for r in running:
@@ -209,7 +249,9 @@ template multinodesuite*(name: string, body: untyped) =
           if r.role == Role.Validator:
             CodexProcess(r.node)
 
-    proc startHardhatNode(config: HardhatConfig): Future[NodeProcess] {.async.} =
+    proc startHardhatNode(
+        config: HardhatConfig
+    ): Future[NodeProcess] {.async: (raises: [MultiNodeSuiteError, CancelledError]).} =
       return await newHardhatProcess(config, Role.Hardhat)
 
     proc startClientNode(conf: CodexConfig): Future[NodeProcess] {.async.} =
@@ -221,44 +263,64 @@ template multinodesuite*(name: string, body: untyped) =
       )
       return await newCodexProcess(clientIdx, config, Role.Client)
 
-    proc startProviderNode(conf: CodexConfig): Future[NodeProcess] {.async.} =
-      let providerIdx = providers().len
-      var config = conf
-      config.addCliOption(StartUpCmd.persistence, "--eth-provider", jsonRpcProviderUrl)
-      config.addCliOption(
-        StartUpCmd.persistence, "--eth-account", $accounts[running.len]
-      )
-      config.addCliOption(
-        PersistenceCmd.prover, "--circom-r1cs",
-        "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.r1cs",
-      )
-      config.addCliOption(
-        PersistenceCmd.prover, "--circom-wasm",
-        "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.wasm",
-      )
-      config.addCliOption(
-        PersistenceCmd.prover, "--circom-zkey",
-        "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.zkey",
-      )
+    proc startProviderNode(
+        conf: CodexConfig
+    ): Future[NodeProcess] {.async: (raises: [MultiNodeSuiteError, CancelledError]).} =
+      try:
+        let providerIdx = providers().len
+        var config = conf
+        config.addCliOption(
+          StartUpCmd.persistence, "--eth-provider", jsonRpcProviderUrl
+        )
+        config.addCliOption(
+          StartUpCmd.persistence, "--eth-account", $accounts[running.len]
+        )
+        config.addCliOption(
+          PersistenceCmd.prover, "--circom-r1cs",
+          "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.r1cs",
+        )
+        config.addCliOption(
+          PersistenceCmd.prover, "--circom-wasm",
+          "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.wasm",
+        )
+        config.addCliOption(
+          PersistenceCmd.prover, "--circom-zkey",
+          "vendor/logos-storage-contracts-eth/verifier/networks/hardhat/proof_main.zkey",
+        )
 
-      return await newCodexProcess(providerIdx, config, Role.Provider)
+        return await newCodexProcess(providerIdx, config, Role.Provider)
+      except CodexConfigError as exc:
+        raiseMultiNodeSuiteError "Failed to start codex node, error adding cli options: " &
+          exc.msg, exc
 
-    proc startValidatorNode(conf: CodexConfig): Future[NodeProcess] {.async.} =
-      let validatorIdx = validators().len
-      var config = conf
-      config.addCliOption(StartUpCmd.persistence, "--eth-provider", jsonRpcProviderUrl)
-      config.addCliOption(
-        StartUpCmd.persistence, "--eth-account", $accounts[running.len]
-      )
-      config.addCliOption(StartUpCmd.persistence, "--validator")
+    proc startValidatorNode(
+        conf: CodexConfig
+    ): Future[NodeProcess] {.async: (raises: [MultiNodeSuiteError, CancelledError]).} =
+      try:
+        let validatorIdx = validators().len
+        var config = conf
+        config.addCliOption(
+          StartUpCmd.persistence, "--eth-provider", jsonRpcProviderUrl
+        )
+        config.addCliOption(
+          StartUpCmd.persistence, "--eth-account", $accounts[running.len]
+        )
+        config.addCliOption(StartUpCmd.persistence, "--validator")
 
-      return await newCodexProcess(validatorIdx, config, Role.Validator)
+        return await newCodexProcess(validatorIdx, config, Role.Validator)
+      except CodexConfigError as e:
+        raiseMultiNodeSuiteError "Failed to start validator node, error adding cli options: " &
+          e.msg, e
 
-    proc teardownImpl() {.async.} =
+    proc teardownImpl() {.async: (raises: []).} =
+      trace "Tearing down test", suite = suiteName, test = currentTestName
       for nodes in @[validators(), clients(), providers()]:
         for node in nodes:
           await node.stop() # also stops rest client
-          node.removeDataDir()
+          try:
+            node.removeDataDir()
+          except CodexProcessError as e:
+            error "Failed to remove data dir during teardown", error = e.msg
 
       # if hardhat was started in the test, kill the node
       # otherwise revert the snapshot taken in the test setup
@@ -266,15 +328,28 @@ template multinodesuite*(name: string, body: untyped) =
       if not hardhat.isNil:
         await hardhat.stop()
       else:
-        discard await send(ethProvider, "evm_revert", @[snapshot])
+        try:
+          discard await noCancel send(ethProvider, "evm_revert", @[snapshot])
+        except ProviderError as e:
+          error "Failed to revert hardhat state during teardown", error = e.msg
 
-        await ethProvider.close()
+        # TODO: JsonRpcProvider.close should NOT raise any exceptions
+        try:
+          await ethProvider.close()
+        except CatchableError:
+          discard
 
       running = @[]
 
     template failAndTeardownOnError(message: string, tryBody: untyped) =
       try:
         tryBody
+      except CancelledError as e:
+        await teardownImpl()
+        when declared(teardownAllIMPL):
+          teardownAllIMPL()
+        fail()
+        quit(1)
       except CatchableError as er:
         fatal message, error = er.msg
         echo "[FATAL] ", message, ": ", er.msg
@@ -286,19 +361,34 @@ template multinodesuite*(name: string, body: untyped) =
 
     proc updateBootstrapNodes(
         node: CodexProcess
-    ): Future[void] {.async: (raises: [CatchableError]).} =
-      without ninfo =? await node.client.info():
-        # raise CatchableError instead of Defect (with .get or !) so we
-        # can gracefully shutdown and prevent zombies
-        raiseMultiNodeSuiteError "Failed to get node info"
-      bootstrapNodes.add ninfo["spr"].getStr()
+    ): Future[void] {.async: (raises: [MultiNodeSuiteError]).} =
+      try:
+        without ninfo =? await node.client.info():
+          # raise CatchableError instead of Defect (with .get or !) so we
+          # can gracefully shutdown and prevent zombies
+          raiseMultiNodeSuiteError "Failed to get node info"
+        bootstrapNodes.add ninfo["spr"].getStr()
+      except CatchableError as e:
+        raiseMultiNodeSuiteError "Failed to get node info: " & e.msg, e
+
+    setupAll:
+      # When this file is run with `-d:chronicles_sinks=textlines[file]`, we
+      # need to set the log file path at runtime, otherwise chronicles didn't seem to
+      # create a log file even when using an absolute path
+      when defaultChroniclesStream.outputs is (FileOutput,) and CodexLogsDir.len > 0:
+        let logFile =
+          CodexLogsDir / sanitize(getAppFilename().extractFilename & ".chronicles.log")
+        let success = defaultChroniclesStream.outputs[0].open(logFile, fmAppend)
+        doAssert success, "Failed to open log file: " & logFile
 
     setup:
+      trace "Setting up test", suite = suiteName, test = currentTestName, nodeConfigs
+
       if var conf =? nodeConfigs.hardhat:
         try:
-          let node = await startHardhatNode(conf)
+          let node = await noCancel startHardhatNode(conf)
           running.add RunningNode(role: Role.Hardhat, node: node)
-        except CatchableError as e:
+        except CatchableError as e: # CancelledError not raised due to noCancel
           echo "failed to start hardhat node"
           fail()
           quit(1)
@@ -307,12 +397,16 @@ template multinodesuite*(name: string, body: untyped) =
         # Workaround for https://github.com/NomicFoundation/hardhat/issues/2053
         # Do not use websockets, but use http and polling to stop subscriptions
         # from being removed after 5 minutes
-        ethProvider = JsonRpcProvider.new(jsonRpcProviderUrl)
+        ethProvider = JsonRpcProvider.new(
+          jsonRpcProviderUrl, pollingInterval = chronos.milliseconds(1000)
+        )
         # if hardhat was NOT started by the test, take a snapshot so it can be
         # reverted in the test teardown
         if nodeConfigs.hardhat.isNone:
           snapshot = await send(ethProvider, "evm_snapshot")
         accounts = await ethProvider.listAccounts()
+      except CancelledError as e:
+        raise e
       except CatchableError as e:
         echo "Hardhat not running. Run hardhat manually " &
           "before executing tests, or include a " & "HardhatConfig in the test setup."
@@ -342,7 +436,10 @@ template multinodesuite*(name: string, body: untyped) =
       # ensure that we have a recent block with a fresh timestamp
       discard await send(ethProvider, "evm_mine")
 
+      trace "Starting test", suite = suiteName, test = currentTestName
+
     teardown:
       await teardownImpl()
+      trace "Test completed", suite = suiteName, test = currentTestName
 
     body
diff --git a/tests/integration/nodeprocess.nim b/tests/integration/nodeprocess.nim
index 9ac0f8c3..a45e7806 100644
--- a/tests/integration/nodeprocess.nim
+++ b/tests/integration/nodeprocess.nim
@@ -5,6 +5,7 @@ import pkg/chronicles
 import pkg/chronos/asyncproc
 import pkg/libp2p
 import std/os
+import std/strformat
 import std/strutils
 import codex/conf
 import codex/utils/exceptions
@@ -14,6 +15,8 @@ import ./codexclient
 export codexclient
 export chronicles
 
+{.push raises: [].}
+
 logScope:
   topics = "integration testing node process"
 
@@ -39,24 +42,19 @@ method startedOutput(node: NodeProcess): string {.base, gcsafe.} =
 method processOptions(node: NodeProcess): set[AsyncProcessOption] {.base, gcsafe.} =
   raiseAssert "not implemented"
 
-method outputLineEndings(node: NodeProcess): string {.base, gcsafe, raises: [].} =
+method outputLineEndings(node: NodeProcess): string {.base, gcsafe.} =
   raiseAssert "not implemented"
 
-method onOutputLineCaptured(
-    node: NodeProcess, line: string
-) {.base, gcsafe, raises: [].} =
+method onOutputLineCaptured(node: NodeProcess, line: string) {.base, gcsafe.} =
   raiseAssert "not implemented"
 
-method start*(node: NodeProcess) {.base, async.} =
+method start*(node: NodeProcess) {.base, async: (raises: [CancelledError]).} =
   logScope:
     nodeName = node.name
 
   let poptions = node.processOptions + {AsyncProcessOption.StdErrToStdOut}
   trace "starting node",
-    args = node.arguments,
-    executable = node.executable,
-    workingDir = node.workingDir,
-    processOptions = poptions
+    args = node.arguments, executable = node.executable, workingDir = node.workingDir
 
   try:
     if node.debug:
@@ -81,11 +79,13 @@ proc captureOutput(
 
   trace "waiting for output", output
 
-  let stream = node.process.stdoutStream
-
   try:
     while node.process.running.option == some true:
-      while (let line = await stream.readLine(0, node.outputLineEndings); line != ""):
+      while (
+        let line = await node.process.stdoutStream.readLine(0, node.outputLineEndings)
+        line != ""
+      )
+      :
         if node.debug:
           # would be nice if chronicles could parse and display with colors
           echo line
@@ -95,8 +95,8 @@ proc captureOutput(
 
         node.onOutputLineCaptured(line)
 
-        await sleepAsync(1.millis)
-      await sleepAsync(1.millis)
+        await sleepAsync(1.nanos)
+      await sleepAsync(1.nanos)
   except CancelledError:
     discard # do not propagate as captureOutput was asyncSpawned
   except AsyncStreamError as e:
@@ -104,7 +104,7 @@ proc captureOutput(
 
 proc startNode*[T: NodeProcess](
     _: type T, args: seq[string], debug: string | bool = false, name: string
-): Future[T] {.async.} =
+): Future[T] {.async: (raises: [CancelledError]).} =
   ## Starts a Logos Storage Node with the specified arguments.
   ## Set debug to 'true' to see output of the node.
   let node = T(
@@ -116,34 +116,36 @@ proc startNode*[T: NodeProcess](
   await node.start()
   return node
 
-method stop*(node: NodeProcess) {.base, async.} =
+method stop*(
+    node: NodeProcess, expectedExitCode: int = 0
+) {.base, async: (raises: []).} =
   logScope:
     nodeName = node.name
 
   await node.trackedFutures.cancelTracked()
-  if node.process != nil:
+  if not node.process.isNil:
+    let processId = node.process.processId
+    trace "terminating node process...", processId
     try:
-      trace "terminating node process..."
-      if errCode =? node.process.terminate().errorOption:
-        error "failed to terminate process", errCode = $errCode
+      let exitCode = await noCancel node.process.terminateAndWaitForExit(2.seconds)
+      if exitCode > 0 and exitCode != 143 and # 143 = SIGTERM (initiated above)
+      exitCode != expectedExitCode:
+        warn "process exited with a non-zero exit code", exitCode
+      trace "node process terminated", exitCode
+    except CatchableError:
+      try:
+        let forcedExitCode = await noCancel node.process.killAndWaitForExit(3.seconds)
+        trace "node process forcibly killed with exit code: ", exitCode = forcedExitCode
+      except CatchableError as e:
+        warn "failed to kill node process in time, it will be killed when the parent process exits",
+          error = e.msg
+        writeStackTrace()
 
-      trace "waiting for node process to exit"
-      let exitCode = await node.process.waitForExit(3.seconds)
-      if exitCode > 0:
-        error "failed to exit process, check for zombies", exitCode
+      trace "node stopped"
 
-      trace "closing node process' streams"
-      await node.process.closeWait()
-    except CancelledError as error:
-      raise error
-    except CatchableError as e:
-      error "error stopping node process", error = e.msg
-    finally:
-      node.process = nil
-
-    trace "node stopped"
-
-proc waitUntilOutput*(node: NodeProcess, output: string) {.async.} =
+proc waitUntilOutput*(
+    node: NodeProcess, output: string
+) {.async: (raises: [CancelledError, AsyncTimeoutError]).} =
   logScope:
     nodeName = node.name
 
@@ -153,9 +155,21 @@ proc waitUntilOutput*(node: NodeProcess, output: string) {.async.} =
   let fut = node.captureOutput(output, started)
   node.trackedFutures.track(fut)
   asyncSpawn fut
-  await started.wait(60.seconds) # allow enough time for proof generation
+  try:
+    await started.wait(60.seconds) # allow enough time for proof generation
+  except AsyncTimeoutError as e:
+    raise e
+  except CancelledError as e:
+    raise e
+  except CatchableError as e: # unsure where this originates from
+    error "unexpected error occurred waiting for node output", error = e.msg
+
+proc waitUntilStarted*(
+    node: NodeProcess
+) {.async: (raises: [CancelledError, NodeProcessError]).} =
+  logScope:
+    nodeName = node.name
 
-proc waitUntilStarted*(node: NodeProcess) {.async.} =
   try:
     await node.waitUntilOutput(node.startedOutput)
     trace "node started"
@@ -168,10 +182,10 @@ proc waitUntilStarted*(node: NodeProcess) {.async.} =
     raise
       newException(NodeProcessError, "node did not output '" & node.startedOutput & "'")
 
-proc restart*(node: NodeProcess) {.async.} =
+method restart*(node: NodeProcess) {.base, async.} =
   await node.stop()
   await node.start()
   await node.waitUntilStarted()
 
-method removeDataDir*(node: NodeProcess) {.base.} =
+method removeDataDir*(node: NodeProcess) {.base, raises: [NodeProcessError].} =
   raiseAssert "[removeDataDir] not implemented"
diff --git a/tests/integration/scripts/winkillprocess.sh b/tests/integration/scripts/winkillprocess.sh
new file mode 100644
index 00000000..b5e58ab4
--- /dev/null
+++ b/tests/integration/scripts/winkillprocess.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+# List all processes with a specific name
+list() {
+  local name=$1
+  echo "Listing all processes named '$name'..."
+  powershell.exe -Command "Get-CimInstance Win32_Process -Filter \"name = '$name'\" | Select-Object ProcessId, Name, CommandLine | Format-Table -AutoSize"
+}
+
+# Search for processes with a specific name and command line pattern
+search() {
+  local name=$1
+  local pattern=$2
+  echo "Searching for '$name' processes with command line matching '$pattern'..."
+  powershell.exe -Command "
+    \$processes = Get-CimInstance Win32_Process -Filter \"name = '$name'\" | Where-Object { \$_.CommandLine -match '$pattern' };
+    if (\$processes) {
+      \$processes | Select-Object ProcessId, Name, CommandLine | Format-Table -AutoSize;
+    } else {
+      Write-Host \"No matching '$name' processes found\";
+    }
+  "
+}
+
+# Kill all processes with a specific name
+killall() {
+  local name=$1
+  echo "Finding and killing all '$name' processes..."
+  powershell.exe -Command "
+    \$processes = Get-CimInstance Win32_Process -Filter \"name = '$name'\";
+    if (\$processes) {
+      foreach (\$process in \$processes) {
+        Stop-Process -Id \$process.ProcessId -Force;
+        Write-Host \"Killed process \$(\$process.ProcessId)\";
+      }
+    } else {
+      Write-Host \"No '$name' processes found\";
+    }
+  "
+}
+
+# Kill processes with a specific name and command line pattern
+kill() {
+  local name=$1
+  local pattern=$2
+  echo "Finding and killing '$name' processes with command line matching '$pattern'..."
+  powershell.exe -Command "
+    \$processes = Get-CimInstance Win32_Process -Filter \"name = '$name'\" | Where-Object { \$_.CommandLine -match '$pattern' };
+    if (\$processes) {
+      foreach (\$process in \$processes) {
+        Stop-Process -Id \$process.ProcessId -Force;
+        Write-Host \"Killed process \$(\$process.ProcessId)\";
+      }
+    } else {
+      Write-Host \"No matching '$name' processes found\";
+    }
+  "
+}
+
+# Check if being run directly or sourced
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  # If run directly (not sourced), provide command line interface
+  case "$1" in
+    list)
+      if [ -z "$2" ]; then
+        echo "Usage: $0 list PROCESS_NAME"
+        exit 1
+      fi
+      list "$2"
+      ;;
+    search)
+      if [ -z "$2" ] || [ -z "$3" ]; then
+        echo "Usage: $0 search PROCESS_NAME COMMANDLINE_PATTERN"
+        exit 1
+      fi
+      search "$2" "$3"
+      ;;
+    killall)
+      if [ -z "$2" ]; then
+        echo "Usage: $0 killall PROCESS_NAME"
+        exit 1
+      fi
+      killall "$2"
+      ;;
+    kill)
+      if [ -z "$2" ] || [ -z "$3" ]; then
+        echo "Usage: $0 kill PROCESS_NAME COMMANDLINE_PATTERN"
+        exit 1
+      fi
+      kill "$2" "$3"
+      ;;
+    *)
+      echo "Usage: $0 {list PROCESS_NAME|search PROCESS_NAME COMMANDLINE_PATTERN|killall PROCESS_NAME|kill PROCESS_NAME COMMANDLINE_PATTERN}"
+      exit 1
+      ;;
+  esac
+fi
diff --git a/tests/integration/utils.nim b/tests/integration/utils.nim
new file mode 100644
index 00000000..3e522a04
--- /dev/null
+++ b/tests/integration/utils.nim
@@ -0,0 +1,92 @@
+import std/os
+import std/strformat
+import pkg/chronos
+import pkg/chronos/asyncproc
+import pkg/codex/logutils
+
+{.push raises: [].}
+
+proc nextFreePort*(startPort: int): Future[int] {.async: (raises: [CancelledError]).} =
+  proc client(server: StreamServer, transp: StreamTransport) {.async: (raises: []).} =
+    await transp.closeWait()
+
+  var port = startPort
+  while true:
+    trace "checking if port is free", port
+    try:
+      let host = initTAddress("127.0.0.1", port)
+      # We use ReuseAddr here only to be able to reuse the same IP/Port when
+      # there's a TIME_WAIT socket. It's useful when running the test multiple
+      # times or if a test ran previously using the same port.
+      var server = createStreamServer(host, client, {ReuseAddr})
+      trace "port is free", port
+      await server.closeWait()
+      return port
+    except TransportOsError:
+      trace "port is not free", port
+      inc port
+    except TransportAddressError:
+      raiseAssert "bad address"
+
+proc sanitize*(pathSegment: string): string =
+  var sanitized = pathSegment
+  for invalid in invalidFilenameChars.items:
+    sanitized = sanitized.replace(invalid, '_').replace(' ', '_')
+  sanitized
+
+proc getLogFile*(
+    logDir, startTime, suiteName, testName, role: string, index = int.none
+): string {.raises: [IOError, OSError].} =
+  let logsDir =
+    if logDir == "":
+      currentSourcePath.parentDir() / "logs" / sanitize(startTime & "__" & suiteName) /
+        sanitize(testName)
+    else:
+      logDir / sanitize(suiteName) / sanitize(testName)
+
+  createDir(logsDir)
+
+  var fn = $role
+  if idx =? index:
+    fn &= "_" & $idx
+  fn &= ".log"
+
+  let fileName = logsDir / fn
+  return fileName
+
+proc appendFile*(filename: string, content: string) {.raises: [IOError].} =
+  ## Opens a file named `filename` for writing. Then writes the
+  ## `content` completely to the file and closes the file afterwards.
+  ## Raises an IO exception in case of an error.
+  var f: File
+  try:
+    f = open(filename, fmAppend)
+    f.write(content)
+  except IOError as e:
+    raise newException(IOError, "cannot open and write " & filename & ": " & e.msg)
+  finally:
+    close(f)
+
+when defined(windows):
+  proc forceKillProcess*(
+      processName, matchingCriteria: string
+  ): Future[CommandExResponse] {.
+      async: (
+        raises: [
+          AsyncProcessError, AsyncProcessTimeoutError, CancelledError, ValueError,
+          OSError,
+        ]
+      )
+  .} =
+    let path = splitFile(currentSourcePath()).dir / "scripts" / "winkillprocess.sh"
+    let cmd = &"{absolutePath(path)} kill {processName} \"{matchingCriteria}\""
+    trace "Forcefully killing windows process", processName, matchingCriteria, cmd
+    return await execCommandEx(cmd, timeout = 5.seconds)
+
+proc getDataDir*(testId, testName, startTime, role: string, index = int.none): string =
+  var suffix = role
+  if idx =? index:
+    suffix &= "_" & $idx
+
+  getTempDir() / "Codex" / sanitize(testId) / sanitize(testName) / sanitize(startTime) /
+    sanitize(suffix)
diff --git a/vendor/nim-chronos b/vendor/nim-chronos
index c04576d8..0646c444 160000
--- a/vendor/nim-chronos
+++ b/vendor/nim-chronos
@@ -1 +1 @@
-Subproject commit c04576d829b8a0a1b12baaa8bc92037501b3a4a0
+Subproject commit 0646c444fce7c7ed08ef6f2c9a7abfd172ffe655