refactor(ci): run integration tests in parallel by spinning up more runners (#1287)

2026-01-05 23:13:09 +00:00 · 2025-06-25 10:56:16 +02:00 · 2025-06-25 10:56:16 +02:00 · 01615354af
commit 01615354af
parent baff902137
19 changed files with 259 additions and 99 deletions
--- a/.github/workflows/Readme.md
+++ b/.github/workflows/Readme.md
@ -3,12 +3,14 @@ Tips for shorter build times

 ### Runner availability ###

-Currently, the biggest bottleneck when optimizing workflows is the availability
-of Windows and macOS runners. Therefore, anything that reduces the time spent in
-Windows or macOS jobs will have a positive impact on the time waiting for
-runners to become available. The usage limits for Github Actions are [described
-here][limits]. You can see a breakdown of runner usage for your jobs in the
-Github Actions tab ([example][usage]).
+When running on the Github free, pro or team plan, the bottleneck when
+optimizing workflows is the availability of macOS runners. Therefore, anything
+that reduces the time spent in macOS jobs will have a positive impact on the
+time waiting for runners to become available. On the Github enterprise plan,
+this is not the case and you can more freely use parallelization on multiple
+runners. The usage limits for Github Actions are [described here][limits]. You
+can see a breakdown of runner usage for your jobs in the Github Actions tab
+([example][usage]).

 ### Windows is slow ###

@ -22,11 +24,10 @@ analysis, etc. are therefore better performed on a Linux runner.

 Breaking up a long build job into several jobs that you run in parallel can have
 a positive impact on the wall clock time that a workflow runs. For instance, you
-might consider running unit tests and integration tests in parallel. Keep in
-mind however that availability of macOS and Windows runners is the biggest
-bottleneck. If you split a Windows job into two jobs, you now need to wait for
-two Windows runners to become available! Therefore parallelization often only
-makes sense for Linux jobs.
+might consider running unit tests and integration tests in parallel. When
+running on the Github free, pro or team plan, keep in mind that availability of
+macOS runners is a bottleneck. If you split a macOS job into two jobs, you now
+need to wait for two macOS runners to become available.

 ### Refactoring ###

@ -66,7 +67,8 @@ might seem inconvenient, because when you're debugging an issue you often want
 to know whether you introduced a failure on all platforms, or only on a single
 one. You might be tempted to disable fail-fast, but keep in mind that this keeps
 runners busy for longer on a workflow that you know is going to fail anyway.
-Consequent runs will therefore take longer to start. Fail fast is most likely better for overall development speed.
+Consequent runs will therefore take longer to start. Fail fast is most likely
+better for overall development speed.

 [usage]: https://github.com/codex-storage/nim-codex/actions/runs/3462031231/usage
 [composite]: https://docs.github.com/en/actions/creating-actions/creating-a-composite-action
--- a/.github/workflows/ci-reusable.yml
+++ b/.github/workflows/ci-reusable.yml
@ -24,9 +24,9 @@ jobs:
      run:
        shell: ${{ matrix.shell }} {0}

-    name: ${{ matrix.os }}-${{ matrix.tests }}-${{ matrix.cpu }}-${{ matrix.nim_version }}
+    name: ${{ matrix.os }}-${{ matrix.tests }}-${{ matrix.cpu }}-${{ matrix.nim_version }}-${{ matrix.job_number }}
    runs-on: ${{ matrix.builder }}
-    timeout-minutes: 120
+    timeout-minutes: 60
    steps:
      - name: Checkout sources
        uses: actions/checkout@v4
@ -72,13 +72,15 @@ jobs:
      ## Part 3 Tests ##
      - name: Integration tests
        if: matrix.tests == 'integration' || matrix.tests == 'all'
+        env:
+          CODEX_INTEGRATION_TEST_INCLUDES: ${{ matrix.includes }}
        run: make -j${ncpu} testIntegration

      - name: Upload integration tests log files
        uses: actions/upload-artifact@v4
        if: (matrix.tests == 'integration' || matrix.tests == 'all') && always()
        with:
-          name: ${{ matrix.os }}-${{ matrix.cpu }}-${{ matrix.nim_version }}-integration-tests-logs
+          name: ${{ matrix.os }}-${{ matrix.cpu }}-${{ matrix.nim_version }}-${{ matrix.job_number }}-integration-tests-logs
          path: tests/integration/logs/
          retention-days: 1

--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -22,23 +22,14 @@ jobs:
      matrix: ${{ steps.matrix.outputs.matrix }}
      cache_nonce: ${{ env.cache_nonce }}
    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v4
      - name: Compute matrix
        id: matrix
-        uses: fabiocaccamo/create-matrix-action@v5
-        with:
-          matrix: |
-            os {linux},   cpu {amd64}, builder {ubuntu-latest},  tests {unittest},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-            os {linux},   cpu {amd64}, builder {ubuntu-latest},  tests {contract},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-            os {linux},   cpu {amd64}, builder {ubuntu-latest},  tests {integration}, nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-            os {linux},   cpu {amd64}, builder {ubuntu-latest},  tests {tools},       nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-            os {macos},   cpu {arm64}, builder {macos-14},       tests {unittest},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-            os {macos},   cpu {arm64}, builder {macos-14},       tests {contract},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-            os {macos},   cpu {arm64}, builder {macos-14},       tests {integration}, nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-            os {macos},   cpu {arm64}, builder {macos-14},       tests {tools},       nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-            os {windows}, cpu {amd64}, builder {windows-latest}, tests {unittest},    nim_version {${{ env.nim_version }}}, shell {msys2}
-            os {windows}, cpu {amd64}, builder {windows-latest}, tests {contract},    nim_version {${{ env.nim_version }}}, shell {msys2}
-            os {windows}, cpu {amd64}, builder {windows-latest}, tests {integration}, nim_version {${{ env.nim_version }}}, shell {msys2}
-            os {windows}, cpu {amd64}, builder {windows-latest}, tests {tools},       nim_version {${{ env.nim_version }}}, shell {msys2}
+        run: |
+          echo 'matrix<<EOF' >> $GITHUB_OUTPUT
+          tools/scripts/ci-job-matrix.sh >> $GITHUB_OUTPUT
+          echo 'EOF' >> $GITHUB_OUTPUT

  build:
    needs: matrix
--- a/.github/workflows/nim-matrix.yml
+++ b/.github/workflows/nim-matrix.yml
@ -15,15 +15,14 @@ jobs:
      matrix: ${{ steps.matrix.outputs.matrix }}
      cache_nonce: ${{ env.cache_nonce }}
    steps:
+      - name: Checkout sources
+        uses: actions/checkout@v4
      - name: Compute matrix
        id: matrix
-      uses: fabiocaccamo/create-matrix-action@v5
-      with:
-        matrix: |
-          os {linux}, cpu {amd64}, builder {ubuntu-latest}, tests {unittest},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-          os {linux}, cpu {amd64}, builder {ubuntu-latest}, tests {contract},    nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-          os {linux}, cpu {amd64}, builder {ubuntu-latest}, tests {integration}, nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
-          os {linux}, cpu {amd64}, builder {ubuntu-latest}, tests {tools},       nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
+        run: |
+          echo 'matrix<<EOF' >> $GITHUB_OUTPUT
+          tools/scripts/ci-job-matrix.sh linux >> $GITHUB_OUTPUT
+          echo 'EOF' >> $GITHUB_OUTPUT

  build:
    needs: matrix
--- a/tests/imports.nim
+++ b/tests/imports.nim
@ -14,3 +14,13 @@ macro importTests*(dir: static string): untyped =
          import `file`
      )
  imports
+
+macro importAll*(paths: static seq[string]): untyped =
+  ## imports all specified paths
+  let imports = newStmtList()
+  for path in paths:
+    imports.add(
+      quote do:
+        import `path`
+    )
+  imports
--- a/tests/integration/1_minute/testblockexpiration.nim
+++ b/tests/integration/1_minute/testblockexpiration.nim
@ -1,5 +1,5 @@
-import ../examples
-import ./multinodes
+import ../../examples
+import ../multinodes

 multinodesuite "Node block expiration tests":
  var content: seq[byte]
--- a/tests/integration/1_minute/testcli.nim
+++ b/tests/integration/1_minute/testcli.nim
@ -1,11 +1,11 @@
 import std/tempfiles
 import codex/conf
 import codex/utils/fileutils
-import ../asynctest
-import ../checktest
-import ./codexprocess
-import ./nodeprocess
-import ../examples
+import ../../asynctest
+import ../../checktest
+import ../codexprocess
+import ../nodeprocess
+import ../../examples

 asyncchecksuite "Command line interface":
  let key = "4242424242424242424242424242424242424242424242424242424242424242"
--- a/tests/integration/1_minute/testecbug.nim
+++ b/tests/integration/1_minute/testecbug.nim
@ -1,8 +1,8 @@
 from pkg/libp2p import Cid, init
-import ../examples
-import ./marketplacesuite
-import ./nodeconfigs
-import ./hardhatconfig
+import ../../examples
+import ../marketplacesuite
+import ../nodeconfigs
+import ../hardhatconfig

 marketplacesuite(
  name = "Bug #821 - node crashes during erasure coding", stopOnRequestFail = true
--- a/tests/integration/1_minute/testpurchasing.nim
+++ b/tests/integration/1_minute/testpurchasing.nim
@ -1,9 +1,9 @@
 import std/options
 import std/httpclient
 import pkg/codex/rng
-import ./twonodes
-import ../contracts/time
-import ../examples
+import ../twonodes
+import ../../contracts/time
+import ../../examples

 twonodessuite "Purchasing":
  test "node handles storage request", twoNodesConfig:
--- a/tests/integration/30_minutes/testmarketplace.nim
+++ b/tests/integration/30_minutes/testmarketplace.nim
@ -1,11 +1,11 @@
 import std/times
 import std/httpclient
-import ../examples
-import ../contracts/time
-import ../contracts/deployment
-import ./marketplacesuite
-import ./twonodes
-import ./nodeconfigs
+import ../../examples
+import ../../contracts/time
+import ../../contracts/deployment
+import ./../marketplacesuite
+import ../twonodes
+import ../nodeconfigs

 marketplacesuite(name = "Marketplace", stopOnRequestFail = true):
  let marketplaceConfig = NodeConfigs(
--- a/tests/integration/30_minutes/testproofs.nim
+++ b/tests/integration/30_minutes/testproofs.nim
@ -1,12 +1,12 @@
 from std/times import inMilliseconds
 import pkg/questionable
 import pkg/codex/logutils
-import ../contracts/time
-import ../contracts/deployment
-import ../codex/helpers
-import ../examples
-import ./marketplacesuite
-import ./nodeconfigs
+import ../../contracts/time
+import ../../contracts/deployment
+import ../../codex/helpers
+import ../../examples
+import ../marketplacesuite
+import ../nodeconfigs

 export logutils

--- a/tests/integration/30_minutes/testslotrepair.nim
+++ b/tests/integration/30_minutes/testslotrepair.nim
@ -1,11 +1,11 @@
 import pkg/questionable
 import pkg/codex/logutils
-import ../contracts/time
-import ../contracts/deployment
-import ../codex/helpers
-import ../examples
-import ./marketplacesuite
-import ./nodeconfigs
+import ../../contracts/time
+import ../../contracts/deployment
+import ../../codex/helpers
+import ../../examples
+import ../marketplacesuite
+import ../nodeconfigs

 export logutils

--- a/tests/integration/30_minutes/testvalidator.nim
+++ b/tests/integration/30_minutes/testvalidator.nim
@ -3,12 +3,12 @@ import std/sugar
 import pkg/codex/logutils
 import pkg/questionable/results
 import pkg/ethers/provider
-import ../contracts/time
-import ../contracts/deployment
-import ../codex/helpers
-import ../examples
-import ./marketplacesuite
-import ./nodeconfigs
+import ../../contracts/time
+import ../../contracts/deployment
+import ../../codex/helpers
+import ../../examples
+import ../marketplacesuite
+import ../nodeconfigs

 export logutils

--- a/tests/integration/5_minutes/testrestapi.nim
+++ b/tests/integration/5_minutes/testrestapi.nim
@ -5,10 +5,10 @@ import std/strformat
 from pkg/libp2p import `==`, `$`, Cid
 import pkg/codex/units
 import pkg/codex/manifest
-import ./twonodes
-import ../examples
-import ../codex/examples
-import ../codex/slots/helpers
+import ../twonodes
+import ../../examples
+import ../../codex/examples
+import ../../codex/slots/helpers
 import json

 twonodessuite "REST API":
--- a/tests/integration/5_minutes/testrestapivalidation.nim
+++ b/tests/integration/5_minutes/testrestapivalidation.nim
@ -2,13 +2,13 @@ import std/times
 import pkg/ethers
 import pkg/codex/conf
 import pkg/codex/contracts
-import ../asynctest
-import ../checktest
-import ../examples
-import ../codex/examples
-import ./codexconfig
-import ./codexclient
-import ./multinodes
+import ../../asynctest
+import ../../checktest
+import ../../examples
+import ../../codex/examples
+import ../codexconfig
+import ../codexclient
+import ../multinodes

 multinodesuite "Rest API validation":
  let config = NodeConfigs(clients: CodexConfigs.init(nodes = 1).some)
--- a/tests/integration/5_minutes/testsales.nim
+++ b/tests/integration/5_minutes/testsales.nim
@ -2,13 +2,13 @@ import std/httpclient
 import std/times
 import pkg/codex/contracts
 from pkg/codex/stores/repostore/types import DefaultQuotaBytes
-import ./twonodes
-import ../codex/examples
-import ../contracts/time
-import ./codexconfig
-import ./codexclient
-import ./nodeconfigs
-import ./marketplacesuite
+import ../twonodes
+import ../../codex/examples
+import ../../contracts/time
+import ../codexconfig
+import ../codexclient
+import ../nodeconfigs
+import ../marketplacesuite

 proc findItem[T](items: seq[T], item: T): ?!T =
  for tmp in items:
--- a/tests/integration/5_minutes/testupdownload.nim
+++ b/tests/integration/5_minutes/testupdownload.nim
@ -1,6 +1,6 @@
 import pkg/codex/rest/json
-import ./twonodes
-import ../codex/examples
+import ../twonodes
+import ../../codex/examples
 import json
 from pkg/libp2p import Cid, `$`

--- a/tests/testIntegration.nim
+++ b/tests/testIntegration.nim
@ -1,6 +1,17 @@
 import std/os
+import std/strutils
 import ./imports

+## Limit which integration tests to run by setting the
+## environment variable during compilation. For example:
+## CODEX_INTEGRATION_TEST_INCLUDES="testFoo.nim,testBar.nim"
+const includes = getEnv("CODEX_INTEGRATION_TEST_INCLUDES")
+
+when includes != "":
+  # import only the specified tests
+  importAll(includes.split(","))
+else:
+  # import all tests in the integration/ directory
  importTests(currentSourcePath().parentDir() / "integration")

 {.warning[UnusedImport]: off.}
--- a/tools/scripts/ci-job-matrix.sh
+++ b/tools/scripts/ci-job-matrix.sh
@ -0,0 +1,145 @@
+#!/usr/bin/env bash
+
+# This script outputs a JSON configuration file for continuous integration with
+# Github actions. See .github/workflows/ci.yml for an example of how it's used.
+
+# remembers how many jobs were generated
+jobs_generated=0
+
+# outputs a github actions job
+job () {
+  # output a comma separator between jobs
+  if (( $jobs_generated >= 1 )); then
+    echo -n ","
+  fi
+  (( jobs_generated++ ))
+
+  # output github actions job as JSON
+  echo "{\
+    \"os\": \"${job_os}\", \
+    \"cpu\": \"${job_cpu}\", \
+    \"builder\": \"${job_builder}\", \
+    \"tests\": \"${job_tests}\", \
+    \"includes\": \"${job_includes}\", \
+    \"nim_version\": \"${nim_version}\", \
+    \"shell\": \"${job_shell}\", \
+    \"job_number\": \"${jobs_generated}\" \
+  }"
+}
+
+# sets parameters for a linux job
+linux () {
+  job_os="linux"
+  job_cpu="amd64"
+  job_builder="ubuntu-latest"
+  job_shell="bash --noprofile --norc -e -o pipefail"
+}
+
+# sets parameters for a macos job
+macos () {
+  job_os="macos"
+  job_cpu="arm64"
+  job_builder="macos-14"
+  job_shell="bash --noprofile --norc -e -o pipefail"
+}
+
+# sets parameters for a windows job
+windows () {
+  job_os="windows"
+  job_cpu="amd64"
+  job_builder="windows-latest"
+  job_shell="msys2"
+}
+
+# outputs a unit test job
+unit_test () {
+  job_tests="unittest"
+  job_includes=""
+  job
+}
+
+# outputs a contract test job
+contract_test () {
+  job_tests="contract"
+  job_includes=""
+  job
+}
+
+# outputs a tools test job
+tools_test () {
+  job_tests="tools"
+  job_includes=""
+  job
+}
+
+# finds all files named test*.nim in the specified directory
+find_tests () {
+  local dir=$1
+  find $dir -name 'test*.nim'
+}
+
+# creates batches from stdin elements, joined by a separator
+batch () {
+  local batch_size=$1
+  local separator=$2
+  xargs -n $batch_size bash -c "IFS=\"$separator\"; echo \"\$*\"" _
+}
+
+# outputs a single integration test job
+integration_test_job () {
+  job_tests="integration"
+  job_includes="$1"
+  job
+}
+
+# outputs several integration test jobs
+integration_test () {
+  # each test that lasts up to 30 minutes gets its own ci job
+  for tests in $(find_tests tests/integration/30_minutes | batch 1 ","); do
+    integration_test_job $tests
+  done
+
+  # tests that last up to 5 minutes are batched per 6 into a ci job
+  for tests in $(find_tests tests/integration/5_minutes | batch 6 ","); do
+    integration_test_job $tests
+  done
+
+  # tests that last up to 1 minute are batched per 30 into a ci job
+  for tests in $(find_tests tests/integration/1_minute | batch 30 ","); do
+    integration_test_job $tests
+  done
+
+  # fail when there are integration tests with an unknown duration
+  local filter='1_minute\|5_minutes\|30_minutes'
+  local unknown=$(find_tests tests/integration | grep -v "$filter")
+  if [ "$unknown" != "" ]; then
+    echo "Error: Integration tests need to be in either the 1_minute," >&2
+    echo "       5_minutes, or 30_minutes directory, based on the maximum" >&2
+    echo "       running time of the test. This is used to group the" >&2
+    echo "       integration tests into batches to speed up the the" >&2
+    echo "       continuous integration." >&2
+    echo "       Offending tests: $unknown" >&2
+    exit 1
+  fi
+}
+
+# outputs jobs for all test types
+all_tests () {
+  unit_test
+  contract_test
+  integration_test
+  tools_test
+}
+
+# outputs jobs for the specified operating systems and all test types
+os_jobs () {
+  local operating_systems=$@
+  echo "["
+  for os in $operating_systems; do
+    $os
+    all_tests
+  done
+  echo "]"
+}
+
+os_jobs ${@:-linux macos windows}