diff --git a/.claude/settings.json b/.claude/settings.json index d357ec83..d2135a39 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -1,4 +1,13 @@ { + "permissions": { + "allow": [ + "Bash(npm install)", + "WebSearch", + "Bash(npm test:*)", + "WebFetch(domain:github.com)", + "WebFetch(domain:api.github.com)" + ] + }, "env": { "MCP_TIMEOUT": "120000" } diff --git a/.github/actions/nimbus-build-system/action.yml b/.github/actions/nimbus-build-system/action.yml index 0f55e36c..7c7230e8 100644 --- a/.github/actions/nimbus-build-system/action.yml +++ b/.github/actions/nimbus-build-system/action.yml @@ -88,7 +88,7 @@ runs: - name: Install ccache on Linux/Mac if: inputs.os == 'linux' || inputs.os == 'macos' - uses: hendrikmuhs/ccache-action@v1.2 + uses: hendrikmuhs/ccache-action@v1.2.23 with: create-symlink: false key: ${{ inputs.os }}-${{ inputs.builder }}-${{ inputs.cpu }}-${{ inputs.tests }}-${{ inputs.nim_version }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }} @@ -103,7 +103,7 @@ runs: - name: Install ccache on Windows if: inputs.os == 'windows' - uses: hendrikmuhs/ccache-action@v1.2 + uses: hendrikmuhs/ccache-action@v1.2.23 with: key: ${{ inputs.os }}-${{ inputs.builder }}-${{ inputs.cpu }}-${{ inputs.tests }}-${{ inputs.nim_version }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }} evict-old-files: 7d @@ -197,7 +197,7 @@ runs: - name: Restore Nim toolchain binaries from cache id: nim-cache - uses: actions/cache@v4 + uses: actions/cache@v5 if: ${{ inputs.coverage != 'true' }} with: path: NimBinaries diff --git a/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/backend.tf b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/backend.tf new file mode 100644 index 00000000..62549a36 --- /dev/null +++ b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/backend.tf @@ -0,0 +1,7 @@ +terraform { + backend "gcs" { + prefix = "clusters/logos-storage-rel-tests-gcp-europe-west4" + # bucket is supplied at init time via: + # terraform init -backend-config="bucket=" + } +} diff --git a/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/main.tf b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/main.tf new file mode 100644 index 00000000..7f51416a --- /dev/null +++ b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/main.tf @@ -0,0 +1,27 @@ +# Both node pools are inline in the module so GCP provisions them in parallel. +module "gke" { + source = "../modules/gke" + + name = "logos-storage-rel-tests" + project = var.project + region = var.region + zone = var.zone + + node_pool_name = "runners-ci-e2-standard-2" + node_pool_machine_type = "e2-standard-2" + node_pool_count = 1 + node_pool_labels = { + default-pool = "true" + scaling-type = "fixed" + workload-type = "tests-runners-ci" + } + + tests_pool_name = "tests-e2-medium" + tests_pool_machine_type = "e2-medium" + tests_pool_count = 5 + tests_pool_labels = { + default-pool = "false" + scaling-type = "fixed" + workload-type = "tests-pods" + } +} diff --git a/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/providers.tf b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/providers.tf new file mode 100644 index 00000000..fe1943eb --- /dev/null +++ b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/providers.tf @@ -0,0 +1,15 @@ +# Providers +provider "google" { + project = var.project + region = var.region +} + +# Used to authenticate the kubernetes provider against the cluster created in +# this same apply (short-lived OAuth access token from the active gcloud creds). +data "google_client_config" "default" {} + +provider "kubernetes" { + host = "https://${module.gke.endpoint}" + cluster_ca_certificate = base64decode(module.gke.ca_certificate) + token = data.google_client_config.default.access_token +} diff --git a/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/rbac.tf b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/rbac.tf new file mode 100644 index 00000000..99b806e7 --- /dev/null +++ b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/rbac.tf @@ -0,0 +1,33 @@ +# In-cluster RBAC for the release-tests runner. +# +# The dist-tests Job runs inside this cluster and programmatically creates/deletes +# Kubernetes resources (storage-node pods) for each test, so it needs API +# credentials. The Job runs under the release-tests-runner ServiceAccount, which +# Kubernetes automatically mounts as a short-lived projected token — no static +# kubeconfig or token Secret required. + +resource "kubernetes_service_account" "release_tests_runner" { + metadata { + name = "release-tests-runner" + namespace = "default" + } +} + +resource "kubernetes_cluster_role_binding" "release_tests_runner" { + metadata { + name = "release-tests-runner" + } + + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = "cluster-admin" + } + + subject { + kind = "ServiceAccount" + name = kubernetes_service_account.release_tests_runner.metadata[0].name + namespace = kubernetes_service_account.release_tests_runner.metadata[0].namespace + } +} + diff --git a/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/variables.tf b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/variables.tf new file mode 100644 index 00000000..2413197c --- /dev/null +++ b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/variables.tf @@ -0,0 +1,14 @@ +variable "project" { + description = "GCP project ID" + type = string +} + +variable "region" { + description = "GCP region (e.g. europe-west4)" + type = string +} + +variable "zone" { + description = "GCP zone for the cluster (e.g. europe-west4-b)" + type = string +} diff --git a/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/versions.tf b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/versions.tf new file mode 100644 index 00000000..90d1c9b6 --- /dev/null +++ b/.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/versions.tf @@ -0,0 +1,14 @@ +# Terraform settings +terraform { + required_version = "~> 1.0" + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + } +} diff --git a/.github/release/clusters/modules/gke/locals.tf b/.github/release/clusters/modules/gke/locals.tf new file mode 100644 index 00000000..197f207d --- /dev/null +++ b/.github/release/clusters/modules/gke/locals.tf @@ -0,0 +1,3 @@ +locals { + name = "${var.name}-gcp-${var.region}" +} diff --git a/.github/release/clusters/modules/gke/main.tf b/.github/release/clusters/modules/gke/main.tf new file mode 100644 index 00000000..bc5344dc --- /dev/null +++ b/.github/release/clusters/modules/gke/main.tf @@ -0,0 +1,48 @@ +# Both node pools are inline so GCP provisions them in parallel during +# cluster creation, avoiding the sequential create penalty of a separate +# google_container_node_pool resource. +resource "google_container_cluster" "this" { + name = local.name + location = var.zone + project = var.project + + deletion_protection = false + + # Send pod stdout/stderr to Cloud Logging automatically + logging_service = "logging.googleapis.com/kubernetes" + monitoring_service = "monitoring.googleapis.com/kubernetes" + + timeouts { + create = "20m" + } + + node_pool { + name = var.node_pool_name + node_count = var.node_pool_count + + node_config { + machine_type = var.node_pool_machine_type + disk_size_gb = 50 + labels = var.node_pool_labels + + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + ] + } + } + + node_pool { + name = var.tests_pool_name + node_count = var.tests_pool_count + + node_config { + machine_type = var.tests_pool_machine_type + disk_size_gb = 20 + labels = var.tests_pool_labels + + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + ] + } + } +} diff --git a/.github/release/clusters/modules/gke/outputs.tf b/.github/release/clusters/modules/gke/outputs.tf new file mode 100644 index 00000000..84f8aef0 --- /dev/null +++ b/.github/release/clusters/modules/gke/outputs.tf @@ -0,0 +1,21 @@ +# Kubernetes cluster +output "kubernetes_cluster_id" { + value = google_container_cluster.this.id + description = "The fully-qualified ID of the GKE cluster." +} + +output "kubernetes_cluster_name" { + value = google_container_cluster.this.name + description = "The name of the GKE cluster." +} + +output "endpoint" { + value = google_container_cluster.this.endpoint + description = "The IP address of the cluster's Kubernetes API server." +} + +output "ca_certificate" { + value = google_container_cluster.this.master_auth[0].cluster_ca_certificate + description = "Base64-encoded public CA certificate of the cluster's API server." + sensitive = true +} diff --git a/.github/release/clusters/modules/gke/variables.tf b/.github/release/clusters/modules/gke/variables.tf new file mode 100644 index 00000000..54eb33b7 --- /dev/null +++ b/.github/release/clusters/modules/gke/variables.tf @@ -0,0 +1,67 @@ +# Main +variable "name" { + type = string + description = "A name for the created resources." +} + +variable "project" { + type = string + description = "The GCP project ID." +} + +variable "region" { + type = string + description = "The GCP region (used for the provider and node pool location)." +} + +variable "zone" { + type = string + description = "The GCP zone for the cluster. Using a single zone avoids the longer provisioning time of a regional (multi-zone) cluster." +} + +# Kubernetes default Node Pool +variable "node_pool_name" { + type = string + description = "A name for the default node pool." +} + +variable "node_pool_machine_type" { + type = string + description = "The GCE machine type for nodes in the default pool." +} + +variable "node_pool_count" { + type = number + description = "Fixed number of nodes in the default pool." +} + +variable "node_pool_labels" { + type = map(string) + description = "A map of key/value pairs to apply as Kubernetes labels to nodes in the default pool." + default = { + default-pool = "true" + scaling-type = "fixed" + } +} + +# Tests node pool (fixed size, single zone) +variable "tests_pool_name" { + type = string + description = "Name for the tests node pool." +} + +variable "tests_pool_machine_type" { + type = string + description = "The GCE machine type for nodes in the tests pool." +} + +variable "tests_pool_count" { + type = number + description = "Fixed number of nodes in the tests pool (no autoscaling; this is a transient cluster)." +} + +variable "tests_pool_labels" { + type = map(string) + description = "Kubernetes labels to apply to nodes in the tests pool." + default = {} +} diff --git a/.github/release/clusters/modules/gke/versions.tf b/.github/release/clusters/modules/gke/versions.tf new file mode 100644 index 00000000..1b886dc9 --- /dev/null +++ b/.github/release/clusters/modules/gke/versions.tf @@ -0,0 +1,9 @@ +# Terraform settings +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + } +} diff --git a/.github/release/job-release-tests.yaml b/.github/release/job-release-tests.yaml new file mode 100644 index 00000000..8684f1c4 --- /dev/null +++ b/.github/release/job-release-tests.yaml @@ -0,0 +1,55 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: ${NAMEPREFIX} + namespace: default + labels: + name: ${NAMEPREFIX} + runid: ${RUNID} +spec: + ttlSecondsAfterFinished: 86400 + backoffLimit: 0 + template: + metadata: + name: ${NAMEPREFIX} + labels: + app: ${TEST_TYPE}-runner + name: ${NAMEPREFIX} + runid: ${RUNID} + spec: + nodeSelector: + workload-type: "tests-runners-ci" + containers: + - name: runner + image: logosstorage/logos-storage-dist-tests:latest + imagePullPolicy: Always + resources: + requests: + memory: "4Gi" + limits: + memory: "6Gi" + env: + - name: LOGPATH + value: "/var/log/storage-${TEST_TYPE}" + - name: BRANCH + value: "${BRANCH}" + - name: SOURCE + value: "${SOURCE}" + - name: RUNID + value: "${RUNID}" + - name: STORAGEDOCKERIMAGE + value: "${STORAGEDOCKERIMAGE}" + - name: TESTID + value: "${TESTID}" + - name: TESTS_TYPE + value: "${TEST_TYPE}" + volumeMounts: + - name: logs + mountPath: /var/log/storage-${TEST_TYPE} + args: ["dotnet", "test", "Tests/LogosStorageReleaseTests", "-p:BuildInParallel=false"] + serviceAccountName: release-tests-runner + restartPolicy: Never + volumes: + - name: logs + hostPath: + path: /var/log/storage-${TEST_TYPE} diff --git a/.github/scripts/generate_test_summary.py b/.github/scripts/generate_test_summary.py new file mode 100644 index 00000000..1f695206 --- /dev/null +++ b/.github/scripts/generate_test_summary.py @@ -0,0 +1,86 @@ +""" +Reads test-result Cloud Logging entries (written as JSON to runner pod stdout +by TearDownDistTest) and writes a Markdown summary to $GITHUB_STEP_SUMMARY. + +Expected env vars (all set by the workflow before calling this script): + ENTRIES_FILE - path to a JSON file containing gcloud logging read output + RUNID - the test run ID (e.g. 20260430-060144) + CLUSTER_NAME - GKE cluster name + GCP_PROJECT - GCP project ID + JOB_START_TIME - ISO timestamp used as the Cloud Logging URL startTime + JOB_START - job startTime from kubectl (for duration calc) + JOB_END - job completionTime from kubectl (for duration calc) + GITHUB_STEP_SUMMARY - path to the GHA step summary file +""" + +import json, os, sys, urllib.parse +from datetime import datetime + +with open(os.environ["ENTRIES_FILE"]) as f: + entries = json.load(f) + +runid = os.environ["RUNID"] +cluster = os.environ["CLUSTER_NAME"] +project = os.environ["GCP_PROJECT"] +start = os.environ["JOB_START_TIME"] + +if not entries: + with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f: + f.write(f"No test results found for run `{runid}`\n") + sys.exit(0) + +# Aggregate by fixture in run order; mark Failed if any method failed. +fixtures, order = {}, [] +for entry in entries: + p = entry.get("jsonPayload", {}) + fixture, status = p.get("fixture", ""), p.get("status", "") + if not fixture: + continue + if fixture not in fixtures: + order.append(fixture) + fixtures[fixture] = status + elif status == "Failed": + fixtures[fixture] = status + +# Job duration +duration = "" +try: + fmt = "%Y-%m-%dT%H:%M:%SZ" + secs = int( + ( + datetime.strptime(os.environ["JOB_END"], fmt) + - datetime.strptime(os.environ["JOB_START"], fmt) + ).total_seconds() + ) + duration = f" in {secs // 60}m {secs % 60}s" +except Exception: + pass + + +def log_url(fixture): + query = "\n".join([ + 'resource.type="k8s_container"', + f'resource.labels.cluster_name="{cluster}"', + f'labels."k8s-pod/runid"="{runid}"', + f'labels."k8s-pod/fixturename"="{fixture.lower()}"', + ]) + encoded = urllib.parse.quote(query, safe="") + return ( + f"https://console.cloud.google.com/logs/query" + f";query={encoded}" + f";startTime={start}" + f"?project={project}" + ) + + +passed = sum(1 for s in fixtures.values() if s == "Passed") +total = len(fixtures) + +lines = ["## Test logs", "", "Filtered run logs by fixture", ""] +for fixture in order: + icon = "✅" if fixtures[fixture] == "Passed" else "❌" + lines.append(f"- {icon} [{fixture}]({log_url(fixture)})") +lines += ["", f"**{passed}/{total} tests passed{duration}**"] + +with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f: + f.write("\n".join(lines) + "\n") diff --git a/.github/workflows/ci-reusable.yml b/.github/workflows/ci-reusable.yml index 83f1dcac..fe0c13af 100644 --- a/.github/workflows/ci-reusable.yml +++ b/.github/workflows/ci-reusable.yml @@ -29,7 +29,7 @@ jobs: timeout-minutes: 90 steps: - name: Checkout sources - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: submodules: recursive ref: ${{ github.event.pull_request.head.sha }} @@ -56,7 +56,7 @@ jobs: run: make -j${ncpu} DEBUG=${{ runner.debug }} testIntegration - name: Upload integration tests log files - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 if: (matrix.tests == 'integration' || matrix.tests == 'all') && always() with: name: ${{ matrix.os }}-${{ matrix.cpu }}-${{ matrix.nim_version }}-${{ matrix.job_number }}-integration-tests-logs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d4391d06..ee4f88d7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: cache_nonce: ${{ env.cache_nonce }} steps: - name: Checkout sources - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Compute matrix id: matrix run: | @@ -44,7 +44,7 @@ jobs: runs-on: ubuntu-latest if: github.event_name == 'pull_request' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Check `nph` formatting uses: arnetheduck/nph-action@v1 with: @@ -57,7 +57,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout sources - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: submodules: recursive ref: ${{ github.event.pull_request.head.sha }} diff --git a/.github/workflows/docker-reusable.yml b/.github/workflows/docker-reusable.yml index b1a0aa06..84b251e0 100644 --- a/.github/workflows/docker-reusable.yml +++ b/.github/workflows/docker-reusable.yml @@ -128,26 +128,26 @@ jobs: PLATFORM: ${{ format('{0}/{1}', 'linux', matrix.target.arch) }} steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Docker - Meta id: meta - uses: docker/metadata-action@v5 + uses: docker/metadata-action@v6 with: images: ${{ env.DOCKER_REPO }} - name: Docker - Set up Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Docker - Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Docker - Build and Push by digest id: build - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v7 with: context: . file: ${{ env.DOCKER_FILE }} @@ -167,7 +167,7 @@ jobs: touch "/tmp/digests/${digest#sha256:}" - name: Docker - Upload digest - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: digests-${{ needs.compute.outputs.build_id }}-${{ matrix.target.arch }} path: /tmp/digests/* @@ -208,18 +208,18 @@ jobs: fi - name: Docker - Download digests - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: pattern: digests-${{ needs.compute.outputs.build_id }}-* merge-multiple: true path: /tmp/digests - name: Docker - Set up Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v4 - name: Docker - Meta id: meta - uses: docker/metadata-action@v5 + uses: docker/metadata-action@v6 with: images: ${{ env.DOCKER_REPO }} flavor: | @@ -232,7 +232,7 @@ jobs: type=sha,enable=${{ env.TAG_SHA }} - name: Docker - Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 18607c70..b1bf750a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -26,13 +26,13 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 - - uses: actions/setup-node@v4 + - uses: actions/setup-node@v6 with: - node-version: 18 + node-version: 24 - name: Lint OpenAPI run: npx @redocly/cli lint openapi.yaml @@ -43,13 +43,13 @@ jobs: if: startsWith(github.ref, 'refs/tags/') steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 - - uses: actions/setup-node@v4 + - uses: actions/setup-node@v6 with: - node-version: 18 + node-version: 24 - name: Build OpenAPI run: npx @redocly/cli build-docs openapi.yaml --output openapi/index.html --title "Logos Storage API" @@ -63,4 +63,4 @@ jobs: path: openapi - name: Deploy to GitHub Pages - uses: actions/deploy-pages@v4 + uses: actions/deploy-pages@v5 diff --git a/.github/workflows/nim-matrix.yml b/.github/workflows/nim-matrix.yml index 590805f7..b23a7f5c 100644 --- a/.github/workflows/nim-matrix.yml +++ b/.github/workflows/nim-matrix.yml @@ -16,7 +16,7 @@ jobs: cache_nonce: ${{ env.cache_nonce }} steps: - name: Checkout sources - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Compute matrix id: matrix run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0b64e231..3bc0525c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,6 +7,12 @@ on: branches: - master workflow_dispatch: + inputs: + branch: + description: 'dist-tests branch to run tests from' + required: false + default: 'master' + type: string env: cache_nonce: 0 # Allows for easily busting actions/cache caches @@ -61,7 +67,7 @@ jobs: echo "TAGGED_RELEASE=false" >> $GITHUB_ENV fi - name: Checkout sources - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: submodules: recursive @@ -110,7 +116,7 @@ jobs: 7z a -tzip "${{ env.build_dir }}/${{env.storage_binary}}.zip" ./${{ env.build_dir }}/* - name: Upload Logos Storage binary to workflow artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: ${{ env.storage_binary }}.zip path: ${{ env.build_dir }}/${{ env.storage_binary }}.zip @@ -127,7 +133,7 @@ jobs: - name: Upload Windows dlls to workflow artifacts if: matrix.os == 'windows' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: ${{ env.storage_binary }}-dlls.zip path: ${{ env.build_dir }}/${{ env.storage_binary }}-dlls.zip @@ -173,17 +179,247 @@ jobs: 7z a -tzip "${{ env.build_dir }}/${{ env.c_bindings_lib }}.zip" ./library/${{ env.c_bindings_lib_base }}.h - name: Upload ${{ env.c_bindings_lib_base }} to workflow artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: ${{ env.c_bindings_lib }}.zip path: ${{ env.build_dir }}/${{ env.c_bindings_lib }}.zip if-no-files-found: error + # Build Docker logosstorage/logos-storage-nim:latest-dist-tests image for Logos Storage nodes in the cluster + build-docker-dist-tests: + name: Build Docker dist-tests image + if: github.ref_type == 'tag' || github.event_name == 'workflow_dispatch' + uses: ./.github/workflows/docker-reusable.yml + with: + nimflags: '-d:disableMarchNative -d:storage_enable_api_debug_peers=true -d:storage_enable_log_counter=true' + nat_ip_auto: true + tag_latest: false + tag_stable: false + tag_suffix: dist-tests + tag_sha: false + secrets: inherit + + # Release tests + release-tests: + name: Release Tests + runs-on: ubuntu-latest + if: github.ref_type == 'tag' || github.event_name == 'workflow_dispatch' + needs: build-docker-dist-tests + timeout-minutes: 90 + permissions: + id-token: write + contents: read + env: + TF_VAR_project: ${{ vars.RELEASE_TESTS_GCP_PROJECT }} + TF_VAR_region: europe-west4 + TF_VAR_zone: europe-west4-a + TF_PLUGIN_CACHE_DIR: /home/runner/.terraform.d/plugin-cache + STORAGEDOCKERIMAGE: ${{ github.ref_type == 'tag' && format('logosstorage/logos-storage-nim:{0}-dist-tests', github.ref_name) || 'logosstorage/logos-storage-nim:latest-dist-tests' }} + TEST_TYPE: release-tests + BRANCH: ${{ inputs.branch || 'master' }} + SOURCE: https://github.com/logos-storage/logos-storage-nim-cs-dist-tests + TF_DIR: .github/release/clusters/logos-storage-rel-tests-gcp-europe-west4 + CLUSTER_NAME: logos-storage-rel-tests-gcp-europe-west4 # should always match the cluster_name variable in TF_DIR + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Create Terraform plugin cache dir + run: mkdir -p /home/runner/.terraform.d/plugin-cache + + - name: Cache Terraform plugins + uses: actions/cache@v5 + with: + path: ~/.terraform.d/plugin-cache + key: terraform-google-${{ hashFiles(format('{0}/.terraform.lock.hcl', env.TF_DIR)) }} + restore-keys: terraform-google- + + - name: Authenticate to GCP + uses: google-github-actions/auth@v3 + with: + workload_identity_provider: ${{ secrets.RELEASE_TESTS_GCP_WORKLOAD_IDENTITY_PROVIDER }} + service_account: ${{ secrets.RELEASE_TESTS_GCP_SERVICE_ACCOUNT }} + + - name: Setup gcloud + uses: google-github-actions/setup-gcloud@v3 + with: + install_components: gke-gcloud-auth-plugin + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v4 + + - name: Setup kubectl + uses: azure/setup-kubectl@v5 + with: + version: v1.36.0 + + - name: Terraform init + working-directory: ${{ env.TF_DIR }} + run: terraform init -backend-config="bucket=${{ vars.RELEASE_TESTS_TF_STATE_BUCKET }}" + + - name: Terraform apply + id: tf-apply + working-directory: ${{ env.TF_DIR }} + run: terraform apply -auto-approve + + - name: Get kubeconfig + run: | + gcloud container clusters get-credentials $CLUSTER_NAME \ + --zone ${{ env.TF_VAR_zone }} \ + --project ${{ vars.RELEASE_TESTS_GCP_PROJECT }} + + - name: Wait for runners-ci node to be Ready + run: | + kubectl wait \ + --for=condition=Ready \ + node \ + -l workload-type=tests-runners-ci \ + --timeout=300s + + - name: Set run variables + run: | + RUNID=$(date +%Y%m%d-%H%M%S) + echo "RUNID=${RUNID}" >> $GITHUB_ENV + echo "NAMEPREFIX=r-tests-${RUNID}" >> $GITHUB_ENV + echo "TESTID=$(git rev-parse --short HEAD)" >> $GITHUB_ENV + echo "JOB_START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")" >> $GITHUB_ENV + + - name: Deploy test job + run: | + envsubst < .github/release/job-release-tests.yaml | kubectl apply -f - + echo "--- Job ---" + kubectl get job $NAMEPREFIX -n default + echo "--- Pods ---" + kubectl get pods -n default + echo "--- Job events ---" + kubectl describe job $NAMEPREFIX -n default + + - name: Print storage node log link + run: | + QUERY=$(printf '%s\n%s\n%s' \ + 'resource.type="k8s_container"' \ + "resource.labels.cluster_name=\"${CLUSTER_NAME}\"" \ + "labels.\"k8s-pod/runid\"=\"${RUNID}\"") + ENCODED=$(python3 -c "import urllib.parse,sys; print(urllib.parse.quote(sys.stdin.read(), safe=''))" <<< "$QUERY") + URL="https://console.cloud.google.com/logs/query;query=${ENCODED};startTime=${JOB_START_TIME}?project=${{ vars.RELEASE_TESTS_GCP_PROJECT }}" + echo "Storage node logs: $URL" + echo "## Summary" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "Run ID: \`${RUNID}\`" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "[Logs for entire run]($URL)" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "> [!TIP]" >> "$GITHUB_STEP_SUMMARY" + echo "> To see the runner logs, add filter \`resource.labels.container_name=\"runner\"\` or use the filters on the left-side panel" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + LOG_RETENTION_DATE=$(date -u -d "${JOB_START_TIME} + 30 days" +"%Y-%m-%dT%H:%M:%SZ") + echo "> [!IMPORTANT]" >> "$GITHUB_STEP_SUMMARY" + echo "> Logs are retained until ${LOG_RETENTION_DATE} UTC (30 days)" >> "$GITHUB_STEP_SUMMARY" + + - name: Wait for runner pod to start + run: | + echo "Waiting for runner pod to reach Running state..." + deadline=$((SECONDS + 300)) + last_describe=0 + while [[ $SECONDS -lt $deadline ]]; do + phase=$(kubectl get pods \ + -l job-name=$NAMEPREFIX \ + -n default \ + -o jsonpath='{range .items[*]}{.status.phase}{end}' 2>/dev/null) + echo "Pod phase: ${phase:-not yet created}" + if [[ "$phase" == "Running" ]]; then break; fi + if [[ $((SECONDS - last_describe)) -ge 60 ]]; then + echo "--- kubectl describe job $NAMEPREFIX ---" + kubectl describe job $NAMEPREFIX -n default + last_describe=$SECONDS + fi + sleep 10 + done + if [[ "$phase" != "Running" ]]; then + echo "Timed out waiting for pod to reach Running state" + exit 1 + fi + + - name: Run tests and stream logs + timeout-minutes: 60 + run: | + POD=$(kubectl get pods -l job-name=$NAMEPREFIX -n default \ + -o jsonpath='{.items[0].metadata.name}') + echo "Streaming logs for pod: $POD" + # Use pod name (not label selector) so the stream survives long silences + # between test completions. || true so the step doesn't fail if the + # API server closes the connection before the pod exits. + kubectl logs $POD -n default --follow || true + + - name: Generate test summary + env: + GCP_PROJECT: ${{ vars.RELEASE_TESTS_GCP_PROJECT }} + run: | + export JOB_START=$(kubectl get job "$NAMEPREFIX" -n default \ + -o jsonpath='{.status.startTime}' 2>/dev/null || true) + export JOB_END=$(kubectl get job "$NAMEPREFIX" -n default \ + -o jsonpath='{.status.completionTime}' 2>/dev/null || true) + + export ENTRIES_FILE=$(mktemp) + # Read test results written by TearDownDistTest directly into ConfigMaps. + kubectl get configmaps -n default -l "runid=${RUNID},app=test-result" \ + -o jsonpath='{range .items[*]}{.data.result}{"\n"}{end}' 2>/dev/null \ + | jq -s '[.[] | {jsonPayload: .}]' > "$ENTRIES_FILE" || echo "[]" > "$ENTRIES_FILE" + + python3 .github/scripts/generate_test_summary.py + rm -f "$ENTRIES_FILE" + + - name: Check job status + run: | + # kubectl logs may have exited early (API server closed the stream). + # Wait for the job to reach a terminal state before checking the result. + kubectl wait job/$NAMEPREFIX -n default \ + --for=condition=Complete \ + --timeout=300s \ + || true + + job_status=$(kubectl get jobs $NAMEPREFIX -n default \ + -o jsonpath='{.status.conditions[0].type}') + echo "Job status: $job_status" + [[ "$job_status" == "SuccessCriteriaMet" ]] || exit 1 + + - name: Terraform destroy + if: always() && steps.tf-apply.conclusion != 'skipped' + working-directory: ${{ env.TF_DIR }} + run: terraform destroy -auto-approve + + - name: Delete orphaned GCE disks + if: always() && steps.tf-apply.conclusion != 'skipped' + env: + GCP_PROJECT: ${{ vars.RELEASE_TESTS_GCP_PROJECT }} + run: | + # Safety net: delete any pvc-* disks the CSI driver did not release before + # the cluster was destroyed. Runs after terraform destroy so disks are + # guaranteed detached (GCE rejects deletes on attached disks). The + # releaseTestsDiskCleaner IAM role is granted out-of-band (not via Terraform) + # so it persists across cluster lifecycles — see CLAUDE.md for details. + gcloud compute disks list \ + --project="$GCP_PROJECT" \ + --filter="name~^pvc-" \ + --format="value(name,zone.basename())" \ + | while IFS=$'\t' read -r name zone; do + [[ -n "$name" && -n "$zone" ]] || continue + gcloud compute disks delete "$name" --zone="$zone" \ + --project="$GCP_PROJECT" --quiet || true + done + + - name: Release Terraform state lock + if: always() + run: | + gcloud storage rm \ + "gs://${{ vars.RELEASE_TESTS_TF_STATE_BUCKET }}/clusters/${CLUSTER_NAME}/default.tflock" \ + 2>/dev/null || true + # Release release: runs-on: ubuntu-latest - needs: build - if: success() || failure() + needs: [build, release-tests] + if: needs.build.result == 'success' && needs.release-tests.result == 'success' steps: - name: Set conditional env variables shell: bash @@ -196,21 +432,21 @@ jobs: echo "TAGGED_RELEASE=false" >> $GITHUB_ENV fi - name: Download binaries from workflow artifacts into temp folder - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v8 with: pattern: ${{ env.storage_binary_base }}* merge-multiple: true path: /tmp/release - name: Download ${{ env.c_bindings_lib_base }} from workflow artifacts into temp folder - uses: actions/download-artifact@v5 + uses: actions/download-artifact@v8 with: pattern: ${{ env.c_bindings_lib_base }}* merge-multiple: true path: /tmp/release - name: Create GH release - uses: softprops/action-gh-release@v2 + uses: softprops/action-gh-release@v3 if: env.TAGGED_RELEASE == 'true' with: files: | @@ -218,7 +454,7 @@ jobs: make_latest: true - name: Generate Python SDK - uses: peter-evans/repository-dispatch@v3 + uses: peter-evans/repository-dispatch@v4 if: env.TAGGED_RELEASE == 'true' with: token: ${{ secrets.DISPATCH_PAT }}