Add release tests workflow

Adds a workflow for release tests:
- builds a docker image for launching nodes in the tests (basically has additional nimflags set)
- creates a K8s cluster in Digital Ocean
- one pod in the cluster is dedicated as the test runner (uses the logos-storage-nim-cs-dist-tests:latest image)
- the release will fail if the docker image build or the release tests fail
- the K8s cluster is torn down after the tests finish (failure or not)
This commit is contained in:
E M 2026-04-09 19:58:05 +10:00
parent 79d59dc66c
commit ca94abae5f
No known key found for this signature in database
13 changed files with 434 additions and 2 deletions

28
.claude/settings.json Normal file
View File

@ -0,0 +1,28 @@
{
"permissions": {
"allow": [
"Read(//Users/egonat/repos/gmega/logos-storage-docs/**)",
"Bash(npm run build)",
"Bash(npx astro build)",
"Bash(npm install)",
"Bash(node_modules/.bin/astro build)",
"WebSearch",
"Bash(npm test:*)",
"Bash(node -e \"import\\(''koffi''\\).then\\(m => console.log\\(Object.keys\\(m.default\\)\\)\\).catch\\(console.error\\)\")",
"Bash(timeout 5 node -e \":*)",
"Bash(node probe.js)",
"Bash(node two-node-test.js)",
"WebFetch(domain:github.com)",
"WebFetch(domain:api.github.com)",
"Read(//Users/egonat/repos/logos-storage/infra-codex/clusters/codex-dist-tests-do-ams3/**)",
"Read(//Users/egonat/repos/logos-storage/logos-storage-nim-cs-dist-tests/Tests/CodexReleaseTests/**)",
"Read(//Users/egonat/repos/logos-storage/logos-storage-nim-cs-dist-tests/**)",
"Bash(grep -E \"\\\\.cs$\")",
"Read(//Users/egonat/repos/logos-storage/infra-codex/clusters/modules/doks/**)"
],
"additionalDirectories": [
"/Users/egonat/repos/gmega/logos-storage-docs/src/content/docs/api",
"/Users/egonat/repos/gmega/logos-storage-docs/src/content/docs/tutorials"
]
}
}

View File

@ -0,0 +1,16 @@
terraform {
backend "s3" {
endpoints = {
s3 = "https://fra1.digitaloceanspaces.com"
}
bucket = "codex-infra-terraform"
key = "clusters/codex-dist-tests-do-ams3/terraform.tfstate"
region = "fra1"
skip_credentials_validation = true
skip_requesting_account_id = true
skip_metadata_api_check = true
skip_region_validation = true
skip_s3_checksum = true
}
}

View File

@ -0,0 +1,57 @@
# Kubernetes cluster
module "doks" {
source = "../modules/doks"
name = "codex-dist-tests"
region = var.region
kubernetes_version = "1.34.5-do.2"
kubernetes_ha = true
kubernetes_auto_upgrade = false
kubernetes_node_pool_name = "infra-s-4vcpu-16gb-amd"
kubernetes_node_pool_size = "s-4vcpu-16gb-amd"
kubernetes_node_pool_auto_scale = true
kubernetes_node_pool_min = 1
kubernetes_node_pool_max = 3
kubernetes_node_pool_tags = ["default", "autoscale"]
kubernetes_node_pool_labels = {
default-pool = "true"
scaling-type = "auto"
workload-type = "infra"
}
}
# Node pool - Runners CI
resource "digitalocean_kubernetes_node_pool" "runners-ci" {
cluster_id = module.doks.kubernetes_cluster_id
name = "runners-ci-s-2vcpu-8gb-amd"
size = "s-2vcpu-8gb-amd"
auto_scale = true
min_nodes = 1
max_nodes = 5
tags = ["runners-ci"]
labels = {
allow-tests-pods = "false"
default-pool = "false"
scaling-type = "auto"
workload-type = "tests-runners-ci"
}
}
# Node pool - Tests Pods
resource "digitalocean_kubernetes_node_pool" "tests-s-2vcpu-4gb" {
cluster_id = module.doks.kubernetes_cluster_id
name = "tests-s-2vcpu-4gb"
size = "s-2vcpu-4gb"
auto_scale = true
min_nodes = 1
max_nodes = 10
tags = ["tests-pods"]
labels = {
allow-tests-pods = "true"
default-pool = "false"
scaling-type = "auto"
workload-type = "tests-pods"
}
}

View File

@ -0,0 +1,4 @@
# Providers
provider "digitalocean" {
token = var.do_token
}

View File

@ -0,0 +1,10 @@
variable "region" {
description = "DigitalOcean region (e.g. ams3)"
type = string
}
variable "do_token" {
description = "DigitalOcean API token"
type = string
sensitive = true
}

View File

@ -0,0 +1,10 @@
# Terraform settings
terraform {
required_version = "~> 1.0"
required_providers {
digitalocean = {
source = "digitalocean/digitalocean"
version = "~> 2.0"
}
}
}

View File

@ -0,0 +1,4 @@
locals {
name = "${var.name}-do-${var.region}"
node_pool_name = "pool-${var.kubernetes_node_pool_size}"
}

View File

@ -0,0 +1,33 @@
# Kubernetes cluster
resource "digitalocean_kubernetes_cluster" "this" {
name = local.name
region = var.region
version = var.kubernetes_version
ha = var.kubernetes_ha
auto_upgrade = var.kubernetes_auto_upgrade
node_pool {
name = var.kubernetes_node_pool_name
size = var.kubernetes_node_pool_size
node_count = var.kubernetes_node_pool_count
auto_scale = var.kubernetes_node_pool_auto_scale
min_nodes = var.kubernetes_node_pool_min
max_nodes = var.kubernetes_node_pool_max
tags = var.kubernetes_node_pool_tags
labels = var.kubernetes_node_pool_labels
dynamic "taint" {
for_each = length(var.kubernetes_node_pool_taint) == 0 ? {} : { taint = true }
content {
key = lookup(var.kubernetes_node_pool_taint, "key")
value = lookup(var.kubernetes_node_pool_taint, "value")
effect = lookup(var.kubernetes_node_pool_taint, "effect")
}
}
}
maintenance_policy {
day = var.kubernetes_maintenance_day
start_time = var.kubernetes_maintenance_start_time
}
}

View File

@ -0,0 +1,5 @@
# Kubernetes cluster
output "kubernetes_cluster_id" {
value = digitalocean_kubernetes_cluster.this.id
description = "A unique ID that can be used to identify and reference a Kubernetes cluster."
}

View File

@ -0,0 +1,97 @@
# Main
variable "name" {
type = string
description = "A name for the created resources."
}
variable "region" {
type = string
description = "The DigitalOcean region slug for the resources location."
}
# Kubernetes Control Plane
variable "kubernetes_version" {
type = string
description = "The slug identifier for the version of Kubernetes used for the cluster."
}
variable "kubernetes_ha" {
type = bool
description = "Enable/disable the high availability control plane for a cluster."
}
variable "kubernetes_auto_upgrade" {
type = bool
description = "A boolean value indicating whether the cluster will be automatically upgraded to new patch releases during its maintenance window."
}
variable "kubernetes_maintenance_day" {
type = string
description = "The day of the maintenance window policy."
default = "sunday"
}
variable "kubernetes_maintenance_start_time" {
type = string
description = "The start time in UTC of the maintenance window policy in 24-hour clock format / HH:MM notation (e.g., 15:00)."
default = "04:00"
}
# Kubernetes default Node Pool
variable "kubernetes_node_pool_name" {
type = string
description = "A name for the node pool."
default = null
}
variable "kubernetes_node_pool_size" {
type = string
description = "The slug identifier for the type of Droplet to be used as workers in the node pool."
default = null
}
variable "kubernetes_node_pool_count" {
type = number
default = null
description = "The number of Droplet instances in the node pool."
}
variable "kubernetes_node_pool_auto_scale" {
type = bool
description = "Enable auto-scaling of the number of nodes in the node pool within the given min/max range."
default = null
}
variable "kubernetes_node_pool_min" {
type = number
description = "If auto-scaling is enabled, this represents the minimum number of nodes that the node pool can be scaled down to."
default = null
}
variable "kubernetes_node_pool_max" {
type = number
description = "If auto-scaling is enabled, this represents the maximum number of nodes that the node pool can be scaled up to."
default = null
}
variable "kubernetes_node_pool_tags" {
type = list(any)
description = "A list of tag names applied to the node pool."
default = ["default", "autoscale"]
}
variable "kubernetes_node_pool_labels" {
type = map(string)
description = "A map of key/value pairs to apply to nodes in the pool."
default = {
default-pool = "true"
scaling-type = "auto"
}
}
variable "kubernetes_node_pool_taint" {
type = map(string)
description = "A block representing a taint applied to all nodes in the pool."
default = {
}
}

View File

@ -0,0 +1,9 @@
# Terraform settings
terraform {
required_providers {
digitalocean = {
source = "digitalocean/digitalocean"
version = "~> 2.0"
}
}
}

63
.github/release/job-release-tests.yaml vendored Normal file
View File

@ -0,0 +1,63 @@
apiVersion: batch/v1
kind: Job
metadata:
name: ${NAMEPREFIX}
namespace: ${NAMESPACE}
labels:
name: ${NAMEPREFIX}
runid: ${RUNID}
spec:
ttlSecondsAfterFinished: 86400
backoffLimit: 0
template:
metadata:
name: ${NAMEPREFIX}
labels:
app: ${TEST_TYPE}-runner
name: ${NAMEPREFIX}
runid: ${RUNID}
spec:
priorityClassName: system-node-critical
nodeSelector:
workload-type: "tests-runners-ci"
containers:
- name: runner
image: logosstorage/cs-codex-dist-tests:latest
imagePullPolicy: Always
resources:
requests:
memory: "1Gi"
env:
- name: KUBECONFIG
value: "/opt/kubeconfig.yaml"
- name: LOGPATH
value: "/var/log/codex-${TEST_TYPE}"
- name: NAMESPACE
value: "${NAMESPACE}"
- name: BRANCH
value: "${BRANCH}"
- name: SOURCE
value: "${SOURCE}"
- name: RUNID
value: "${RUNID}"
- name: CODEXDOCKERIMAGE
value: "${CODEXDOCKERIMAGE}"
- name: TESTID
value: "${TESTID}"
- name: TESTS_TYPE
value: "${TEST_TYPE}"
volumeMounts:
- name: kubeconfig
mountPath: /opt/kubeconfig.yaml
subPath: kubeconfig.yaml
- name: logs
mountPath: /var/log/codex-${TEST_TYPE}
args: ${COMMAND}
restartPolicy: Never
volumes:
- name: kubeconfig
secret:
secretName: codex-dist-tests-app-kubeconfig
- name: logs
hostPath:
path: /var/log/codex-${TEST_TYPE}

View File

@ -179,11 +179,107 @@ jobs:
path: ${{ env.build_dir }}/${{ env.c_bindings_lib }}.zip
if-no-files-found: error
# Build Docker dist-tests image
build-docker-dist-tests:
name: Build Docker dist-tests image
if: github.ref_type == 'tag'
uses: ./.github/workflows/docker-reusable.yml
with:
nimflags: '-d:disableMarchNative -d:storage_enable_api_debug_peers=true -d:storage_enable_log_counter=true'
nat_ip_auto: true
tag_latest: false
tag_stable: false
tag_suffix: dist-tests
tag_sha: false
secrets: inherit
# Release tests
release-tests:
name: Release Tests
runs-on: ubuntu-latest
if: github.ref_type == 'tag'
needs: build-docker-dist-tests
env:
TF_VAR_region: ams3
TF_VAR_do_token: ${{ secrets.DO_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.SPACES_ACCESS_KEY }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.SPACES_SECRET_KEY }}
CODEXDOCKERIMAGE: ${{ needs.build-docker-dist-tests.outputs.logos_storage_image }}
NAMESPACE: default
TEST_TYPE: release-tests
BRANCH: master
SOURCE: https://github.com/logos-storage/logos-storage-nim-cs-dist-tests
TF_DIR: .github/release/clusters/codex-dist-tests-do-ams3
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
- name: Setup doctl
uses: digitalocean/action-doctl@v2
with:
token: ${{ secrets.DO_TOKEN }}
- name: Setup kubectl
uses: azure/setup-kubectl@v4
with:
version: v1.30.5
- name: Terraform init
working-directory: ${{ env.TF_DIR }}
run: terraform init
- name: Terraform apply
working-directory: ${{ env.TF_DIR }}
run: terraform apply -auto-approve
- name: Get kubeconfig
run: doctl kubernetes cluster kubeconfig save codex-dist-tests-do-ams3
- name: Create in-cluster app kubeconfig secret
run: |
kubectl create secret generic codex-dist-tests-app-kubeconfig \
--from-file=kubeconfig.yaml=$HOME/.kube/config \
-n ${{ env.NAMESPACE }}
- name: Set run variables
run: |
RUNID=$(date +%Y%m%d-%H%M%S)
echo "RUNID=${RUNID}" >> $GITHUB_ENV
echo "NAMEPREFIX=r-tests-${RUNID}" >> $GITHUB_ENV
echo "TESTID=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
echo "COMMAND=$(jq -c 'split(" ")' <<< '"dotnet test Tests/CodexReleaseTests"')" >> $GITHUB_ENV
- name: Deploy test job
run: envsubst < .github/release/job-release-tests.yaml | kubectl apply -f -
- name: Stream test logs
run: |
kubectl logs -l job-name=${{ env.NAMEPREFIX }} \
-n ${{ env.NAMESPACE }} \
--follow \
--pod-running-timeout=300s
- name: Check job status
run: |
sleep 5
job_status=$(kubectl get jobs ${{ env.NAMEPREFIX }} -n ${{ env.NAMESPACE }} \
-o jsonpath='{.status.conditions[0].type}')
echo "Job status: $job_status"
[[ "$job_status" == "Complete" ]] || exit 1
- name: Terraform destroy
if: always()
working-directory: ${{ env.TF_DIR }}
run: terraform destroy -auto-approve
# Release
release:
runs-on: ubuntu-latest
needs: build
if: success() || failure()
needs: [build, release-tests]
if: needs.build.result == 'success' && needs.release-tests.result == 'success'
steps:
- name: Set conditional env variables
shell: bash