492 lines
20 KiB
YAML

name: Release
on:
push:
tags:
- 'v*.*.*'
branches:
- master
workflow_dispatch:
inputs:
branch:
description: 'dist-tests branch to run tests from'
required: false
default: 'master'
type: string
env:
cache_nonce: 0 # Allows for easily busting actions/cache caches
nim_version: pinned
storage_binary_base: logos-storage
c_bindings_lib_base: libstorage
build_dir: build
nim_flags: ''
windows_libs: 'libstdc++-6.dll libgomp-1.dll libgcc_s_seh-1.dll libwinpthread-1.dll'
jobs:
# Matrix
matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.matrix.outputs.matrix }}
steps:
- name: Compute matrix
id: matrix
uses: fabiocaccamo/create-matrix-action@v5
with:
matrix: |
os {linux}, cpu {amd64}, builder {ubuntu-22.04}, nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
os {linux}, cpu {arm64}, builder {ubuntu-22.04-arm}, nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
os {macos}, cpu {arm64}, builder {macos-14}, nim_version {${{ env.nim_version }}}, shell {bash --noprofile --norc -e -o pipefail}
os {windows}, cpu {amd64}, builder {windows-latest}, nim_version {${{ env.nim_version }}}, shell {msys2}
# Build
# build:
# needs: matrix
# strategy:
# fail-fast: false
# matrix:
# include: ${{fromJson(needs.matrix.outputs.matrix)}}
# defaults:
# run:
# shell: ${{ matrix.shell }} {0}
# name: ${{ matrix.os }}-${{ matrix.cpu }}-${{ matrix.nim_version }}
# runs-on: ${{ matrix.builder }}
# timeout-minutes: 80
# steps:
# - name: Set conditional env variables
# shell: bash
# run: |
# if [[ "${{ github.ref_type }}" == "tag" ]]; then
# echo "VERSION=${{ github.ref_name }}" >> $GITHUB_ENV
# echo "TAGGED_RELEASE=true" >> $GITHUB_ENV
# else
# echo "VERSION=${GITHUB_SHA::7}" >> $GITHUB_ENV
# echo "TAGGED_RELEASE=false" >> $GITHUB_ENV
# fi
# - name: Checkout sources
# uses: actions/checkout@v4
# with:
# submodules: recursive
# - name: Setup Nimbus Build System
# uses: ./.github/actions/nimbus-build-system
# with:
# os: ${{ matrix.os }}
# cpu: ${{ matrix.cpu }}
# shell: ${{ matrix.shell }}
# nim_version: ${{ matrix.nim_version }}
# - name: Compute binary name
# run: |
# case ${{ matrix.os }} in
# linux*) os_name="linux" ;;
# macos*) os_name="darwin" ;;
# windows*) os_name="windows" ;;
# esac
# storage_binary="${{ env.storage_binary_base }}-${os_name}-${{ matrix.cpu }}-${{ env.VERSION }}"
# c_bindings_lib="${{ env.c_bindings_lib_base }}-${os_name}-${{ matrix.cpu }}-${{ env.VERSION }}"
# if [[ ${os_name} == "windows" ]]; then
# storage_binary="${storage_binary}.exe"
# fi
# echo "storage_binary=${storage_binary}" >>$GITHUB_ENV
# echo "c_bindings_lib=${c_bindings_lib}" >>$GITHUB_ENV
# - name: Build Logos Storage binary
# run: |
# make NIMFLAGS="--out:${{ env.build_dir }}/${{ env.storage_binary }} ${{ env.nim_flags }}"
# - name: Package ${{ env.storage_binary_base }} Linux (compress and preserve perms)
# if: matrix.os == 'linux'
# run: |
# sudo apt-get update && sudo apt-get install -y zip
# zip -j "${{ env.build_dir }}/${{env.storage_binary}}.zip" ./${{ env.build_dir }}/*
# - name: Package ${{ env.storage_binary_base }} MacOS (compress and preserve perms)
# if: matrix.os == 'macos'
# run: |
# zip -j "${{ env.build_dir }}/${{env.storage_binary}}.zip" ./${{ env.build_dir }}/*
# - name: Package ${{ env.storage_binary_base }} Windows (compress and preserve perms)
# if: matrix.os == 'windows'
# shell: msys2 {0}
# run: |
# 7z a -tzip "${{ env.build_dir }}/${{env.storage_binary}}.zip" ./${{ env.build_dir }}/*
# - name: Upload Logos Storage binary to workflow artifacts
# uses: actions/upload-artifact@v4
# with:
# name: ${{ env.storage_binary }}.zip
# path: ${{ env.build_dir }}/${{ env.storage_binary }}.zip
# retention-days: 30
# - name: Copy and zip Windows dlls to build/dlls dir (Windows)
# if: matrix.os == 'windows'
# run: |
# mkdir -p "${{ env.build_dir }}/dlls"
# for lib in ${{ env.windows_libs }}; do
# cp -v "${MINGW_PREFIX}/bin/${lib}" "${{ env.build_dir }}/dlls"
# done
# 7z a -tzip "${{ env.build_dir }}/${{ env.storage_binary }}-dlls.zip" ./${{ env.build_dir }}/dlls/*.dll
# - name: Upload Windows dlls to workflow artifacts
# if: matrix.os == 'windows'
# uses: actions/upload-artifact@v4
# with:
# name: ${{ env.storage_binary }}-dlls.zip
# path: ${{ env.build_dir }}/${{ env.storage_binary }}-dlls.zip
# retention-days: 30
# - name: Build ${{ env.c_bindings_lib_base }} (Linux)
# if: matrix.os == 'linux'
# run: |
# make -j${ncpu} update
# make -j${ncpu} libstorage
# - name: Build ${{ env.c_bindings_lib_base }} (MacOS)
# if: matrix.os == 'macos'
# run: |
# make -j${ncpu} update
# STORAGE_LIB_PARAMS="--passL:\"-Wl,-install_name,@rpath/${{ env.c_bindings_lib_base }}.dylib\"" make -j${ncpu} libstorage
# - name: Build ${{ env.c_bindings_lib_base }} (Windows)
# if: matrix.os == 'windows'
# shell: msys2 {0}
# run: |
# make -j${ncpu} update
# make -j${ncpu} libstorage
# - name: Package ${{ env.c_bindings_lib_base }} Linux
# if: matrix.os == 'linux'
# run: |
# sudo apt-get update && sudo apt-get install -y zip
# zip -j "${{ env.build_dir }}/${{ env.c_bindings_lib }}.zip" ${{ env.build_dir }}/${{ env.c_bindings_lib_base }}.so
# zip -j "${{ env.build_dir }}/${{ env.c_bindings_lib }}.zip" library/${{ env.c_bindings_lib_base }}.h
# - name: Package ${{ env.c_bindings_lib_base }} MacOS
# if: matrix.os == 'macos'
# run: |
# zip -j "${{ env.build_dir }}/${{ env.c_bindings_lib }}.zip" ${{ env.build_dir }}/${{ env.c_bindings_lib_base }}.dylib
# zip -j "${{ env.build_dir }}/${{ env.c_bindings_lib }}.zip" library/${{ env.c_bindings_lib_base }}.h
# - name: Package ${{ env.c_bindings_lib_base }} (Windows)
# if: matrix.os == 'windows'
# shell: msys2 {0}
# run: |
# 7z a -tzip "${{ env.build_dir }}/${{ env.c_bindings_lib }}.zip" ./${{ env.build_dir }}/${{ env.c_bindings_lib_base }}.dll
# 7z a -tzip "${{ env.build_dir }}/${{ env.c_bindings_lib }}.zip" ./library/${{ env.c_bindings_lib_base }}.h
# - name: Upload ${{ env.c_bindings_lib_base }} to workflow artifacts
# uses: actions/upload-artifact@v4
# with:
# name: ${{ env.c_bindings_lib }}.zip
# path: ${{ env.build_dir }}/${{ env.c_bindings_lib }}.zip
# if-no-files-found: error
# # Build Docker logosstorage/logos-storage-nim:latest-dist-tests image for Logos Storage nodes in the cluster
# build-docker-dist-tests:
# name: Build Docker dist-tests image
# if: github.ref_type == 'tag' || github.event_name == 'workflow_dispatch'
# uses: ./.github/workflows/docker-reusable.yml
# with:
# nimflags: '-d:disableMarchNative -d:storage_enable_api_debug_peers=true -d:storage_enable_log_counter=true'
# nat_ip_auto: true
# tag_latest: false
# tag_stable: false
# tag_suffix: dist-tests
# tag_sha: false
# secrets: inherit
# Release tests
release-tests:
name: Release Tests
runs-on: ubuntu-latest
if: github.ref_type == 'tag' || github.event_name == 'workflow_dispatch'
# needs: build-docker-dist-tests
timeout-minutes: 60
permissions:
id-token: write
contents: read
env:
TF_VAR_project: ${{ vars.RELEASE_TESTS_GCP_PROJECT }}
TF_VAR_region: europe-west4
TF_VAR_zone: europe-west4-a
TF_PLUGIN_CACHE_DIR: /home/runner/.terraform.d/plugin-cache
STORAGEDOCKERIMAGE: ${{ github.ref_type == 'tag' && format('logosstorage/logos-storage-nim:{0}-dist-tests', github.ref_name) || 'logosstorage/logos-storage-nim:latest-dist-tests' }}
TEST_TYPE: release-tests
BRANCH: ${{ inputs.branch || 'master' }}
SOURCE: https://github.com/logos-storage/logos-storage-nim-cs-dist-tests
TF_DIR: .github/release/clusters/logos-storage-rel-tests-gcp-europe-west4
CLUSTER_NAME: logos-storage-rel-tests-gcp-europe-west4 # should always match the cluster_name variable in TF_DIR
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Create Terraform plugin cache dir
run: mkdir -p /home/runner/.terraform.d/plugin-cache
- name: Cache Terraform plugins
uses: actions/cache@v4
with:
path: ~/.terraform.d/plugin-cache
key: terraform-google-${{ hashFiles(format('{0}/.terraform.lock.hcl', env.TF_DIR)) }}
restore-keys: terraform-google-
- name: Authenticate to GCP
uses: google-github-actions/auth@v2
with:
workload_identity_provider: ${{ secrets.RELEASE_TESTS_GCP_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.RELEASE_TESTS_GCP_SERVICE_ACCOUNT }}
- name: Setup gcloud
uses: google-github-actions/setup-gcloud@v2
with:
install_components: gke-gcloud-auth-plugin
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
- name: Setup kubectl
uses: azure/setup-kubectl@v4
with:
version: v1.36.0
- name: Terraform init
working-directory: ${{ env.TF_DIR }}
run: terraform init -backend-config="bucket=${{ vars.RELEASE_TESTS_TF_STATE_BUCKET }}"
- name: Terraform apply
id: tf-apply
working-directory: ${{ env.TF_DIR }}
run: terraform apply -auto-approve
- name: Get kubeconfig
run: |
gcloud container clusters get-credentials $CLUSTER_NAME \
--zone ${{ env.TF_VAR_zone }} \
--project ${{ vars.RELEASE_TESTS_GCP_PROJECT }}
- name: Wait for runners-ci node to be Ready
run: |
kubectl wait \
--for=condition=Ready \
node \
-l workload-type=tests-runners-ci \
--timeout=300s
- name: Create in-cluster app kubeconfig secret
run: |
kubectl create serviceaccount release-tests-runner -n default
kubectl create clusterrolebinding release-tests-runner \
--clusterrole=cluster-admin \
--serviceaccount=default:release-tests-runner
export TOKEN=$(kubectl create token release-tests-runner -n default --duration=2h)
export SERVER=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}')
export CA=$(kubectl config view --minify --raw -o jsonpath='{.clusters[0].cluster.certificate-authority-data}')
kubectl create secret generic storage-dist-tests-app-kubeconfig \
--from-file=kubeconfig.yaml=<(envsubst < .github/release/kubeconfig-template.yaml) \
-n default
- name: Set run variables
run: |
RUNID=$(date +%Y%m%d-%H%M%S)
echo "RUNID=${RUNID}" >> $GITHUB_ENV
echo "NAMEPREFIX=r-tests-${RUNID}" >> $GITHUB_ENV
echo "TESTID=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
echo "JOB_START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")" >> $GITHUB_ENV
- name: Deploy test job
run: |
envsubst < .github/release/job-release-tests.yaml | kubectl apply -f -
echo "--- Job ---"
kubectl get job $NAMEPREFIX -n default
echo "--- Pods ---"
kubectl get pods -n default
echo "--- Job events ---"
kubectl describe job $NAMEPREFIX -n default
- name: Print storage node log link
run: |
QUERY=$(printf '%s\n%s\n%s' \
'resource.type="k8s_container"' \
"resource.labels.cluster_name=\"${CLUSTER_NAME}\"" \
"labels.\"k8s-pod/runid\"=\"${RUNID}\"")
ENCODED=$(python3 -c "import urllib.parse,sys; print(urllib.parse.quote(sys.stdin.read(), safe=''))" <<< "$QUERY")
URL="https://console.cloud.google.com/logs/query;query=${ENCODED};startTime=${JOB_START_TIME}?project=${{ vars.RELEASE_TESTS_GCP_PROJECT }}"
echo "Storage node logs: $URL"
echo "## Storage Node Logs" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "Run ID: \`${RUNID}\`" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "[View in Cloud Logging]($URL)" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "Filter: \`labels.\"k8s-pod/runid\"=\"${RUNID}\"\`" >> "$GITHUB_STEP_SUMMARY"
- name: Wait for test pod to start
run: |
echo "Waiting for test pod to reach Running state..."
deadline=$((SECONDS + 300))
last_describe=0
while [[ $SECONDS -lt $deadline ]]; do
phase=$(kubectl get pods \
-l job-name=$NAMEPREFIX \
-n default \
-o jsonpath='{range .items[*]}{.status.phase}{end}' 2>/dev/null)
echo "Pod phase: ${phase:-not yet created}"
if [[ "$phase" == "Running" ]]; then break; fi
if [[ $((SECONDS - last_describe)) -ge 60 ]]; then
echo "--- kubectl describe job $NAMEPREFIX ---"
kubectl describe job $NAMEPREFIX -n default
last_describe=$SECONDS
fi
sleep 10
done
if [[ "$phase" != "Running" ]]; then
echo "Timed out waiting for pod to reach Running state"
exit 1
fi
- name: Stream test logs
run: |
POD=$(kubectl get pods -l job-name=$NAMEPREFIX -n default \
-o jsonpath='{.items[0].metadata.name}')
echo "Streaming logs for pod: $POD"
# Use pod name (not label selector) so the stream survives long silences
# between test completions. || true so the step doesn't fail if the
# API server closes the connection before the pod exits.
kubectl logs $POD -n default --follow || true
- name: Check job status
run: |
# kubectl logs may have exited early (API server closed the stream).
# Wait for the job to reach a terminal state before checking the result.
kubectl wait job/$NAMEPREFIX -n default \
--for=condition=Complete \
--timeout=300s \
|| kubectl wait job/$NAMEPREFIX -n default \
--for=condition=Failed \
--timeout=0s
job_status=$(kubectl get jobs $NAMEPREFIX -n default \
-o jsonpath='{.status.conditions[0].type}')
echo "Job status: $job_status"
[[ "$job_status" == "SuccessCriteriaMet" ]] || exit 1
- name: Generate test summary
env:
GCP_PROJECT: ${{ vars.RELEASE_TESTS_GCP_PROJECT }}
run: |
export JOB_START=$(kubectl get job "$NAMEPREFIX" -n default \
-o jsonpath='{.status.startTime}' 2>/dev/null || true)
export JOB_END=$(kubectl get job "$NAMEPREFIX" -n default \
-o jsonpath='{.status.completionTime}' 2>/dev/null || true)
FILTER="resource.type=\"k8s_container\""
FILTER+=" AND resource.labels.namespace_name=\"default\""
FILTER+=" AND resource.labels.container_name=\"runner\""
FILTER+=" AND labels.\"k8s-pod/runid\"=\"${RUNID}\""
FILTER+=" AND jsonPayload.type=\"test-result\""
export ENTRIES_FILE=$(mktemp)
for attempt in $(seq 1 12); do
gcloud logging read "$FILTER" --order=asc --limit=1000 --format=json \
--project="$GCP_PROJECT" > "$ENTRIES_FILE" 2>/dev/null || echo "[]" > "$ENTRIES_FILE"
[[ $(cat "$ENTRIES_FILE") != "[]" ]] && break
echo "Attempt $attempt/12: waiting for test results in Cloud Logging..."
[[ $attempt -lt 12 ]] && sleep 10
done
python3 .github/scripts/generate_test_summary.py
rm -f "$ENTRIES_FILE"
- name: Delete PVCs before cluster teardown
if: always() && steps.tf-apply.conclusion != 'skipped'
run: |
# Best-effort: trigger PVC deletion so the CSI driver can release GCE PDs before
# terraform destroy kills the cluster. --wait=false avoids hanging when pods are
# still running (e.g. runner was OOM-killed and never ran its own cleanup).
# Any disks the CSI driver doesn't finish releasing are caught by the
# "Delete orphaned GCE disks" step that runs after terraform destroy.
kubectl delete pvc --all --all-namespaces --wait=false 2>/dev/null || true
- name: Terraform destroy
if: always() && steps.tf-apply.conclusion != 'skipped'
working-directory: ${{ env.TF_DIR }}
run: terraform destroy -auto-approve
- name: Delete orphaned GCE disks
if: always() && steps.tf-apply.conclusion != 'skipped'
env:
GCP_PROJECT: ${{ vars.RELEASE_TESTS_GCP_PROJECT }}
run: |
# Safety net: delete any pvc-* disks left unattached after cluster teardown.
# These are GCE PDs whose PVC was deleted but the CSI driver didn't finish before
# the cluster was destroyed.
gcloud compute disks list \
--project="$GCP_PROJECT" \
--filter="name~^pvc- AND -users:*" \
--format="value(name,zone.basename())" 2>/dev/null \
| while IFS=$'\t' read -r name zone; do
[[ -n "$name" && -n "$zone" ]] || continue
gcloud compute disks delete "$name" --zone="$zone" \
--project="$GCP_PROJECT" --quiet 2>/dev/null || true
done
- name: Release Terraform state lock
if: always()
run: |
gcloud storage rm \
"gs://${{ vars.RELEASE_TESTS_TF_STATE_BUCKET }}/clusters/${CLUSTER_NAME}/default.tflock" \
2>/dev/null || true
# Release
# release:
# runs-on: ubuntu-latest
# needs: [build, release-tests]
# if: needs.build.result == 'success' && needs.release-tests.result == 'success'
# steps:
# - name: Set conditional env variables
# shell: bash
# run: |
# if [[ "${{ github.ref_type }}" == "tag" ]]; then
# echo "VERSION=${{ github.ref_name }}" >> $GITHUB_ENV
# echo "TAGGED_RELEASE=true" >> $GITHUB_ENV
# else
# echo "VERSION=${GITHUB_SHA::7}" >> $GITHUB_ENV
# echo "TAGGED_RELEASE=false" >> $GITHUB_ENV
# fi
# - name: Download binaries from workflow artifacts into temp folder
# uses: actions/download-artifact@v4
# with:
# pattern: ${{ env.storage_binary_base }}*
# merge-multiple: true
# path: /tmp/release
# - name: Download ${{ env.c_bindings_lib_base }} from workflow artifacts into temp folder
# uses: actions/download-artifact@v5
# with:
# pattern: ${{ env.c_bindings_lib_base }}*
# merge-multiple: true
# path: /tmp/release
# - name: Create GH release
# uses: softprops/action-gh-release@v2
# if: env.TAGGED_RELEASE == 'true'
# with:
# files: |
# /tmp/release/*-*
# make_latest: true
# - name: Generate Python SDK
# uses: peter-evans/repository-dispatch@v3
# if: env.TAGGED_RELEASE == 'true'
# with:
# token: ${{ secrets.DISPATCH_PAT }}
# repository: logos-storage/logos-storage-py-api-client
# event-type: generate
# client-payload: '{"openapi_url": "https://raw.githubusercontent.com/logos-storage/logos-storage-nim/${{ github.ref }}/openapi.yaml"}'