diff --git a/.github/release/clusters/logos-storage-dist-tests-do-ams3/backend.tf b/.github/release/clusters/logos-storage-dist-tests-do-ams3/backend.tf new file mode 100644 index 00000000..e6b8c6b4 --- /dev/null +++ b/.github/release/clusters/logos-storage-dist-tests-do-ams3/backend.tf @@ -0,0 +1,16 @@ +terraform { + backend "s3" { + endpoints = { + s3 = "https://fra1.digitaloceanspaces.com" + } + bucket = "codex-infra-terraform" + key = "clusters/logos-storage-dist-tests-do-ams3/terraform.tfstate" + region = "fra1" + + skip_credentials_validation = true + skip_requesting_account_id = true + skip_metadata_api_check = true + skip_region_validation = true + skip_s3_checksum = true + } +} diff --git a/.github/release/clusters/logos-storage-dist-tests-do-ams3/main.tf b/.github/release/clusters/logos-storage-dist-tests-do-ams3/main.tf new file mode 100644 index 00000000..3facf793 --- /dev/null +++ b/.github/release/clusters/logos-storage-dist-tests-do-ams3/main.tf @@ -0,0 +1,57 @@ +# Kubernetes cluster +module "doks" { + source = "../modules/doks" + + name = "logos-storage-dist-tests" + region = var.region + kubernetes_version = "1.34.5-do.2" + kubernetes_ha = true + kubernetes_auto_upgrade = false + kubernetes_node_pool_name = "infra-s-4vcpu-16gb-amd" + kubernetes_node_pool_size = "s-4vcpu-16gb-amd" + kubernetes_node_pool_auto_scale = true + kubernetes_node_pool_min = 1 + kubernetes_node_pool_max = 3 + kubernetes_node_pool_tags = ["default", "autoscale"] + kubernetes_node_pool_labels = { + default-pool = "true" + scaling-type = "auto" + workload-type = "infra" + } +} + +# Node pool - Runners CI +resource "digitalocean_kubernetes_node_pool" "runners-ci" { + cluster_id = module.doks.kubernetes_cluster_id + name = "runners-ci-s-2vcpu-8gb-amd" + size = "s-2vcpu-8gb-amd" + auto_scale = true + min_nodes = 1 + max_nodes = 5 + tags = ["runners-ci"] + + labels = { + allow-tests-pods = "false" + default-pool = "false" + scaling-type = "auto" + workload-type = "tests-runners-ci" + } +} + +# Node pool - Tests Pods +resource "digitalocean_kubernetes_node_pool" "tests-s-2vcpu-4gb" { + cluster_id = module.doks.kubernetes_cluster_id + name = "tests-s-2vcpu-4gb" + size = "s-2vcpu-4gb" + auto_scale = true + min_nodes = 1 + max_nodes = 10 + tags = ["tests-pods"] + + labels = { + allow-tests-pods = "true" + default-pool = "false" + scaling-type = "auto" + workload-type = "tests-pods" + } +} diff --git a/.github/release/clusters/logos-storage-dist-tests-do-ams3/providers.tf b/.github/release/clusters/logos-storage-dist-tests-do-ams3/providers.tf new file mode 100644 index 00000000..1eed7365 --- /dev/null +++ b/.github/release/clusters/logos-storage-dist-tests-do-ams3/providers.tf @@ -0,0 +1,4 @@ +# Providers +provider "digitalocean" { + token = var.do_token +} diff --git a/.github/release/clusters/logos-storage-dist-tests-do-ams3/variables.tf b/.github/release/clusters/logos-storage-dist-tests-do-ams3/variables.tf new file mode 100644 index 00000000..3eb848b3 --- /dev/null +++ b/.github/release/clusters/logos-storage-dist-tests-do-ams3/variables.tf @@ -0,0 +1,10 @@ +variable "region" { + description = "DigitalOcean region (e.g. ams3)" + type = string +} + +variable "do_token" { + description = "DigitalOcean API token" + type = string + sensitive = true +} diff --git a/.github/release/clusters/logos-storage-dist-tests-do-ams3/versions.tf b/.github/release/clusters/logos-storage-dist-tests-do-ams3/versions.tf new file mode 100644 index 00000000..8841865e --- /dev/null +++ b/.github/release/clusters/logos-storage-dist-tests-do-ams3/versions.tf @@ -0,0 +1,10 @@ +# Terraform settings +terraform { + required_version = "~> 1.0" + required_providers { + digitalocean = { + source = "digitalocean/digitalocean" + version = "~> 2.0" + } + } +} diff --git a/.github/release/clusters/modules/doks/locals.tf b/.github/release/clusters/modules/doks/locals.tf new file mode 100644 index 00000000..f2dd8a14 --- /dev/null +++ b/.github/release/clusters/modules/doks/locals.tf @@ -0,0 +1,4 @@ +locals { + name = "${var.name}-do-${var.region}" + node_pool_name = "pool-${var.kubernetes_node_pool_size}" +} diff --git a/.github/release/clusters/modules/doks/main.tf b/.github/release/clusters/modules/doks/main.tf new file mode 100644 index 00000000..d945c911 --- /dev/null +++ b/.github/release/clusters/modules/doks/main.tf @@ -0,0 +1,33 @@ +# Kubernetes cluster +resource "digitalocean_kubernetes_cluster" "this" { + name = local.name + region = var.region + version = var.kubernetes_version + ha = var.kubernetes_ha + auto_upgrade = var.kubernetes_auto_upgrade + + node_pool { + name = var.kubernetes_node_pool_name + size = var.kubernetes_node_pool_size + node_count = var.kubernetes_node_pool_count + auto_scale = var.kubernetes_node_pool_auto_scale + min_nodes = var.kubernetes_node_pool_min + max_nodes = var.kubernetes_node_pool_max + tags = var.kubernetes_node_pool_tags + labels = var.kubernetes_node_pool_labels + + dynamic "taint" { + for_each = length(var.kubernetes_node_pool_taint) == 0 ? {} : { taint = true } + content { + key = lookup(var.kubernetes_node_pool_taint, "key") + value = lookup(var.kubernetes_node_pool_taint, "value") + effect = lookup(var.kubernetes_node_pool_taint, "effect") + } + } + } + + maintenance_policy { + day = var.kubernetes_maintenance_day + start_time = var.kubernetes_maintenance_start_time + } +} diff --git a/.github/release/clusters/modules/doks/outputs.tf b/.github/release/clusters/modules/doks/outputs.tf new file mode 100644 index 00000000..3344ad92 --- /dev/null +++ b/.github/release/clusters/modules/doks/outputs.tf @@ -0,0 +1,5 @@ +# Kubernetes cluster +output "kubernetes_cluster_id" { + value = digitalocean_kubernetes_cluster.this.id + description = "A unique ID that can be used to identify and reference a Kubernetes cluster." +} diff --git a/.github/release/clusters/modules/doks/variables.tf b/.github/release/clusters/modules/doks/variables.tf new file mode 100644 index 00000000..e0f3253c --- /dev/null +++ b/.github/release/clusters/modules/doks/variables.tf @@ -0,0 +1,97 @@ +# Main +variable "name" { + type = string + description = "A name for the created resources." +} + +variable "region" { + type = string + description = "The DigitalOcean region slug for the resources location." +} + +# Kubernetes Control Plane +variable "kubernetes_version" { + type = string + description = "The slug identifier for the version of Kubernetes used for the cluster." +} + +variable "kubernetes_ha" { + type = bool + description = "Enable/disable the high availability control plane for a cluster." +} + +variable "kubernetes_auto_upgrade" { + type = bool + description = "A boolean value indicating whether the cluster will be automatically upgraded to new patch releases during its maintenance window." +} + +variable "kubernetes_maintenance_day" { + type = string + description = "The day of the maintenance window policy." + default = "sunday" +} + +variable "kubernetes_maintenance_start_time" { + type = string + description = "The start time in UTC of the maintenance window policy in 24-hour clock format / HH:MM notation (e.g., 15:00)." + default = "04:00" +} + +# Kubernetes default Node Pool +variable "kubernetes_node_pool_name" { + type = string + description = "A name for the node pool." + default = null +} + +variable "kubernetes_node_pool_size" { + type = string + description = "The slug identifier for the type of Droplet to be used as workers in the node pool." + default = null +} + +variable "kubernetes_node_pool_count" { + type = number + default = null + description = "The number of Droplet instances in the node pool." +} + +variable "kubernetes_node_pool_auto_scale" { + type = bool + description = "Enable auto-scaling of the number of nodes in the node pool within the given min/max range." + default = null +} + +variable "kubernetes_node_pool_min" { + type = number + description = "If auto-scaling is enabled, this represents the minimum number of nodes that the node pool can be scaled down to." + default = null +} + +variable "kubernetes_node_pool_max" { + type = number + description = "If auto-scaling is enabled, this represents the maximum number of nodes that the node pool can be scaled up to." + default = null +} + +variable "kubernetes_node_pool_tags" { + type = list(any) + description = "A list of tag names applied to the node pool." + default = ["default", "autoscale"] +} + +variable "kubernetes_node_pool_labels" { + type = map(string) + description = "A map of key/value pairs to apply to nodes in the pool." + default = { + default-pool = "true" + scaling-type = "auto" + } +} + +variable "kubernetes_node_pool_taint" { + type = map(string) + description = "A block representing a taint applied to all nodes in the pool." + default = { + } +} diff --git a/.github/release/clusters/modules/doks/versions.tf b/.github/release/clusters/modules/doks/versions.tf new file mode 100644 index 00000000..21efa07e --- /dev/null +++ b/.github/release/clusters/modules/doks/versions.tf @@ -0,0 +1,9 @@ +# Terraform settings +terraform { + required_providers { + digitalocean = { + source = "digitalocean/digitalocean" + version = "~> 2.0" + } + } +} diff --git a/.github/release/job-release-tests.yaml b/.github/release/job-release-tests.yaml new file mode 100644 index 00000000..3d56fe01 --- /dev/null +++ b/.github/release/job-release-tests.yaml @@ -0,0 +1,61 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: ${NAMEPREFIX} + namespace: default + labels: + name: ${NAMEPREFIX} + runid: ${RUNID} +spec: + ttlSecondsAfterFinished: 86400 + backoffLimit: 0 + template: + metadata: + name: ${NAMEPREFIX} + labels: + app: ${TEST_TYPE}-runner + name: ${NAMEPREFIX} + runid: ${RUNID} + spec: + priorityClassName: system-node-critical + nodeSelector: + workload-type: "tests-runners-ci" + containers: + - name: runner + image: logosstorage/logos-storage-dist-tests:latest + imagePullPolicy: Always + resources: + requests: + memory: "1Gi" + env: + - name: KUBECONFIG + value: "/opt/kubeconfig.yaml" + - name: LOGPATH + value: "/var/log/storage-${TEST_TYPE}" + - name: BRANCH + value: "${BRANCH}" + - name: SOURCE + value: "${SOURCE}" + - name: RUNID + value: "${RUNID}" + - name: STORAGEDOCKERIMAGE + value: "${STORAGEDOCKERIMAGE}" + - name: TESTID + value: "${TESTID}" + - name: TESTS_TYPE + value: "${TEST_TYPE}" + volumeMounts: + - name: kubeconfig + mountPath: /opt/kubeconfig.yaml + subPath: kubeconfig.yaml + - name: logs + mountPath: /var/log/storage-${TEST_TYPE} + args: ["dotnet", "test", "Tests/LogosStorageReleaseTests"] + restartPolicy: Never + volumes: + - name: kubeconfig + secret: + secretName: storage-dist-tests-app-kubeconfig + - name: logs + hostPath: + path: /var/log/storage-${TEST_TYPE} diff --git a/.github/release/kubeconfig-template.yaml b/.github/release/kubeconfig-template.yaml new file mode 100644 index 00000000..5dd36976 --- /dev/null +++ b/.github/release/kubeconfig-template.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Config +clusters: +- cluster: + certificate-authority-data: ${CA} + server: ${SERVER} + name: release-tests +contexts: +- context: + cluster: release-tests + user: release-tests-runner + name: release-tests +current-context: release-tests +users: +- name: release-tests-runner + user: + token: ${TOKEN} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0b64e231..6d1e4ae4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,6 +7,12 @@ on: branches: - master workflow_dispatch: + inputs: + branch: + description: 'dist-tests branch to run tests from' + required: false + default: 'master' + type: string env: cache_nonce: 0 # Allows for easily busting actions/cache caches @@ -179,11 +185,122 @@ jobs: path: ${{ env.build_dir }}/${{ env.c_bindings_lib }}.zip if-no-files-found: error + # Build Docker logosstorage/logos-storage-nim:latest-dist-tests image for Logos Storage nodes in the cluster + build-docker-dist-tests: + name: Build Docker dist-tests image + if: github.ref_type == 'tag' || github.event_name == 'workflow_dispatch' + uses: ./.github/workflows/docker-reusable.yml + with: + nimflags: '-d:disableMarchNative -d:storage_enable_api_debug_peers=true -d:storage_enable_log_counter=true' + nat_ip_auto: true + tag_latest: false + tag_stable: false + tag_suffix: dist-tests + tag_sha: false + secrets: inherit + + # Release tests + release-tests: + name: Release Tests + runs-on: ubuntu-latest + if: github.ref_type == 'tag' || github.event_name == 'workflow_dispatch' + needs: build-docker-dist-tests + env: + TF_VAR_region: ams3 + TF_VAR_do_token: ${{ secrets.RELEASE_TESTS_DO_TOKEN }} + AWS_ACCESS_KEY_ID: ${{ secrets.RELEASE_TESTS_SPACES_ACCESS_KEY }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.RELEASE_TESTS_SPACES_SECRET_KEY }} + STORAGEDOCKERIMAGE: ${{ needs.build-docker-dist-tests.outputs.logos_storage_image }} + TEST_TYPE: release-tests + BRANCH: ${{ inputs.branch || 'master' }} + SOURCE: https://github.com/logos-storage/logos-storage-nim-cs-dist-tests + TF_DIR: .github/release/clusters/logos-storage-dist-tests-do-ams3 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + + - name: Setup doctl + uses: digitalocean/action-doctl@v2 + with: + token: ${{ secrets.RELEASE_TESTS_DO_TOKEN }} + + - name: Setup kubectl + uses: azure/setup-kubectl@v4 + with: + version: v1.30.5 + + - name: Terraform init + working-directory: ${{ env.TF_DIR }} + run: terraform init + + - name: Terraform apply + id: tf-apply + working-directory: ${{ env.TF_DIR }} + run: terraform apply -auto-approve + + - name: Get kubeconfig + run: doctl kubernetes cluster kubeconfig save logos-storage-dist-tests-do-ams3 + + - name: Create in-cluster app kubeconfig secret + run: | + kubectl create serviceaccount release-tests-runner -n default + kubectl create clusterrolebinding release-tests-runner \ + --clusterrole=cluster-admin \ + --serviceaccount=default:release-tests-runner + + export TOKEN=$(kubectl create token release-tests-runner -n default --duration=2h) + export SERVER=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}') + export CA=$(kubectl config view --minify --raw -o jsonpath='{.clusters[0].cluster.certificate-authority-data}') + + kubectl create secret generic storage-dist-tests-app-kubeconfig \ + --from-file=kubeconfig.yaml=<(envsubst < .github/release/kubeconfig-template.yaml) \ + -n default + + - name: Set run variables + run: | + RUNID=$(date +%Y%m%d-%H%M%S) + echo "RUNID=${RUNID}" >> $GITHUB_ENV + echo "NAMEPREFIX=r-tests-${RUNID}" >> $GITHUB_ENV + echo "TESTID=$(git rev-parse --short HEAD)" >> $GITHUB_ENV + + - name: Deploy test job + run: envsubst < .github/release/job-release-tests.yaml | kubectl apply -f - + + - name: Wait for test pod to start + run: | + kubectl wait pod \ + -l job-name=$NAMEPREFIX \ + -n default \ + --for=condition=Ready \ + --timeout=300s + + - name: Stream test logs + run: | + kubectl logs -l job-name=$NAMEPREFIX \ + -n default \ + --follow + + - name: Check job status + run: | + sleep 5 + job_status=$(kubectl get jobs $NAMEPREFIX -n default \ + -o jsonpath='{.status.conditions[0].type}') + echo "Job status: $job_status" + [[ "$job_status" == "SuccessCriteriaMet" ]] || exit 1 + + - name: Terraform destroy + if: always() && steps.tf-apply.conclusion != 'skipped' + working-directory: ${{ env.TF_DIR }} + run: terraform destroy -auto-approve + # Release release: runs-on: ubuntu-latest - needs: build - if: success() || failure() + needs: [build, release-tests] + if: needs.build.result == 'success' && needs.release-tests.result == 'success' steps: - name: Set conditional env variables shell: bash