diff --git a/.github/release/clusters/logos-storage-dist-tests-do-ams3/backend.tf b/.github/release/clusters/logos-storage-dist-tests-do-ams3/backend.tf deleted file mode 100644 index e6b8c6b4..00000000 --- a/.github/release/clusters/logos-storage-dist-tests-do-ams3/backend.tf +++ /dev/null @@ -1,16 +0,0 @@ -terraform { - backend "s3" { - endpoints = { - s3 = "https://fra1.digitaloceanspaces.com" - } - bucket = "codex-infra-terraform" - key = "clusters/logos-storage-dist-tests-do-ams3/terraform.tfstate" - region = "fra1" - - skip_credentials_validation = true - skip_requesting_account_id = true - skip_metadata_api_check = true - skip_region_validation = true - skip_s3_checksum = true - } -} diff --git a/.github/release/clusters/logos-storage-dist-tests-do-ams3/main.tf b/.github/release/clusters/logos-storage-dist-tests-do-ams3/main.tf deleted file mode 100644 index 3facf793..00000000 --- a/.github/release/clusters/logos-storage-dist-tests-do-ams3/main.tf +++ /dev/null @@ -1,57 +0,0 @@ -# Kubernetes cluster -module "doks" { - source = "../modules/doks" - - name = "logos-storage-dist-tests" - region = var.region - kubernetes_version = "1.34.5-do.2" - kubernetes_ha = true - kubernetes_auto_upgrade = false - kubernetes_node_pool_name = "infra-s-4vcpu-16gb-amd" - kubernetes_node_pool_size = "s-4vcpu-16gb-amd" - kubernetes_node_pool_auto_scale = true - kubernetes_node_pool_min = 1 - kubernetes_node_pool_max = 3 - kubernetes_node_pool_tags = ["default", "autoscale"] - kubernetes_node_pool_labels = { - default-pool = "true" - scaling-type = "auto" - workload-type = "infra" - } -} - -# Node pool - Runners CI -resource "digitalocean_kubernetes_node_pool" "runners-ci" { - cluster_id = module.doks.kubernetes_cluster_id - name = "runners-ci-s-2vcpu-8gb-amd" - size = "s-2vcpu-8gb-amd" - auto_scale = true - min_nodes = 1 - max_nodes = 5 - tags = ["runners-ci"] - - labels = { - allow-tests-pods = "false" - default-pool = "false" - scaling-type = "auto" - workload-type = "tests-runners-ci" - } -} - -# Node pool - Tests Pods -resource "digitalocean_kubernetes_node_pool" "tests-s-2vcpu-4gb" { - cluster_id = module.doks.kubernetes_cluster_id - name = "tests-s-2vcpu-4gb" - size = "s-2vcpu-4gb" - auto_scale = true - min_nodes = 1 - max_nodes = 10 - tags = ["tests-pods"] - - labels = { - allow-tests-pods = "true" - default-pool = "false" - scaling-type = "auto" - workload-type = "tests-pods" - } -} diff --git a/.github/release/clusters/logos-storage-dist-tests-do-ams3/providers.tf b/.github/release/clusters/logos-storage-dist-tests-do-ams3/providers.tf deleted file mode 100644 index 1eed7365..00000000 --- a/.github/release/clusters/logos-storage-dist-tests-do-ams3/providers.tf +++ /dev/null @@ -1,4 +0,0 @@ -# Providers -provider "digitalocean" { - token = var.do_token -} diff --git a/.github/release/clusters/logos-storage-dist-tests-do-ams3/variables.tf b/.github/release/clusters/logos-storage-dist-tests-do-ams3/variables.tf deleted file mode 100644 index 3eb848b3..00000000 --- a/.github/release/clusters/logos-storage-dist-tests-do-ams3/variables.tf +++ /dev/null @@ -1,10 +0,0 @@ -variable "region" { - description = "DigitalOcean region (e.g. ams3)" - type = string -} - -variable "do_token" { - description = "DigitalOcean API token" - type = string - sensitive = true -} diff --git a/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/backend.tf b/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/backend.tf new file mode 100644 index 00000000..1ec73157 --- /dev/null +++ b/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/backend.tf @@ -0,0 +1,7 @@ +terraform { + backend "gcs" { + prefix = "clusters/logos-storage-dist-tests-gcp-europe-west4" + # bucket is supplied at init time via: + # terraform init -backend-config="bucket=" + } +} diff --git a/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/main.tf b/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/main.tf new file mode 100644 index 00000000..6017d92d --- /dev/null +++ b/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/main.tf @@ -0,0 +1,70 @@ +# Kubernetes cluster +module "gke" { + source = "../modules/gke" + + name = "logos-storage-dist-tests" + project = var.project + region = var.region + kubernetes_release_channel = "STABLE" + node_pool_name = "infra-e2-standard-4" + node_pool_machine_type = "e2-standard-4" + node_pool_min = 1 + node_pool_max = 3 + node_pool_labels = { + default-pool = "true" + scaling-type = "auto" + workload-type = "infra" + } +} + +# Node pool - Runners CI +resource "google_container_node_pool" "runners-ci" { + name = "runners-ci-e2-standard-2" + cluster = module.gke.kubernetes_cluster_id + location = var.region + project = var.project + + autoscaling { + min_node_count = 1 + max_node_count = 5 + } + + node_config { + machine_type = "e2-standard-2" + labels = { + allow-tests-pods = "false" + default-pool = "false" + scaling-type = "auto" + workload-type = "tests-runners-ci" + } + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + ] + } +} + +# Node pool - Tests Pods +resource "google_container_node_pool" "tests-pods" { + name = "tests-e2-medium" + cluster = module.gke.kubernetes_cluster_id + location = var.region + project = var.project + + autoscaling { + min_node_count = 1 + max_node_count = 10 + } + + node_config { + machine_type = "e2-medium" + labels = { + allow-tests-pods = "true" + default-pool = "false" + scaling-type = "auto" + workload-type = "tests-pods" + } + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + ] + } +} diff --git a/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/providers.tf b/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/providers.tf new file mode 100644 index 00000000..a614cc6e --- /dev/null +++ b/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/providers.tf @@ -0,0 +1,5 @@ +# Providers +provider "google" { + project = var.project + region = var.region +} diff --git a/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/variables.tf b/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/variables.tf new file mode 100644 index 00000000..8dd91afb --- /dev/null +++ b/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/variables.tf @@ -0,0 +1,9 @@ +variable "project" { + description = "GCP project ID" + type = string +} + +variable "region" { + description = "GCP region (e.g. europe-west4)" + type = string +} diff --git a/.github/release/clusters/logos-storage-dist-tests-do-ams3/versions.tf b/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/versions.tf similarity index 52% rename from .github/release/clusters/logos-storage-dist-tests-do-ams3/versions.tf rename to .github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/versions.tf index 8841865e..93dce430 100644 --- a/.github/release/clusters/logos-storage-dist-tests-do-ams3/versions.tf +++ b/.github/release/clusters/logos-storage-dist-tests-gcp-europe-west4/versions.tf @@ -2,9 +2,9 @@ terraform { required_version = "~> 1.0" required_providers { - digitalocean = { - source = "digitalocean/digitalocean" - version = "~> 2.0" + google = { + source = "hashicorp/google" + version = "~> 6.0" } } } diff --git a/.github/release/clusters/modules/doks/locals.tf b/.github/release/clusters/modules/doks/locals.tf deleted file mode 100644 index f2dd8a14..00000000 --- a/.github/release/clusters/modules/doks/locals.tf +++ /dev/null @@ -1,4 +0,0 @@ -locals { - name = "${var.name}-do-${var.region}" - node_pool_name = "pool-${var.kubernetes_node_pool_size}" -} diff --git a/.github/release/clusters/modules/doks/main.tf b/.github/release/clusters/modules/doks/main.tf deleted file mode 100644 index d945c911..00000000 --- a/.github/release/clusters/modules/doks/main.tf +++ /dev/null @@ -1,33 +0,0 @@ -# Kubernetes cluster -resource "digitalocean_kubernetes_cluster" "this" { - name = local.name - region = var.region - version = var.kubernetes_version - ha = var.kubernetes_ha - auto_upgrade = var.kubernetes_auto_upgrade - - node_pool { - name = var.kubernetes_node_pool_name - size = var.kubernetes_node_pool_size - node_count = var.kubernetes_node_pool_count - auto_scale = var.kubernetes_node_pool_auto_scale - min_nodes = var.kubernetes_node_pool_min - max_nodes = var.kubernetes_node_pool_max - tags = var.kubernetes_node_pool_tags - labels = var.kubernetes_node_pool_labels - - dynamic "taint" { - for_each = length(var.kubernetes_node_pool_taint) == 0 ? {} : { taint = true } - content { - key = lookup(var.kubernetes_node_pool_taint, "key") - value = lookup(var.kubernetes_node_pool_taint, "value") - effect = lookup(var.kubernetes_node_pool_taint, "effect") - } - } - } - - maintenance_policy { - day = var.kubernetes_maintenance_day - start_time = var.kubernetes_maintenance_start_time - } -} diff --git a/.github/release/clusters/modules/doks/outputs.tf b/.github/release/clusters/modules/doks/outputs.tf deleted file mode 100644 index 3344ad92..00000000 --- a/.github/release/clusters/modules/doks/outputs.tf +++ /dev/null @@ -1,5 +0,0 @@ -# Kubernetes cluster -output "kubernetes_cluster_id" { - value = digitalocean_kubernetes_cluster.this.id - description = "A unique ID that can be used to identify and reference a Kubernetes cluster." -} diff --git a/.github/release/clusters/modules/doks/variables.tf b/.github/release/clusters/modules/doks/variables.tf deleted file mode 100644 index e0f3253c..00000000 --- a/.github/release/clusters/modules/doks/variables.tf +++ /dev/null @@ -1,97 +0,0 @@ -# Main -variable "name" { - type = string - description = "A name for the created resources." -} - -variable "region" { - type = string - description = "The DigitalOcean region slug for the resources location." -} - -# Kubernetes Control Plane -variable "kubernetes_version" { - type = string - description = "The slug identifier for the version of Kubernetes used for the cluster." -} - -variable "kubernetes_ha" { - type = bool - description = "Enable/disable the high availability control plane for a cluster." -} - -variable "kubernetes_auto_upgrade" { - type = bool - description = "A boolean value indicating whether the cluster will be automatically upgraded to new patch releases during its maintenance window." -} - -variable "kubernetes_maintenance_day" { - type = string - description = "The day of the maintenance window policy." - default = "sunday" -} - -variable "kubernetes_maintenance_start_time" { - type = string - description = "The start time in UTC of the maintenance window policy in 24-hour clock format / HH:MM notation (e.g., 15:00)." - default = "04:00" -} - -# Kubernetes default Node Pool -variable "kubernetes_node_pool_name" { - type = string - description = "A name for the node pool." - default = null -} - -variable "kubernetes_node_pool_size" { - type = string - description = "The slug identifier for the type of Droplet to be used as workers in the node pool." - default = null -} - -variable "kubernetes_node_pool_count" { - type = number - default = null - description = "The number of Droplet instances in the node pool." -} - -variable "kubernetes_node_pool_auto_scale" { - type = bool - description = "Enable auto-scaling of the number of nodes in the node pool within the given min/max range." - default = null -} - -variable "kubernetes_node_pool_min" { - type = number - description = "If auto-scaling is enabled, this represents the minimum number of nodes that the node pool can be scaled down to." - default = null -} - -variable "kubernetes_node_pool_max" { - type = number - description = "If auto-scaling is enabled, this represents the maximum number of nodes that the node pool can be scaled up to." - default = null -} - -variable "kubernetes_node_pool_tags" { - type = list(any) - description = "A list of tag names applied to the node pool." - default = ["default", "autoscale"] -} - -variable "kubernetes_node_pool_labels" { - type = map(string) - description = "A map of key/value pairs to apply to nodes in the pool." - default = { - default-pool = "true" - scaling-type = "auto" - } -} - -variable "kubernetes_node_pool_taint" { - type = map(string) - description = "A block representing a taint applied to all nodes in the pool." - default = { - } -} diff --git a/.github/release/clusters/modules/doks/versions.tf b/.github/release/clusters/modules/doks/versions.tf deleted file mode 100644 index 21efa07e..00000000 --- a/.github/release/clusters/modules/doks/versions.tf +++ /dev/null @@ -1,9 +0,0 @@ -# Terraform settings -terraform { - required_providers { - digitalocean = { - source = "digitalocean/digitalocean" - version = "~> 2.0" - } - } -} diff --git a/.github/release/clusters/modules/gke/locals.tf b/.github/release/clusters/modules/gke/locals.tf new file mode 100644 index 00000000..197f207d --- /dev/null +++ b/.github/release/clusters/modules/gke/locals.tf @@ -0,0 +1,3 @@ +locals { + name = "${var.name}-gcp-${var.region}" +} diff --git a/.github/release/clusters/modules/gke/main.tf b/.github/release/clusters/modules/gke/main.tf new file mode 100644 index 00000000..daa26709 --- /dev/null +++ b/.github/release/clusters/modules/gke/main.tf @@ -0,0 +1,47 @@ +# Kubernetes cluster +resource "google_container_cluster" "this" { + name = local.name + location = var.region + project = var.project + + # Create an empty cluster — all node pools are managed as separate resources + remove_default_node_pool = true + initial_node_count = 1 + + deletion_protection = false + + release_channel { + channel = var.kubernetes_release_channel + } + + # Enable Workload Identity + workload_identity_config { + workload_pool = "${var.project}.svc.id.goog" + } + + # Send pod stdout/stderr to Cloud Logging automatically + logging_service = "logging.googleapis.com/kubernetes" + monitoring_service = "monitoring.googleapis.com/kubernetes" +} + +# Default (infra) node pool +resource "google_container_node_pool" "default" { + name = var.node_pool_name + cluster = google_container_cluster.this.id + location = var.region + project = var.project + + autoscaling { + min_node_count = var.node_pool_min + max_node_count = var.node_pool_max + } + + node_config { + machine_type = var.node_pool_machine_type + labels = var.node_pool_labels + + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + ] + } +} diff --git a/.github/release/clusters/modules/gke/outputs.tf b/.github/release/clusters/modules/gke/outputs.tf new file mode 100644 index 00000000..6adcc20d --- /dev/null +++ b/.github/release/clusters/modules/gke/outputs.tf @@ -0,0 +1,10 @@ +# Kubernetes cluster +output "kubernetes_cluster_id" { + value = google_container_cluster.this.id + description = "The fully-qualified ID of the GKE cluster." +} + +output "kubernetes_cluster_name" { + value = google_container_cluster.this.name + description = "The name of the GKE cluster." +} diff --git a/.github/release/clusters/modules/gke/variables.tf b/.github/release/clusters/modules/gke/variables.tf new file mode 100644 index 00000000..cab525ca --- /dev/null +++ b/.github/release/clusters/modules/gke/variables.tf @@ -0,0 +1,52 @@ +# Main +variable "name" { + type = string + description = "A name for the created resources." +} + +variable "project" { + type = string + description = "The GCP project ID." +} + +variable "region" { + type = string + description = "The GCP region for the cluster (regional cluster spans 3 zones)." +} + +# Kubernetes Control Plane +variable "kubernetes_release_channel" { + type = string + description = "The GKE release channel: RAPID, REGULAR, or STABLE." + default = "STABLE" +} + +# Kubernetes default Node Pool +variable "node_pool_name" { + type = string + description = "A name for the default node pool." +} + +variable "node_pool_machine_type" { + type = string + description = "The GCE machine type for nodes in the default pool." +} + +variable "node_pool_min" { + type = number + description = "Minimum number of nodes per zone in the default pool (autoscaling)." +} + +variable "node_pool_max" { + type = number + description = "Maximum number of nodes per zone in the default pool (autoscaling)." +} + +variable "node_pool_labels" { + type = map(string) + description = "A map of key/value pairs to apply as Kubernetes labels to nodes in the default pool." + default = { + default-pool = "true" + scaling-type = "auto" + } +} diff --git a/.github/release/clusters/modules/gke/versions.tf b/.github/release/clusters/modules/gke/versions.tf new file mode 100644 index 00000000..1b886dc9 --- /dev/null +++ b/.github/release/clusters/modules/gke/versions.tf @@ -0,0 +1,9 @@ +# Terraform settings +terraform { + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.0" + } + } +} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 61aa7933..04129868 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -206,27 +206,29 @@ jobs: if: github.ref_type == 'tag' || github.event_name == 'workflow_dispatch' needs: build-docker-dist-tests env: - TF_VAR_region: ams3 - TF_VAR_do_token: ${{ secrets.RELEASE_TESTS_DO_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.RELEASE_TESTS_SPACES_ACCESS_KEY }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.RELEASE_TESTS_SPACES_SECRET_KEY }} + TF_VAR_project: ${{ secrets.RELEASE_TESTS_GCP_PROJECT }} + TF_VAR_region: europe-west4 STORAGEDOCKERIMAGE: ${{ needs.build-docker-dist-tests.outputs.logos_storage_image }} TEST_TYPE: release-tests BRANCH: ${{ inputs.branch || 'master' }} SOURCE: https://github.com/logos-storage/logos-storage-nim-cs-dist-tests - TF_DIR: .github/release/clusters/logos-storage-dist-tests-do-ams3 + TF_DIR: .github/release/clusters/logos-storage-dist-tests-gcp-europe-west4 steps: - name: Checkout uses: actions/checkout@v4 + - name: Authenticate to GCP + uses: google-github-actions/auth@v2 + with: + workload_identity_provider: ${{ secrets.RELEASE_TESTS_GCP_WORKLOAD_IDENTITY_PROVIDER }} + service_account: ${{ secrets.RELEASE_TESTS_GCP_SERVICE_ACCOUNT }} + + - name: Setup gcloud + uses: google-github-actions/setup-gcloud@v2 + - name: Setup Terraform uses: hashicorp/setup-terraform@v3 - - name: Setup doctl - uses: digitalocean/action-doctl@v2 - with: - token: ${{ secrets.RELEASE_TESTS_DO_TOKEN }} - - name: Setup kubectl uses: azure/setup-kubectl@v4 with: @@ -234,14 +236,18 @@ jobs: - name: Terraform init working-directory: ${{ env.TF_DIR }} - run: terraform init + run: terraform init -backend-config="bucket=${{ secrets.RELEASE_TESTS_TF_STATE_BUCKET }}" - name: Terraform apply + id: tf-apply working-directory: ${{ env.TF_DIR }} run: terraform apply -auto-approve - name: Get kubeconfig - run: doctl kubernetes cluster kubeconfig save logos-storage-dist-tests-do-ams3 + run: | + gcloud container clusters get-credentials logos-storage-dist-tests-gcp-europe-west4 \ + --region europe-west4 \ + --project ${{ secrets.RELEASE_TESTS_GCP_PROJECT }} - name: Create in-cluster app kubeconfig secret run: | @@ -291,7 +297,7 @@ jobs: [[ "$job_status" == "SuccessCriteriaMet" ]] || exit 1 - name: Terraform destroy - if: always() + if: always() && steps.tf-apply.conclusion != 'skipped' working-directory: ${{ env.TF_DIR }} run: terraform destroy -auto-approve