mirror of
https://github.com/logos-storage/logos-storage-nim.git
synced 2026-05-12 06:19:33 +00:00
chore: reduce GKE release test cluster provisioning time and cost
- Configure runners-ci node pool inline in the cluster resource instead of using remove_default_node_pool=true, eliminating the provision-then-delete cycle that added ~5 min to terraform apply - Remove the separate infra pool; runners-ci is now the only pool on the critical path of cluster creation - Set tests-pods pool min_node_count=0 so no node is provisioned at apply time — nodes scale up only when test pods are scheduled - Enable spot instances on the tests-pods pool for ~60-91% cost saving - Add 60 min job timeout to release-tests to bound hung cluster cost - Add Terraform plugin cache keyed on the lock file to skip provider re-downloads on subsequent runs (~30-60s saved) - Install gke-gcloud-auth-plugin via setup-gcloud to fix kubectl auth Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
00a6264030
commit
8f13be1dc4
@ -1,4 +1,4 @@
|
||||
# Kubernetes cluster
|
||||
# Kubernetes cluster — runners-ci pool is configured inline in the module
|
||||
module "gke" {
|
||||
source = "../modules/gke"
|
||||
|
||||
@ -7,40 +7,15 @@ module "gke" {
|
||||
region = var.region
|
||||
zone = var.zone
|
||||
kubernetes_release_channel = "STABLE"
|
||||
node_pool_name = "infra-e2-standard-4"
|
||||
node_pool_machine_type = "e2-standard-4"
|
||||
node_pool_name = "runners-ci-e2-standard-2"
|
||||
node_pool_machine_type = "e2-standard-2"
|
||||
node_pool_min = 1
|
||||
node_pool_max = 3
|
||||
node_pool_max = 5
|
||||
node_pool_labels = {
|
||||
default-pool = "true"
|
||||
scaling-type = "auto"
|
||||
workload-type = "infra"
|
||||
}
|
||||
}
|
||||
|
||||
# Node pool - Runners CI
|
||||
resource "google_container_node_pool" "runners-ci" {
|
||||
name = "runners-ci-e2-standard-2"
|
||||
cluster = module.gke.kubernetes_cluster_id
|
||||
location = var.zone
|
||||
project = var.project
|
||||
|
||||
autoscaling {
|
||||
min_node_count = 1
|
||||
max_node_count = 5
|
||||
}
|
||||
|
||||
node_config {
|
||||
machine_type = "e2-standard-2"
|
||||
labels = {
|
||||
allow-tests-pods = "false"
|
||||
default-pool = "false"
|
||||
scaling-type = "auto"
|
||||
workload-type = "tests-runners-ci"
|
||||
}
|
||||
oauth_scopes = [
|
||||
"https://www.googleapis.com/auth/cloud-platform",
|
||||
]
|
||||
allow-tests-pods = "false"
|
||||
default-pool = "true"
|
||||
scaling-type = "auto"
|
||||
workload-type = "tests-runners-ci"
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,12 +27,13 @@ resource "google_container_node_pool" "tests-pods" {
|
||||
project = var.project
|
||||
|
||||
autoscaling {
|
||||
min_node_count = 1
|
||||
min_node_count = 0
|
||||
max_node_count = 10
|
||||
}
|
||||
|
||||
node_config {
|
||||
machine_type = "e2-medium"
|
||||
spot = true
|
||||
labels = {
|
||||
allow-tests-pods = "true"
|
||||
default-pool = "false"
|
||||
|
||||
38
.github/release/clusters/modules/gke/main.tf
vendored
38
.github/release/clusters/modules/gke/main.tf
vendored
@ -1,13 +1,10 @@
|
||||
# Kubernetes cluster
|
||||
# Kubernetes cluster — runners-ci pool configured inline to avoid the
|
||||
# remove_default_node_pool create-then-delete cycle that adds ~5 min.
|
||||
resource "google_container_cluster" "this" {
|
||||
name = local.name
|
||||
location = var.zone
|
||||
project = var.project
|
||||
|
||||
# Create an empty cluster — all node pools are managed as separate resources
|
||||
remove_default_node_pool = true
|
||||
initial_node_count = 1
|
||||
|
||||
deletion_protection = false
|
||||
|
||||
release_channel {
|
||||
@ -22,26 +19,23 @@ resource "google_container_cluster" "this" {
|
||||
# Send pod stdout/stderr to Cloud Logging automatically
|
||||
logging_service = "logging.googleapis.com/kubernetes"
|
||||
monitoring_service = "monitoring.googleapis.com/kubernetes"
|
||||
}
|
||||
|
||||
# Default (infra) node pool
|
||||
resource "google_container_node_pool" "default" {
|
||||
name = var.node_pool_name
|
||||
cluster = google_container_cluster.this.id
|
||||
location = var.zone
|
||||
project = var.project
|
||||
node_pool {
|
||||
name = var.node_pool_name
|
||||
initial_node_count = var.node_pool_min
|
||||
|
||||
autoscaling {
|
||||
min_node_count = var.node_pool_min
|
||||
max_node_count = var.node_pool_max
|
||||
}
|
||||
autoscaling {
|
||||
min_node_count = var.node_pool_min
|
||||
max_node_count = var.node_pool_max
|
||||
}
|
||||
|
||||
node_config {
|
||||
machine_type = var.node_pool_machine_type
|
||||
labels = var.node_pool_labels
|
||||
node_config {
|
||||
machine_type = var.node_pool_machine_type
|
||||
labels = var.node_pool_labels
|
||||
|
||||
oauth_scopes = [
|
||||
"https://www.googleapis.com/auth/cloud-platform",
|
||||
]
|
||||
oauth_scopes = [
|
||||
"https://www.googleapis.com/auth/cloud-platform",
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
11
.github/workflows/release.yml
vendored
11
.github/workflows/release.yml
vendored
@ -205,6 +205,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref_type == 'tag' || github.event_name == 'workflow_dispatch'
|
||||
needs: build-docker-dist-tests
|
||||
timeout-minutes: 60
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
@ -212,6 +213,7 @@ jobs:
|
||||
TF_VAR_project: ${{ secrets.RELEASE_TESTS_GCP_PROJECT }}
|
||||
TF_VAR_region: europe-west4
|
||||
TF_VAR_zone: europe-west4-b
|
||||
TF_PLUGIN_CACHE_DIR: ~/.terraform.d/plugin-cache
|
||||
STORAGEDOCKERIMAGE: ${{ needs.build-docker-dist-tests.outputs.logos_storage_image }}
|
||||
TEST_TYPE: release-tests
|
||||
BRANCH: ${{ inputs.branch || 'master' }}
|
||||
@ -221,6 +223,13 @@ jobs:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cache Terraform plugins
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.terraform.d/plugin-cache
|
||||
key: terraform-google-${{ hashFiles(format('{0}/.terraform.lock.hcl', env.TF_DIR)) }}
|
||||
restore-keys: terraform-google-
|
||||
|
||||
- name: Authenticate to GCP
|
||||
uses: google-github-actions/auth@v2
|
||||
with:
|
||||
@ -229,6 +238,8 @@ jobs:
|
||||
|
||||
- name: Setup gcloud
|
||||
uses: google-github-actions/setup-gcloud@v2
|
||||
with:
|
||||
install_components: gke-gcloud-auth-plugin
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v3
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user