mirror of
https://github.com/logos-storage/logos-storage-nim.git
synced 2026-06-27 21:09:28 +00:00
feat(testing): Release tests -- force pod spread to one pod per node (#1445)
This commit is contained in:
parent
d65f32f819
commit
cb928aacdd
55
.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/NETWORK.md
vendored
Normal file
55
.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/NETWORK.md
vendored
Normal file
@ -0,0 +1,55 @@
|
||||
# VPC Architecture
|
||||
|
||||
## Purpose
|
||||
|
||||
The original purpose of creating a VPC was to allow increasing the number of pods, and therefore number of nodes given the anti-affinity constraint that enforces one pod per node, beyond 8, which is the default quota for external IPs given by Google. Adding a VPC means the number of nodes can be scaled to the limits of the VPC, not to the limits of the external IP quota, since each node no longer needs its own external IP. The VPC allows for the nodes to communicate with the wider internet, outbound only, for functions like pulling docker images, and dependency management.
|
||||
|
||||
## Architecture design
|
||||
```ascii
|
||||
Internet
|
||||
│
|
||||
│ (public endpoint, no
|
||||
│ master_authorized_networks)
|
||||
▼
|
||||
┌──────────────────────────┐
|
||||
│ GKE Control Plane │
|
||||
│ (Google-managed, peered)│
|
||||
│ 172.16.0.0/28 │
|
||||
└────────────┬─────────────┘
|
||||
│ private peering
|
||||
┌───────────────────────────────────── │ ──────────────────────────────────┐
|
||||
│ VPC: logos-storage-rel-tests-vpc │ │
|
||||
│ (custom, auto_create_subnetworks=false) │
|
||||
│ ▼ │
|
||||
│ ┌───────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ Subnet: logos-storage-rel-tests-subnet (europe-west4) │ │
|
||||
│ │ primary range: 10.10.0.0/20 ← node internal IPs │ │
|
||||
│ │ secondary "pods": 10.20.0.0/14 ← pod IPs (VPC-native) │ │
|
||||
│ │ secondary "services": 10.30.0.0/20 ← ClusterIP services │ │
|
||||
│ │ │ │
|
||||
│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │
|
||||
│ │ │ GKE node 1 │ │ GKE node 2 │ ... │ GKE node N │ │ │
|
||||
│ │ │ 10.10.0.x │ │ 10.10.0.x │ │ 10.10.0.x │ │ │
|
||||
│ │ │ no ext IP │ │ no ext IP │ │ no ext IP │ │ │
|
||||
│ │ │ pods:10.20.x│ │ pods:10.20.x│ │ pods:10.20.x│ │ │
|
||||
│ │ └─────┬───────┘ └─────┬───────┘ └─────┬───────┘ │ │
|
||||
│ └─────────┼────────────────┼─────────────────────┼──────────────────┘ │
|
||||
│ └────────────────┴───────────┬─────────┘ │
|
||||
│ node-to-node / pod-to-pod traffic, all internal │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌──────────────────────────────┐ │
|
||||
│ │ Cloud Router + Cloud NAT │ │
|
||||
│ │ (logos-storage-rel-tests-*) │ │
|
||||
│ └──────────────┬───────────────┘ │
|
||||
└─────────────────────────────────────────│────────────────────────────────┘
|
||||
▼
|
||||
Internet
|
||||
(image pulls, package mirrors,
|
||||
outbound only — no inbound)
|
||||
```
|
||||
- Custom VPC + subnet replace the project's default network, giving us a dedicated address space with the secondary ranges GKE's VPC-native (alias-IP) mode requires.
|
||||
- Three non-overlapping ranges on one subnet: node IPs (/20), pod IPs (/14), service IPs (/20) — ip_allocation_policy points the cluster at the pods/services secondary ranges.
|
||||
- Nodes have no external IPs (enable_private_nodes = true) — node-to-node and pod-to-pod traffic stays entirely inside the VPC, satisfying the test framework's "real network" requirement without touching the constrained IN_USE_ADDRESSES quota.
|
||||
- Cloud Router + Cloud NAT give the otherwise IP-less nodes outbound-only internet access (pulling container images, etc.), with no inbound exposure.
|
||||
- Control plane keeps its public endpoint (enable_private_endpoint = false) — only the nodes are private, so the GitHub-hosted CI runner can still kubectl/terraform apply against the cluster's API server over the internet.
|
||||
3
.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/locals.tf
vendored
Normal file
3
.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/locals.tf
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
locals {
|
||||
name = "logos-storage-rel-tests"
|
||||
}
|
||||
@ -2,11 +2,19 @@
|
||||
module "gke" {
|
||||
source = "../modules/gke"
|
||||
|
||||
name = "logos-storage-rel-tests"
|
||||
name = local.name
|
||||
project = var.project
|
||||
region = var.region
|
||||
zone = var.zone
|
||||
|
||||
network = google_compute_network.this.id
|
||||
subnetwork = google_compute_subnetwork.this.id
|
||||
|
||||
pods_range_name = "pods"
|
||||
services_range_name = "services"
|
||||
|
||||
master_ipv4_cidr_block = "172.16.0.0/28"
|
||||
|
||||
node_pool_name = "runners-ci-e2-standard-2"
|
||||
node_pool_machine_type = "e2-standard-2"
|
||||
node_pool_count = 1
|
||||
@ -18,7 +26,7 @@ module "gke" {
|
||||
|
||||
tests_pool_name = "tests-e2-medium"
|
||||
tests_pool_machine_type = "e2-medium"
|
||||
tests_pool_count = 5
|
||||
tests_pool_count = 11
|
||||
tests_pool_labels = {
|
||||
default-pool = "false"
|
||||
scaling-type = "fixed"
|
||||
|
||||
45
.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/network.tf
vendored
Normal file
45
.github/release/clusters/logos-storage-rel-tests-gcp-europe-west4/network.tf
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
# Custom VPC + subnet, required for private GKE nodes (enable_private_nodes
|
||||
# in main.tf). Without this, nodes would use the default network with no
|
||||
# secondary ranges available for VPC-native pod/service IPs.
|
||||
resource "google_compute_network" "this" {
|
||||
name = "${local.name}-vpc"
|
||||
project = var.project
|
||||
auto_create_subnetworks = false
|
||||
}
|
||||
|
||||
resource "google_compute_subnetwork" "this" {
|
||||
name = "${local.name}-subnet"
|
||||
project = var.project
|
||||
region = var.region
|
||||
network = google_compute_network.this.id
|
||||
ip_cidr_range = "10.10.0.0/20"
|
||||
|
||||
secondary_ip_range {
|
||||
range_name = "pods"
|
||||
ip_cidr_range = "10.20.0.0/14"
|
||||
}
|
||||
|
||||
secondary_ip_range {
|
||||
range_name = "services"
|
||||
ip_cidr_range = "10.30.0.0/20"
|
||||
}
|
||||
}
|
||||
|
||||
# Cloud Router + NAT: gives private nodes outbound internet access (pulling
|
||||
# container images, apt packages, etc.) since they have no external IPs.
|
||||
resource "google_compute_router" "this" {
|
||||
name = "${local.name}-router"
|
||||
project = var.project
|
||||
region = var.region
|
||||
network = google_compute_network.this.id
|
||||
}
|
||||
|
||||
resource "google_compute_router_nat" "this" {
|
||||
name = "${local.name}-nat"
|
||||
project = var.project
|
||||
router = google_compute_router.this.name
|
||||
region = var.region
|
||||
|
||||
nat_ip_allocate_option = "AUTO_ONLY"
|
||||
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
|
||||
}
|
||||
19
.github/release/clusters/modules/gke/main.tf
vendored
19
.github/release/clusters/modules/gke/main.tf
vendored
@ -8,6 +8,25 @@ resource "google_container_cluster" "this" {
|
||||
|
||||
deletion_protection = false
|
||||
|
||||
network = var.network
|
||||
subnetwork = var.subnetwork
|
||||
|
||||
# VPC-native cluster, required for private nodes.
|
||||
ip_allocation_policy {
|
||||
cluster_secondary_range_name = var.pods_range_name
|
||||
services_secondary_range_name = var.services_range_name
|
||||
}
|
||||
|
||||
# Nodes get only internal IPs, avoiding the per-region IN_USE_ADDRESSES
|
||||
# quota. The control plane keeps its public endpoint (no
|
||||
# master_authorized_networks_config) so the GitHub-hosted CI runner can
|
||||
# still reach it.
|
||||
private_cluster_config {
|
||||
enable_private_nodes = true
|
||||
enable_private_endpoint = false
|
||||
master_ipv4_cidr_block = var.master_ipv4_cidr_block
|
||||
}
|
||||
|
||||
# Send pod stdout/stderr to Cloud Logging automatically
|
||||
logging_service = "logging.googleapis.com/kubernetes"
|
||||
monitoring_service = "monitoring.googleapis.com/kubernetes"
|
||||
|
||||
@ -19,6 +19,32 @@ variable "zone" {
|
||||
description = "The GCP zone for the cluster. Using a single zone avoids the longer provisioning time of a regional (multi-zone) cluster."
|
||||
}
|
||||
|
||||
# Networking (private nodes)
|
||||
variable "network" {
|
||||
type = string
|
||||
description = "Self link or ID of the VPC network the cluster's nodes run in."
|
||||
}
|
||||
|
||||
variable "subnetwork" {
|
||||
type = string
|
||||
description = "Self link or ID of the subnetwork the cluster's nodes run in."
|
||||
}
|
||||
|
||||
variable "pods_range_name" {
|
||||
type = string
|
||||
description = "Name of the subnetwork secondary IP range to use for Pod IPs."
|
||||
}
|
||||
|
||||
variable "services_range_name" {
|
||||
type = string
|
||||
description = "Name of the subnetwork secondary IP range to use for Service IPs."
|
||||
}
|
||||
|
||||
variable "master_ipv4_cidr_block" {
|
||||
type = string
|
||||
description = "/28 CIDR range for the GKE control plane's private endpoint."
|
||||
}
|
||||
|
||||
# Kubernetes default Node Pool
|
||||
variable "node_pool_name" {
|
||||
type = string
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user