feat(k8s): add DO (Digital Ocean) Kubernetes cluster terraform

Adds Terraform configuration to provision a DO K8s cluster with
auto-scaling node pools for running Codex benchmarks.

Signed-off-by: Chrysostomos Nanakos <chris@include.gr>
This commit is contained in:
Chrysostomos Nanakos 2025-10-21 18:17:26 +03:00
parent 200c749cb5
commit b59ddaf648
No known key found for this signature in database
11 changed files with 247 additions and 0 deletions

View File

@ -0,0 +1,19 @@
terraform {
backend "s3" {
endpoints = {
s3 = "https://<S3_REGION>.digitaloceanspaces.com"
}
bucket = "codex-infra-terraform"
key = "clusters/codex-benchmarks-do-<DO_REGION>/terraform.tfstate"
region = "<S3_REGION>"
access_key = "<S3_ACCESS_KEY>"
secret_key = "<S3_SECRET_KEY>"
skip_credentials_validation = true
skip_requesting_account_id = true
skip_metadata_api_check = true
skip_region_validation = true
skip_s3_checksum = true
}
}

View File

@ -0,0 +1,40 @@
# Kubernetes cluster
module "doks" {
source = "../modules/doks"
name = "codex-benchmarks"
region = var.region
vpc_ip_range = "10.1.0.0/20"
kubernetes_version = "1.33.1-do.5"
kubernetes_ha = true
kubernetes_auto_upgrade = false
kubernetes_node_pool_name = "infra-s-4vcpu-16gb-amd"
kubernetes_node_pool_size = "s-4vcpu-16gb-amd"
kubernetes_node_pool_auto_scale = true
kubernetes_node_pool_min = 1
kubernetes_node_pool_max = 4
kubernetes_node_pool_tags = ["default", "autoscale"]
kubernetes_node_pool_labels = {
default-pool = "true"
scaling-type = "auto"
workload-type = "infra"
}
}
# Node pool - Codex
resource "digitalocean_kubernetes_node_pool" "codex-d-4vcpu-8gb" {
cluster_id = module.doks.kubernetes_cluster_id
name = "codex-d-4vcpu-8gb"
size = "c-4"
auto_scale = true
min_nodes = 1
max_nodes = 95
node_count = 4
tags = ["codex"]
labels = {
default-pool = "false"
scaling-type = "auto"
workload-type = "benchmarks"
}
}

View File

@ -0,0 +1,4 @@
# Providers
provider "digitalocean" {
token = var.do_token
}

View File

@ -0,0 +1,2 @@
region = "<DO_REGION>"
do_token = "<DOP_TOKEN>"

View File

@ -0,0 +1,10 @@
variable "region" {
description = "DigitalOcean region (e.g. ams3)"
type = string
}
variable "do_token" {
description = "DigitalOcean API token"
type = string
sensitive = true
}

View File

@ -0,0 +1,10 @@
# Terraform settings
terraform {
required_version = "~> 1.0"
required_providers {
digitalocean = {
source = "digitalocean/digitalocean"
version = "~> 2.0"
}
}
}

View File

@ -0,0 +1,4 @@
locals {
name = "${var.name}-do-${var.region}"
node_pool_name = "pool-${var.kubernetes_node_pool_size}"
}

View File

@ -0,0 +1,41 @@
# VPC
resource "digitalocean_vpc" "this" {
name = local.name
region = var.region
ip_range = var.vpc_ip_range
}
# Kubernetes cluster
resource "digitalocean_kubernetes_cluster" "this" {
name = local.name
region = var.region
version = var.kubernetes_version
ha = var.kubernetes_ha
auto_upgrade = var.kubernetes_auto_upgrade
vpc_uuid = digitalocean_vpc.this.id
node_pool {
name = var.kubernetes_node_pool_name
size = var.kubernetes_node_pool_size
node_count = var.kubernetes_node_pool_count
auto_scale = var.kubernetes_node_pool_auto_scale
min_nodes = var.kubernetes_node_pool_min
max_nodes = var.kubernetes_node_pool_max
tags = var.kubernetes_node_pool_tags
labels = var.kubernetes_node_pool_labels
dynamic "taint" {
for_each = length(var.kubernetes_node_pool_taint) == 0 ? {} : { taint = true }
content {
key = lookup(var.kubernetes_node_pool_taint, "key")
value = lookup(var.kubernetes_node_pool_taint, "value")
effect = lookup(var.kubernetes_node_pool_taint, "effect")
}
}
}
maintenance_policy {
day = var.kubernetes_maintenance_day
start_time = var.kubernetes_maintenance_start_time
}
}

View File

@ -0,0 +1,5 @@
# Kubernetes cluster
output "kubernetes_cluster_id" {
value = digitalocean_kubernetes_cluster.this.id
description = "A unique ID that can be used to identify and reference a Kubernetes cluster."
}

View File

@ -0,0 +1,103 @@
# Main
variable "name" {
type = string
description = "A name for the created resources."
}
variable "region" {
type = string
description = "The DigitalOcean region slug for the resources location."
}
# VPC
variable "vpc_ip_range" {
type = string
description = " The range of IP addresses for the VPC in CIDR notation."
}
# Kubernetes Control Plane
variable "kubernetes_version" {
type = string
description = "The slug identifier for the version of Kubernetes used for the cluster."
}
variable "kubernetes_ha" {
type = bool
description = "Enable/disable the high availability control plane for a cluster."
}
variable "kubernetes_auto_upgrade" {
type = bool
description = "A boolean value indicating whether the cluster will be automatically upgraded to new patch releases during its maintenance window."
}
variable "kubernetes_maintenance_day" {
type = string
description = "The day of the maintenance window policy."
default = "sunday"
}
variable "kubernetes_maintenance_start_time" {
type = string
description = "The start time in UTC of the maintenance window policy in 24-hour clock format / HH:MM notation (e.g., 15:00)."
default = "04:00"
}
# Kubernetes default Node Pool
variable "kubernetes_node_pool_name" {
type = string
description = "A name for the node pool."
default = null
}
variable "kubernetes_node_pool_size" {
type = string
description = "The slug identifier for the type of Droplet to be used as workers in the node pool."
default = null
}
variable "kubernetes_node_pool_count" {
type = number
default = null
description = "The number of Droplet instances in the node pool."
}
variable "kubernetes_node_pool_auto_scale" {
type = bool
description = "Enable auto-scaling of the number of nodes in the node pool within the given min/max range."
default = null
}
variable "kubernetes_node_pool_min" {
type = number
description = "If auto-scaling is enabled, this represents the minimum number of nodes that the node pool can be scaled down to."
default = null
}
variable "kubernetes_node_pool_max" {
type = number
description = "If auto-scaling is enabled, this represents the maximum number of nodes that the node pool can be scaled up to."
default = null
}
variable "kubernetes_node_pool_tags" {
type = list(any)
description = "A list of tag names applied to the node pool."
default = ["default", "autoscale"]
}
variable "kubernetes_node_pool_labels" {
type = map(string)
description = "A map of key/value pairs to apply to nodes in the pool."
default = {
default-pool = "true"
scaling-type = "auto"
}
}
variable "kubernetes_node_pool_taint" {
type = map(string)
description = "A block representing a taint applied to all nodes in the pool."
default = {
}
}

View File

@ -0,0 +1,9 @@
# Terraform settings
terraform {
required_providers {
digitalocean = {
source = "digitalocean/digitalocean"
version = "~> 2.0"
}
}
}