diff --git a/Framework/KubernetesWorkflow/K8sController.cs b/Framework/KubernetesWorkflow/K8sController.cs index 68ffcde5..5ca3a609 100644 --- a/Framework/KubernetesWorkflow/K8sController.cs +++ b/Framework/KubernetesWorkflow/K8sController.cs @@ -371,8 +371,8 @@ namespace KubernetesWorkflow Spec = new V1PodSpec { PriorityClassName = GetPriorityClassName(containerRecipes), - Affinity = CreatePodAffinity(containerRecipes), - NodeSelector = CreateNodeSelector(location), + NodeSelector = CreateNodeSelector(location, containerRecipes), + Tolerations = CreateTolerations(containerRecipes), Containers = CreateDeploymentContainers(containerRecipes), Volumes = CreateVolumes(containerRecipes) } @@ -392,51 +392,32 @@ namespace KubernetesWorkflow WaitUntilDeploymentOffline(deployment.Name); } - private IDictionary CreateNodeSelector(ILocation location) + private IDictionary CreateNodeSelector(ILocation location, ContainerRecipe[] recipes) { - var nodeLabel = GetNodeLabelForLocation(location); - if (nodeLabel == null) return new Dictionary(); + var result = new Dictionary(); - return new Dictionary - { - { nodeLabel.Key, nodeLabel.Value } - }; + var nodeLabel = GetNodeLabelForLocation(location); + if (nodeLabel != null) result[nodeLabel.Key] = nodeLabel.Value; + + foreach (var recipe in recipes) + foreach (var kvp in recipe.NodePoolLabels) + result[kvp.Key] = kvp.Value; + + return result; } - private V1Affinity? CreatePodAffinity(ContainerRecipe[] recipes) + private IList? CreateTolerations(ContainerRecipe[] recipes) { - var notIns = recipes - .Select(r => r.SchedulingAffinity.NotIn) - .Where(n => !string.IsNullOrEmpty(n)) - .Distinct() - .ToList(); + var distinct = recipes.SelectMany(r => r.Tolerations).Distinct().ToList(); + if (!distinct.Any()) return null; - if (!notIns.Any()) return null; - - return new V1Affinity + return distinct.Select(t => new V1Toleration { - NodeAffinity = new V1NodeAffinity - { - RequiredDuringSchedulingIgnoredDuringExecution = new V1NodeSelector - { - NodeSelectorTerms = new List - { - new V1NodeSelectorTerm - { - MatchExpressions = new List - { - new V1NodeSelectorRequirement - { - Key = "allow-tests-pods", - OperatorProperty = "NotIn", - Values = notIns - } - } - } - } - } - } - }; + Key = t.Key, + OperatorProperty = "Equal", + Value = t.Value, + Effect = t.Effect + }).ToList(); } private K8sNodeLabel? GetNodeLabelForLocation(ILocation location) @@ -445,13 +426,10 @@ namespace KubernetesWorkflow return l.NodeLabel; } - private string GetPriorityClassName(ContainerRecipe[] containerRecipes) + private string? GetPriorityClassName(ContainerRecipe[] containerRecipes) { - if (containerRecipes.Any(c => c.SetCriticalPriority)) - { - return "system-node-critical"; - } - return null!; + if (containerRecipes.Any(c => c.IsCriticalPriority)) return "system-node-critical"; + return null; } private IDictionary GetSelector(ContainerRecipe[] containerRecipes) @@ -759,7 +737,7 @@ namespace KubernetesWorkflow private RunningService? CreateInternalService(ContainerRecipe[] recipes) { - return CreateService(recipes, r => r.InternalPorts.Concat(r.ExposedPorts).ToArray(), "ClusterIP", "int", false); + return CreateService(recipes, r => r.InternalPorts.Concat(r.ExposedPorts), "ClusterIP", "int", false); } private RunningService? CreateExternalService(ContainerRecipe[] recipes) @@ -767,7 +745,7 @@ namespace KubernetesWorkflow return CreateService(recipes, r => r.ExposedPorts, "NodePort", "ext", true); } - private RunningService? CreateService(ContainerRecipe[] recipes, Func portSelector, string serviceType, string namePostfix, bool isNodePort) + private RunningService? CreateService(ContainerRecipe[] recipes, Func> portSelector, string serviceType, string namePostfix, bool isNodePort) { var ports = CreateServicePorts(recipes, portSelector, isNodePort); if (!ports.Any()) return null; @@ -843,7 +821,7 @@ namespace KubernetesWorkflow }; } - private List CreateServicePorts(ContainerRecipe[] recipes, Func portSelector, bool isNodePort) + private List CreateServicePorts(ContainerRecipe[] recipes, Func> portSelector, bool isNodePort) { var result = new List(); foreach (var recipe in recipes) diff --git a/Framework/KubernetesWorkflow/Recipe/ContainerRecipe.cs b/Framework/KubernetesWorkflow/Recipe/ContainerRecipe.cs index b643f38d..61038421 100644 --- a/Framework/KubernetesWorkflow/Recipe/ContainerRecipe.cs +++ b/Framework/KubernetesWorkflow/Recipe/ContainerRecipe.cs @@ -2,16 +2,17 @@ { public class ContainerRecipe { - public ContainerRecipe(DateTime recipeCreatedUtc, int number, string? nameOverride, string image, ContainerResources resources, SchedulingAffinity schedulingAffinity, CommandOverride commandOverride, bool setCriticalPriority, Port[] exposedPorts, Port[] internalPorts, EnvVar[] envVars, PodLabels podLabels, PodAnnotations podAnnotations, VolumeMount[] volumes, ContainerAdditionals additionals) + public ContainerRecipe(DateTime recipeCreatedUtc, int number, string? nameOverride, string image, ContainerResources resources, IReadOnlyDictionary nodePoolLabels, IReadOnlyList tolerations, CommandOverride commandOverride, bool isCriticalPriority, IReadOnlyList exposedPorts, IReadOnlyList internalPorts, IReadOnlyList envVars, PodLabels podLabels, PodAnnotations podAnnotations, IReadOnlyList volumes, ContainerAdditionals additionals) { RecipeCreatedUtc = recipeCreatedUtc; Number = number; NameOverride = nameOverride; Image = image; Resources = resources; - SchedulingAffinity = schedulingAffinity; + NodePoolLabels = nodePoolLabels; + Tolerations = tolerations; CommandOverride = commandOverride; - SetCriticalPriority = setCriticalPriority; + IsCriticalPriority = isCriticalPriority; ExposedPorts = exposedPorts; InternalPorts = internalPorts; EnvVars = envVars; @@ -37,16 +38,17 @@ public int Number { get; } public string? NameOverride { get; } public ContainerResources Resources { get; } - public SchedulingAffinity SchedulingAffinity { get; } + public IReadOnlyDictionary NodePoolLabels { get; } + public IReadOnlyList Tolerations { get; } public CommandOverride CommandOverride { get; } - public bool SetCriticalPriority { get; } + public bool IsCriticalPriority { get; } public string Image { get; } - public Port[] ExposedPorts { get; } - public Port[] InternalPorts { get; } - public EnvVar[] EnvVars { get; } + public IReadOnlyList ExposedPorts { get; } + public IReadOnlyList InternalPorts { get; } + public IReadOnlyList EnvVars { get; } public PodLabels PodLabels { get; } public PodAnnotations PodAnnotations { get; } - public VolumeMount[] Volumes { get; } + public IReadOnlyList Volumes { get; } public ContainerAdditionals Additionals { get; } public Port? GetPortByTag(string tag) @@ -61,7 +63,8 @@ $"internalPorts: {string.Join(",", InternalPorts.Select(p => p.Number))}, " + $"envVars: {string.Join(",", EnvVars.Select(v => v.ToString()))}, " + $"limits: {Resources}, " + - $"affinity: {SchedulingAffinity}, " + + $"nodePoolLabels: [{string.Join(",", NodePoolLabels.Select(kvp => $"{kvp.Key}={kvp.Value}"))}], " + + $"tolerations: [{string.Join(",", Tolerations.Select(t => $"{t.Key}={t.Value}:{t.Effect}"))}], " + $"volumes: {string.Join(",", Volumes.Select(v => $"'{v.MountPath}'"))}"; } } @@ -107,6 +110,8 @@ UDP } + public record PodToleration(string Key, string Value, string Effect); + public class EnvVar { public EnvVar(string name, string value) diff --git a/Framework/KubernetesWorkflow/Recipe/ContainerRecipeFactory.cs b/Framework/KubernetesWorkflow/Recipe/ContainerRecipeFactory.cs index 2e22dd61..48b0c3ea 100644 --- a/Framework/KubernetesWorkflow/Recipe/ContainerRecipeFactory.cs +++ b/Framework/KubernetesWorkflow/Recipe/ContainerRecipeFactory.cs @@ -14,7 +14,8 @@ namespace KubernetesWorkflow.Recipe private readonly List additionals = new List(); private RecipeComponentFactory factory = null!; private ContainerResources resources = new ContainerResources(); - private SchedulingAffinity schedulingAffinity = new SchedulingAffinity(); + private readonly Dictionary nodePoolLabels = new Dictionary(); + private readonly List tolerations = new List(); private CommandOverride commandOverride = new CommandOverride(); private bool setCriticalPriority; @@ -26,7 +27,10 @@ namespace KubernetesWorkflow.Recipe Initialize(config); - var recipe = new ContainerRecipe(DateTime.UtcNow, containerNumber, config.NameOverride, Image, resources, schedulingAffinity, commandOverride, setCriticalPriority, + var recipe = new ContainerRecipe(DateTime.UtcNow, containerNumber, config.NameOverride, Image, resources, + new Dictionary(nodePoolLabels), + tolerations.ToArray(), + commandOverride, setCriticalPriority, exposedPorts.ToArray(), internalPorts.ToArray(), envVars.ToArray(), @@ -35,18 +39,7 @@ namespace KubernetesWorkflow.Recipe volumeMounts.ToArray(), ContainerAdditionals.CreateFromUserData(additionals)); - exposedPorts.Clear(); - internalPorts.Clear(); - envVars.Clear(); - podLabels.Clear(); - podAnnotations.Clear(); - volumeMounts.Clear(); - additionals.Clear(); - this.factory = null!; - resources = new ContainerResources(); - schedulingAffinity = new SchedulingAffinity(); - commandOverride = new CommandOverride(); - setCriticalPriority = false; + Reset(); return recipe; } @@ -133,9 +126,14 @@ namespace KubernetesWorkflow.Recipe SetResourcesRequest(new ContainerResourceSet(milliCPUs, memory)); } - protected void SetSchedulingAffinity(string notIn) + protected void ScheduleInPoolsWithLabel(string key, string value) { - schedulingAffinity = new SchedulingAffinity(notIn); + nodePoolLabels[key] = value; + } + + protected void AddToleration(string key, string value, string effect) + { + tolerations.Add(new PodToleration(key, value, effect)); } protected void OverrideCommand(params string[] command) @@ -165,6 +163,23 @@ namespace KubernetesWorkflow.Recipe resources.Limits = limits; } + private void Reset() + { + exposedPorts.Clear(); + internalPorts.Clear(); + envVars.Clear(); + podLabels.Clear(); + podAnnotations.Clear(); + volumeMounts.Clear(); + additionals.Clear(); + nodePoolLabels.Clear(); + tolerations.Clear(); + factory = null!; + resources = new ContainerResources(); + commandOverride = new CommandOverride(); + setCriticalPriority = false; + } + private Port AddExposedPort(Port port) { exposedPorts.Add(port); diff --git a/Framework/KubernetesWorkflow/Recipe/SchedulingAffinity.cs b/Framework/KubernetesWorkflow/Recipe/SchedulingAffinity.cs deleted file mode 100644 index 8bc4c840..00000000 --- a/Framework/KubernetesWorkflow/Recipe/SchedulingAffinity.cs +++ /dev/null @@ -1,18 +0,0 @@ -namespace KubernetesWorkflow.Recipe -{ - public class SchedulingAffinity - { - public SchedulingAffinity(string? notIn = null) - { - NotIn = notIn; - } - - public string? NotIn { get; } - - public override string ToString() - { - if (string.IsNullOrEmpty(NotIn)) return "none"; - return "notIn:" + NotIn; - } - } -} diff --git a/ProjectPlugins/LogosStorageDiscordBotPlugin/DiscordBotContainerRecipe.cs b/ProjectPlugins/LogosStorageDiscordBotPlugin/DiscordBotContainerRecipe.cs index f7a67ba5..514c5961 100644 --- a/ProjectPlugins/LogosStorageDiscordBotPlugin/DiscordBotContainerRecipe.cs +++ b/ProjectPlugins/LogosStorageDiscordBotPlugin/DiscordBotContainerRecipe.cs @@ -15,7 +15,8 @@ namespace LogosStorageDiscordBotPlugin { var config = startupConfig.Get(); - SetSchedulingAffinity(notIn: "false"); + ScheduleInPoolsWithLabel("workload-type", "tests-pods"); + AddToleration("cloud.google.com/gke-provisioning", "spot", "NoSchedule"); AddEnvVar("TOKEN", config.Token); AddEnvVar("SERVERNAME", config.ServerName); diff --git a/ProjectPlugins/LogosStorageDiscordBotPlugin/RewarderBotContainerRecipe.cs b/ProjectPlugins/LogosStorageDiscordBotPlugin/RewarderBotContainerRecipe.cs index 94bf760f..3075f357 100644 --- a/ProjectPlugins/LogosStorageDiscordBotPlugin/RewarderBotContainerRecipe.cs +++ b/ProjectPlugins/LogosStorageDiscordBotPlugin/RewarderBotContainerRecipe.cs @@ -13,7 +13,8 @@ namespace LogosStorageDiscordBotPlugin { var config = startupConfig.Get(); - SetSchedulingAffinity(notIn: "false"); + ScheduleInPoolsWithLabel("workload-type", "tests-pods"); + AddToleration("cloud.google.com/gke-provisioning", "spot", "NoSchedule"); AddEnvVar("DISCORDBOTHOST", config.DiscordBotHost); AddEnvVar("DISCORDBOTPORT", config.DiscordBotPort.ToString()); diff --git a/ProjectPlugins/MetricsPlugin/PrometheusContainerRecipe.cs b/ProjectPlugins/MetricsPlugin/PrometheusContainerRecipe.cs index 9a95f8ed..c665a326 100644 --- a/ProjectPlugins/MetricsPlugin/PrometheusContainerRecipe.cs +++ b/ProjectPlugins/MetricsPlugin/PrometheusContainerRecipe.cs @@ -14,7 +14,8 @@ namespace MetricsPlugin { var config = startupConfig.Get(); - SetSchedulingAffinity(notIn: "false"); + ScheduleInPoolsWithLabel("workload-type", "tests-pods"); + AddToleration("cloud.google.com/gke-provisioning", "spot", "NoSchedule"); AddExposedPortAndVar("PROM_PORT", PortTag); AddEnvVar("PROM_CONFIG", config.PrometheusConfigBase64); diff --git a/ProjectPlugins/StoragePlugin/ContainerRecipe.cs b/ProjectPlugins/StoragePlugin/ContainerRecipe.cs index 1bb4b587..13ca3d7b 100644 --- a/ProjectPlugins/StoragePlugin/ContainerRecipe.cs +++ b/ProjectPlugins/StoragePlugin/ContainerRecipe.cs @@ -29,7 +29,9 @@ namespace StoragePlugin SetResourcesRequest(milliCPUs: 100, memory: 100.MB()); //SetResourceLimits(milliCPUs: 4000, memory: 12.GB()); - SetSchedulingAffinity(notIn: "false"); + // Schedule storage nodes on the spot node pool, away from the test runner. + ScheduleInPoolsWithLabel("workload-type", "tests-pods"); + AddToleration("cloud.google.com/gke-provisioning", "spot", "NoSchedule"); var config = startupConfig.Get();