24 lines
734 B
C#
Raw Normal View History

2023-09-13 16:06:05 +02:00
using KubernetesWorkflow;
using KubernetesWorkflow.Recipe;
2023-09-13 11:25:08 +02:00
namespace MetricsPlugin
{
2023-09-13 16:06:05 +02:00
public class PrometheusContainerRecipe : ContainerRecipeFactory
2023-09-13 11:25:08 +02:00
{
public override string AppName => "prometheus";
public override string Image => "logosstorage/dist-tests-prometheus:latest";
2023-10-19 11:12:08 +02:00
public const string PortTag = "prometheus_port_tag";
2023-09-13 16:06:05 +02:00
protected override void Initialize(StartupConfig startupConfig)
2023-09-13 11:25:08 +02:00
{
var config = startupConfig.Get<PrometheusStartupConfig>();
refactor: replace scheduling affinity with explicit node pool label selection Replace the indirect `SetSchedulingAffinity(notIn: "false")` / `allow-tests-pods` mechanism with `ScheduleInPoolsWithLabel(key, value)` and `AddToleration(key, value, effect)` in ContainerRecipeFactory. This is much more readable from an API perspective. `SetSchedulingAffinity(notIn: "false")` was a double-negative (hard to reason about) and it was not clear that this was meant to schedule on pools with labels `allow-tests-pods=true`. Previously, pods were steered to the spot node pool via a node affinity exclusion on a boolean label (`allow-tests-pods NotIn ["false"]`), and spot taint toleration was added implicitly by using the `system-node-critical` priority class. The priority class was removed earlier because it caused a ResourceQuota admission error in GCP, which silently broke spot node scheduling. The new API is explicit: recipes call `ScheduleInPoolsWithLabel` to set a nodeSelector label that targets the intended pool, and `AddToleration` to declare any taints the pool carries. Tolerations are set at the recipe level to allow for the recipe to move back to Digital Ocean if needed (removing the unneeded toleration). All four recipes (storage, prometheus, discord bot, rewarder bot) now call both. Cleanup applied alongside: - `PodToleration` converted to a record for structural equality and simpler deduplication - `ExposedPorts`, `InternalPorts`, `EnvVars`, `Volumes` on `ContainerRecipe` changed to `IReadOnlyList<T>` for consistent immutable typing - `SetCriticalPriority` property renamed to `IsCriticalPriority` - `GetPriorityClassName` returns `string?` instead of `null!` - `Reset()` extracted in `ContainerRecipeFactory` to consolidate post-create state reset - Fixed bug: `nodePoolLabels` and `tolerations` were passed by reference and then cleared, leaving the recipe with empty collections; now snapshotted before clearing - `SchedulingAffinity.cs` deleted (no remaining callers)
2026-04-29 16:45:55 +10:00
ScheduleInPoolsWithLabel("workload-type", "tests-pods");
2023-10-19 11:12:08 +02:00
AddExposedPortAndVar("PROM_PORT", PortTag);
2023-09-13 11:25:08 +02:00
AddEnvVar("PROM_CONFIG", config.PrometheusConfigBase64);
}
}
}