2023-11-06 14:33:47 +01:00
|
|
|
using KubernetesWorkflow;
|
2023-11-12 10:07:23 +01:00
|
|
|
using KubernetesWorkflow.Recipe;
|
2023-09-08 09:39:56 +02:00
|
|
|
using Utils;
|
2023-04-12 13:53:55 +02:00
|
|
|
|
2026-04-17 15:03:22 +10:00
|
|
|
namespace StoragePlugin
|
2023-04-12 13:53:55 +02:00
|
|
|
{
|
2026-04-17 15:03:22 +10:00
|
|
|
public class LogosStorageContainerRecipe : ContainerRecipeFactory
|
2023-04-12 13:53:55 +02:00
|
|
|
{
|
2026-04-17 15:03:22 +10:00
|
|
|
public const string ApiPortTag = "storage_api_port";
|
|
|
|
|
public const string ListenPortTag = "storage_listen_port";
|
|
|
|
|
public const string MetricsPortTag = "storage_metrics_port";
|
|
|
|
|
public const string DiscoveryPortTag = "storage_discovery_port";
|
2023-04-13 14:36:17 +02:00
|
|
|
|
2023-09-04 10:08:34 +03:00
|
|
|
// Used by tests for time-constraint assertions.
|
2023-06-07 08:30:10 +02:00
|
|
|
public static readonly TimeSpan MaxUploadTimePerMegabyte = TimeSpan.FromSeconds(2.0);
|
|
|
|
|
public static readonly TimeSpan MaxDownloadTimePerMegabyte = TimeSpan.FromSeconds(2.0);
|
2026-04-17 15:03:22 +10:00
|
|
|
private readonly LogosStorageDockerImage logosStorageDockerImage;
|
2023-06-07 08:30:10 +02:00
|
|
|
|
2026-04-17 15:03:22 +10:00
|
|
|
public override string AppName => "storage";
|
|
|
|
|
public override string Image => logosStorageDockerImage.GetLogosStorageDockerImage();
|
2023-09-14 15:17:30 +10:00
|
|
|
|
2026-04-17 15:03:22 +10:00
|
|
|
public LogosStorageContainerRecipe(LogosStorageDockerImage logosStorageDockerImage)
|
2025-04-18 15:47:44 +02:00
|
|
|
{
|
2026-04-17 15:03:22 +10:00
|
|
|
this.logosStorageDockerImage = logosStorageDockerImage;
|
2025-04-18 15:47:44 +02:00
|
|
|
}
|
2023-04-12 13:53:55 +02:00
|
|
|
|
2023-09-13 16:06:05 +02:00
|
|
|
protected override void Initialize(StartupConfig startupConfig)
|
2023-04-12 13:53:55 +02:00
|
|
|
{
|
2023-09-21 11:07:27 +02:00
|
|
|
SetResourcesRequest(milliCPUs: 100, memory: 100.MB());
|
2023-11-07 09:25:51 +01:00
|
|
|
//SetResourceLimits(milliCPUs: 4000, memory: 12.GB());
|
2023-09-18 15:45:21 +02:00
|
|
|
|
refactor: replace scheduling affinity with explicit node pool label selection
Replace the indirect `SetSchedulingAffinity(notIn: "false")` / `allow-tests-pods` mechanism with `ScheduleInPoolsWithLabel(key, value)` and `AddToleration(key, value, effect)` in ContainerRecipeFactory. This is much more readable from an API perspective. `SetSchedulingAffinity(notIn: "false")` was a double-negative (hard to reason about) and it was not clear that this was meant to schedule on pools with labels `allow-tests-pods=true`.
Previously, pods were steered to the spot node pool via a node affinity exclusion on a boolean label (`allow-tests-pods NotIn ["false"]`), and spot taint toleration was added implicitly by using the `system-node-critical` priority class. The priority class was removed earlier because it caused a ResourceQuota admission error in GCP, which silently broke spot node scheduling.
The new API is explicit: recipes call `ScheduleInPoolsWithLabel` to set a nodeSelector label that targets the intended pool, and `AddToleration` to declare any taints the pool carries. Tolerations are set at the recipe level to allow for the recipe to move back to Digital Ocean if needed (removing the unneeded toleration). All four recipes (storage, prometheus, discord bot, rewarder bot) now call both.
Cleanup applied alongside:
- `PodToleration` converted to a record for structural equality and simpler deduplication
- `ExposedPorts`, `InternalPorts`, `EnvVars`, `Volumes` on `ContainerRecipe` changed to
`IReadOnlyList<T>` for consistent immutable typing
- `SetCriticalPriority` property renamed to `IsCriticalPriority`
- `GetPriorityClassName` returns `string?` instead of `null!`
- `Reset()` extracted in `ContainerRecipeFactory` to consolidate post-create state reset
- Fixed bug: `nodePoolLabels` and `tolerations` were passed by reference and then cleared,
leaving the recipe with empty collections; now snapshotted before clearing
- `SchedulingAffinity.cs` deleted (no remaining callers)
2026-04-29 16:45:55 +10:00
|
|
|
// Schedule storage nodes on the spot node pool, away from the test runner.
|
|
|
|
|
ScheduleInPoolsWithLabel("workload-type", "tests-pods");
|
2026-04-29 19:12:39 +10:00
|
|
|
AddToleration("cloud.google.com/gke-spot", "true", "NoSchedule");
|
2023-11-14 10:16:00 +01:00
|
|
|
|
2026-04-17 15:03:22 +10:00
|
|
|
var config = startupConfig.Get<LogosStorageStartupConfig>();
|
2023-04-12 13:53:55 +02:00
|
|
|
|
2023-10-23 11:30:54 +02:00
|
|
|
var apiPort = CreateApiPort(config, ApiPortTag);
|
2026-02-11 10:18:06 +01:00
|
|
|
AddEnvVar("STORAGE_API_PORT", apiPort);
|
|
|
|
|
AddEnvVar("STORAGE_API_BINDADDR", "0.0.0.0");
|
2023-07-04 16:04:18 +02:00
|
|
|
|
2023-09-07 08:19:19 +02:00
|
|
|
var dataDir = $"datadir{ContainerNumber}";
|
2026-02-11 10:18:06 +01:00
|
|
|
AddEnvVar("STORAGE_DATA_DIR", dataDir);
|
2023-09-07 10:37:52 +02:00
|
|
|
AddVolume($"codex/{dataDir}", GetVolumeCapacity(config));
|
2023-09-07 08:19:19 +02:00
|
|
|
|
2023-10-23 12:33:48 +02:00
|
|
|
var discPort = CreateDiscoveryPort(config);
|
2026-02-11 10:18:06 +01:00
|
|
|
AddEnvVar("STORAGE_DISC_PORT", discPort);
|
|
|
|
|
AddEnvVar("STORAGE_LOG_LEVEL", config.LogLevelWithTopics());
|
2023-07-04 16:04:18 +02:00
|
|
|
|
2023-10-23 12:33:48 +02:00
|
|
|
if (config.PublicTestNet != null)
|
2023-10-23 11:30:54 +02:00
|
|
|
{
|
2023-11-15 14:53:25 +01:00
|
|
|
// This makes the node announce itself to its public IP address.
|
2023-10-27 11:21:43 +02:00
|
|
|
AddEnvVar("NAT_IP_AUTO", "false");
|
2023-12-11 08:38:31 +01:00
|
|
|
AddEnvVar("NAT_PUBLIC_IP_AUTO", PublicIpService.Address);
|
2023-10-23 11:30:54 +02:00
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// This makes the node announce itself to its local (pod) IP address.
|
|
|
|
|
AddEnvVar("NAT_IP_AUTO", "true");
|
|
|
|
|
}
|
2023-04-12 13:53:55 +02:00
|
|
|
|
2023-10-23 12:33:48 +02:00
|
|
|
var listenPort = CreateListenPort(config);
|
2026-02-11 10:18:06 +01:00
|
|
|
AddEnvVar("STORAGE_LISTEN_ADDRS", $"/ip4/0.0.0.0/tcp/{listenPort.Number}");
|
2023-04-12 13:53:55 +02:00
|
|
|
|
2023-04-24 14:09:23 +02:00
|
|
|
if (!string.IsNullOrEmpty(config.BootstrapSpr))
|
|
|
|
|
{
|
2026-02-11 10:18:06 +01:00
|
|
|
AddEnvVar("STORAGE_BOOTSTRAP_NODE", config.BootstrapSpr);
|
2023-04-24 14:09:23 +02:00
|
|
|
}
|
2023-04-12 13:53:55 +02:00
|
|
|
if (config.StorageQuota != null)
|
|
|
|
|
{
|
2026-02-11 10:18:06 +01:00
|
|
|
AddEnvVar("STORAGE_STORAGE_QUOTA", config.StorageQuota.SizeInBytes.ToString()!);
|
2023-04-12 13:53:55 +02:00
|
|
|
}
|
2023-06-29 16:03:45 +02:00
|
|
|
if (config.BlockTTL != null)
|
|
|
|
|
{
|
2026-02-11 10:18:06 +01:00
|
|
|
AddEnvVar("STORAGE_BLOCK_TTL", config.BlockTTL.ToString()!);
|
2023-06-29 16:03:45 +02:00
|
|
|
}
|
2023-08-14 15:51:03 +02:00
|
|
|
if (config.BlockMaintenanceInterval != null)
|
|
|
|
|
{
|
2026-02-11 10:18:06 +01:00
|
|
|
AddEnvVar("STORAGE_BLOCK_MI", Convert.ToInt32(config.BlockMaintenanceInterval.Value.TotalSeconds).ToString());
|
2023-08-14 15:51:03 +02:00
|
|
|
}
|
2023-08-14 16:37:31 +02:00
|
|
|
if (config.BlockMaintenanceNumber != null)
|
|
|
|
|
{
|
2026-02-11 10:18:06 +01:00
|
|
|
AddEnvVar("STORAGE_BLOCK_MN", config.BlockMaintenanceNumber.ToString()!);
|
2023-08-14 16:37:31 +02:00
|
|
|
}
|
2023-09-13 11:59:21 +02:00
|
|
|
if (config.MetricsEnabled)
|
|
|
|
|
{
|
2023-10-23 11:30:54 +02:00
|
|
|
var metricsPort = CreateApiPort(config, MetricsPortTag);
|
2026-02-11 10:18:06 +01:00
|
|
|
AddEnvVar("STORAGE_METRICS", "true");
|
|
|
|
|
AddEnvVar("STORAGE_METRICS_ADDRESS", "0.0.0.0");
|
|
|
|
|
AddEnvVar("STORAGE_METRICS_PORT", metricsPort);
|
2023-09-13 11:59:21 +02:00
|
|
|
AddPodAnnotation("prometheus.io/scrape", "true");
|
|
|
|
|
AddPodAnnotation("prometheus.io/port", metricsPort.Number.ToString());
|
|
|
|
|
}
|
2023-09-11 11:59:33 +02:00
|
|
|
|
2024-06-10 10:58:50 +02:00
|
|
|
if (!string.IsNullOrEmpty(config.NameOverride))
|
2023-09-21 11:07:27 +02:00
|
|
|
{
|
2023-09-14 15:02:53 +10:00
|
|
|
AddEnvVar("CODEX_NODENAME", config.NameOverride);
|
2023-04-14 10:51:35 +02:00
|
|
|
}
|
2023-04-12 13:53:55 +02:00
|
|
|
}
|
2026-02-11 10:18:06 +01:00
|
|
|
|
2026-04-17 15:03:22 +10:00
|
|
|
private Port CreateApiPort(LogosStorageStartupConfig config, string tag)
|
2023-10-25 14:23:07 +02:00
|
|
|
{
|
|
|
|
|
if (config.PublicTestNet == null) return AddExposedPort(tag);
|
|
|
|
|
return AddInternalPort(tag);
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-17 15:03:22 +10:00
|
|
|
private Port CreateListenPort(LogosStorageStartupConfig config)
|
2023-10-23 12:33:48 +02:00
|
|
|
{
|
|
|
|
|
if (config.PublicTestNet == null) return AddInternalPort(ListenPortTag);
|
|
|
|
|
|
|
|
|
|
return AddExposedPort(config.PublicTestNet.PublicListenPort, ListenPortTag);
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-17 15:03:22 +10:00
|
|
|
private Port CreateDiscoveryPort(LogosStorageStartupConfig config)
|
2023-10-23 12:33:48 +02:00
|
|
|
{
|
2023-10-25 14:23:07 +02:00
|
|
|
if (config.PublicTestNet == null) return AddInternalPort(DiscoveryPortTag, PortProtocol.UDP);
|
2023-10-23 12:33:48 +02:00
|
|
|
|
2023-10-25 14:23:07 +02:00
|
|
|
return AddExposedPort(config.PublicTestNet.PublicDiscoveryPort, DiscoveryPortTag, PortProtocol.UDP);
|
2023-10-23 12:33:48 +02:00
|
|
|
}
|
|
|
|
|
|
2026-04-17 15:03:22 +10:00
|
|
|
private ByteSize GetVolumeCapacity(LogosStorageStartupConfig config)
|
2023-09-07 10:37:52 +02:00
|
|
|
{
|
2024-06-08 10:36:23 +02:00
|
|
|
if (config.StorageQuota != null) return config.StorageQuota.Multiply(1.2);
|
2023-09-08 09:39:56 +02:00
|
|
|
// Default Codex quota: 8 Gb, using +20% to be safe.
|
|
|
|
|
return 8.GB().Multiply(1.2);
|
2023-09-07 10:37:52 +02:00
|
|
|
}
|
2023-04-12 13:53:55 +02:00
|
|
|
}
|
|
|
|
|
}
|