Eric 13d453d5ed
chore: Docker updates to support release tests in logos-storage-nim, and remove Codex references (#124)
* ci(docker): build dist-tests images

* Update to .net 10, kubernetes client 18.0.13

Kubernetes client 18.0.13 is compatible with Kubernetes 1.34.x. The Kubernetes version is selected automatically by kubeadm in docker desktop (v1.34.1). See https://github.com/kubernetes-client/csharp#version-compatibility for a compatibility table.

* Updates to support Kubernetes upgrade

* bump openapi.yaml to match openapi.yaml in the logos-storage-nim docker image

* bump doc to .net 10

* bump docker to .net 10

* Build image with latest tag always

Always build an image with a latest tag (as well as a sha commit hash) when there's a push to master

* docker image tag as "latest" only when pushing to master

* Update docker image to install doctl

* Remove doctl install

kubeconfig is now created and uses a plain bearer token instead of using doctl as a credential mgr

* Rename and remove all instances of Codex

* Further remove CodexNetDeployer as it is no longer needed

---------

Co-authored-by: Adam Uhlíř <adam@uhlir.dev>
2026-04-17 15:03:22 +10:00

394 lines
14 KiB
C#

using LogosStorageClient.Hooks;
using FileUtils;
using Logging;
using Utils;
namespace LogosStorageClient
{
public partial interface IStorageNode : IHasMetricsScrapeTarget
{
string GetName();
string GetImageName();
string GetPeerId();
DebugInfo GetDebugInfo(bool log = false);
void SetLogLevel(string logLevel);
string GetSpr();
DebugPeer GetDebugPeer(string peerId);
ContentId UploadFile(TrackedFile file);
ContentId UploadFile(TrackedFile file, string contentType, string contentDisposition);
TrackedFile? DownloadContent(ContentId contentId, string fileLabel = "");
TrackedFile? DownloadContent(ContentId contentId, TimeSpan timeout, string fileLabel = "");
LocalDataset DownloadStreamless(ContentId cid);
/// <summary>
/// TODO: This will monitor the quota-used of the node until 'size' bytes are added. That's a very bad way
/// to track the streamless download progress. Replace it once we have a good API for this.
/// </summary>
LocalDataset DownloadStreamlessWait(ContentId cid, ByteSize size);
LocalDataset DownloadManifestOnly(ContentId cid);
LocalDatasetList LocalFiles();
LogosStorageSpace Space();
void ConnectToPeer(IStorageNode node);
DebugInfoVersion Version { get; }
ITransferSpeeds TransferSpeeds { get; }
Address GetDiscoveryEndpoint();
Address GetApiEndpoint();
Address GetListenEndpoint();
/// <summary>
/// Warning! The node is not usable after this.
/// TODO: Replace with delete-blocks debug call once available in Codex.
/// </summary>
void DeleteDataDirFolder();
void Stop(bool waitTillStopped);
IDownloadedLog DownloadLog(string additionalName = "");
bool HasCrashed();
}
public class StorageNode : IStorageNode
{
private const string UploadFailedMessage = "Unable to store block";
private readonly ILog log;
private readonly IStorageNodeHooks hooks;
private readonly TransferSpeeds transferSpeeds;
private string peerId = string.Empty;
private string nodeId = string.Empty;
private readonly LogosStorageAccess logosStorageAccess;
private readonly IFileManager fileManager;
public StorageNode(ILog log, LogosStorageAccess logosStorageAccess, IFileManager fileManager, IStorageNodeHooks hooks)
{
this.logosStorageAccess = logosStorageAccess;
this.fileManager = fileManager;
this.hooks = hooks;
Version = new DebugInfoVersion();
transferSpeeds = new TransferSpeeds();
this.log = new LogPrefixer(log, $"{GetName()} ");
}
public void Awake()
{
hooks.OnNodeStarting(logosStorageAccess.GetStartUtc(), logosStorageAccess.GetImageName());
}
public void Initialize()
{
// This is the moment we first connect to a codex node. Sometimes, Kubernetes takes a while to spin up the
// container. So we'll adding a custom, generous retry here.
var kubeSpinupRetry = new Retry("StorageNode_Initialize",
maxTimeout: TimeSpan.FromMinutes(10.0),
sleepAfterFail: TimeSpan.FromSeconds(10.0),
onFail: f => { },
failFast: false);
kubeSpinupRetry.Run(InitializePeerNodeId);
InitializeLogReplacements();
hooks.OnNodeStarted(this, peerId, nodeId);
}
public DebugInfoVersion Version { get; private set; }
public ITransferSpeeds TransferSpeeds { get => transferSpeeds; }
public string GetName()
{
return logosStorageAccess.GetName();
}
public string GetImageName()
{
return logosStorageAccess.GetImageName();
}
public string GetPeerId()
{
return peerId;
}
public DebugInfo GetDebugInfo(bool log = false)
{
var debugInfo = logosStorageAccess.GetDebugInfo();
if (log)
{
var known = string.Join(",", debugInfo.Table.Nodes.Select(n => n.PeerId));
Log($"Got DebugInfo with id: {debugInfo.Id}. This node knows: [{known}]");
}
return debugInfo;
}
public void SetLogLevel(string logLevel)
{
logosStorageAccess.SetLogLevel(logLevel);
}
public string GetSpr()
{
return logosStorageAccess.GetSpr();
}
public DebugPeer GetDebugPeer(string peerId)
{
return logosStorageAccess.GetDebugPeer(peerId);
}
public ContentId UploadFile(TrackedFile file)
{
return UploadFile(file, "application/octet-stream", $"attachment; filename=\"{Path.GetFileName(file.Filename)}\"");
}
public ContentId UploadFile(TrackedFile file, string contentType, string contentDisposition)
{
using var fileStream = File.OpenRead(file.Filename);
var uniqueId = Guid.NewGuid().ToString();
var size = file.GetFilesize();
hooks.OnFileUploading(uniqueId, size);
var input = new UploadInput(contentType, contentDisposition, fileStream);
var logMessage = $"Uploading file {file.Describe()} with contentType: '{input.ContentType}' and disposition: '{input.ContentDisposition}'...";
var measurement = Stopwatch.Measure(log, logMessage, () =>
{
return logosStorageAccess.UploadFile(input);
});
var response = measurement.Value;
transferSpeeds.AddUploadSample(size, measurement.Duration);
if (string.IsNullOrEmpty(response)) FrameworkAssert.Fail("Received empty response.");
if (response.StartsWith(UploadFailedMessage)) FrameworkAssert.Fail("Node failed to store block.");
Log($"Uploaded file {file.Describe()}. Received contentId: '{response}'.");
var cid = new ContentId(response);
hooks.OnFileUploaded(uniqueId, size, cid);
return cid;
}
public TrackedFile? DownloadContent(ContentId contentId, string fileLabel = "")
{
return DownloadContent(contentId, TimeSpan.FromMinutes(10.0), fileLabel);
}
public TrackedFile? DownloadContent(ContentId contentId, TimeSpan timeout, string fileLabel = "")
{
var file = fileManager.CreateEmptyFile(fileLabel);
hooks.OnFileDownloading(contentId);
Log($"Downloading '{contentId}'...");
var logMessage = $"Downloaded '{contentId}' to '{file.Filename}'";
var measurement = Stopwatch.Measure(log, logMessage, () => DownloadToFile(contentId.Id, file, timeout));
var size = file.GetFilesize();
transferSpeeds.AddDownloadSample(size, measurement);
hooks.OnFileDownloaded(size, contentId);
return file;
}
public LocalDataset DownloadStreamless(ContentId cid)
{
Log($"Downloading streamless '{cid}' (no-wait)");
return logosStorageAccess.DownloadStreamless(cid);
}
public LocalDataset DownloadStreamlessWait(ContentId cid, ByteSize size)
{
Log($"Downloading streamless '{cid}' (wait till finished)");
var sw = Stopwatch.Measure(log, nameof(DownloadStreamlessWait), () =>
{
var startSpace = Space();
var result = logosStorageAccess.DownloadStreamless(cid);
WaitUntilQuotaUsedIncreased(startSpace, size);
return result;
});
return sw.Value;
}
public LocalDataset DownloadManifestOnly(ContentId cid)
{
Log($"Downloading manifest-only '{cid}'");
return logosStorageAccess.DownloadManifestOnly(cid);
}
public LocalDatasetList LocalFiles()
{
return logosStorageAccess.LocalFiles();
}
public LogosStorageSpace Space()
{
return logosStorageAccess.Space();
}
public void ConnectToPeer(IStorageNode node)
{
var peer = (StorageNode)node;
Log($"Connecting to peer {peer.GetName()}...");
var peerInfo = node.GetDebugInfo();
logosStorageAccess.ConnectToPeer(peerInfo.Id, GetPeerMultiAddresses(peer, peerInfo));
Log($"Successfully connected to peer {peer.GetName()}.");
}
public void DeleteDataDirFolder()
{
logosStorageAccess.DeleteDataDirFolder();
}
public void Stop(bool waitTillStopped)
{
Log("Stopping...");
hooks.OnNodeStopping();
logosStorageAccess.Stop(waitTillStopped);
}
public IDownloadedLog DownloadLog(string additionalName = "")
{
return logosStorageAccess.DownloadLog(additionalName);
}
public Address GetDiscoveryEndpoint()
{
return logosStorageAccess.GetDiscoveryEndpoint();
}
public Address GetApiEndpoint()
{
return logosStorageAccess.GetApiEndpoint();
}
public Address GetListenEndpoint()
{
return logosStorageAccess.GetListenEndpoint();
}
public Address GetMetricsScrapeTarget()
{
var address = logosStorageAccess.GetMetricsEndpoint();
if (address == null) throw new Exception("Metrics ScrapeTarget accessed, but node was not started with EnableMetrics()");
return address;
}
public bool HasCrashed()
{
return logosStorageAccess.HasCrashed();
}
public override string ToString()
{
return $"StorageNode:{GetName()}";
}
private void InitializePeerNodeId()
{
var debugInfo = logosStorageAccess.GetDebugInfo();
if (!debugInfo.Version.IsValid())
{
throw new Exception($"Invalid version information received from Logos Storage node {GetName()}: {debugInfo.Version}");
}
peerId = debugInfo.Id;
nodeId = debugInfo.Table.LocalNode.NodeId;
Version = debugInfo.Version;
}
private void InitializeLogReplacements()
{
var nodeName = GetName();
log.AddStringReplace(peerId, nodeName);
log.AddStringReplace(LogosStorageUtils.ToShortId(peerId), nodeName);
log.AddStringReplace(nodeId, nodeName);
log.AddStringReplace(LogosStorageUtils.ToShortId(nodeId), nodeName);
}
private string[] GetPeerMultiAddresses(StorageNode peer, DebugInfo peerInfo)
{
var peerId = peer.GetDiscoveryEndpoint().Host
.Replace("http://", "")
.Replace("https://", "");
return peerInfo.Addrs.Select(a => a
.Replace("0.0.0.0", peerId))
.ToArray();
}
private void DownloadToFile(string contentId, TrackedFile file, TimeSpan timeout)
{
using var fileStream = File.OpenWrite(file.Filename);
try
{
// Type of stream generated by openAPI client does not support timeouts.
// So we use a task and cancellation token to track our timeout manually.
var start = DateTime.UtcNow;
var cts = new CancellationTokenSource();
var downloadTask = Task.Run(() =>
{
using var downloadStream = logosStorageAccess.DownloadFile(contentId);
downloadStream.CopyTo(fileStream);
}, cts.Token);
while (DateTime.UtcNow - start < timeout)
{
if (downloadTask.IsFaulted) throw downloadTask.Exception;
if (downloadTask.IsCompletedSuccessfully) return;
Thread.Sleep(100);
}
cts.Cancel();
throw new TimeoutException($"Download of '{contentId}' timed out after {Time.FormatDuration(timeout)}");
}
catch (Exception ex)
{
Log($"Failed to download file '{contentId}': {ex}");
throw;
}
}
public void WaitUntilQuotaUsedIncreased(LogosStorageSpace startSpace, ByteSize expectedIncreaseOfQuotaUsed)
{
WaitUntilQuotaUsedIncreased(startSpace, expectedIncreaseOfQuotaUsed, TimeSpan.FromMinutes(30));
}
public void WaitUntilQuotaUsedIncreased(
LogosStorageSpace startSpace,
ByteSize expectedIncreaseOfQuotaUsed,
TimeSpan maxTimeout)
{
Log($"Waiting until quotaUsed " +
$"(start: {startSpace.QuotaUsedBytes}) " +
$"increases by {expectedIncreaseOfQuotaUsed} " +
$"to reach {startSpace.QuotaUsedBytes + expectedIncreaseOfQuotaUsed.SizeInBytes}");
var retry = new Retry($"Checking local space for quotaUsed increase of {expectedIncreaseOfQuotaUsed}",
maxTimeout: maxTimeout,
sleepAfterFail: TimeSpan.FromSeconds(10),
onFail: f => { },
failFast: false);
retry.Run(() =>
{
var space = Space();
var increase = space.QuotaUsedBytes - startSpace.QuotaUsedBytes;
if (increase < expectedIncreaseOfQuotaUsed.SizeInBytes)
throw new Exception($"Expected quota-used not reached. " +
$"Expected increase: {expectedIncreaseOfQuotaUsed.SizeInBytes} " +
$"Actual increase: {increase} " +
$"Actual used: {space.QuotaUsedBytes}");
});
}
private void Log(string msg)
{
log.Log(msg);
}
}
}