From 3da36725e36109fb3bdb13a6cbfb922d5f67fd4c Mon Sep 17 00:00:00 2001 From: benbierens Date: Mon, 3 Jun 2024 10:58:04 +0200 Subject: [PATCH 01/27] Updates codex image --- ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs | 2 +- Tests/CodexTests/UtilityTests/ClusterSpeedTests.cs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs b/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs index 7bc308a..d060d75 100644 --- a/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs +++ b/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs @@ -7,7 +7,7 @@ namespace CodexPlugin { public class CodexContainerRecipe : ContainerRecipeFactory { - private const string DefaultDockerImage = "codexstorage/nim-codex:sha-267266a-dist-tests"; + private const string DefaultDockerImage = "codexstorage/nim-codex:sha-5e3183a-dist-tests"; public const string ApiPortTag = "codex_api_port"; public const string ListenPortTag = "codex_listen_port"; diff --git a/Tests/CodexTests/UtilityTests/ClusterSpeedTests.cs b/Tests/CodexTests/UtilityTests/ClusterSpeedTests.cs index 9e647ea..7800e13 100644 --- a/Tests/CodexTests/UtilityTests/ClusterSpeedTests.cs +++ b/Tests/CodexTests/UtilityTests/ClusterSpeedTests.cs @@ -3,7 +3,7 @@ using Logging; using NUnit.Framework; using Utils; -namespace CodexTests.ScalabilityTests +namespace CodexTests.UtilityTests { [TestFixture] public class ClusterDiscSpeedTests : DistTest @@ -18,7 +18,7 @@ namespace CodexTests.ScalabilityTests ) { long targetSize = (long)(1024 * 1024 * 1024) * 2; - long bufferSizeBytes = ((long)bufferSizeKb) * 1024; + long bufferSizeBytes = (long)bufferSizeKb * 1024; var filename = nameof(DiscSpeedTest); @@ -28,7 +28,7 @@ namespace CodexTests.ScalabilityTests var writeSpeed = PerformWrite(targetSize, bufferSizeBytes, filename); Thread.Sleep(2000); var readSpeed = PerformRead(targetSize, bufferSizeBytes, filename); - + Log($"Write speed: {writeSpeed} per second."); Log($"Read speed: {readSpeed} per second."); } From 383102b9886cebb81ff9311d0aad2c2c7158f351 Mon Sep 17 00:00:00 2001 From: benbierens Date: Mon, 3 Jun 2024 11:36:48 +0200 Subject: [PATCH 02/27] Adds container name to crash messge. --- ProjectPlugins/CodexPlugin/CodexAccess.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index e35a9f9..121ccf9 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -141,7 +141,7 @@ namespace CodexPlugin private void CheckContainerCrashed(HttpClient client) { - if (hasContainerCrashed) throw new Exception("Container has crashed."); + if (hasContainerCrashed) throw new Exception($"Container {GetName()} has crashed."); } public void Log(Stream crashLog) From 02baa72c1e388bb072707f6839752ccacb60f705 Mon Sep 17 00:00:00 2001 From: benbierens Date: Mon, 3 Jun 2024 11:42:52 +0200 Subject: [PATCH 03/27] DebugInfo log --- ProjectPlugins/CodexPlugin/CodexNode.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ProjectPlugins/CodexPlugin/CodexNode.cs b/ProjectPlugins/CodexPlugin/CodexNode.cs index 36fb1dc..034fe63 100644 --- a/ProjectPlugins/CodexPlugin/CodexNode.cs +++ b/ProjectPlugins/CodexPlugin/CodexNode.cs @@ -80,7 +80,7 @@ namespace CodexPlugin { var debugInfo = CodexAccess.GetDebugInfo(); var known = string.Join(",", debugInfo.Table.Nodes.Select(n => n.PeerId)); - Log($"Got DebugInfo with id: '{debugInfo.Id}'. This node knows: {known}"); + Log($"Got DebugInfo with id: {debugInfo.Id}. This node knows: [{known}]"); return debugInfo; } From 67fc2183b7e9faee3aa4834e4d3c03a6961263e3 Mon Sep 17 00:00:00 2001 From: benbierens Date: Mon, 3 Jun 2024 12:56:19 +0200 Subject: [PATCH 04/27] Better logging for slot-filled event checking --- Tests/CodexTests/BasicTests/MarketplaceTests.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tests/CodexTests/BasicTests/MarketplaceTests.cs b/Tests/CodexTests/BasicTests/MarketplaceTests.cs index 7cff4aa..5d69387 100644 --- a/Tests/CodexTests/BasicTests/MarketplaceTests.cs +++ b/Tests/CodexTests/BasicTests/MarketplaceTests.cs @@ -92,9 +92,9 @@ namespace CodexTests.BasicTests var blockRange = geth.ConvertTimeRangeToBlockRange(GetTestRunTimeRange()); var slotFilledEvents = contracts.GetSlotFilledEvents(blockRange); - Debug($"SlotFilledEvents: {slotFilledEvents.Length} - NumSlots: {purchase.MinRequiredNumberOfNodes}"); - - if (slotFilledEvents.Length != purchase.MinRequiredNumberOfNodes) throw new Exception(); + var msg = $"SlotFilledEvents: {slotFilledEvents.Length} - NumSlots: {purchase.MinRequiredNumberOfNodes}"; + Debug(msg); + if (slotFilledEvents.Length != purchase.MinRequiredNumberOfNodes) throw new Exception(msg); }, purchase.Expiry + TimeSpan.FromSeconds(10), TimeSpan.FromSeconds(5), "Checking SlotFilled events"); } From 11b866986d6b98ecd9d8687b13cd8d90efaaba5d Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 4 Jun 2024 11:06:19 +0200 Subject: [PATCH 05/27] wip retry upgrade --- Framework/Utils/Retry.cs | 125 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 Framework/Utils/Retry.cs diff --git a/Framework/Utils/Retry.cs b/Framework/Utils/Retry.cs new file mode 100644 index 0000000..45790e8 --- /dev/null +++ b/Framework/Utils/Retry.cs @@ -0,0 +1,125 @@ + +namespace Utils +{ + public class Retry + { + private readonly string description; + private readonly Func task; + private readonly TimeSpan maxTimeout; + private readonly int maxRetries; + private readonly TimeSpan sleepAfterFail; + private readonly Action onFail; + + public Retry(string description, Func task, TimeSpan maxTimeout, int maxRetries, TimeSpan sleepAfterFail, Action onFail) + { + this.description = description; + this.task = task; + this.maxTimeout = maxTimeout; + this.maxRetries = maxRetries; + this.sleepAfterFail = sleepAfterFail; + this.onFail = onFail; + } + + public T Run() + { + var run = new RetryRun(description, task, maxTimeout, maxRetries, sleepAfterFail, onFail); + return run.Run(); + } + + private class RetryRun + { + private readonly string description; + private readonly Func task; + private readonly TimeSpan maxTimeout; + private readonly int maxRetries; + private readonly TimeSpan sleepAfterFail; + private readonly Action onFail; + private readonly DateTime start = DateTime.UtcNow; + private readonly List failures = new List(); + private int tryNumber; + private DateTime tryStart; + + public RetryRun(string description, Func task, TimeSpan maxTimeout, int maxRetries, TimeSpan sleepAfterFail, Action onFail) + { + this.description = description; + this.task = task; + this.maxTimeout = maxTimeout; + this.maxRetries = maxRetries; + this.sleepAfterFail = sleepAfterFail; + this.onFail = onFail; + + tryNumber = 0; + tryStart = DateTime.UtcNow; + } + + public T Run() + { + while (true) + { + CheckMaximums(); + + tryNumber++; + tryStart = DateTime.UtcNow; + try + { + return task(); + } + catch (Exception ex) + { + var failure = CaptureFailure(ex); + onFail(failure); + Time.Sleep(sleepAfterFail); + } + } + } + + private Failure CaptureFailure(Exception ex) + { + var f = new Failure(ex, DateTime.UtcNow - tryStart, tryNumber); + failures.Add(f); + return f; + } + + private void CheckMaximums() + { + if (Duration() > maxTimeout) Fail(); + if (tryNumber > maxRetries) Fail(); + } + + private void Fail() + { + throw new TimeoutException($"Retry '{description}' timed out after {tryNumber} tries over {Time.FormatDuration(Duration())}: {GetFailureReport}", + new AggregateException(failures.Select(f => f.Exception))); + } + + private string GetFailureReport() + { + return Environment.NewLine + string.Join(Environment.NewLine, failures.Select(f => f.Describe())); + } + + private TimeSpan Duration() + { + return DateTime.UtcNow - start; + } + } + } + + public class Failure + { + public Failure(Exception exception, TimeSpan duration, int tryNumber) + { + Exception = exception; + Duration = duration; + TryNumber = tryNumber; + } + + public Exception Exception { get; } + public TimeSpan Duration { get; } + public int TryNumber { get; } + + public string Describe() + { + return $"Try {TryNumber} failed after {Time.FormatDuration(Duration)} with exception '{Exception}'"; + } + } +} From e9555cf99e465d5b530b4deaa33ee2c90f6652a8 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 5 Jun 2024 09:20:00 +0200 Subject: [PATCH 06/27] Upload/download failure now automatically investigates node --- Framework/Core/Http.cs | 13 +- Framework/Core/PluginTools.cs | 10 +- Framework/Utils/Retry.cs | 42 ++++--- Framework/Utils/Time.cs | 78 ++---------- ProjectPlugins/CodexPlugin/CodexAccess.cs | 116 ++++++++++++++++-- ProjectPlugins/CodexPlugin/CodexNode.cs | 6 + ProjectPlugins/CodexPlugin/CodexTypes.cs | 13 ++ ProjectPlugins/CodexPlugin/Mapper.cs | 12 ++ .../BasicTests/LargeFileTests.cs | 13 -- 9 files changed, 188 insertions(+), 115 deletions(-) diff --git a/Framework/Core/Http.cs b/Framework/Core/Http.cs index 49b3b09..44cb3c6 100644 --- a/Framework/Core/Http.cs +++ b/Framework/Core/Http.cs @@ -7,6 +7,7 @@ namespace Core { T OnClient(Func action); T OnClient(Func action, string description); + T OnClient(Func action, Retry retry); IEndpoint CreateEndpoint(Address address, string baseUrl, string? logAlias = null); } @@ -35,13 +36,19 @@ namespace Core } public T OnClient(Func action, string description) + { + var retry = new Retry(description, timeSet.HttpRetryTimeout(), timeSet.HttpCallRetryDelay(), f => { }); + return OnClient(action, retry); + } + + public T OnClient(Func action, Retry retry) { var client = GetClient(); return LockRetry(() => { return action(client); - }, description); + }, retry); } public IEndpoint CreateEndpoint(Address address, string baseUrl, string? logAlias = null) @@ -54,11 +61,11 @@ namespace Core return DebugStack.GetCallerName(skipFrames: 2); } - private T LockRetry(Func operation, string description) + private T LockRetry(Func operation, Retry retry) { lock (httpLock) { - return Time.Retry(operation, timeSet.HttpRetryTimeout(), timeSet.HttpCallRetryDelay(), description); + return retry.Run(operation); } } diff --git a/Framework/Core/PluginTools.cs b/Framework/Core/PluginTools.cs index 5e1faee..78f7814 100644 --- a/Framework/Core/PluginTools.cs +++ b/Framework/Core/PluginTools.cs @@ -6,6 +6,7 @@ namespace Core { public interface IPluginTools : IWorkflowTool, ILogTool, IHttpFactoryTool, IFileTool { + ITimeSet TimeSet { get; } void Decommission(bool deleteKubernetesResources, bool deleteTrackedFiles); } @@ -33,7 +34,6 @@ namespace Core internal class PluginTools : IPluginTools { - private readonly ITimeSet timeSet; private readonly WorkflowCreator workflowCreator; private readonly IFileManager fileManager; private readonly LogPrefixer log; @@ -42,10 +42,12 @@ namespace Core { this.log = new LogPrefixer(log); this.workflowCreator = workflowCreator; - this.timeSet = timeSet; + TimeSet = timeSet; fileManager = new FileManager(log, fileManagerRootFolder); } + public ITimeSet TimeSet { get; } + public void ApplyLogPrefix(string prefix) { log.Prefix = prefix; @@ -53,7 +55,7 @@ namespace Core public IHttp CreateHttp(Action onClientCreated) { - return CreateHttp(onClientCreated, timeSet); + return CreateHttp(onClientCreated, TimeSet); } public IHttp CreateHttp(Action onClientCreated, ITimeSet ts) @@ -63,7 +65,7 @@ namespace Core public IHttp CreateHttp() { - return new Http(log, timeSet); + return new Http(log, TimeSet); } public IStartupWorkflow CreateWorkflow(string? namespaceOverride = null) diff --git a/Framework/Utils/Retry.cs b/Framework/Utils/Retry.cs index 45790e8..ec05ea6 100644 --- a/Framework/Utils/Retry.cs +++ b/Framework/Utils/Retry.cs @@ -1,37 +1,44 @@ - -namespace Utils +namespace Utils { - public class Retry + public class Retry { private readonly string description; - private readonly Func task; private readonly TimeSpan maxTimeout; - private readonly int maxRetries; private readonly TimeSpan sleepAfterFail; private readonly Action onFail; - public Retry(string description, Func task, TimeSpan maxTimeout, int maxRetries, TimeSpan sleepAfterFail, Action onFail) + public Retry(string description, TimeSpan maxTimeout, TimeSpan sleepAfterFail, Action onFail) { this.description = description; - this.task = task; this.maxTimeout = maxTimeout; - this.maxRetries = maxRetries; this.sleepAfterFail = sleepAfterFail; this.onFail = onFail; } - public T Run() + public void Run(Action task) { - var run = new RetryRun(description, task, maxTimeout, maxRetries, sleepAfterFail, onFail); - return run.Run(); + var run = new RetryRun(description, task, maxTimeout, sleepAfterFail, onFail); + run.Run(); + } + + public T Run(Func task) + { + T? result = default; + + var run = new RetryRun(description, () => + { + result = task(); + }, maxTimeout, sleepAfterFail, onFail); + run.Run(); + + return result!; } private class RetryRun { private readonly string description; - private readonly Func task; + private readonly Action task; private readonly TimeSpan maxTimeout; - private readonly int maxRetries; private readonly TimeSpan sleepAfterFail; private readonly Action onFail; private readonly DateTime start = DateTime.UtcNow; @@ -39,12 +46,11 @@ namespace Utils private int tryNumber; private DateTime tryStart; - public RetryRun(string description, Func task, TimeSpan maxTimeout, int maxRetries, TimeSpan sleepAfterFail, Action onFail) + public RetryRun(string description, Action task, TimeSpan maxTimeout, TimeSpan sleepAfterFail, Action onFail) { this.description = description; this.task = task; this.maxTimeout = maxTimeout; - this.maxRetries = maxRetries; this.sleepAfterFail = sleepAfterFail; this.onFail = onFail; @@ -52,7 +58,7 @@ namespace Utils tryStart = DateTime.UtcNow; } - public T Run() + public void Run() { while (true) { @@ -62,7 +68,8 @@ namespace Utils tryStart = DateTime.UtcNow; try { - return task(); + task(); + return; } catch (Exception ex) { @@ -83,7 +90,6 @@ namespace Utils private void CheckMaximums() { if (Duration() > maxTimeout) Fail(); - if (tryNumber > maxRetries) Fail(); } private void Fail() diff --git a/Framework/Utils/Time.cs b/Framework/Utils/Time.cs index 10caaf4..f22242f 100644 --- a/Framework/Utils/Time.cs +++ b/Framework/Utils/Time.cs @@ -111,78 +111,24 @@ namespace Utils public static void Retry(Action action, TimeSpan maxTimeout, TimeSpan retryTime, string description) { - var start = DateTime.UtcNow; - var tries = 1; - var tryInfo = new List<(Exception, TimeSpan)>(); - - while (true) - { - var duration = DateTime.UtcNow - start; - if (duration > maxTimeout) - { - var info = FormatTryInfos(tryInfo); - throw new TimeoutException($"Retry '{description}' timed out after {tries} tries over {FormatDuration(duration)}.{Environment.NewLine}{info}"); - } - - var sw = Stopwatch.StartNew(); - try - { - action(); - return; - } - catch (Exception ex) - { - tryInfo.Add((ex, sw.Elapsed)); - tries++; - } - - Sleep(retryTime); - } - } - - private static string FormatTryInfos(List<(Exception, TimeSpan)> tryInfo) - { - return string.Join(Environment.NewLine, tryInfo.Select(FormatTryInfo).ToArray()); - } - - private static string FormatTryInfo((Exception, TimeSpan) info, int index) - { - return $"Attempt {index} took {FormatDuration(info.Item2)} and failed with exception {info.Item1}."; - } - - private static Action failedCallback = i => { }; - public static void SetRetryFailedCallback(Action onRetryFailed) - { - failedCallback = onRetryFailed; + Retry(action, maxTimeout, retryTime, description, f => { }); } public static T Retry(Func action, TimeSpan maxTimeout, TimeSpan retryTime, string description) { - var start = DateTime.UtcNow; - var tries = 1; - var exceptions = new List(); + return Retry(action, maxTimeout, retryTime, description, f => { }); + } - while (true) - { - var duration = DateTime.UtcNow - start; - if (duration > maxTimeout) - { - throw new TimeoutException($"Retry '{description}' timed out after {tries} tries over {FormatDuration(duration)}.", new AggregateException(exceptions)); - } + public static void Retry(Action action, TimeSpan maxTimeout, TimeSpan retryTime, string description, Action onFail) + { + var r = new Retry(description, maxTimeout, retryTime, onFail); + r.Run(action); + } - try - { - return action(); - } - catch (Exception ex) - { - exceptions.Add(ex); - failedCallback(tries); - tries++; - } - - Sleep(retryTime); - } + public static T Retry(Func action, TimeSpan maxTimeout, TimeSpan retryTime, string description, Action onFail) + { + var r = new Retry(description, maxTimeout, retryTime, onFail); + return r.Run(action); } } } diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index 121ccf9..78a3894 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -2,6 +2,7 @@ using Core; using KubernetesWorkflow; using KubernetesWorkflow.Types; +using Logging; using Newtonsoft.Json; using Utils; @@ -61,12 +62,17 @@ namespace CodexPlugin public string UploadFile(FileStream fileStream) { - return OnCodex(api => api.UploadAsync(fileStream)); + return OnCodex( + api => api.UploadAsync(fileStream), + CreateRetryConfig(nameof(UploadFile))); } public Stream DownloadFile(string contentId) { - var fileResponse = OnCodex(api => api.DownloadNetworkAsync(contentId)); + var fileResponse = OnCodex( + api => api.DownloadNetworkAsync(contentId), + CreateRetryConfig(nameof(DownloadFile))); + if (fileResponse.StatusCode != 200) throw new Exception("Download failed with StatusCode: " + fileResponse.StatusCode); return fileResponse.Stream; } @@ -89,6 +95,12 @@ namespace CodexPlugin return OnCodex(api => api.CreateStorageRequestAsync(request.ContentId.Id, body)); } + public CodexSpace Space() + { + var space = OnCodex(api => api.SpaceAsync()); + return mapper.Map(space); + } + public StoragePurchase GetPurchaseStatus(string purchaseId) { var endpoint = GetEndpoint(); @@ -116,17 +128,24 @@ namespace CodexPlugin private T OnCodex(Func> action) { - var address = GetAddress(); - var result = tools.CreateHttp(CheckContainerCrashed) - .OnClient(client => - { - var api = new CodexApi(client); - api.BaseUrl = $"{address.Host}:{address.Port}/api/codex/v1"; - return Time.Wait(action(api)); - }); + var result = tools.CreateHttp(CheckContainerCrashed).OnClient(client => CallCodex(client, action)); return result; } + private T OnCodex(Func> action, Retry retry) + { + var result = tools.CreateHttp(CheckContainerCrashed).OnClient(client => CallCodex(client, action), retry); + return result; + } + + private T CallCodex(HttpClient client, Func> action) + { + var address = GetAddress(); + var api = new CodexApi(client); + api.BaseUrl = $"{address.Host}:{address.Port}/api/codex/v1"; + return Time.Wait(action(api)); + } + private IEndpoint GetEndpoint() { return tools @@ -144,7 +163,7 @@ namespace CodexPlugin if (hasContainerCrashed) throw new Exception($"Container {GetName()} has crashed."); } - public void Log(Stream crashLog) + void ILogHandler.Log(Stream crashLog) { var log = tools.GetLog(); var file = log.CreateSubfile(); @@ -162,5 +181,80 @@ namespace CodexPlugin log.Log("Crash log successfully downloaded."); hasContainerCrashed = true; } + + private Retry CreateRetryConfig(string description) + { + var timeSet = tools.TimeSet; + var log = tools.GetLog(); + + return new Retry(description, timeSet.HttpRetryTimeout(), timeSet.HttpCallRetryDelay(), failure => + { + if (failure.TryNumber < 3) return; + if (failure.Duration.TotalSeconds < timeSet.HttpCallTimeout().TotalSeconds) + { + Investigate(log, failure, timeSet); + } + }); + } + + private void Investigate(ILog log, Failure failure, ITimeSet timeSet) + { + log.Log($"Retry {failure.TryNumber} took {Time.FormatDuration(failure.Duration)}. (HTTP timeout = {Time.FormatDuration(timeSet.HttpCallTimeout())}) " + + $"Checking if node responds to debug/info..."); + try + { + var debugInfo = GetDebugInfo(); + if (string.IsNullOrEmpty(debugInfo.Spr)) + { + log.Log("Did not get value debug/info response."); + DownloadLog(); + Throw(failure); + } + else + { + log.Log("Got valid response from debug/info. Checking storage statistics..."); + CheckSpaceStatistics(log, failure); + } + } + catch (Exception ex) + { + log.Log("Got exception from debug/info call: " + ex); + DownloadLog(); + Throw(failure); + } + } + + private void CheckSpaceStatistics(ILog log, Failure failure) + { + try + { + var space = Space(); + log.Log($"Got space statistics: {space}"); + var freeSpace = space.QuotaMaxBytes - (space.QuotaUsedBytes + space.QuotaReservedBytes); + log.Log($"Free space: {freeSpace}"); + + if (freeSpace < 1.MB().SizeInBytes) + { + log.Log("There's less than 1MB free. Stopping..."); + Throw(failure); + } + } + catch (Exception e) + { + log.Log("Failed to get space statistics: " + e); + DownloadLog(); + Throw(failure); + } + } + + private void Throw(Failure failure) + { + throw failure.Exception; + } + + private void DownloadLog() + { + tools.CreateWorkflow().DownloadContainerLog(Container.Containers.Single(), this); + } } } diff --git a/ProjectPlugins/CodexPlugin/CodexNode.cs b/ProjectPlugins/CodexPlugin/CodexNode.cs index 034fe63..dc8531c 100644 --- a/ProjectPlugins/CodexPlugin/CodexNode.cs +++ b/ProjectPlugins/CodexPlugin/CodexNode.cs @@ -17,6 +17,7 @@ namespace CodexPlugin ContentId UploadFile(TrackedFile file); TrackedFile? DownloadContent(ContentId contentId, string fileLabel = ""); LocalDatasetList LocalFiles(); + CodexSpace Space(); void ConnectToPeer(ICodexNode node); DebugInfoVersion Version { get; } IMarketplaceAccess Marketplace { get; } @@ -126,6 +127,11 @@ namespace CodexPlugin return CodexAccess.LocalFiles(); } + public CodexSpace Space() + { + return CodexAccess.Space(); + } + public void ConnectToPeer(ICodexNode node) { var peer = (CodexNode)node; diff --git a/ProjectPlugins/CodexPlugin/CodexTypes.cs b/ProjectPlugins/CodexPlugin/CodexTypes.cs index 945251e..b82aa78 100644 --- a/ProjectPlugins/CodexPlugin/CodexTypes.cs +++ b/ProjectPlugins/CodexPlugin/CodexTypes.cs @@ -105,4 +105,17 @@ namespace CodexPlugin return HashCode.Combine(Id); } } + + public class CodexSpace + { + public int TotalBlocks { get; set; } + public int QuotaMaxBytes { get; set; } + public int QuotaUsedBytes { get; set; } + public int QuotaReservedBytes { get; set; } + + public override string ToString() + { + return JsonConvert.SerializeObject(this); + } + } } diff --git a/ProjectPlugins/CodexPlugin/Mapper.cs b/ProjectPlugins/CodexPlugin/Mapper.cs index 61c95b4..0c695e0 100644 --- a/ProjectPlugins/CodexPlugin/Mapper.cs +++ b/ProjectPlugins/CodexPlugin/Mapper.cs @@ -1,4 +1,5 @@ using CodexContractsPlugin; +using CodexOpenApi; using Newtonsoft.Json.Linq; using System.Numerics; using Utils; @@ -84,6 +85,17 @@ namespace CodexPlugin }; } + public CodexSpace Map(Space space) + { + return new CodexSpace + { + QuotaMaxBytes = space.QuotaMaxBytes, + QuotaReservedBytes = space.QuotaReservedBytes, + QuotaUsedBytes = space.QuotaUsedBytes, + TotalBlocks = space.TotalBlocks + }; + } + private DebugInfoVersion MapDebugInfoVersion(JObject obj) { return new DebugInfoVersion diff --git a/Tests/CodexLongTests/BasicTests/LargeFileTests.cs b/Tests/CodexLongTests/BasicTests/LargeFileTests.cs index d182ed8..0470e35 100644 --- a/Tests/CodexLongTests/BasicTests/LargeFileTests.cs +++ b/Tests/CodexLongTests/BasicTests/LargeFileTests.cs @@ -50,8 +50,6 @@ namespace CodexLongTests.BasicTests var node = StartCodex(s => s.WithStorageQuota((size + 10).MB())); - Time.SetRetryFailedCallback(i => OnFailed(i, node)); - var uploadStart = DateTime.UtcNow; var cid = node.UploadFile(expectedFile); var downloadStart = DateTime.UtcNow; @@ -62,17 +60,6 @@ namespace CodexLongTests.BasicTests AssertTimeConstraint(uploadStart, downloadStart, downloadFinished, size); } - private void OnFailed(int tries, ICodexNode node) - { - if (tries < 5) return; - - if (tries % 10 == 0) - { - Log($"After try {tries}, downloading node log."); - Ci.DownloadLog(node); - } - } - private void AssertTimeConstraint(DateTime uploadStart, DateTime downloadStart, DateTime downloadFinished, long size) { float sizeInMB = size; From b888999b61c5aed30dc4d4be742de9c1d6535fb4 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 5 Jun 2024 09:34:43 +0200 Subject: [PATCH 07/27] Adds space check for upload call --- ProjectPlugins/CodexPlugin/CodexAccess.cs | 27 +++++++++++++++++------ ProjectPlugins/CodexPlugin/CodexTypes.cs | 1 + 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index 78a3894..e56bc1e 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -62,16 +62,18 @@ namespace CodexPlugin public string UploadFile(FileStream fileStream) { + CheckSpaceIsAvailable(fileStream); + return OnCodex( api => api.UploadAsync(fileStream), - CreateRetryConfig(nameof(UploadFile))); + CreateRetryConfig(nameof(UploadFile), f => CheckSpaceIsAvailable(fileStream))); } public Stream DownloadFile(string contentId) { var fileResponse = OnCodex( api => api.DownloadNetworkAsync(contentId), - CreateRetryConfig(nameof(DownloadFile))); + CreateRetryConfig(nameof(DownloadFile), f => { })); if (fileResponse.StatusCode != 200) throw new Exception("Download failed with StatusCode: " + fileResponse.StatusCode); return fileResponse.Stream; @@ -158,6 +160,18 @@ namespace CodexPlugin return Container.Containers.Single().GetAddress(tools.GetLog(), CodexContainerRecipe.ApiPortTag); } + private void CheckSpaceIsAvailable(FileStream fileStream) + { + var space = Space(); + + if (space.FreeBytes < fileStream.Length) + { + var msg = $"Not enough space available. File: {fileStream.Length} Free: {space.FreeBytes}"; + tools.GetLog().Error(msg); + throw new Exception(msg); + } + } + private void CheckContainerCrashed(HttpClient client) { if (hasContainerCrashed) throw new Exception($"Container {GetName()} has crashed."); @@ -182,14 +196,14 @@ namespace CodexPlugin hasContainerCrashed = true; } - private Retry CreateRetryConfig(string description) + private Retry CreateRetryConfig(string description, Action onFailure) { var timeSet = tools.TimeSet; var log = tools.GetLog(); return new Retry(description, timeSet.HttpRetryTimeout(), timeSet.HttpCallRetryDelay(), failure => { - if (failure.TryNumber < 3) return; + onFailure(failure); if (failure.Duration.TotalSeconds < timeSet.HttpCallTimeout().TotalSeconds) { Investigate(log, failure, timeSet); @@ -230,10 +244,9 @@ namespace CodexPlugin { var space = Space(); log.Log($"Got space statistics: {space}"); - var freeSpace = space.QuotaMaxBytes - (space.QuotaUsedBytes + space.QuotaReservedBytes); - log.Log($"Free space: {freeSpace}"); + log.Log($"Free space: {space.FreeBytes}"); - if (freeSpace < 1.MB().SizeInBytes) + if (space.FreeBytes < 1.MB().SizeInBytes) { log.Log("There's less than 1MB free. Stopping..."); Throw(failure); diff --git a/ProjectPlugins/CodexPlugin/CodexTypes.cs b/ProjectPlugins/CodexPlugin/CodexTypes.cs index b82aa78..ea7546c 100644 --- a/ProjectPlugins/CodexPlugin/CodexTypes.cs +++ b/ProjectPlugins/CodexPlugin/CodexTypes.cs @@ -112,6 +112,7 @@ namespace CodexPlugin public int QuotaMaxBytes { get; set; } public int QuotaUsedBytes { get; set; } public int QuotaReservedBytes { get; set; } + public int FreeBytes => QuotaMaxBytes - (QuotaUsedBytes + QuotaReservedBytes); public override string ToString() { From bf75c2515230b2a4d6cf5cff704b3b59d7a2a95a Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 5 Jun 2024 09:59:03 +0200 Subject: [PATCH 08/27] Cannot assume retry uploads will require as much space as file is large --- ProjectPlugins/CodexPlugin/CodexAccess.cs | 42 ++++++++--------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index e56bc1e..537dee0 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -62,18 +62,22 @@ namespace CodexPlugin public string UploadFile(FileStream fileStream) { - CheckSpaceIsAvailable(fileStream); + LogSpaceStatistics("Before upload"); - return OnCodex( + var str = OnCodex( api => api.UploadAsync(fileStream), - CreateRetryConfig(nameof(UploadFile), f => CheckSpaceIsAvailable(fileStream))); + CreateRetryConfig(nameof(UploadFile))); + + LogSpaceStatistics("After upload"); + + return str; } public Stream DownloadFile(string contentId) { var fileResponse = OnCodex( api => api.DownloadNetworkAsync(contentId), - CreateRetryConfig(nameof(DownloadFile), f => { })); + CreateRetryConfig(nameof(DownloadFile))); if (fileResponse.StatusCode != 200) throw new Exception("Download failed with StatusCode: " + fileResponse.StatusCode); return fileResponse.Stream; @@ -160,18 +164,6 @@ namespace CodexPlugin return Container.Containers.Single().GetAddress(tools.GetLog(), CodexContainerRecipe.ApiPortTag); } - private void CheckSpaceIsAvailable(FileStream fileStream) - { - var space = Space(); - - if (space.FreeBytes < fileStream.Length) - { - var msg = $"Not enough space available. File: {fileStream.Length} Free: {space.FreeBytes}"; - tools.GetLog().Error(msg); - throw new Exception(msg); - } - } - private void CheckContainerCrashed(HttpClient client) { if (hasContainerCrashed) throw new Exception($"Container {GetName()} has crashed."); @@ -196,14 +188,13 @@ namespace CodexPlugin hasContainerCrashed = true; } - private Retry CreateRetryConfig(string description, Action onFailure) + private Retry CreateRetryConfig(string description) { var timeSet = tools.TimeSet; var log = tools.GetLog(); return new Retry(description, timeSet.HttpRetryTimeout(), timeSet.HttpCallRetryDelay(), failure => { - onFailure(failure); if (failure.Duration.TotalSeconds < timeSet.HttpCallTimeout().TotalSeconds) { Investigate(log, failure, timeSet); @@ -242,15 +233,7 @@ namespace CodexPlugin { try { - var space = Space(); - log.Log($"Got space statistics: {space}"); - log.Log($"Free space: {space.FreeBytes}"); - - if (space.FreeBytes < 1.MB().SizeInBytes) - { - log.Log("There's less than 1MB free. Stopping..."); - Throw(failure); - } + LogSpaceStatistics(); } catch (Exception e) { @@ -260,6 +243,11 @@ namespace CodexPlugin } } + private void LogSpaceStatistics(string prefix = "") + { + tools.GetLog().Log($"{prefix} Space statistics: {Space()}"); + } + private void Throw(Failure failure) { throw failure.Exception; From f4d1dae478469c02ca2c3fa7b86ed4456ec3bb33 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 5 Jun 2024 10:00:50 +0200 Subject: [PATCH 09/27] Disables large network scalability test for now --- Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs index b87be97..b4c1b39 100644 --- a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs +++ b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs @@ -18,7 +18,7 @@ public class ScalabilityTests : CodexDistTest [UseLongTimeouts] [DontDownloadLogs] public void ShouldMaintainFileInNetwork( - [Values(10, 40, 80, 100)] int numberOfNodes, + [Values(10, 40)] int numberOfNodes, // TODO: include 80 and 100 [Values(100, 1000, 5000, 10000)] int fileSizeInMb ) { From 62b56e198b4fe023256a6bd6e93c7de33c0c7751 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 6 Jun 2024 09:54:50 +0200 Subject: [PATCH 10/27] Adds logging of node status to base codexdistTest --- ProjectPlugins/CodexPlugin/CodexAccess.cs | 23 +++++------- ProjectPlugins/CodexPlugin/CodexNode.cs | 24 ++++++++++--- ProjectPlugins/MetricsPlugin/MetricsQuery.cs | 36 +++++++++++++++++++ .../ScalabilityTests/ScalabilityTests.cs | 11 ++++-- Tests/CodexTests/CodexDistTest.cs | 23 ++++++++++++ 5 files changed, 97 insertions(+), 20 deletions(-) diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index 537dee0..5d97bec 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -60,24 +60,18 @@ namespace CodexPlugin }); } - public string UploadFile(FileStream fileStream) + public string UploadFile(FileStream fileStream, Action onFailure) { - LogSpaceStatistics("Before upload"); - - var str = OnCodex( + return OnCodex( api => api.UploadAsync(fileStream), - CreateRetryConfig(nameof(UploadFile))); - - LogSpaceStatistics("After upload"); - - return str; + CreateRetryConfig(nameof(UploadFile), onFailure)); } - public Stream DownloadFile(string contentId) + public Stream DownloadFile(string contentId, Action onFailure) { var fileResponse = OnCodex( api => api.DownloadNetworkAsync(contentId), - CreateRetryConfig(nameof(DownloadFile))); + CreateRetryConfig(nameof(DownloadFile), onFailure)); if (fileResponse.StatusCode != 200) throw new Exception("Download failed with StatusCode: " + fileResponse.StatusCode); return fileResponse.Stream; @@ -188,13 +182,14 @@ namespace CodexPlugin hasContainerCrashed = true; } - private Retry CreateRetryConfig(string description) + private Retry CreateRetryConfig(string description, Action onFailure) { var timeSet = tools.TimeSet; var log = tools.GetLog(); return new Retry(description, timeSet.HttpRetryTimeout(), timeSet.HttpCallRetryDelay(), failure => { + onFailure(failure); if (failure.Duration.TotalSeconds < timeSet.HttpCallTimeout().TotalSeconds) { Investigate(log, failure, timeSet); @@ -243,9 +238,9 @@ namespace CodexPlugin } } - private void LogSpaceStatistics(string prefix = "") + private void LogSpaceStatistics() { - tools.GetLog().Log($"{prefix} Space statistics: {Space()}"); + tools.GetLog().Log($"Space statistics: {Space()}"); } private void Throw(Failure failure) diff --git a/ProjectPlugins/CodexPlugin/CodexNode.cs b/ProjectPlugins/CodexPlugin/CodexNode.cs index dc8531c..5179aae 100644 --- a/ProjectPlugins/CodexPlugin/CodexNode.cs +++ b/ProjectPlugins/CodexPlugin/CodexNode.cs @@ -15,7 +15,9 @@ namespace CodexPlugin DebugInfo GetDebugInfo(); DebugPeer GetDebugPeer(string peerId); ContentId UploadFile(TrackedFile file); + ContentId UploadFile(TrackedFile file, Action onFailure); TrackedFile? DownloadContent(ContentId contentId, string fileLabel = ""); + TrackedFile? DownloadContent(ContentId contentId, Action onFailure, string fileLabel = ""); LocalDatasetList LocalFiles(); CodexSpace Space(); void ConnectToPeer(ICodexNode node); @@ -91,6 +93,11 @@ namespace CodexPlugin } public ContentId UploadFile(TrackedFile file) + { + return UploadFile(file, DoNothing); + } + + public ContentId UploadFile(TrackedFile file, Action onFailure) { using var fileStream = File.OpenRead(file.Filename); @@ -98,7 +105,7 @@ namespace CodexPlugin Log(logMessage); var measurement = Stopwatch.Measure(tools.GetLog(), logMessage, () => { - return CodexAccess.UploadFile(fileStream); + return CodexAccess.UploadFile(fileStream, onFailure); }); var response = measurement.Value; @@ -112,11 +119,16 @@ namespace CodexPlugin } public TrackedFile? DownloadContent(ContentId contentId, string fileLabel = "") + { + return DownloadContent(contentId, DoNothing, fileLabel); + } + + public TrackedFile? DownloadContent(ContentId contentId, Action onFailure, string fileLabel = "") { var logMessage = $"Downloading for contentId: '{contentId.Id}'..."; Log(logMessage); var file = tools.GetFileManager().CreateEmptyFile(fileLabel); - var measurement = Stopwatch.Measure(tools.GetLog(), logMessage, () => DownloadToFile(contentId.Id, file)); + var measurement = Stopwatch.Measure(tools.GetLog(), logMessage, () => DownloadToFile(contentId.Id, file, onFailure)); transferSpeeds.AddDownloadSample(file.GetFilesize(), measurement); Log($"Downloaded file {file.Describe()} to '{file.Filename}'."); return file; @@ -188,12 +200,12 @@ namespace CodexPlugin .ToArray(); } - private void DownloadToFile(string contentId, TrackedFile file) + private void DownloadToFile(string contentId, TrackedFile file, Action onFailure) { using var fileStream = File.OpenWrite(file.Filename); try { - using var downloadStream = CodexAccess.DownloadFile(contentId); + using var downloadStream = CodexAccess.DownloadFile(contentId, onFailure); downloadStream.CopyTo(fileStream); } catch @@ -207,5 +219,9 @@ namespace CodexPlugin { tools.GetLog().Log($"{GetName()}: {msg}"); } + + private void DoNothing(Failure failure) + { + } } } diff --git a/ProjectPlugins/MetricsPlugin/MetricsQuery.cs b/ProjectPlugins/MetricsPlugin/MetricsQuery.cs index 199015b..cdab697 100644 --- a/ProjectPlugins/MetricsPlugin/MetricsQuery.cs +++ b/ProjectPlugins/MetricsPlugin/MetricsQuery.cs @@ -1,4 +1,5 @@ using Core; +using IdentityModel; using KubernetesWorkflow.Types; using Logging; using System.Globalization; @@ -177,6 +178,41 @@ namespace MetricsPlugin { return "[" + string.Join(',', Sets.Select(s => s.ToString())) + "]"; } + + public string AsCsv() + { + var allTimestamps = Sets.SelectMany(s => s.Values.Select(v => v.Timestamp)).Distinct().OrderDescending().ToArray(); + + var lines = new List(); + MakeLine(lines, e => + { + e.Add("Metrics"); + foreach (var ts in allTimestamps) e.Add(ts.ToEpochTime().ToString()); + }); + + foreach (var set in Sets) + { + MakeLine(lines, e => + { + e.Add(set.Name); + foreach (var ts in allTimestamps) + { + var value = set.Values.SingleOrDefault(v => v.Timestamp == ts); + if (value == null) e.Add(" "); + else e.Add(value.Value.ToString()); + } + }); + } + + return string.Join(Environment.NewLine, lines.ToArray()); + } + + private void MakeLine(List lines, Action> values) + { + var list = new List(); + values(list); + lines.Add(string.Join(",", list)); + } } public class MetricsSet diff --git a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs index b4c1b39..e75d189 100644 --- a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs +++ b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs @@ -1,4 +1,5 @@ using CodexPlugin; +using MetricsPlugin; using DistTestCore; using FileUtils; using NUnit.Framework; @@ -26,6 +27,7 @@ public class ScalabilityTests : CodexDistTest var bootstrap = StartCodex(s => s.WithLogLevel(logLevel)); var nodes = StartCodex(numberOfNodes - 1, s => s + .EnableMetrics() .WithBootstrapNode(bootstrap) .WithLogLevel(logLevel) .WithStorageQuota((fileSizeInMb + 50).MB()) @@ -33,10 +35,15 @@ public class ScalabilityTests : CodexDistTest var uploader = nodes.PickOneRandom(); var downloader = nodes.PickOneRandom(); + var metrics = Ci.GetMetricsFor(uploader, downloader); var testFile = GenerateTestFile(fileSizeInMb.MB()); - var contentId = uploader.UploadFile(testFile); - var downloadedFile = downloader.DownloadContent(contentId); + + LogNodeStatus(uploader, metrics[0]); + var contentId = uploader.UploadFile(testFile, f => LogNodeStatus(uploader, metrics[0])); + + LogNodeStatus(downloader, metrics[1]); + var downloadedFile = downloader.DownloadContent(contentId, f => LogNodeStatus(downloader, metrics[1])); downloadedFile!.AssertIsEqual(testFile); diff --git a/Tests/CodexTests/CodexDistTest.cs b/Tests/CodexTests/CodexDistTest.cs index 7058b8e..10ca6a4 100644 --- a/Tests/CodexTests/CodexDistTest.cs +++ b/Tests/CodexTests/CodexDistTest.cs @@ -6,6 +6,8 @@ using Core; using DistTestCore; using DistTestCore.Helpers; using DistTestCore.Logs; +using MetricsPlugin; +using Newtonsoft.Json; using NUnit.Framework.Constraints; namespace CodexTests @@ -99,6 +101,27 @@ namespace CodexTests log.AssertLogDoesNotContain("ERR "); } + public void LogNodeStatus(ICodexNode node, IMetricsAccess? metrics = null) + { + Log("Status for " + node.GetName() + Environment.NewLine + + GetBasicNodeStatus(node) + + GetNodeMetrics(metrics)); + } + + private string GetBasicNodeStatus(ICodexNode node) + { + return JsonConvert.SerializeObject(node.GetDebugInfo(), Formatting.Indented) + Environment.NewLine + + node.Space().ToString() + Environment.NewLine; + } + + private string GetNodeMetrics(IMetricsAccess? metrics) + { + if (metrics == null) return "No metrics enabled"; + var m = metrics.GetAllMetrics(); + if (m == null) return "No metrics received"; + return m.AsCsv(); + } + protected virtual void OnCodexSetup(ICodexSetup setup) { } From e53a64471834b7ce8fadf0bcae3a036eecff1e7e Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 6 Jun 2024 10:11:51 +0200 Subject: [PATCH 11/27] Updates to new codex image --- ProjectPlugins/CodexPlugin/ApiChecker.cs | 2 +- ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs | 2 +- ProjectPlugins/CodexPlugin/openapi.yaml | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ProjectPlugins/CodexPlugin/ApiChecker.cs b/ProjectPlugins/CodexPlugin/ApiChecker.cs index f3e4e10..12fe874 100644 --- a/ProjectPlugins/CodexPlugin/ApiChecker.cs +++ b/ProjectPlugins/CodexPlugin/ApiChecker.cs @@ -9,7 +9,7 @@ namespace CodexPlugin public class ApiChecker { // - private const string OpenApiYamlHash = "0F-C8-02-1E-2C-2C-15-F6-91-6A-01-31-11-49-95-06-79-26-25-BF-27-3C-A8-2E-5F-7F-34-FD-C0-57-A0-9A"; + private const string OpenApiYamlHash = "27-D0-F6-EB-B9-A6-66-41-AA-EA-19-62-07-AF-47-41-25-5E-75-7E-97-35-CC-E1-C0-75-58-17-2D-87-11-75"; private const string OpenApiFilePath = "/codex/openapi.yaml"; private const string DisableEnvironmentVariable = "CODEXPLUGIN_DISABLE_APICHECK"; diff --git a/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs b/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs index d060d75..56ceabc 100644 --- a/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs +++ b/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs @@ -7,7 +7,7 @@ namespace CodexPlugin { public class CodexContainerRecipe : ContainerRecipeFactory { - private const string DefaultDockerImage = "codexstorage/nim-codex:sha-5e3183a-dist-tests"; + private const string DefaultDockerImage = "codexstorage/nim-codex:sha-a518ec6-dist-tests"; public const string ApiPortTag = "codex_api_port"; public const string ListenPortTag = "codex_listen_port"; diff --git a/ProjectPlugins/CodexPlugin/openapi.yaml b/ProjectPlugins/CodexPlugin/openapi.yaml index 49c75e6..6887de9 100644 --- a/ProjectPlugins/CodexPlugin/openapi.yaml +++ b/ProjectPlugins/CodexPlugin/openapi.yaml @@ -289,6 +289,7 @@ components: description: "Root hash of the content" originalBytes: type: integer + format: uint64 description: "Length of original content in bytes" blockSize: type: integer @@ -303,14 +304,18 @@ components: totalBlocks: description: "Number of blocks stored by the node" type: integer + format: uint64 quotaMaxBytes: type: integer + format: uint64 description: "Maximum storage space used by the node" quotaUsedBytes: type: integer + format: uint64 description: "Amount of storage space currently in use" quotaReservedBytes: type: integer + format: uint64 description: "Amount of storage space reserved" servers: From 9900db8d2562e062345b868db2193988a0bcaf52 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 6 Jun 2024 10:12:11 +0200 Subject: [PATCH 12/27] simply scalability test failure handling --- ProjectPlugins/CodexPlugin/CodexAccess.cs | 27 ++--------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index 5d97bec..469c8ca 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -190,10 +190,7 @@ namespace CodexPlugin return new Retry(description, timeSet.HttpRetryTimeout(), timeSet.HttpCallRetryDelay(), failure => { onFailure(failure); - if (failure.Duration.TotalSeconds < timeSet.HttpCallTimeout().TotalSeconds) - { - Investigate(log, failure, timeSet); - } + Investigate(log, failure, timeSet); }); } @@ -212,8 +209,7 @@ namespace CodexPlugin } else { - log.Log("Got valid response from debug/info. Checking storage statistics..."); - CheckSpaceStatistics(log, failure); + log.Log("Got valid response from debug/info."); } } catch (Exception ex) @@ -224,25 +220,6 @@ namespace CodexPlugin } } - private void CheckSpaceStatistics(ILog log, Failure failure) - { - try - { - LogSpaceStatistics(); - } - catch (Exception e) - { - log.Log("Failed to get space statistics: " + e); - DownloadLog(); - Throw(failure); - } - } - - private void LogSpaceStatistics() - { - tools.GetLog().Log($"Space statistics: {Space()}"); - } - private void Throw(Failure failure) { throw failure.Exception; From 38c2d1749abb05204e3137ee5b0d766804465d20 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 6 Jun 2024 10:31:31 +0200 Subject: [PATCH 13/27] Links up status method for checking. --- Tests/CodexTests/BasicTests/ExampleTests.cs | 3 +++ Tests/CodexTests/BasicTests/OneClientTests.cs | 2 ++ 2 files changed, 5 insertions(+) diff --git a/Tests/CodexTests/BasicTests/ExampleTests.cs b/Tests/CodexTests/BasicTests/ExampleTests.cs index 0356298..f9d5b89 100644 --- a/Tests/CodexTests/BasicTests/ExampleTests.cs +++ b/Tests/CodexTests/BasicTests/ExampleTests.cs @@ -45,6 +45,9 @@ namespace CodexTests.BasicTests metrics[0].AssertThat("libp2p_peers", Is.EqualTo(1)); metrics[1].AssertThat("libp2p_peers", Is.EqualTo(1)); + + LogNodeStatus(primary, metrics[0]); + LogNodeStatus(primary2, metrics[1]); } [Test] diff --git a/Tests/CodexTests/BasicTests/OneClientTests.cs b/Tests/CodexTests/BasicTests/OneClientTests.cs index 11e8125..451b1cd 100644 --- a/Tests/CodexTests/BasicTests/OneClientTests.cs +++ b/Tests/CodexTests/BasicTests/OneClientTests.cs @@ -13,6 +13,8 @@ namespace CodexTests.BasicTests var primary = StartCodex(); PerformOneClientTest(primary); + + LogNodeStatus(primary); } [Test] From 3a61fc89c6423019706630bf494014fb5d1f97f0 Mon Sep 17 00:00:00 2001 From: benbierens Date: Thu, 6 Jun 2024 15:09:52 +0200 Subject: [PATCH 14/27] Adds WaitForCleanup test attribute to allow tests to wait for resources to be cleaned up --- Framework/Core/EntryPoint.cs | 10 +++++++--- Framework/Core/PluginManager.cs | 4 ++-- Framework/Core/PluginTools.cs | 11 ++++++++--- Framework/KubernetesWorkflow/K8sController.cs | 16 ++++++++++++---- .../KubernetesWorkflow/StartupWorkflow.cs | 12 ++++++------ ProjectPlugins/CodexPlugin/ApiChecker.cs | 2 +- .../CodexPlugin/CodexContainerRecipe.cs | 2 +- ProjectPlugins/CodexPlugin/CodexTypes.cs | 10 +++++----- ProjectPlugins/CodexPlugin/openapi.yaml | 10 +++++----- .../ContinuousTestRunner.cs | 4 ++-- Tests/CodexContinuousTests/NodeRunner.cs | 2 +- Tests/CodexContinuousTests/SingleTestRun.cs | 3 ++- .../ScalabilityTests/ScalabilityTests.cs | 2 ++ Tests/DistTestCore/DistTest.cs | 18 +++++++++++++++--- Tests/DistTestCore/TestLifecycle.cs | 8 +++++--- Tests/DistTestCore/WaitForCleanupAttribute.cs | 15 +++++++++++++++ 16 files changed, 89 insertions(+), 40 deletions(-) create mode 100644 Tests/DistTestCore/WaitForCleanupAttribute.cs diff --git a/Framework/Core/EntryPoint.cs b/Framework/Core/EntryPoint.cs index 7977eb3..0db40f3 100644 --- a/Framework/Core/EntryPoint.cs +++ b/Framework/Core/EntryPoint.cs @@ -38,10 +38,14 @@ namespace Core return new CoreInterface(this); } - public void Decommission(bool deleteKubernetesResources, bool deleteTrackedFiles) + /// + /// Deletes kubernetes and tracked file resources. + /// when `waitTillDone` is true, this function will block until resources are deleted. + /// + public void Decommission(bool deleteKubernetesResources, bool deleteTrackedFiles, bool waitTillDone) { - manager.DecommissionPlugins(deleteKubernetesResources, deleteTrackedFiles); - Tools.Decommission(deleteKubernetesResources, deleteTrackedFiles); + manager.DecommissionPlugins(deleteKubernetesResources, deleteTrackedFiles, waitTillDone); + Tools.Decommission(deleteKubernetesResources, deleteTrackedFiles, waitTillDone); } internal T GetPlugin() where T : IProjectPlugin diff --git a/Framework/Core/PluginManager.cs b/Framework/Core/PluginManager.cs index e2b2a5c..27b08fe 100644 --- a/Framework/Core/PluginManager.cs +++ b/Framework/Core/PluginManager.cs @@ -34,12 +34,12 @@ return metadata; } - internal void DecommissionPlugins(bool deleteKubernetesResources, bool deleteTrackedFiles) + internal void DecommissionPlugins(bool deleteKubernetesResources, bool deleteTrackedFiles, bool waitTillDone) { foreach (var pair in pairs) { pair.Plugin.Decommission(); - pair.Tools.Decommission(deleteKubernetesResources, deleteTrackedFiles); + pair.Tools.Decommission(deleteKubernetesResources, deleteTrackedFiles, waitTillDone); } } diff --git a/Framework/Core/PluginTools.cs b/Framework/Core/PluginTools.cs index 78f7814..aa8e0a6 100644 --- a/Framework/Core/PluginTools.cs +++ b/Framework/Core/PluginTools.cs @@ -7,7 +7,12 @@ namespace Core public interface IPluginTools : IWorkflowTool, ILogTool, IHttpFactoryTool, IFileTool { ITimeSet TimeSet { get; } - void Decommission(bool deleteKubernetesResources, bool deleteTrackedFiles); + + /// + /// Deletes kubernetes and tracked file resources. + /// when `waitTillDone` is true, this function will block until resources are deleted. + /// + void Decommission(bool deleteKubernetesResources, bool deleteTrackedFiles, bool waitTillDone); } public interface IWorkflowTool @@ -73,9 +78,9 @@ namespace Core return workflowCreator.CreateWorkflow(namespaceOverride); } - public void Decommission(bool deleteKubernetesResources, bool deleteTrackedFiles) + public void Decommission(bool deleteKubernetesResources, bool deleteTrackedFiles, bool waitTillDone) { - if (deleteKubernetesResources) CreateWorkflow().DeleteNamespace(); + if (deleteKubernetesResources) CreateWorkflow().DeleteNamespace(waitTillDone); if (deleteTrackedFiles) fileManager.DeleteAllFiles(); } diff --git a/Framework/KubernetesWorkflow/K8sController.cs b/Framework/KubernetesWorkflow/K8sController.cs index 0f50d87..30de0c9 100644 --- a/Framework/KubernetesWorkflow/K8sController.cs +++ b/Framework/KubernetesWorkflow/K8sController.cs @@ -115,7 +115,7 @@ namespace KubernetesWorkflow }); } - public void DeleteAllNamespacesStartingWith(string prefix) + public void DeleteAllNamespacesStartingWith(string prefix, bool wait) { log.Debug(); @@ -124,25 +124,28 @@ namespace KubernetesWorkflow foreach (var ns in namespaces) { - DeleteNamespace(ns); + DeleteNamespace(ns, wait); } } - public void DeleteNamespace() + public void DeleteNamespace(bool wait) { log.Debug(); if (IsNamespaceOnline(K8sNamespace)) { client.Run(c => c.DeleteNamespace(K8sNamespace, null, null, gracePeriodSeconds: 0)); + + if (wait) WaitUntilNamespaceDeleted(K8sNamespace); } } - public void DeleteNamespace(string ns) + public void DeleteNamespace(string ns, bool wait) { log.Debug(); if (IsNamespaceOnline(ns)) { client.Run(c => c.DeleteNamespace(ns, null, null, gracePeriodSeconds: 0)); + if (wait) WaitUntilNamespaceDeleted(ns); } } @@ -871,6 +874,11 @@ namespace KubernetesWorkflow WaitUntil(() => IsNamespaceOnline(K8sNamespace), nameof(WaitUntilNamespaceCreated)); } + private void WaitUntilNamespaceDeleted(string @namespace) + { + WaitUntil(() => !IsNamespaceOnline(@namespace), nameof(WaitUntilNamespaceDeleted)); + } + private void WaitUntilDeploymentOnline(string deploymentName) { WaitUntil(() => diff --git a/Framework/KubernetesWorkflow/StartupWorkflow.cs b/Framework/KubernetesWorkflow/StartupWorkflow.cs index cda6148..3a7326d 100644 --- a/Framework/KubernetesWorkflow/StartupWorkflow.cs +++ b/Framework/KubernetesWorkflow/StartupWorkflow.cs @@ -17,8 +17,8 @@ namespace KubernetesWorkflow void Stop(RunningPod pod, bool waitTillStopped); void DownloadContainerLog(RunningContainer container, ILogHandler logHandler, int? tailLines = null); string ExecuteCommand(RunningContainer container, string command, params string[] args); - void DeleteNamespace(); - void DeleteNamespacesStartingWith(string namespacePrefix); + void DeleteNamespace(bool wait); + void DeleteNamespacesStartingWith(string namespacePrefix, bool wait); } public class StartupWorkflow : IStartupWorkflow @@ -122,19 +122,19 @@ namespace KubernetesWorkflow }); } - public void DeleteNamespace() + public void DeleteNamespace(bool wait) { K8s(controller => { - controller.DeleteNamespace(); + controller.DeleteNamespace(wait); }); } - public void DeleteNamespacesStartingWith(string namespacePrefix) + public void DeleteNamespacesStartingWith(string namespacePrefix, bool wait) { K8s(controller => { - controller.DeleteAllNamespacesStartingWith(namespacePrefix); + controller.DeleteAllNamespacesStartingWith(namespacePrefix, wait); }); } diff --git a/ProjectPlugins/CodexPlugin/ApiChecker.cs b/ProjectPlugins/CodexPlugin/ApiChecker.cs index 12fe874..d394405 100644 --- a/ProjectPlugins/CodexPlugin/ApiChecker.cs +++ b/ProjectPlugins/CodexPlugin/ApiChecker.cs @@ -9,7 +9,7 @@ namespace CodexPlugin public class ApiChecker { // - private const string OpenApiYamlHash = "27-D0-F6-EB-B9-A6-66-41-AA-EA-19-62-07-AF-47-41-25-5E-75-7E-97-35-CC-E1-C0-75-58-17-2D-87-11-75"; + private const string OpenApiYamlHash = "67-76-AB-FC-54-4F-EB-81-F5-E4-F8-27-DF-82-92-41-63-A5-EA-1B-17-14-0C-BE-20-9C-B3-DF-CE-E4-AA-38"; private const string OpenApiFilePath = "/codex/openapi.yaml"; private const string DisableEnvironmentVariable = "CODEXPLUGIN_DISABLE_APICHECK"; diff --git a/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs b/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs index 56ceabc..531f82a 100644 --- a/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs +++ b/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs @@ -7,7 +7,7 @@ namespace CodexPlugin { public class CodexContainerRecipe : ContainerRecipeFactory { - private const string DefaultDockerImage = "codexstorage/nim-codex:sha-a518ec6-dist-tests"; + private const string DefaultDockerImage = "codexstorage/nim-codex:sha-b89493e-dist-tests"; public const string ApiPortTag = "codex_api_port"; public const string ListenPortTag = "codex_listen_port"; diff --git a/ProjectPlugins/CodexPlugin/CodexTypes.cs b/ProjectPlugins/CodexPlugin/CodexTypes.cs index ea7546c..d40fb13 100644 --- a/ProjectPlugins/CodexPlugin/CodexTypes.cs +++ b/ProjectPlugins/CodexPlugin/CodexTypes.cs @@ -108,11 +108,11 @@ namespace CodexPlugin public class CodexSpace { - public int TotalBlocks { get; set; } - public int QuotaMaxBytes { get; set; } - public int QuotaUsedBytes { get; set; } - public int QuotaReservedBytes { get; set; } - public int FreeBytes => QuotaMaxBytes - (QuotaUsedBytes + QuotaReservedBytes); + public long TotalBlocks { get; set; } + public long QuotaMaxBytes { get; set; } + public long QuotaUsedBytes { get; set; } + public long QuotaReservedBytes { get; set; } + public long FreeBytes => QuotaMaxBytes - (QuotaUsedBytes + QuotaReservedBytes); public override string ToString() { diff --git a/ProjectPlugins/CodexPlugin/openapi.yaml b/ProjectPlugins/CodexPlugin/openapi.yaml index 6887de9..94450bf 100644 --- a/ProjectPlugins/CodexPlugin/openapi.yaml +++ b/ProjectPlugins/CodexPlugin/openapi.yaml @@ -289,7 +289,7 @@ components: description: "Root hash of the content" originalBytes: type: integer - format: uint64 + format: int64 description: "Length of original content in bytes" blockSize: type: integer @@ -304,18 +304,18 @@ components: totalBlocks: description: "Number of blocks stored by the node" type: integer - format: uint64 + format: int64 quotaMaxBytes: type: integer - format: uint64 + format: int64 description: "Maximum storage space used by the node" quotaUsedBytes: type: integer - format: uint64 + format: int64 description: "Amount of storage space currently in use" quotaReservedBytes: type: integer - format: uint64 + format: int64 description: "Amount of storage space reserved" servers: diff --git a/Tests/CodexContinuousTests/ContinuousTestRunner.cs b/Tests/CodexContinuousTests/ContinuousTestRunner.cs index b860c86..fde65aa 100644 --- a/Tests/CodexContinuousTests/ContinuousTestRunner.cs +++ b/Tests/CodexContinuousTests/ContinuousTestRunner.cs @@ -148,7 +148,7 @@ namespace ContinuousTests log.Log($"Clearing namespace '{test.CustomK8sNamespace}'..."); var entryPoint = entryPointFactory.CreateEntryPoint(config.KubeConfigFile, config.DataPath, test.CustomK8sNamespace, log); - entryPoint.Tools.CreateWorkflow().DeleteNamespacesStartingWith(test.CustomK8sNamespace); + entryPoint.Tools.CreateWorkflow().DeleteNamespacesStartingWith(test.CustomK8sNamespace, wait: true); } private void PerformCleanup(ILog log) @@ -157,7 +157,7 @@ namespace ContinuousTests log.Log("Cleaning up test namespace..."); var entryPoint = entryPointFactory.CreateEntryPoint(config.KubeConfigFile, config.DataPath, config.CodexDeployment.Metadata.KubeNamespace, log); - entryPoint.Decommission(deleteKubernetesResources: true, deleteTrackedFiles: true); + entryPoint.Decommission(deleteKubernetesResources: true, deleteTrackedFiles: true, waitTillDone: true); log.Log("Cleanup finished."); } } diff --git a/Tests/CodexContinuousTests/NodeRunner.cs b/Tests/CodexContinuousTests/NodeRunner.cs index 31f1f2e..e58facc 100644 --- a/Tests/CodexContinuousTests/NodeRunner.cs +++ b/Tests/CodexContinuousTests/NodeRunner.cs @@ -64,7 +64,7 @@ namespace ContinuousTests } finally { - entryPoint.Tools.CreateWorkflow().DeleteNamespace(); + entryPoint.Tools.CreateWorkflow().DeleteNamespace(wait: false); } } diff --git a/Tests/CodexContinuousTests/SingleTestRun.cs b/Tests/CodexContinuousTests/SingleTestRun.cs index 38bc502..95d0466 100644 --- a/Tests/CodexContinuousTests/SingleTestRun.cs +++ b/Tests/CodexContinuousTests/SingleTestRun.cs @@ -54,7 +54,8 @@ namespace ContinuousTests entryPoint.Decommission( deleteKubernetesResources: false, // This would delete the continuous test net. - deleteTrackedFiles: true + deleteTrackedFiles: true, + waitTillDone: false ); runFinishedHandle.Set(); } diff --git a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs index e75d189..1b759ac 100644 --- a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs +++ b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs @@ -18,6 +18,7 @@ public class ScalabilityTests : CodexDistTest [Combinatorial] [UseLongTimeouts] [DontDownloadLogs] + [WaitForCleanup] public void ShouldMaintainFileInNetwork( [Values(10, 40)] int numberOfNodes, // TODO: include 80 and 100 [Values(100, 1000, 5000, 10000)] int fileSizeInMb @@ -64,6 +65,7 @@ public class ScalabilityTests : CodexDistTest [Combinatorial] [UseLongTimeouts] [DontDownloadLogs] + [WaitForCleanup] public void EveryoneGetsAFile( [Values(10, 40, 80, 100)] int numberOfNodes, [Values(100, 1000, 5000, 10000)] int fileSizeInMb diff --git a/Tests/DistTestCore/DistTest.cs b/Tests/DistTestCore/DistTest.cs index ed99fe9..a2edfec 100644 --- a/Tests/DistTestCore/DistTest.cs +++ b/Tests/DistTestCore/DistTest.cs @@ -52,7 +52,7 @@ namespace DistTestCore { Stopwatch.Measure(fixtureLog, "Global setup", () => { - globalEntryPoint.Tools.CreateWorkflow().DeleteNamespacesStartingWith(TestNamespacePrefix); + globalEntryPoint.Tools.CreateWorkflow().DeleteNamespacesStartingWith(TestNamespacePrefix, wait: true); }); } catch (Exception ex) @@ -72,7 +72,8 @@ namespace DistTestCore globalEntryPoint.Decommission( // There shouldn't be any of either, but clean everything up regardless. deleteKubernetesResources: true, - deleteTrackedFiles: true + deleteTrackedFiles: true, + waitTillDone: true ); } @@ -185,7 +186,13 @@ namespace DistTestCore lock (lifecycleLock) { var testNamespace = TestNamespacePrefix + Guid.NewGuid().ToString(); - var lifecycle = new TestLifecycle(fixtureLog.CreateTestLog(), configuration, GetTimeSet(), testNamespace, deployId); + var lifecycle = new TestLifecycle( + fixtureLog.CreateTestLog(), + configuration, + GetTimeSet(), + testNamespace, + deployId, + ShouldWaitForCleanup()); lifecycles.Add(testName, lifecycle); LifecycleStart(lifecycle); } @@ -235,6 +242,11 @@ namespace DistTestCore return new DefaultTimeSet(); } + private bool ShouldWaitForCleanup() + { + return CurrentTestMethodHasAttribute(); + } + private bool ShouldUseLongTimeouts() { return CurrentTestMethodHasAttribute(); diff --git a/Tests/DistTestCore/TestLifecycle.cs b/Tests/DistTestCore/TestLifecycle.cs index 542ca27..4191cd8 100644 --- a/Tests/DistTestCore/TestLifecycle.cs +++ b/Tests/DistTestCore/TestLifecycle.cs @@ -16,7 +16,7 @@ namespace DistTestCore private readonly List runningContainers = new(); private readonly string deployId; - public TestLifecycle(TestLog log, Configuration configuration, ITimeSet timeSet, string testNamespace, string deployId) + public TestLifecycle(TestLog log, Configuration configuration, ITimeSet timeSet, string testNamespace, string deployId, bool waitForCleanup) { Log = log; Configuration = configuration; @@ -27,7 +27,7 @@ namespace DistTestCore metadata = entryPoint.GetPluginMetadata(); CoreInterface = entryPoint.CreateInterface(); this.deployId = deployId; - + WaitForCleanup = waitForCleanup; log.WriteLogTag(); } @@ -35,13 +35,15 @@ namespace DistTestCore public TestLog Log { get; } public Configuration Configuration { get; } public ITimeSet TimeSet { get; } + public bool WaitForCleanup { get; } public CoreInterface CoreInterface { get; } public void DeleteAllResources() { entryPoint.Decommission( deleteKubernetesResources: true, - deleteTrackedFiles: true + deleteTrackedFiles: true, + waitTillDone: WaitForCleanup ); } diff --git a/Tests/DistTestCore/WaitForCleanupAttribute.cs b/Tests/DistTestCore/WaitForCleanupAttribute.cs new file mode 100644 index 0000000..928e3c2 --- /dev/null +++ b/Tests/DistTestCore/WaitForCleanupAttribute.cs @@ -0,0 +1,15 @@ +using NUnit.Framework; + +namespace DistTestCore +{ + /// + /// By default, test system does not wait until all resources are destroyed before starting the + /// next test. This saves a lot of time but it's not always what you want. + /// If you want to be sure the resources of your test are destroyed before the next test starts, + /// add this attribute to your test method. + /// + [AttributeUsage(AttributeTargets.Method, AllowMultiple = false)] + public class WaitForCleanupAttribute : PropertyAttribute + { + } +} From 18a02b1717211b2444fc9b770519883fbdfc50f6 Mon Sep 17 00:00:00 2001 From: benbierens Date: Fri, 7 Jun 2024 16:37:31 +0200 Subject: [PATCH 15/27] Implements mapping of debug/info routing table nodes --- ProjectPlugins/CodexPlugin/Mapper.cs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ProjectPlugins/CodexPlugin/Mapper.cs b/ProjectPlugins/CodexPlugin/Mapper.cs index 0c695e0..6e59c68 100644 --- a/ProjectPlugins/CodexPlugin/Mapper.cs +++ b/ProjectPlugins/CodexPlugin/Mapper.cs @@ -110,7 +110,7 @@ namespace CodexPlugin return new DebugInfoTable { LocalNode = MapDebugInfoTableNode(obj.GetValue("localNode")), - Nodes = new DebugInfoTableNode[0] + Nodes = MapDebugInfoTableNodeArray(obj.GetValue("nodes") as JArray) }; } @@ -129,6 +129,16 @@ namespace CodexPlugin }; } + private DebugInfoTableNode[] MapDebugInfoTableNodeArray(JArray? nodes) + { + if (nodes == null || nodes.Count == 0) + { + return new DebugInfoTableNode[0]; + } + + return nodes.Select(MapDebugInfoTableNode).ToArray(); + } + private Manifest MapManifest(CodexOpenApi.ManifestItem manifest) { return new Manifest From 5ffff1ed079df7070efc60b831651bb39b46aae9 Mon Sep 17 00:00:00 2001 From: benbierens Date: Fri, 7 Jun 2024 17:07:35 +0200 Subject: [PATCH 16/27] Disables metrics in scalability test. Downloads container log when retry attempt failed too quickly to be a timeout. --- ProjectPlugins/CodexPlugin/CodexAccess.cs | 13 +++++++++++-- .../ScalabilityTests/ScalabilityTests.cs | 13 ++++++------- Tests/CodexTests/CodexDistTest.cs | 18 +++++++++--------- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index 469c8ca..7013900 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -196,8 +196,10 @@ namespace CodexPlugin private void Investigate(ILog log, Failure failure, ITimeSet timeSet) { - log.Log($"Retry {failure.TryNumber} took {Time.FormatDuration(failure.Duration)}. (HTTP timeout = {Time.FormatDuration(timeSet.HttpCallTimeout())}) " + - $"Checking if node responds to debug/info..."); + log.Log($"Retry {failure.TryNumber} took {Time.FormatDuration(failure.Duration)} and failed with '{failure.Exception}'. " + + $"(HTTP timeout = {Time.FormatDuration(timeSet.HttpCallTimeout())}) " + + $"Checking if node responds to debug/info..."); + try { var debugInfo = GetDebugInfo(); @@ -218,6 +220,13 @@ namespace CodexPlugin DownloadLog(); Throw(failure); } + + if (failure.Duration < timeSet.HttpCallTimeout()) + { + log.Log("Retry failed within HTTP timeout duration."); + DownloadLog(); + Throw(failure); + } } private void Throw(Failure failure) diff --git a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs index 1b759ac..b63d5bd 100644 --- a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs +++ b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs @@ -1,5 +1,4 @@ using CodexPlugin; -using MetricsPlugin; using DistTestCore; using FileUtils; using NUnit.Framework; @@ -28,7 +27,7 @@ public class ScalabilityTests : CodexDistTest var bootstrap = StartCodex(s => s.WithLogLevel(logLevel)); var nodes = StartCodex(numberOfNodes - 1, s => s - .EnableMetrics() + //.EnableMetrics() .WithBootstrapNode(bootstrap) .WithLogLevel(logLevel) .WithStorageQuota((fileSizeInMb + 50).MB()) @@ -36,15 +35,15 @@ public class ScalabilityTests : CodexDistTest var uploader = nodes.PickOneRandom(); var downloader = nodes.PickOneRandom(); - var metrics = Ci.GetMetricsFor(uploader, downloader); + //var metrics = Ci.GetMetricsFor(uploader, downloader); var testFile = GenerateTestFile(fileSizeInMb.MB()); - LogNodeStatus(uploader, metrics[0]); - var contentId = uploader.UploadFile(testFile, f => LogNodeStatus(uploader, metrics[0])); + LogNodeStatus(uploader); + var contentId = uploader.UploadFile(testFile, f => LogNodeStatus(uploader)); - LogNodeStatus(downloader, metrics[1]); - var downloadedFile = downloader.DownloadContent(contentId, f => LogNodeStatus(downloader, metrics[1])); + LogNodeStatus(downloader); + var downloadedFile = downloader.DownloadContent(contentId, f => LogNodeStatus(downloader)); downloadedFile!.AssertIsEqual(testFile); diff --git a/Tests/CodexTests/CodexDistTest.cs b/Tests/CodexTests/CodexDistTest.cs index 10ca6a4..1c184bc 100644 --- a/Tests/CodexTests/CodexDistTest.cs +++ b/Tests/CodexTests/CodexDistTest.cs @@ -104,8 +104,7 @@ namespace CodexTests public void LogNodeStatus(ICodexNode node, IMetricsAccess? metrics = null) { Log("Status for " + node.GetName() + Environment.NewLine + - GetBasicNodeStatus(node) + - GetNodeMetrics(metrics)); + GetBasicNodeStatus(node)); } private string GetBasicNodeStatus(ICodexNode node) @@ -114,13 +113,14 @@ namespace CodexTests node.Space().ToString() + Environment.NewLine; } - private string GetNodeMetrics(IMetricsAccess? metrics) - { - if (metrics == null) return "No metrics enabled"; - var m = metrics.GetAllMetrics(); - if (m == null) return "No metrics received"; - return m.AsCsv(); - } + // Disabled for now: Makes huge log files! + //private string GetNodeMetrics(IMetricsAccess? metrics) + //{ + // if (metrics == null) return "No metrics enabled"; + // var m = metrics.GetAllMetrics(); + // if (m == null) return "No metrics received"; + // return m.AsCsv(); + //} protected virtual void OnCodexSetup(ICodexSetup setup) { From 52ae67123464fa5d37714212dd2fc188a34327be Mon Sep 17 00:00:00 2001 From: benbierens Date: Fri, 7 Jun 2024 17:08:14 +0200 Subject: [PATCH 17/27] focussing on the interesting numbers for now --- Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs index b63d5bd..6b6a2dc 100644 --- a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs +++ b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs @@ -19,8 +19,8 @@ public class ScalabilityTests : CodexDistTest [DontDownloadLogs] [WaitForCleanup] public void ShouldMaintainFileInNetwork( - [Values(10, 40)] int numberOfNodes, // TODO: include 80 and 100 - [Values(100, 1000, 5000, 10000)] int fileSizeInMb + [Values(10)] int numberOfNodes, // TODO: include 40, 80 and 100 + [Values(5000, 10000)] int fileSizeInMb // TODO: include 100, 1000 ) { var logLevel = CodexLogLevel.Info; From 16bf5ce5ab9c3d52ac6cf880320354742a2480b2 Mon Sep 17 00:00:00 2001 From: benbierens Date: Fri, 7 Jun 2024 18:16:19 +0200 Subject: [PATCH 18/27] I think I may be on to something --- .../ScalabilityTests/ScalabilityTests.cs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs index 6b6a2dc..4c9ce65 100644 --- a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs +++ b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs @@ -9,6 +9,12 @@ namespace CodexTests.ScalabilityTests; [TestFixture] public class ScalabilityTests : CodexDistTest { + private const int Below2 = (Int32.MaxValue / (1024 * 1024)) - 20; + private const int Below1 = (Int32.MaxValue / (1024 * 1024)) - 10; + private const int Exact = (Int32.MaxValue / (1024 * 1024)) + 0; + private const int Above1 = (Int32.MaxValue / (1024 * 1024)) + 10; + private const int Above2 = (Int32.MaxValue / (1024 * 1024)) + 20; + /// /// We upload a file to node A, then download it with B. /// Then we stop node A, and download again with node C. @@ -19,8 +25,8 @@ public class ScalabilityTests : CodexDistTest [DontDownloadLogs] [WaitForCleanup] public void ShouldMaintainFileInNetwork( - [Values(10)] int numberOfNodes, // TODO: include 40, 80 and 100 - [Values(5000, 10000)] int fileSizeInMb // TODO: include 100, 1000 + [Values(5)] int numberOfNodes, // TODO: include 10, 40, 80 and 100, not 5 + [Values(Below2, Below1, Exact, Above1, Above2)] int fileSizeInMb // TODO: include 100, 1000, 5000, 10000 ) { var logLevel = CodexLogLevel.Info; From aa416d50b3658c4361fd4095d6db2a6a4dfa4594 Mon Sep 17 00:00:00 2001 From: benbierens Date: Sat, 8 Jun 2024 10:36:23 +0200 Subject: [PATCH 19/27] ensuring enough mounted disk space --- .../KubernetesWorkflow/ByteSizeExtensions.cs | 41 ------------------- Framework/KubernetesWorkflow/CrashWatcher.cs | 5 ++- Framework/KubernetesWorkflow/K8sController.cs | 2 +- .../Recipe/ContainerRecipeFactory.cs | 4 +- ProjectPlugins/CodexPlugin/CodexAccess.cs | 4 +- .../CodexPlugin/CodexContainerRecipe.cs | 2 +- .../ScalabilityTests/ScalabilityTests.cs | 12 ++---- 7 files changed, 13 insertions(+), 57 deletions(-) delete mode 100644 Framework/KubernetesWorkflow/ByteSizeExtensions.cs diff --git a/Framework/KubernetesWorkflow/ByteSizeExtensions.cs b/Framework/KubernetesWorkflow/ByteSizeExtensions.cs deleted file mode 100644 index c3cca24..0000000 --- a/Framework/KubernetesWorkflow/ByteSizeExtensions.cs +++ /dev/null @@ -1,41 +0,0 @@ -using Utils; - -namespace KubernetesWorkflow -{ - public static class ByteSizeExtensions - { - public static string ToSuffixNotation(this ByteSize b) - { - long x = 1024; - var map = new Dictionary - { - { Pow(x, 4), "Ti" }, - { Pow(x, 3), "Gi" }, - { Pow(x, 2), "Mi" }, - { (x), "Ki" }, - }; - - var bytes = b.SizeInBytes; - foreach (var pair in map) - { - if (bytes > pair.Key) - { - double bytesD = bytes; - double divD = pair.Key; - double numD = Math.Ceiling(bytesD / divD); - var v = Convert.ToInt64(numD); - return $"{v}{pair.Value}"; - } - } - - return $"{bytes}"; - } - - private static long Pow(long x, int v) - { - long result = 1; - for (var i = 0; i < v; i++) result *= x; - return result; - } - } -} diff --git a/Framework/KubernetesWorkflow/CrashWatcher.cs b/Framework/KubernetesWorkflow/CrashWatcher.cs index 5cb2e03..eb1ced7 100644 --- a/Framework/KubernetesWorkflow/CrashWatcher.cs +++ b/Framework/KubernetesWorkflow/CrashWatcher.cs @@ -83,12 +83,13 @@ namespace KubernetesWorkflow private bool HasContainerBeenRestarted(Kubernetes client) { var podInfo = client.ReadNamespacedPod(podName, k8sNamespace); - return podInfo.Status.ContainerStatuses.Any(c => c.RestartCount > 0); + var result = podInfo.Status.ContainerStatuses.Any(c => c.RestartCount > 0); + if (result) log.Log("Pod crash detected for " + containerName); + return result; } private void DownloadCrashedContainerLogs(Kubernetes client) { - log.Log("Pod crash detected for " + containerName); using var stream = client.ReadNamespacedPodLog(podName, k8sNamespace, recipeName, previous: true); logHandler!.Log(stream); } diff --git a/Framework/KubernetesWorkflow/K8sController.cs b/Framework/KubernetesWorkflow/K8sController.cs index 30de0c9..00c7563 100644 --- a/Framework/KubernetesWorkflow/K8sController.cs +++ b/Framework/KubernetesWorkflow/K8sController.cs @@ -535,7 +535,7 @@ namespace KubernetesWorkflow } if (set.Memory.SizeInBytes != 0) { - result.Add("memory", new ResourceQuantity(set.Memory.ToSuffixNotation())); + result.Add("memory", new ResourceQuantity(set.Memory.SizeInBytes.ToString())); } return result; } diff --git a/Framework/KubernetesWorkflow/Recipe/ContainerRecipeFactory.cs b/Framework/KubernetesWorkflow/Recipe/ContainerRecipeFactory.cs index 6b6ae2d..2c42143 100644 --- a/Framework/KubernetesWorkflow/Recipe/ContainerRecipeFactory.cs +++ b/Framework/KubernetesWorkflow/Recipe/ContainerRecipeFactory.cs @@ -105,7 +105,7 @@ namespace KubernetesWorkflow.Recipe protected void AddVolume(string name, string mountPath, string? subPath = null, string? secret = null, string? hostPath = null) { - var size = 10.MB().ToSuffixNotation(); + var size = 10.MB().SizeInBytes.ToString(); volumeMounts.Add(new VolumeMount(name, mountPath, subPath, size, secret, hostPath)); } @@ -114,7 +114,7 @@ namespace KubernetesWorkflow.Recipe volumeMounts.Add(new VolumeMount( $"autovolume-{Guid.NewGuid().ToString().ToLowerInvariant()}", mountPath, - resourceQuantity: volumeSize.ToSuffixNotation())); + resourceQuantity: volumeSize.SizeInBytes.ToString())); } protected void Additional(object userData) diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index 7013900..b63b9f4 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -167,8 +167,8 @@ namespace CodexPlugin { var log = tools.GetLog(); var file = log.CreateSubfile(); - log.Log($"Container {Container.Name} has crashed. Downloading crash log to '{file.FullFilename}'..."); - file.Write($"Container Crash Log for {Container.Name}."); + log.Log($"Downloading log to '{file.FullFilename}'..."); + file.Write($"Container log for {Container.Name}."); using var reader = new StreamReader(crashLog); var line = reader.ReadLine(); diff --git a/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs b/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs index 531f82a..ac29ce1 100644 --- a/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs +++ b/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs @@ -159,7 +159,7 @@ namespace CodexPlugin private ByteSize GetVolumeCapacity(CodexStartupConfig config) { - if (config.StorageQuota != null) return config.StorageQuota; + if (config.StorageQuota != null) return config.StorageQuota.Multiply(1.2); // Default Codex quota: 8 Gb, using +20% to be safe. return 8.GB().Multiply(1.2); } diff --git a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs index 4c9ce65..2e6eb14 100644 --- a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs +++ b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs @@ -9,12 +9,6 @@ namespace CodexTests.ScalabilityTests; [TestFixture] public class ScalabilityTests : CodexDistTest { - private const int Below2 = (Int32.MaxValue / (1024 * 1024)) - 20; - private const int Below1 = (Int32.MaxValue / (1024 * 1024)) - 10; - private const int Exact = (Int32.MaxValue / (1024 * 1024)) + 0; - private const int Above1 = (Int32.MaxValue / (1024 * 1024)) + 10; - private const int Above2 = (Int32.MaxValue / (1024 * 1024)) + 20; - /// /// We upload a file to node A, then download it with B. /// Then we stop node A, and download again with node C. @@ -25,8 +19,8 @@ public class ScalabilityTests : CodexDistTest [DontDownloadLogs] [WaitForCleanup] public void ShouldMaintainFileInNetwork( - [Values(5)] int numberOfNodes, // TODO: include 10, 40, 80 and 100, not 5 - [Values(Below2, Below1, Exact, Above1, Above2)] int fileSizeInMb // TODO: include 100, 1000, 5000, 10000 + [Values(4, 5, 6)] int numberOfNodes, // TODO: include 10, 40, 80 and 100, not 5 + [Values(2000, 2200, 2500, 2800, 3000, 3200, 3500, 3800, 4200, 4500, 4800, 5000)] int fileSizeInMb // TODO: include 100, 1000, 5000, 10000 ) { var logLevel = CodexLogLevel.Info; @@ -47,9 +41,11 @@ public class ScalabilityTests : CodexDistTest LogNodeStatus(uploader); var contentId = uploader.UploadFile(testFile, f => LogNodeStatus(uploader)); + LogNodeStatus(uploader); LogNodeStatus(downloader); var downloadedFile = downloader.DownloadContent(contentId, f => LogNodeStatus(downloader)); + LogNodeStatus(downloader); downloadedFile!.AssertIsEqual(testFile); From 25663b59a0bf08e9233eecc49cdcfad46e84fac4 Mon Sep 17 00:00:00 2001 From: benbierens Date: Sat, 8 Jun 2024 10:50:30 +0200 Subject: [PATCH 20/27] sets trace level for scalability tests --- Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs index 2e6eb14..14f363b 100644 --- a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs +++ b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs @@ -23,7 +23,7 @@ public class ScalabilityTests : CodexDistTest [Values(2000, 2200, 2500, 2800, 3000, 3200, 3500, 3800, 4200, 4500, 4800, 5000)] int fileSizeInMb // TODO: include 100, 1000, 5000, 10000 ) { - var logLevel = CodexLogLevel.Info; + var logLevel = CodexLogLevel.Trace; var bootstrap = StartCodex(s => s.WithLogLevel(logLevel)); var nodes = StartCodex(numberOfNodes - 1, s => s From 22527a5d93606f5c8c5d5bffb2354112f161f9ce Mon Sep 17 00:00:00 2001 From: Ben Date: Mon, 10 Jun 2024 10:58:50 +0200 Subject: [PATCH 21/27] Deletes block repostore folder in container when Codex node stops. --- ProjectPlugins/CodexPlugin/CodexAccess.cs | 8 ++++++++ ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs | 2 +- ProjectPlugins/CodexPlugin/CodexNode.cs | 11 ++++++----- Tests/CodexTests/CodexDistTest.cs | 9 +++++++++ 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index b63b9f4..ffc9258 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -126,6 +126,14 @@ namespace CodexPlugin return workflow.GetPodInfo(Container); } + public void DeleteRepoFolder() + { + var containerNumber = Container.Containers.First().Recipe.Number; + var dataDir = $"datadir{containerNumber}"; + var workflow = tools.CreateWorkflow(); + workflow.ExecuteCommand(Container.Containers.First(), "rm", "-Rfv", $"/codex/{dataDir}/repo"); + } + private T OnCodex(Func> action) { var result = tools.CreateHttp(CheckContainerCrashed).OnClient(client => CallCodex(client, action)); diff --git a/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs b/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs index ac29ce1..f118377 100644 --- a/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs +++ b/ProjectPlugins/CodexPlugin/CodexContainerRecipe.cs @@ -119,7 +119,7 @@ namespace CodexPlugin } } - if(!string.IsNullOrEmpty(config.NameOverride)) + if (!string.IsNullOrEmpty(config.NameOverride)) { AddEnvVar("CODEX_NODENAME", config.NameOverride); } diff --git a/ProjectPlugins/CodexPlugin/CodexNode.cs b/ProjectPlugins/CodexPlugin/CodexNode.cs index 5179aae..4965dbc 100644 --- a/ProjectPlugins/CodexPlugin/CodexNode.cs +++ b/ProjectPlugins/CodexPlugin/CodexNode.cs @@ -26,6 +26,7 @@ namespace CodexPlugin CrashWatcher CrashWatcher { get; } PodInfo GetPodInfo(); ITransferSpeeds TransferSpeeds { get; } + void DeleteRepoFolder(); void Stop(bool waitTillStopped); } @@ -160,15 +161,15 @@ namespace CodexPlugin return CodexAccess.GetPodInfo(); } + public void DeleteRepoFolder() + { + CodexAccess.DeleteRepoFolder(); + } + public void Stop(bool waitTillStopped) { CrashWatcher.Stop(); Group.Stop(this, waitTillStopped); - // if (Group.Count() > 1) throw new InvalidOperationException("Codex-nodes that are part of a group cannot be " + - // "individually shut down. Use 'BringOffline()' on the group object to stop the group. This method is only " + - // "available for codex-nodes in groups of 1."); - // - // Group.BringOffline(waitTillStopped); } public void EnsureOnlineGetVersionResponse() diff --git a/Tests/CodexTests/CodexDistTest.cs b/Tests/CodexTests/CodexDistTest.cs index 1c184bc..d3e9beb 100644 --- a/Tests/CodexTests/CodexDistTest.cs +++ b/Tests/CodexTests/CodexDistTest.cs @@ -38,6 +38,7 @@ namespace CodexTests protected override void LifecycleStop(TestLifecycle lifecycle) { + DeleteBlockRepo(onlineCodexNodes[lifecycle]); onlineCodexNodes.Remove(lifecycle); } @@ -134,5 +135,13 @@ namespace CodexTests if (upload != null) data.Add("avgupload", upload.ToString()); if (download != null) data.Add("avgdownload", download.ToString()); } + + private void DeleteBlockRepo(List codexNodes) + { + foreach (var node in codexNodes) + { + node.DeleteRepoFolder(); + } + } } } From 74ca512f92955ac914996b3194d8ac123fb5678a Mon Sep 17 00:00:00 2001 From: Ben Date: Mon, 10 Jun 2024 15:15:27 +0200 Subject: [PATCH 22/27] Sets up test for downloading content by contract CID --- ProjectPlugins/CodexPlugin/Mapper.cs | 49 ++++++++++++++++--- .../CodexPlugin/MarketplaceAccess.cs | 3 ++ .../CodexPlugin/MarketplaceTypes.cs | 30 ++++++++++++ .../CodexTests/BasicTests/MarketplaceTests.cs | 41 ++++++++++++++++ 4 files changed, 115 insertions(+), 8 deletions(-) diff --git a/ProjectPlugins/CodexPlugin/Mapper.cs b/ProjectPlugins/CodexPlugin/Mapper.cs index 6e59c68..c175ae1 100644 --- a/ProjectPlugins/CodexPlugin/Mapper.cs +++ b/ProjectPlugins/CodexPlugin/Mapper.cs @@ -63,14 +63,47 @@ namespace CodexPlugin }; } - public StoragePurchase Map(CodexOpenApi.Purchase purchase) - { - return new StoragePurchase - { - State = purchase.State, - Error = purchase.Error - }; - } + // TODO: Fix openapi spec for this call. + //public StoragePurchase Map(CodexOpenApi.Purchase purchase) + //{ + // return new StoragePurchase(Map(purchase.Request)) + // { + // State = purchase.State, + // Error = purchase.Error + // }; + //} + + //public StorageRequest Map(CodexOpenApi.StorageRequest request) + //{ + // return new StorageRequest(Map(request.Ask), Map(request.Content)) + // { + // Id = request.Id, + // Client = request.Client, + // Expiry = TimeSpan.FromSeconds(Convert.ToInt64(request.Expiry)), + // Nonce = request.Nonce + // }; + //} + + //public StorageAsk Map(CodexOpenApi.StorageAsk ask) + //{ + // return new StorageAsk + // { + // Duration = TimeSpan.FromSeconds(Convert.ToInt64(ask.Duration)), + // MaxSlotLoss = ask.MaxSlotLoss, + // ProofProbability = ask.ProofProbability, + // Reward = Convert.ToDecimal(ask.Reward).TstWei(), + // Slots = ask.Slots, + // SlotSize = new ByteSize(Convert.ToInt64(ask.SlotSize)) + // }; + //} + + //public StorageContent Map(CodexOpenApi.Content content) + //{ + // return new StorageContent + // { + // Cid = content.Cid + // }; + //} public StorageAvailability Map(CodexOpenApi.SalesAvailabilityREAD read) { diff --git a/ProjectPlugins/CodexPlugin/MarketplaceAccess.cs b/ProjectPlugins/CodexPlugin/MarketplaceAccess.cs index 8944d95..6f5e47e 100644 --- a/ProjectPlugins/CodexPlugin/MarketplaceAccess.cs +++ b/ProjectPlugins/CodexPlugin/MarketplaceAccess.cs @@ -97,10 +97,13 @@ namespace CodexPlugin this.codexAccess = codexAccess; PurchaseId = purchaseId; Purchase = purchase; + + ContentId = new ContentId(codexAccess.GetPurchaseStatus(purchaseId).Request.Content.Cid); } public string PurchaseId { get; } public StoragePurchaseRequest Purchase { get; } + public ContentId ContentId { get; } public TimeSpan? PendingToSubmitted => contractSubmittedUtc - contractPendingUtc; public TimeSpan? SubmittedToStarted => contractStartedUtc - contractSubmittedUtc; diff --git a/ProjectPlugins/CodexPlugin/MarketplaceTypes.cs b/ProjectPlugins/CodexPlugin/MarketplaceTypes.cs index caeda25..21aa86a 100644 --- a/ProjectPlugins/CodexPlugin/MarketplaceTypes.cs +++ b/ProjectPlugins/CodexPlugin/MarketplaceTypes.cs @@ -1,5 +1,7 @@ using CodexContractsPlugin; +using CodexOpenApi; using Logging; +using System.Data; using Utils; namespace CodexPlugin @@ -37,6 +39,34 @@ namespace CodexPlugin { public string State { get; set; } = string.Empty; public string Error { get; set; } = string.Empty; + public StorageRequest Request { get; set; } = null!; + } + + public class StorageRequest + { + public string Id { get; set; } = string.Empty; + public string Client { get; set; } = string.Empty; + public StorageAsk Ask { get; set; } = null!; + public StorageContent Content { get; set; } = null!; + public string Expiry { get; set; } = string.Empty; + public string Nonce { get; set; } = string.Empty; + } + + public class StorageAsk + { + public int Slots { get; set; } + public string SlotSize { get; set; } = string.Empty; + public string Duration { get; set; } = string.Empty; + public string ProofProbability { get; set; } = string.Empty; + public string Reward { get; set; } = string.Empty; + public int MaxSlotLoss { get; set; } + } + + public class StorageContent + { + public string Cid { get; set; } = string.Empty; + //public ErasureParameters Erasure { get; set; } + //public PoRParameters Por { get; set; } } public class StorageAvailability diff --git a/Tests/CodexTests/BasicTests/MarketplaceTests.cs b/Tests/CodexTests/BasicTests/MarketplaceTests.cs index 5d69387..493d18c 100644 --- a/Tests/CodexTests/BasicTests/MarketplaceTests.cs +++ b/Tests/CodexTests/BasicTests/MarketplaceTests.cs @@ -85,6 +85,47 @@ namespace CodexTests.BasicTests Assert.That(contracts.GetRequestState(request), Is.EqualTo(RequestState.Finished)); } + [Test] + public void CanDownloadContentFromContractCid() + { + var fileSize = 10.MB(); + var geth = Ci.StartGethNode(s => s.IsMiner().WithName("disttest-geth")); + var contracts = Ci.StartCodexContracts(geth); + var testFile = GenerateTestFile(fileSize); + + var client = StartCodex(s => s + .WithName("Client") + .EnableMarketplace(geth, contracts, m => m + .WithInitial(10.Eth(), 10.Tst()))); + + var uploadCid = client.UploadFile(testFile); + + var purchase = new StoragePurchaseRequest(uploadCid) + { + PricePerSlotPerSecond = 2.TstWei(), + RequiredCollateral = 10.TstWei(), + MinRequiredNumberOfNodes = 5, + NodeFailureTolerance = 2, + ProofProbability = 5, + Duration = TimeSpan.FromMinutes(5), + Expiry = TimeSpan.FromMinutes(4) + }; + + var purchaseContract = client.Marketplace.RequestStorage(purchase); + + var contractCid = purchaseContract.ContentId; + + Assert.That(uploadCid.Id, Is.Not.EqualTo(contractCid.Id)); + + var downloader = StartCodex(s => s.WithName("Downloader")); + + var uploadedFile = downloader.DownloadContent(uploadCid); + testFile.AssertIsEqual(uploadedFile); + + var contractFile = downloader.DownloadContent(contractCid); + testFile.AssertIsEqual(contractFile); + } + private void WaitForAllSlotFilledEvents(ICodexContracts contracts, StoragePurchaseRequest purchase, IGethNode geth) { Time.Retry(() => From 9a46e20b4aa95171ef0b26da0ad3e1234ab19c62 Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 11 Jun 2024 11:33:02 +0200 Subject: [PATCH 23/27] Fixes exception in CodexDistTests --- Framework/Utils/Time.cs | 4 +--- Tests/CodexTests/CodexDistTest.cs | 9 ++++++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Framework/Utils/Time.cs b/Framework/Utils/Time.cs index f22242f..42051af 100644 --- a/Framework/Utils/Time.cs +++ b/Framework/Utils/Time.cs @@ -1,6 +1,4 @@ -using System.Diagnostics; - -namespace Utils +namespace Utils { public static class Time { diff --git a/Tests/CodexTests/CodexDistTest.cs b/Tests/CodexTests/CodexDistTest.cs index d3e9beb..9758060 100644 --- a/Tests/CodexTests/CodexDistTest.cs +++ b/Tests/CodexTests/CodexDistTest.cs @@ -140,7 +140,14 @@ namespace CodexTests { foreach (var node in codexNodes) { - node.DeleteRepoFolder(); + try + { + node.DeleteRepoFolder(); + } + catch (Exception ex) + { + Log($"Failed to delete repo folder for node {node.GetName()} : {ex}"); + } } } } From 4fc9835f430f0b393996e2a75e7678ed4e4dc80b Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 12 Jun 2024 10:48:52 +0200 Subject: [PATCH 24/27] Attempt to log disk space before and after uploads and downloads --- Framework/KubernetesWorkflow/RunningPod.cs | 5 +- ProjectPlugins/CodexPlugin/CodexAccess.cs | 57 +++++++++++++++++----- ProjectPlugins/CodexPlugin/CodexStarter.cs | 2 +- Tests/CodexTests/CodexDistTest.cs | 4 ++ 4 files changed, 50 insertions(+), 18 deletions(-) diff --git a/Framework/KubernetesWorkflow/RunningPod.cs b/Framework/KubernetesWorkflow/RunningPod.cs index e86b2aa..43a05cc 100644 --- a/Framework/KubernetesWorkflow/RunningPod.cs +++ b/Framework/KubernetesWorkflow/RunningPod.cs @@ -1,8 +1,5 @@ -using k8s; -using k8s.Models; -using KubernetesWorkflow.Recipe; +using KubernetesWorkflow.Recipe; using KubernetesWorkflow.Types; -using Newtonsoft.Json; namespace KubernetesWorkflow { diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index ffc9258..34459f4 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -10,6 +10,7 @@ namespace CodexPlugin { public class CodexAccess : ILogHandler { + private readonly ILog log; private readonly IPluginTools tools; private readonly Mapper mapper = new Mapper(); private bool hasContainerCrashed; @@ -17,6 +18,7 @@ namespace CodexPlugin public CodexAccess(IPluginTools tools, RunningPod container, CrashWatcher crashWatcher) { this.tools = tools; + log = tools.GetLog(); Container = container; CrashWatcher = crashWatcher; hasContainerCrashed = false; @@ -62,17 +64,27 @@ namespace CodexPlugin public string UploadFile(FileStream fileStream, Action onFailure) { - return OnCodex( + LogDiskSpace("Before upload"); + + var response = OnCodex( api => api.UploadAsync(fileStream), CreateRetryConfig(nameof(UploadFile), onFailure)); + + LogDiskSpace("After upload"); + + return response; } public Stream DownloadFile(string contentId, Action onFailure) { + LogDiskSpace("Before download"); + var fileResponse = OnCodex( api => api.DownloadNetworkAsync(contentId), CreateRetryConfig(nameof(DownloadFile), onFailure)); + LogDiskSpace("After download"); + if (fileResponse.StatusCode != 200) throw new Exception("Download failed with StatusCode: " + fileResponse.StatusCode); return fileResponse.Stream; } @@ -132,6 +144,7 @@ namespace CodexPlugin var dataDir = $"datadir{containerNumber}"; var workflow = tools.CreateWorkflow(); workflow.ExecuteCommand(Container.Containers.First(), "rm", "-Rfv", $"/codex/{dataDir}/repo"); + Log("Deleted repo folder."); } private T OnCodex(Func> action) @@ -163,7 +176,7 @@ namespace CodexPlugin private Address GetAddress() { - return Container.Containers.Single().GetAddress(tools.GetLog(), CodexContainerRecipe.ApiPortTag); + return Container.Containers.Single().GetAddress(log, CodexContainerRecipe.ApiPortTag); } private void CheckContainerCrashed(HttpClient client) @@ -173,9 +186,8 @@ namespace CodexPlugin void ILogHandler.Log(Stream crashLog) { - var log = tools.GetLog(); var file = log.CreateSubfile(); - log.Log($"Downloading log to '{file.FullFilename}'..."); + Log($"Downloading log to '{file.FullFilename}'..."); file.Write($"Container log for {Container.Name}."); using var reader = new StreamReader(crashLog); @@ -186,62 +198,81 @@ namespace CodexPlugin line = reader.ReadLine(); } - log.Log("Crash log successfully downloaded."); + Log("Container log successfully downloaded."); hasContainerCrashed = true; } private Retry CreateRetryConfig(string description, Action onFailure) { var timeSet = tools.TimeSet; - var log = tools.GetLog(); return new Retry(description, timeSet.HttpRetryTimeout(), timeSet.HttpCallRetryDelay(), failure => { onFailure(failure); - Investigate(log, failure, timeSet); + Investigate(failure, timeSet); }); } - private void Investigate(ILog log, Failure failure, ITimeSet timeSet) + private void Investigate(Failure failure, ITimeSet timeSet) { - log.Log($"Retry {failure.TryNumber} took {Time.FormatDuration(failure.Duration)} and failed with '{failure.Exception}'. " + + Log($"Retry {failure.TryNumber} took {Time.FormatDuration(failure.Duration)} and failed with '{failure.Exception}'. " + $"(HTTP timeout = {Time.FormatDuration(timeSet.HttpCallTimeout())}) " + $"Checking if node responds to debug/info..."); + LogDiskSpace("After retry failure"); + try { var debugInfo = GetDebugInfo(); if (string.IsNullOrEmpty(debugInfo.Spr)) { - log.Log("Did not get value debug/info response."); + Log("Did not get value debug/info response."); DownloadLog(); Throw(failure); } else { - log.Log("Got valid response from debug/info."); + Log("Got valid response from debug/info."); } } catch (Exception ex) { - log.Log("Got exception from debug/info call: " + ex); + Log("Got exception from debug/info call: " + ex); DownloadLog(); Throw(failure); } if (failure.Duration < timeSet.HttpCallTimeout()) { - log.Log("Retry failed within HTTP timeout duration."); + Log("Retry failed within HTTP timeout duration."); DownloadLog(); Throw(failure); } } + private void LogDiskSpace(string msg) + { + try + { + var diskInfo = tools.CreateWorkflow().ExecuteCommand(Container.Containers.Single(), "df", "-h"); + Log($"{GetName()} - {msg} - Disk info: {diskInfo}"); + } + catch (Exception e) + { + Log("Failed to get disk info: " + e); + } + } + private void Throw(Failure failure) { throw failure.Exception; } + private void Log(string msg) + { + log.Log(msg); + } + private void DownloadLog() { tools.CreateWorkflow().DownloadContainerLog(Container.Containers.Single(), this); diff --git a/ProjectPlugins/CodexPlugin/CodexStarter.cs b/ProjectPlugins/CodexPlugin/CodexStarter.cs index dec13d3..77fd89a 100644 --- a/ProjectPlugins/CodexPlugin/CodexStarter.cs +++ b/ProjectPlugins/CodexPlugin/CodexStarter.cs @@ -33,7 +33,7 @@ namespace CodexPlugin foreach (var rc in containers) { var podInfo = GetPodInfo(rc); - var podInfos = string.Join(", ", rc.Containers.Select(c => $"Container: '{c.Name}' runs at '{podInfo.K8SNodeName}'={podInfo.Ip}")); + var podInfos = string.Join(", ", rc.Containers.Select(c => $"Container: '{c.Name}' PodLabel: '{c.RunningPod.StartResult.Deployment.PodLabel}' runs at '{podInfo.K8SNodeName}'={podInfo.Ip}")); Log($"Started {codexSetup.NumberOfNodes} nodes of image '{containers.First().Containers.First().Recipe.Image}'. ({podInfos})"); } LogSeparator(); diff --git a/Tests/CodexTests/CodexDistTest.cs b/Tests/CodexTests/CodexDistTest.cs index 9758060..437dbf5 100644 --- a/Tests/CodexTests/CodexDistTest.cs +++ b/Tests/CodexTests/CodexDistTest.cs @@ -142,6 +142,10 @@ namespace CodexTests { try { + if (node.CrashWatcher.HasContainerCrashed()) + { + Log("Crash detected!"); + } node.DeleteRepoFolder(); } catch (Exception ex) From 390b9de54accedd9aaaca9025140545b6fd57722 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 12 Jun 2024 15:28:08 +0200 Subject: [PATCH 25/27] Replaces delete with disk info log. --- ProjectPlugins/CodexPlugin/CodexAccess.cs | 43 +++++------------ ProjectPlugins/CodexPlugin/CodexNode.cs | 15 +++--- .../LongFullyConnectedDownloadTests.cs | 4 +- Tests/CodexTests/CodexDistTest.cs | 48 +------------------ .../FullyConnectedDownloadTests.cs | 17 +++---- .../LayeredDiscoveryTests.cs | 17 ++++--- .../PeerDiscoveryTests/PeerDiscoveryTests.cs | 19 ++++---- Tests/DistTestCore/DistTest.cs | 2 +- 8 files changed, 54 insertions(+), 111 deletions(-) diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index 34459f4..a99a2ec 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -64,27 +64,17 @@ namespace CodexPlugin public string UploadFile(FileStream fileStream, Action onFailure) { - LogDiskSpace("Before upload"); - - var response = OnCodex( + return OnCodex( api => api.UploadAsync(fileStream), CreateRetryConfig(nameof(UploadFile), onFailure)); - - LogDiskSpace("After upload"); - - return response; } public Stream DownloadFile(string contentId, Action onFailure) { - LogDiskSpace("Before download"); - var fileResponse = OnCodex( api => api.DownloadNetworkAsync(contentId), CreateRetryConfig(nameof(DownloadFile), onFailure)); - LogDiskSpace("After download"); - if (fileResponse.StatusCode != 200) throw new Exception("Download failed with StatusCode: " + fileResponse.StatusCode); return fileResponse.Stream; } @@ -138,13 +128,17 @@ namespace CodexPlugin return workflow.GetPodInfo(Container); } - public void DeleteRepoFolder() + public void LogDiskSpace(string msg) { - var containerNumber = Container.Containers.First().Recipe.Number; - var dataDir = $"datadir{containerNumber}"; - var workflow = tools.CreateWorkflow(); - workflow.ExecuteCommand(Container.Containers.First(), "rm", "-Rfv", $"/codex/{dataDir}/repo"); - Log("Deleted repo folder."); + try + { + var diskInfo = tools.CreateWorkflow().ExecuteCommand(Container.Containers.Single(), "df", "--sync"); + Log($"{msg} - Disk info: {diskInfo}"); + } + catch (Exception e) + { + Log("Failed to get disk info: " + e); + } } private T OnCodex(Func> action) @@ -250,19 +244,6 @@ namespace CodexPlugin } } - private void LogDiskSpace(string msg) - { - try - { - var diskInfo = tools.CreateWorkflow().ExecuteCommand(Container.Containers.Single(), "df", "-h"); - Log($"{GetName()} - {msg} - Disk info: {diskInfo}"); - } - catch (Exception e) - { - Log("Failed to get disk info: " + e); - } - } - private void Throw(Failure failure) { throw failure.Exception; @@ -270,7 +251,7 @@ namespace CodexPlugin private void Log(string msg) { - log.Log(msg); + log.Log($"{GetName()} {msg}"); } private void DownloadLog() diff --git a/ProjectPlugins/CodexPlugin/CodexNode.cs b/ProjectPlugins/CodexPlugin/CodexNode.cs index 4965dbc..f0e064b 100644 --- a/ProjectPlugins/CodexPlugin/CodexNode.cs +++ b/ProjectPlugins/CodexPlugin/CodexNode.cs @@ -26,7 +26,6 @@ namespace CodexPlugin CrashWatcher CrashWatcher { get; } PodInfo GetPodInfo(); ITransferSpeeds TransferSpeeds { get; } - void DeleteRepoFolder(); void Stop(bool waitTillStopped); } @@ -100,6 +99,8 @@ namespace CodexPlugin public ContentId UploadFile(TrackedFile file, Action onFailure) { + CodexAccess.LogDiskSpace("Before upload"); + using var fileStream = File.OpenRead(file.Filename); var logMessage = $"Uploading file {file.Describe()}..."; @@ -116,6 +117,8 @@ namespace CodexPlugin if (response.StartsWith(UploadFailedMessage)) FrameworkAssert.Fail("Node failed to store block."); Log($"Uploaded file. Received contentId: '{response}'."); + CodexAccess.LogDiskSpace("After upload"); + return new ContentId(response); } @@ -161,13 +164,9 @@ namespace CodexPlugin return CodexAccess.GetPodInfo(); } - public void DeleteRepoFolder() - { - CodexAccess.DeleteRepoFolder(); - } - public void Stop(bool waitTillStopped) { + Log("Stopping..."); CrashWatcher.Stop(); Group.Stop(this, waitTillStopped); } @@ -203,6 +202,8 @@ namespace CodexPlugin private void DownloadToFile(string contentId, TrackedFile file, Action onFailure) { + CodexAccess.LogDiskSpace("Before download"); + using var fileStream = File.OpenWrite(file.Filename); try { @@ -214,6 +215,8 @@ namespace CodexPlugin Log($"Failed to download file '{contentId}'."); throw; } + + CodexAccess.LogDiskSpace("After download"); } private void Log(string msg) diff --git a/Tests/CodexLongTests/DownloadConnectivityTests/LongFullyConnectedDownloadTests.cs b/Tests/CodexLongTests/DownloadConnectivityTests/LongFullyConnectedDownloadTests.cs index 15fc9f3..eaa999e 100644 --- a/Tests/CodexLongTests/DownloadConnectivityTests/LongFullyConnectedDownloadTests.cs +++ b/Tests/CodexLongTests/DownloadConnectivityTests/LongFullyConnectedDownloadTests.cs @@ -15,9 +15,9 @@ namespace CodexLongTests.DownloadConnectivityTests [Values(10, 15, 20)] int numberOfNodes, [Values(10, 100)] int sizeMBs) { - for (var i = 0; i < numberOfNodes; i++) StartCodex(); + var nodes = StartCodex(numberOfNodes); - CreatePeerDownloadTestHelpers().AssertFullDownloadInterconnectivity(GetAllOnlineCodexNodes(), sizeMBs.MB()); + CreatePeerDownloadTestHelpers().AssertFullDownloadInterconnectivity(nodes, sizeMBs.MB()); } } } diff --git a/Tests/CodexTests/CodexDistTest.cs b/Tests/CodexTests/CodexDistTest.cs index 437dbf5..e62627f 100644 --- a/Tests/CodexTests/CodexDistTest.cs +++ b/Tests/CodexTests/CodexDistTest.cs @@ -14,8 +14,6 @@ namespace CodexTests { public class CodexDistTest : DistTest { - private readonly Dictionary> onlineCodexNodes = new Dictionary>(); - public CodexDistTest() { ProjectPlugin.Load(); @@ -31,17 +29,6 @@ namespace CodexTests localBuilder.Build(); } - protected override void LifecycleStart(TestLifecycle lifecycle) - { - onlineCodexNodes.Add(lifecycle, new List()); - } - - protected override void LifecycleStop(TestLifecycle lifecycle) - { - DeleteBlockRepo(onlineCodexNodes[lifecycle]); - onlineCodexNodes.Remove(lifecycle); - } - public ICodexNode StartCodex() { return StartCodex(s => { }); @@ -64,7 +51,7 @@ namespace CodexTests setup(s); OnCodexSetup(s); }); - onlineCodexNodes[Get()].AddRange(group); + return group; } @@ -78,11 +65,6 @@ namespace CodexTests return new PeerDownloadTestHelpers(GetTestLog(), Get().GetFileManager()); } - public IEnumerable GetAllOnlineCodexNodes() - { - return onlineCodexNodes[Get()]; - } - public void AssertBalance(ICodexContracts contracts, ICodexNode codexNode, Constraint constraint, string msg = "") { AssertHelpers.RetryAssert(constraint, () => contracts.GetTestTokenBalance(codexNode), nameof(AssertBalance) + msg); @@ -126,33 +108,5 @@ namespace CodexTests protected virtual void OnCodexSetup(ICodexSetup setup) { } - - protected override void CollectStatusLogData(TestLifecycle lifecycle, Dictionary data) - { - var nodes = onlineCodexNodes[lifecycle]; - var upload = nodes.Select(n => n.TransferSpeeds.GetUploadSpeed()).ToList()!.OptionalAverage(); - var download = nodes.Select(n => n.TransferSpeeds.GetDownloadSpeed()).ToList()!.OptionalAverage(); - if (upload != null) data.Add("avgupload", upload.ToString()); - if (download != null) data.Add("avgdownload", download.ToString()); - } - - private void DeleteBlockRepo(List codexNodes) - { - foreach (var node in codexNodes) - { - try - { - if (node.CrashWatcher.HasContainerCrashed()) - { - Log("Crash detected!"); - } - node.DeleteRepoFolder(); - } - catch (Exception ex) - { - Log($"Failed to delete repo folder for node {node.GetName()} : {ex}"); - } - } - } } } diff --git a/Tests/CodexTests/DownloadConnectivityTests/FullyConnectedDownloadTests.cs b/Tests/CodexTests/DownloadConnectivityTests/FullyConnectedDownloadTests.cs index e40d9fb..e6a42c5 100644 --- a/Tests/CodexTests/DownloadConnectivityTests/FullyConnectedDownloadTests.cs +++ b/Tests/CodexTests/DownloadConnectivityTests/FullyConnectedDownloadTests.cs @@ -1,4 +1,5 @@ using CodexContractsPlugin; +using CodexPlugin; using GethPlugin; using NUnit.Framework; using Utils; @@ -11,9 +12,9 @@ namespace CodexTests.DownloadConnectivityTests [Test] public void MetricsDoesNotInterfereWithPeerDownload() { - StartCodex(2, s => s.EnableMetrics()); + var nodes = StartCodex(2, s => s.EnableMetrics()); - AssertAllNodesConnected(); + AssertAllNodesConnected(nodes); } [Test] @@ -21,10 +22,10 @@ namespace CodexTests.DownloadConnectivityTests { var geth = Ci.StartGethNode(s => s.IsMiner()); var contracts = Ci.StartCodexContracts(geth); - StartCodex(2, s => s.EnableMarketplace(geth, contracts, m => m + var nodes = StartCodex(2, s => s.EnableMarketplace(geth, contracts, m => m .WithInitial(10.Eth(), 1000.TstWei()))); - AssertAllNodesConnected(); + AssertAllNodesConnected(nodes); } [Test] @@ -33,14 +34,14 @@ namespace CodexTests.DownloadConnectivityTests [Values(2, 5)] int numberOfNodes, [Values(1, 10)] int sizeMBs) { - StartCodex(numberOfNodes); + var nodes = StartCodex(numberOfNodes); - AssertAllNodesConnected(sizeMBs); + AssertAllNodesConnected(nodes, sizeMBs); } - private void AssertAllNodesConnected(int sizeMBs = 10) + private void AssertAllNodesConnected(IEnumerable nodes, int sizeMBs = 10) { - CreatePeerDownloadTestHelpers().AssertFullDownloadInterconnectivity(GetAllOnlineCodexNodes(), sizeMBs.MB()); + CreatePeerDownloadTestHelpers().AssertFullDownloadInterconnectivity(nodes, sizeMBs.MB()); } } } diff --git a/Tests/CodexTests/PeerDiscoveryTests/LayeredDiscoveryTests.cs b/Tests/CodexTests/PeerDiscoveryTests/LayeredDiscoveryTests.cs index 9c884fe..33225d6 100644 --- a/Tests/CodexTests/PeerDiscoveryTests/LayeredDiscoveryTests.cs +++ b/Tests/CodexTests/PeerDiscoveryTests/LayeredDiscoveryTests.cs @@ -1,4 +1,5 @@ -using NUnit.Framework; +using CodexPlugin; +using NUnit.Framework; namespace CodexTests.PeerDiscoveryTests { @@ -13,7 +14,7 @@ namespace CodexTests.PeerDiscoveryTests var l1Node = StartCodex(s => s.WithBootstrapNode(root)); var l2Target = StartCodex(s => s.WithBootstrapNode(l1Node)); - AssertAllNodesConnected(); + AssertAllNodesConnected(root, l1Source, l1Node, l2Target); } [Test] @@ -25,7 +26,7 @@ namespace CodexTests.PeerDiscoveryTests var l2Node = StartCodex(s => s.WithBootstrapNode(l1Node)); var l3Target = StartCodex(s => s.WithBootstrapNode(l2Node)); - AssertAllNodesConnected(); + AssertAllNodesConnected(root, l1Source, l1Node, l2Node, l3Target); } [TestCase(3)] @@ -33,18 +34,22 @@ namespace CodexTests.PeerDiscoveryTests [TestCase(10)] public void NodeChainTest(int chainLength) { + var nodes = new List(); var node = StartCodex(); + nodes.Add(node); + for (var i = 1; i < chainLength; i++) { node = StartCodex(s => s.WithBootstrapNode(node)); + nodes.Add(node); } - AssertAllNodesConnected(); + AssertAllNodesConnected(nodes.ToArray()); } - private void AssertAllNodesConnected() + private void AssertAllNodesConnected(params ICodexNode[] nodes) { - CreatePeerConnectionTestHelpers().AssertFullyConnected(GetAllOnlineCodexNodes()); + CreatePeerConnectionTestHelpers().AssertFullyConnected(nodes); } } } diff --git a/Tests/CodexTests/PeerDiscoveryTests/PeerDiscoveryTests.cs b/Tests/CodexTests/PeerDiscoveryTests/PeerDiscoveryTests.cs index 34da8d4..52619dd 100644 --- a/Tests/CodexTests/PeerDiscoveryTests/PeerDiscoveryTests.cs +++ b/Tests/CodexTests/PeerDiscoveryTests/PeerDiscoveryTests.cs @@ -21,9 +21,9 @@ namespace CodexTests.PeerDiscoveryTests [Test] public void MetricsDoesNotInterfereWithPeerDiscovery() { - StartCodex(2, s => s.EnableMetrics()); + var nodes = StartCodex(2, s => s.EnableMetrics()); - AssertAllNodesConnected(); + AssertAllNodesConnected(nodes); } [Test] @@ -31,10 +31,10 @@ namespace CodexTests.PeerDiscoveryTests { var geth = Ci.StartGethNode(s => s.IsMiner()); var contracts = Ci.StartCodexContracts(geth); - StartCodex(2, s => s.EnableMarketplace(geth, contracts, m => m + var nodes = StartCodex(2, s => s.EnableMarketplace(geth, contracts, m => m .WithInitial(10.Eth(), 1000.TstWei()))); - AssertAllNodesConnected(); + AssertAllNodesConnected(nodes); } [TestCase(2)] @@ -42,16 +42,15 @@ namespace CodexTests.PeerDiscoveryTests [TestCase(10)] public void VariableNodes(int number) { - StartCodex(number); + var nodes = StartCodex(number); - AssertAllNodesConnected(); + AssertAllNodesConnected(nodes); } - private void AssertAllNodesConnected() + private void AssertAllNodesConnected(IEnumerable nodes) { - var allNodes = GetAllOnlineCodexNodes(); - CreatePeerConnectionTestHelpers().AssertFullyConnected(allNodes); - CheckRoutingTable(allNodes); + CreatePeerConnectionTestHelpers().AssertFullyConnected(nodes); + CheckRoutingTable(nodes); } private void CheckRoutingTable(IEnumerable allNodes) diff --git a/Tests/DistTestCore/DistTest.cs b/Tests/DistTestCore/DistTest.cs index a2edfec..7b1d09e 100644 --- a/Tests/DistTestCore/DistTest.cs +++ b/Tests/DistTestCore/DistTest.cs @@ -215,7 +215,7 @@ namespace DistTestCore IncludeLogsOnTestFailure(lifecycle); LifecycleStop(lifecycle); lifecycle.DeleteAllResources(); - lifecycle = null!; + lifecycles.Remove(GetCurrentTestName()); }); } From 7b5e802efba139a533fe60a094d13b9f278dc94b Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 13 Jun 2024 08:51:52 +0200 Subject: [PATCH 26/27] Applies delete in scalability tests --- ProjectPlugins/CodexPlugin/CodexAccess.cs | 16 ++++++++++++++++ ProjectPlugins/CodexPlugin/CodexNode.cs | 12 ++++++++++++ .../ScalabilityTests/ScalabilityTests.cs | 8 +++++--- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index a99a2ec..4d7ca22 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -141,6 +141,22 @@ namespace CodexPlugin } } + public void DeleteRepoFolder() + { + try + { + var containerNumber = Container.Containers.First().Recipe.Number; + var dataDir = $"datadir{containerNumber}"; + var workflow = tools.CreateWorkflow(); + workflow.ExecuteCommand(Container.Containers.First(), "rm", "-Rfv", $"/codex/{dataDir}/repo"); + Log("Deleted repo folder."); + } + catch (Exception e) + { + Log("Unable to delete repo folder: " + e); + } + } + private T OnCodex(Func> action) { var result = tools.CreateHttp(CheckContainerCrashed).OnClient(client => CallCodex(client, action)); diff --git a/ProjectPlugins/CodexPlugin/CodexNode.cs b/ProjectPlugins/CodexPlugin/CodexNode.cs index f0e064b..fd6d1f7 100644 --- a/ProjectPlugins/CodexPlugin/CodexNode.cs +++ b/ProjectPlugins/CodexPlugin/CodexNode.cs @@ -26,6 +26,13 @@ namespace CodexPlugin CrashWatcher CrashWatcher { get; } PodInfo GetPodInfo(); ITransferSpeeds TransferSpeeds { get; } + + /// + /// Warning! The node is not usable after this. + /// TODO: Replace with delete-blocks debug call once available in Codex. + /// + void DeleteRepoFolder(); + void Stop(bool waitTillStopped); } @@ -164,6 +171,11 @@ namespace CodexPlugin return CodexAccess.GetPodInfo(); } + public void DeleteRepoFolder() + { + CodexAccess.DeleteRepoFolder(); + } + public void Stop(bool waitTillStopped) { Log("Stopping..."); diff --git a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs index 14f363b..c3b05f7 100644 --- a/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs +++ b/Tests/CodexLongTests/ScalabilityTests/ScalabilityTests.cs @@ -20,14 +20,13 @@ public class ScalabilityTests : CodexDistTest [WaitForCleanup] public void ShouldMaintainFileInNetwork( [Values(4, 5, 6)] int numberOfNodes, // TODO: include 10, 40, 80 and 100, not 5 - [Values(2000, 2200, 2500, 2800, 3000, 3200, 3500, 3800, 4200, 4500, 4800, 5000)] int fileSizeInMb // TODO: include 100, 1000, 5000, 10000 + [Values(4000, 5000, 6000, 7000, 8000, 9000, 10000)] int fileSizeInMb ) { var logLevel = CodexLogLevel.Trace; var bootstrap = StartCodex(s => s.WithLogLevel(logLevel)); var nodes = StartCodex(numberOfNodes - 1, s => s - //.EnableMetrics() .WithBootstrapNode(bootstrap) .WithLogLevel(logLevel) .WithStorageQuota((fileSizeInMb + 50).MB()) @@ -35,7 +34,6 @@ public class ScalabilityTests : CodexDistTest var uploader = nodes.PickOneRandom(); var downloader = nodes.PickOneRandom(); - //var metrics = Ci.GetMetricsFor(uploader, downloader); var testFile = GenerateTestFile(fileSizeInMb.MB()); @@ -49,12 +47,16 @@ public class ScalabilityTests : CodexDistTest downloadedFile!.AssertIsEqual(testFile); + uploader.DeleteRepoFolder(); uploader.Stop(true); var otherDownloader = nodes.PickOneRandom(); downloadedFile = otherDownloader.DownloadContent(contentId); downloadedFile!.AssertIsEqual(testFile); + + downloader.DeleteRepoFolder(); + otherDownloader.DeleteRepoFolder(); } /// From 54f053cfccde4a6e284d9c85cc00078b82bd6600 Mon Sep 17 00:00:00 2001 From: benbierens Date: Fri, 14 Jun 2024 09:05:56 +0200 Subject: [PATCH 27/27] Improves crash detection --- Framework/KubernetesWorkflow/CrashWatcher.cs | 4 +- Framework/KubernetesWorkflow/K8sCluster.cs | 1 + ProjectPlugins/CodexPlugin/CodexAccess.cs | 64 +++++++++++-------- .../CodexTests/BasicTests/MarketplaceTests.cs | 15 ++--- 4 files changed, 48 insertions(+), 36 deletions(-) diff --git a/Framework/KubernetesWorkflow/CrashWatcher.cs b/Framework/KubernetesWorkflow/CrashWatcher.cs index eb1ced7..57e30eb 100644 --- a/Framework/KubernetesWorkflow/CrashWatcher.cs +++ b/Framework/KubernetesWorkflow/CrashWatcher.cs @@ -50,7 +50,9 @@ namespace KubernetesWorkflow public bool HasContainerCrashed() { using var client = new Kubernetes(config); - return HasContainerBeenRestarted(client); + var result = HasContainerBeenRestarted(client); + if (result) DownloadCrashedContainerLogs(client); + return result; } private void Worker() diff --git a/Framework/KubernetesWorkflow/K8sCluster.cs b/Framework/KubernetesWorkflow/K8sCluster.cs index 366855f..be87e1f 100644 --- a/Framework/KubernetesWorkflow/K8sCluster.cs +++ b/Framework/KubernetesWorkflow/K8sCluster.cs @@ -16,6 +16,7 @@ namespace KubernetesWorkflow { var config = GetConfig(); UpdateHostAddress(config); + config.SkipTlsVerify = true; // Required for operation on Wings cluster. return config; } diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index 4d7ca22..80f1061 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -37,20 +37,23 @@ namespace CodexPlugin public DebugPeer GetDebugPeer(string peerId) { // Cannot use openAPI: debug/peer endpoint is not specified there. - var endpoint = GetEndpoint(); - var str = endpoint.HttpGetString($"debug/peer/{peerId}"); - - if (str.ToLowerInvariant() == "unable to find peer!") + return CrashCheck(() => { - return new DebugPeer - { - IsPeerFound = false - }; - } + var endpoint = GetEndpoint(); + var str = endpoint.HttpGetString($"debug/peer/{peerId}"); - var result = endpoint.Deserialize(str); - result.IsPeerFound = true; - return result; + if (str.ToLowerInvariant() == "unable to find peer!") + { + return new DebugPeer + { + IsPeerFound = false + }; + } + + var result = endpoint.Deserialize(str); + result.IsPeerFound = true; + return result; + }); } public void ConnectToPeer(string peerId, string[] peerMultiAddresses) @@ -105,13 +108,16 @@ namespace CodexPlugin public StoragePurchase GetPurchaseStatus(string purchaseId) { - var endpoint = GetEndpoint(); - return Time.Retry(() => + return CrashCheck(() => { - var str = endpoint.HttpGetString($"storage/purchases/{purchaseId}"); - if (string.IsNullOrEmpty(str)) throw new Exception("Empty response."); - return JsonConvert.DeserializeObject(str)!; - }, nameof(GetPurchaseStatus)); + var endpoint = GetEndpoint(); + return Time.Retry(() => + { + var str = endpoint.HttpGetString($"storage/purchases/{purchaseId}"); + if (string.IsNullOrEmpty(str)) throw new Exception("Empty response."); + return JsonConvert.DeserializeObject(str)!; + }, nameof(GetPurchaseStatus)); + }); // TODO: current getpurchase api does not line up with its openapi spec. // return mapper.Map(OnCodex(api => api.GetPurchaseAsync(purchaseId))); @@ -174,7 +180,19 @@ namespace CodexPlugin var address = GetAddress(); var api = new CodexApi(client); api.BaseUrl = $"{address.Host}:{address.Port}/api/codex/v1"; - return Time.Wait(action(api)); + return CrashCheck(() => Time.Wait(action(api))); + } + + private T CrashCheck(Func action) + { + try + { + return action(); + } + finally + { + CrashWatcher.HasContainerCrashed(); + } } private IEndpoint GetEndpoint() @@ -237,7 +255,6 @@ namespace CodexPlugin if (string.IsNullOrEmpty(debugInfo.Spr)) { Log("Did not get value debug/info response."); - DownloadLog(); Throw(failure); } else @@ -248,14 +265,12 @@ namespace CodexPlugin catch (Exception ex) { Log("Got exception from debug/info call: " + ex); - DownloadLog(); Throw(failure); } if (failure.Duration < timeSet.HttpCallTimeout()) { Log("Retry failed within HTTP timeout duration."); - DownloadLog(); Throw(failure); } } @@ -269,10 +284,5 @@ namespace CodexPlugin { log.Log($"{GetName()} {msg}"); } - - private void DownloadLog() - { - tools.CreateWorkflow().DownloadContainerLog(Container.Containers.Single(), this); - } } } diff --git a/Tests/CodexTests/BasicTests/MarketplaceTests.cs b/Tests/CodexTests/BasicTests/MarketplaceTests.cs index 493d18c..756f863 100644 --- a/Tests/CodexTests/BasicTests/MarketplaceTests.cs +++ b/Tests/CodexTests/BasicTests/MarketplaceTests.cs @@ -112,18 +112,17 @@ namespace CodexTests.BasicTests }; var purchaseContract = client.Marketplace.RequestStorage(purchase); - var contractCid = purchaseContract.ContentId; - Assert.That(uploadCid.Id, Is.Not.EqualTo(contractCid.Id)); - var downloader = StartCodex(s => s.WithName("Downloader")); - - var uploadedFile = downloader.DownloadContent(uploadCid); - testFile.AssertIsEqual(uploadedFile); + // Download both from client. + testFile.AssertIsEqual(client.DownloadContent(uploadCid)); + testFile.AssertIsEqual(client.DownloadContent(contractCid)); - var contractFile = downloader.DownloadContent(contractCid); - testFile.AssertIsEqual(contractFile); + // Download both from another node. + var downloader = StartCodex(s => s.WithName("Downloader")); + testFile.AssertIsEqual(downloader.DownloadContent(uploadCid)); + testFile.AssertIsEqual(downloader.DownloadContent(contractCid)); } private void WaitForAllSlotFilledEvents(ICodexContracts contracts, StoragePurchaseRequest purchase, IGethNode geth)