From 54f053cfccde4a6e284d9c85cc00078b82bd6600 Mon Sep 17 00:00:00 2001 From: benbierens Date: Fri, 14 Jun 2024 09:05:56 +0200 Subject: [PATCH] Improves crash detection --- Framework/KubernetesWorkflow/CrashWatcher.cs | 4 +- Framework/KubernetesWorkflow/K8sCluster.cs | 1 + ProjectPlugins/CodexPlugin/CodexAccess.cs | 64 +++++++++++-------- .../CodexTests/BasicTests/MarketplaceTests.cs | 15 ++--- 4 files changed, 48 insertions(+), 36 deletions(-) diff --git a/Framework/KubernetesWorkflow/CrashWatcher.cs b/Framework/KubernetesWorkflow/CrashWatcher.cs index eb1ced7b..57e30eb6 100644 --- a/Framework/KubernetesWorkflow/CrashWatcher.cs +++ b/Framework/KubernetesWorkflow/CrashWatcher.cs @@ -50,7 +50,9 @@ namespace KubernetesWorkflow public bool HasContainerCrashed() { using var client = new Kubernetes(config); - return HasContainerBeenRestarted(client); + var result = HasContainerBeenRestarted(client); + if (result) DownloadCrashedContainerLogs(client); + return result; } private void Worker() diff --git a/Framework/KubernetesWorkflow/K8sCluster.cs b/Framework/KubernetesWorkflow/K8sCluster.cs index 366855fa..be87e1fe 100644 --- a/Framework/KubernetesWorkflow/K8sCluster.cs +++ b/Framework/KubernetesWorkflow/K8sCluster.cs @@ -16,6 +16,7 @@ namespace KubernetesWorkflow { var config = GetConfig(); UpdateHostAddress(config); + config.SkipTlsVerify = true; // Required for operation on Wings cluster. return config; } diff --git a/ProjectPlugins/CodexPlugin/CodexAccess.cs b/ProjectPlugins/CodexPlugin/CodexAccess.cs index 4d7ca224..80f10611 100644 --- a/ProjectPlugins/CodexPlugin/CodexAccess.cs +++ b/ProjectPlugins/CodexPlugin/CodexAccess.cs @@ -37,20 +37,23 @@ namespace CodexPlugin public DebugPeer GetDebugPeer(string peerId) { // Cannot use openAPI: debug/peer endpoint is not specified there. - var endpoint = GetEndpoint(); - var str = endpoint.HttpGetString($"debug/peer/{peerId}"); - - if (str.ToLowerInvariant() == "unable to find peer!") + return CrashCheck(() => { - return new DebugPeer - { - IsPeerFound = false - }; - } + var endpoint = GetEndpoint(); + var str = endpoint.HttpGetString($"debug/peer/{peerId}"); - var result = endpoint.Deserialize(str); - result.IsPeerFound = true; - return result; + if (str.ToLowerInvariant() == "unable to find peer!") + { + return new DebugPeer + { + IsPeerFound = false + }; + } + + var result = endpoint.Deserialize(str); + result.IsPeerFound = true; + return result; + }); } public void ConnectToPeer(string peerId, string[] peerMultiAddresses) @@ -105,13 +108,16 @@ namespace CodexPlugin public StoragePurchase GetPurchaseStatus(string purchaseId) { - var endpoint = GetEndpoint(); - return Time.Retry(() => + return CrashCheck(() => { - var str = endpoint.HttpGetString($"storage/purchases/{purchaseId}"); - if (string.IsNullOrEmpty(str)) throw new Exception("Empty response."); - return JsonConvert.DeserializeObject(str)!; - }, nameof(GetPurchaseStatus)); + var endpoint = GetEndpoint(); + return Time.Retry(() => + { + var str = endpoint.HttpGetString($"storage/purchases/{purchaseId}"); + if (string.IsNullOrEmpty(str)) throw new Exception("Empty response."); + return JsonConvert.DeserializeObject(str)!; + }, nameof(GetPurchaseStatus)); + }); // TODO: current getpurchase api does not line up with its openapi spec. // return mapper.Map(OnCodex(api => api.GetPurchaseAsync(purchaseId))); @@ -174,7 +180,19 @@ namespace CodexPlugin var address = GetAddress(); var api = new CodexApi(client); api.BaseUrl = $"{address.Host}:{address.Port}/api/codex/v1"; - return Time.Wait(action(api)); + return CrashCheck(() => Time.Wait(action(api))); + } + + private T CrashCheck(Func action) + { + try + { + return action(); + } + finally + { + CrashWatcher.HasContainerCrashed(); + } } private IEndpoint GetEndpoint() @@ -237,7 +255,6 @@ namespace CodexPlugin if (string.IsNullOrEmpty(debugInfo.Spr)) { Log("Did not get value debug/info response."); - DownloadLog(); Throw(failure); } else @@ -248,14 +265,12 @@ namespace CodexPlugin catch (Exception ex) { Log("Got exception from debug/info call: " + ex); - DownloadLog(); Throw(failure); } if (failure.Duration < timeSet.HttpCallTimeout()) { Log("Retry failed within HTTP timeout duration."); - DownloadLog(); Throw(failure); } } @@ -269,10 +284,5 @@ namespace CodexPlugin { log.Log($"{GetName()} {msg}"); } - - private void DownloadLog() - { - tools.CreateWorkflow().DownloadContainerLog(Container.Containers.Single(), this); - } } } diff --git a/Tests/CodexTests/BasicTests/MarketplaceTests.cs b/Tests/CodexTests/BasicTests/MarketplaceTests.cs index 493d18c3..756f8637 100644 --- a/Tests/CodexTests/BasicTests/MarketplaceTests.cs +++ b/Tests/CodexTests/BasicTests/MarketplaceTests.cs @@ -112,18 +112,17 @@ namespace CodexTests.BasicTests }; var purchaseContract = client.Marketplace.RequestStorage(purchase); - var contractCid = purchaseContract.ContentId; - Assert.That(uploadCid.Id, Is.Not.EqualTo(contractCid.Id)); - var downloader = StartCodex(s => s.WithName("Downloader")); - - var uploadedFile = downloader.DownloadContent(uploadCid); - testFile.AssertIsEqual(uploadedFile); + // Download both from client. + testFile.AssertIsEqual(client.DownloadContent(uploadCid)); + testFile.AssertIsEqual(client.DownloadContent(contractCid)); - var contractFile = downloader.DownloadContent(contractCid); - testFile.AssertIsEqual(contractFile); + // Download both from another node. + var downloader = StartCodex(s => s.WithName("Downloader")); + testFile.AssertIsEqual(downloader.DownloadContent(uploadCid)); + testFile.AssertIsEqual(downloader.DownloadContent(contractCid)); } private void WaitForAllSlotFilledEvents(ICodexContracts contracts, StoragePurchaseRequest purchase, IGethNode geth)