diff --git a/DistTestCore/Metrics/dashboard.json b/DistTestCore/Metrics/dashboard.json index d53d72b..fdc5a2a 100644 --- a/DistTestCore/Metrics/dashboard.json +++ b/DistTestCore/Metrics/dashboard.json @@ -377,7 +377,8 @@ "value": 80 } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, @@ -471,7 +472,8 @@ "value": "" } ] - } + }, + "unit": "decbytes" }, "overrides": [] }, diff --git a/KubernetesWorkflow/K8sController.cs b/KubernetesWorkflow/K8sController.cs index 5569b07..29147ea 100644 --- a/KubernetesWorkflow/K8sController.cs +++ b/KubernetesWorkflow/K8sController.cs @@ -604,6 +604,30 @@ namespace KubernetesWorkflow #endregion + public Task WatchForCrashLogs(RunningContainer container, CancellationToken token, ILogHandler logHandler) + { + return Task.Run(() => + { + var myOwnClient = new Kubernetes(cluster.GetK8sClientConfig()); + while (!token.IsCancellationRequested) + { + token.WaitHandle.WaitOne(TimeSpan.FromSeconds(3)); + + var pod = container.Pod; + var recipe = container.Recipe; + var podName = pod.PodInfo.Name; + var podInfo = myOwnClient.ReadNamespacedPod(podName, K8sTestNamespace); + + if (podInfo.Status.ContainerStatuses.Any(c => c.RestartCount > 0)) + { + log.Log("Pod crash detected for " + container.Name); + using var stream = client.Run(c => c.ReadNamespacedPodLog(pod.PodInfo.Name, K8sTestNamespace, recipe.Name, previous: true)); + logHandler.Log(stream); + } + } + }); + } + private PodInfo FetchNewPod() { var pods = client.Run(c => c.ListNamespacedPod(K8sTestNamespace)).Items; diff --git a/KubernetesWorkflow/StartupWorkflow.cs b/KubernetesWorkflow/StartupWorkflow.cs index c51b991..6f390a0 100644 --- a/KubernetesWorkflow/StartupWorkflow.cs +++ b/KubernetesWorkflow/StartupWorkflow.cs @@ -37,6 +37,11 @@ namespace KubernetesWorkflow }, pl); } + public Task WatchForCrashLogs(RunningContainer container, CancellationToken token, ILogHandler logHandler) + { + return K8s(controller => controller.WatchForCrashLogs(container, token, logHandler)); + } + public void Stop(RunningContainers runningContainers) { K8s(controller => diff --git a/Tests/BasicTests/ContinuousSubstitute.cs b/Tests/BasicTests/ContinuousSubstitute.cs index 5cc010a..722bb3f 100644 --- a/Tests/BasicTests/ContinuousSubstitute.cs +++ b/Tests/BasicTests/ContinuousSubstitute.cs @@ -1,11 +1,12 @@ using DistTestCore; +using KubernetesWorkflow; using NUnit.Framework; using Utils; namespace Tests.BasicTests { [TestFixture] - public class ContinuousSubstitute : AutoBootstrapDistTest + public class ContinuousSubstitute : AutoBootstrapDistTest, ILogHandler { [Test] [UseLongTimeouts] @@ -57,7 +58,7 @@ namespace Tests.BasicTests testFile.AssertIsEqual(downloadedFile); }); } - + [Test] [UseLongTimeouts] public void HoldMyBeerTest() @@ -70,28 +71,53 @@ namespace Tests.BasicTests var nodes = group.Cast().ToArray(); - foreach (var node in nodes) - { - node.Marketplace.MakeStorageAvailable( - size: 1.GB(), - minPricePerBytePerSecond: 1.TestTokens(), - maxCollateral: 1024.TestTokens(), - maxDuration: TimeSpan.FromMinutes(5)); - } + var flow = Get().WorkflowCreator.CreateWorkflow(); + var cst = new CancellationTokenSource(); + var tasks = nodes.Select(n => flow.WatchForCrashLogs(n.CodexAccess.Container, cst.Token, this)).ToArray(); - var endTime = DateTime.UtcNow + TimeSpan.FromHours(2); - while (DateTime.UtcNow < endTime) + try { foreach (var node in nodes) { - var file = GenerateTestFile(80.MB()); - var cid = node.UploadFile(file); - - var dl = node.DownloadContent(cid); - file.AssertIsEqual(dl); + node.Marketplace.MakeStorageAvailable( + size: 1.GB(), + minPricePerBytePerSecond: 1.TestTokens(), + maxCollateral: 1024.TestTokens(), + maxDuration: TimeSpan.FromMinutes(5)); } - Thread.Sleep(TimeSpan.FromMinutes(2)); + var endTime = DateTime.UtcNow + TimeSpan.FromHours(2); + while (DateTime.UtcNow < endTime) + { + foreach (var node in nodes) + { + var file = GenerateTestFile(80.MB()); + var cid = node.UploadFile(file); + + var dl = node.DownloadContent(cid); + file.AssertIsEqual(dl); + } + + Thread.Sleep(TimeSpan.FromMinutes(2)); + } + } + finally + { + cst.Cancel(); + foreach (var t in tasks) t.Wait(); + } + } + + public void Log(Stream log) + { + Log("Well damn, container crashed. Here's the log:"); + using var reader = new StreamReader(log); + + var line = reader.ReadLine(); + while(line != null) + { + Log(line); + line = reader.ReadLine(); } } }