rigged up a pod crash detector maybe
This commit is contained in:
parent
9f0600d6c1
commit
6e5e30afd4
|
@ -377,7 +377,8 @@
|
|||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
|
@ -471,7 +472,8 @@
|
|||
"value": "<CODEX_STORAGEQUOTA>"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
|
|
|
@ -604,6 +604,30 @@ namespace KubernetesWorkflow
|
|||
|
||||
#endregion
|
||||
|
||||
public Task WatchForCrashLogs(RunningContainer container, CancellationToken token, ILogHandler logHandler)
|
||||
{
|
||||
return Task.Run(() =>
|
||||
{
|
||||
var myOwnClient = new Kubernetes(cluster.GetK8sClientConfig());
|
||||
while (!token.IsCancellationRequested)
|
||||
{
|
||||
token.WaitHandle.WaitOne(TimeSpan.FromSeconds(3));
|
||||
|
||||
var pod = container.Pod;
|
||||
var recipe = container.Recipe;
|
||||
var podName = pod.PodInfo.Name;
|
||||
var podInfo = myOwnClient.ReadNamespacedPod(podName, K8sTestNamespace);
|
||||
|
||||
if (podInfo.Status.ContainerStatuses.Any(c => c.RestartCount > 0))
|
||||
{
|
||||
log.Log("Pod crash detected for " + container.Name);
|
||||
using var stream = client.Run(c => c.ReadNamespacedPodLog(pod.PodInfo.Name, K8sTestNamespace, recipe.Name, previous: true));
|
||||
logHandler.Log(stream);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private PodInfo FetchNewPod()
|
||||
{
|
||||
var pods = client.Run(c => c.ListNamespacedPod(K8sTestNamespace)).Items;
|
||||
|
|
|
@ -37,6 +37,11 @@ namespace KubernetesWorkflow
|
|||
}, pl);
|
||||
}
|
||||
|
||||
public Task WatchForCrashLogs(RunningContainer container, CancellationToken token, ILogHandler logHandler)
|
||||
{
|
||||
return K8s(controller => controller.WatchForCrashLogs(container, token, logHandler));
|
||||
}
|
||||
|
||||
public void Stop(RunningContainers runningContainers)
|
||||
{
|
||||
K8s(controller =>
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
using DistTestCore;
|
||||
using KubernetesWorkflow;
|
||||
using NUnit.Framework;
|
||||
using Utils;
|
||||
|
||||
namespace Tests.BasicTests
|
||||
{
|
||||
[TestFixture]
|
||||
public class ContinuousSubstitute : AutoBootstrapDistTest
|
||||
public class ContinuousSubstitute : AutoBootstrapDistTest, ILogHandler
|
||||
{
|
||||
[Test]
|
||||
[UseLongTimeouts]
|
||||
|
@ -57,7 +58,7 @@ namespace Tests.BasicTests
|
|||
testFile.AssertIsEqual(downloadedFile);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
[Test]
|
||||
[UseLongTimeouts]
|
||||
public void HoldMyBeerTest()
|
||||
|
@ -70,28 +71,53 @@ namespace Tests.BasicTests
|
|||
|
||||
var nodes = group.Cast<OnlineCodexNode>().ToArray();
|
||||
|
||||
foreach (var node in nodes)
|
||||
{
|
||||
node.Marketplace.MakeStorageAvailable(
|
||||
size: 1.GB(),
|
||||
minPricePerBytePerSecond: 1.TestTokens(),
|
||||
maxCollateral: 1024.TestTokens(),
|
||||
maxDuration: TimeSpan.FromMinutes(5));
|
||||
}
|
||||
var flow = Get().WorkflowCreator.CreateWorkflow();
|
||||
var cst = new CancellationTokenSource();
|
||||
var tasks = nodes.Select(n => flow.WatchForCrashLogs(n.CodexAccess.Container, cst.Token, this)).ToArray();
|
||||
|
||||
var endTime = DateTime.UtcNow + TimeSpan.FromHours(2);
|
||||
while (DateTime.UtcNow < endTime)
|
||||
try
|
||||
{
|
||||
foreach (var node in nodes)
|
||||
{
|
||||
var file = GenerateTestFile(80.MB());
|
||||
var cid = node.UploadFile(file);
|
||||
|
||||
var dl = node.DownloadContent(cid);
|
||||
file.AssertIsEqual(dl);
|
||||
node.Marketplace.MakeStorageAvailable(
|
||||
size: 1.GB(),
|
||||
minPricePerBytePerSecond: 1.TestTokens(),
|
||||
maxCollateral: 1024.TestTokens(),
|
||||
maxDuration: TimeSpan.FromMinutes(5));
|
||||
}
|
||||
|
||||
Thread.Sleep(TimeSpan.FromMinutes(2));
|
||||
var endTime = DateTime.UtcNow + TimeSpan.FromHours(2);
|
||||
while (DateTime.UtcNow < endTime)
|
||||
{
|
||||
foreach (var node in nodes)
|
||||
{
|
||||
var file = GenerateTestFile(80.MB());
|
||||
var cid = node.UploadFile(file);
|
||||
|
||||
var dl = node.DownloadContent(cid);
|
||||
file.AssertIsEqual(dl);
|
||||
}
|
||||
|
||||
Thread.Sleep(TimeSpan.FromMinutes(2));
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
cst.Cancel();
|
||||
foreach (var t in tasks) t.Wait();
|
||||
}
|
||||
}
|
||||
|
||||
public void Log(Stream log)
|
||||
{
|
||||
Log("Well damn, container crashed. Here's the log:");
|
||||
using var reader = new StreamReader(log);
|
||||
|
||||
var line = reader.ReadLine();
|
||||
while(line != null)
|
||||
{
|
||||
Log(line);
|
||||
line = reader.ReadLine();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue