2023-08-15 09:01:18 +00:00
|
|
|
|
using k8s;
|
|
|
|
|
using Logging;
|
|
|
|
|
|
|
|
|
|
namespace KubernetesWorkflow
|
|
|
|
|
{
|
|
|
|
|
public class CrashWatcher
|
|
|
|
|
{
|
2023-09-12 08:31:55 +00:00
|
|
|
|
private readonly ILog log;
|
2023-08-15 09:01:18 +00:00
|
|
|
|
private readonly KubernetesClientConfiguration config;
|
2023-11-06 13:33:47 +00:00
|
|
|
|
private readonly string containerName;
|
|
|
|
|
private readonly string podName;
|
|
|
|
|
private readonly string recipeName;
|
2023-08-15 09:01:18 +00:00
|
|
|
|
private readonly string k8sNamespace;
|
|
|
|
|
private ILogHandler? logHandler;
|
|
|
|
|
private CancellationTokenSource cts;
|
|
|
|
|
private Task? worker;
|
|
|
|
|
private Exception? workerException;
|
|
|
|
|
|
2023-11-06 13:33:47 +00:00
|
|
|
|
public CrashWatcher(ILog log, KubernetesClientConfiguration config, string containerName, string podName, string recipeName, string k8sNamespace)
|
2023-08-15 09:01:18 +00:00
|
|
|
|
{
|
|
|
|
|
this.log = log;
|
|
|
|
|
this.config = config;
|
2023-11-06 13:33:47 +00:00
|
|
|
|
this.containerName = containerName;
|
|
|
|
|
this.podName = podName;
|
|
|
|
|
this.recipeName = recipeName;
|
2023-08-15 09:01:18 +00:00
|
|
|
|
this.k8sNamespace = k8sNamespace;
|
|
|
|
|
cts = new CancellationTokenSource();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void Start(ILogHandler logHandler)
|
|
|
|
|
{
|
|
|
|
|
if (worker != null) throw new InvalidOperationException();
|
|
|
|
|
|
|
|
|
|
this.logHandler = logHandler;
|
|
|
|
|
cts = new CancellationTokenSource();
|
|
|
|
|
worker = Task.Run(Worker);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void Stop()
|
|
|
|
|
{
|
|
|
|
|
if (worker == null) throw new InvalidOperationException();
|
|
|
|
|
|
|
|
|
|
cts.Cancel();
|
|
|
|
|
worker.Wait();
|
|
|
|
|
worker = null;
|
|
|
|
|
|
|
|
|
|
if (workerException != null) throw new Exception("Exception occurred in CrashWatcher worker thread.", workerException);
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-28 09:53:59 +00:00
|
|
|
|
public bool HasContainerCrashed()
|
|
|
|
|
{
|
|
|
|
|
using var client = new Kubernetes(config);
|
2024-06-14 07:05:56 +00:00
|
|
|
|
var result = HasContainerBeenRestarted(client);
|
|
|
|
|
if (result) DownloadCrashedContainerLogs(client);
|
|
|
|
|
return result;
|
2023-08-28 09:53:59 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-08-15 09:01:18 +00:00
|
|
|
|
private void Worker()
|
|
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
MonitorContainer(cts.Token);
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
workerException = ex;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void MonitorContainer(CancellationToken token)
|
|
|
|
|
{
|
2023-08-28 09:53:59 +00:00
|
|
|
|
using var client = new Kubernetes(config);
|
2023-08-15 09:01:18 +00:00
|
|
|
|
while (!token.IsCancellationRequested)
|
|
|
|
|
{
|
2023-11-06 13:33:47 +00:00
|
|
|
|
token.WaitHandle.WaitOne(TimeSpan.FromSeconds(10));
|
2023-08-15 09:01:18 +00:00
|
|
|
|
|
2023-11-06 13:33:47 +00:00
|
|
|
|
if (HasContainerBeenRestarted(client))
|
2023-08-15 09:01:18 +00:00
|
|
|
|
{
|
2023-11-06 13:33:47 +00:00
|
|
|
|
DownloadCrashedContainerLogs(client);
|
2023-08-15 09:01:18 +00:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-06 13:33:47 +00:00
|
|
|
|
private bool HasContainerBeenRestarted(Kubernetes client)
|
2023-08-28 09:53:59 +00:00
|
|
|
|
{
|
|
|
|
|
var podInfo = client.ReadNamespacedPod(podName, k8sNamespace);
|
2024-06-08 08:36:23 +00:00
|
|
|
|
var result = podInfo.Status.ContainerStatuses.Any(c => c.RestartCount > 0);
|
|
|
|
|
if (result) log.Log("Pod crash detected for " + containerName);
|
|
|
|
|
return result;
|
2023-08-28 09:53:59 +00:00
|
|
|
|
}
|
|
|
|
|
|
2023-11-06 13:33:47 +00:00
|
|
|
|
private void DownloadCrashedContainerLogs(Kubernetes client)
|
2023-08-15 09:01:18 +00:00
|
|
|
|
{
|
2023-11-06 13:33:47 +00:00
|
|
|
|
using var stream = client.ReadNamespacedPodLog(podName, k8sNamespace, recipeName, previous: true);
|
2023-08-15 09:01:18 +00:00
|
|
|
|
logHandler!.Log(stream);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|