2
0
mirror of synced 2025-01-11 09:06:56 +00:00

Adds a container-restart check at the end of the deployment.

This commit is contained in:
benbierens 2023-08-28 11:53:59 +02:00
parent 249b13d503
commit 553a368714
No known key found for this signature in database
GPG Key ID: FE44815D96D0A1AA
3 changed files with 39 additions and 6 deletions

View File

@ -60,7 +60,7 @@ namespace CodexNetDeployer
if (string.IsNullOrEmpty(bootstrapSpr)) bootstrapSpr = debugInfo.spr;
validatorsLeft--;
return new CodexNodeStartResult(container, codexAccess);
return new CodexNodeStartResult(workflow, container, codexAccess);
}
}
}
@ -105,12 +105,14 @@ namespace CodexNetDeployer
public class CodexNodeStartResult
{
public CodexNodeStartResult(RunningContainer container, CodexAccess access)
public CodexNodeStartResult(StartupWorkflow workflow, RunningContainer container, CodexAccess access)
{
Workflow = workflow;
Container = container;
Access = access;
}
public StartupWorkflow Workflow { get; }
public RunningContainer Container { get; }
public CodexAccess Access { get; }
}

View File

@ -55,6 +55,7 @@ namespace CodexNetDeployer
var (prometheusContainer, grafanaStartInfo) = StartMetricsService(lifecycle, setup, startResults.Select(r => r.Container));
CheckPeerConnectivity(startResults);
CheckContainerRestarts(startResults);
return new CodexDeployment(gethResults, startResults.Select(r => r.Container).ToArray(), prometheusContainer, grafanaStartInfo, CreateMetadata());
}
@ -106,6 +107,26 @@ namespace CodexNetDeployer
Log("Check passed.");
}
private void CheckContainerRestarts(List<CodexNodeStartResult> startResults)
{
var crashes = new List<RunningContainer>();
foreach (var startResult in startResults)
{
var watcher = startResult.Workflow.CreateCrashWatcher(startResult.Container);
if (watcher.HasContainerCrashed()) crashes.Add(startResult.Container);
}
if (!crashes.Any())
{
Log("Container restart check passed.");
}
else
{
Log($"Deployment failed. The following containers have crashed: {string.Join(",", crashes.Select(c => c.Name))}");
throw new Exception("Deployment failed: One or more containers crashed.");
}
}
private DeploymentMetadata CreateMetadata()
{
return new DeploymentMetadata(

View File

@ -43,6 +43,12 @@ namespace KubernetesWorkflow
if (workerException != null) throw new Exception("Exception occurred in CrashWatcher worker thread.", workerException);
}
public bool HasContainerCrashed()
{
using var client = new Kubernetes(config);
return HasContainerBeenRestarted(client, container.Pod.PodInfo.Name);
}
private void Worker()
{
try
@ -57,7 +63,7 @@ namespace KubernetesWorkflow
private void MonitorContainer(CancellationToken token)
{
var client = new Kubernetes(config);
using var client = new Kubernetes(config);
while (!token.IsCancellationRequested)
{
token.WaitHandle.WaitOne(TimeSpan.FromSeconds(1));
@ -65,9 +71,7 @@ namespace KubernetesWorkflow
var pod = container.Pod;
var recipe = container.Recipe;
var podName = pod.PodInfo.Name;
var podInfo = client.ReadNamespacedPod(podName, k8sNamespace);
if (podInfo.Status.ContainerStatuses.Any(c => c.RestartCount > 0))
if (HasContainerBeenRestarted(client, podName))
{
DownloadCrashedContainerLogs(client, podName, recipe);
return;
@ -75,6 +79,12 @@ namespace KubernetesWorkflow
}
}
private bool HasContainerBeenRestarted(Kubernetes client, string podName)
{
var podInfo = client.ReadNamespacedPod(podName, k8sNamespace);
return podInfo.Status.ContainerStatuses.Any(c => c.RestartCount > 0);
}
private void DownloadCrashedContainerLogs(Kubernetes client, string podName, ContainerRecipe recipe)
{
log.Log("Pod crash detected for " + container.Name);