Adds a container-restart check at the end of the deployment.
This commit is contained in:
parent
249b13d503
commit
553a368714
@ -60,7 +60,7 @@ namespace CodexNetDeployer
|
||||
|
||||
if (string.IsNullOrEmpty(bootstrapSpr)) bootstrapSpr = debugInfo.spr;
|
||||
validatorsLeft--;
|
||||
return new CodexNodeStartResult(container, codexAccess);
|
||||
return new CodexNodeStartResult(workflow, container, codexAccess);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -105,12 +105,14 @@ namespace CodexNetDeployer
|
||||
|
||||
public class CodexNodeStartResult
|
||||
{
|
||||
public CodexNodeStartResult(RunningContainer container, CodexAccess access)
|
||||
public CodexNodeStartResult(StartupWorkflow workflow, RunningContainer container, CodexAccess access)
|
||||
{
|
||||
Workflow = workflow;
|
||||
Container = container;
|
||||
Access = access;
|
||||
}
|
||||
|
||||
public StartupWorkflow Workflow { get; }
|
||||
public RunningContainer Container { get; }
|
||||
public CodexAccess Access { get; }
|
||||
}
|
||||
|
@ -55,6 +55,7 @@ namespace CodexNetDeployer
|
||||
var (prometheusContainer, grafanaStartInfo) = StartMetricsService(lifecycle, setup, startResults.Select(r => r.Container));
|
||||
|
||||
CheckPeerConnectivity(startResults);
|
||||
CheckContainerRestarts(startResults);
|
||||
|
||||
return new CodexDeployment(gethResults, startResults.Select(r => r.Container).ToArray(), prometheusContainer, grafanaStartInfo, CreateMetadata());
|
||||
}
|
||||
@ -106,6 +107,26 @@ namespace CodexNetDeployer
|
||||
Log("Check passed.");
|
||||
}
|
||||
|
||||
private void CheckContainerRestarts(List<CodexNodeStartResult> startResults)
|
||||
{
|
||||
var crashes = new List<RunningContainer>();
|
||||
foreach (var startResult in startResults)
|
||||
{
|
||||
var watcher = startResult.Workflow.CreateCrashWatcher(startResult.Container);
|
||||
if (watcher.HasContainerCrashed()) crashes.Add(startResult.Container);
|
||||
}
|
||||
|
||||
if (!crashes.Any())
|
||||
{
|
||||
Log("Container restart check passed.");
|
||||
}
|
||||
else
|
||||
{
|
||||
Log($"Deployment failed. The following containers have crashed: {string.Join(",", crashes.Select(c => c.Name))}");
|
||||
throw new Exception("Deployment failed: One or more containers crashed.");
|
||||
}
|
||||
}
|
||||
|
||||
private DeploymentMetadata CreateMetadata()
|
||||
{
|
||||
return new DeploymentMetadata(
|
||||
|
@ -43,6 +43,12 @@ namespace KubernetesWorkflow
|
||||
if (workerException != null) throw new Exception("Exception occurred in CrashWatcher worker thread.", workerException);
|
||||
}
|
||||
|
||||
public bool HasContainerCrashed()
|
||||
{
|
||||
using var client = new Kubernetes(config);
|
||||
return HasContainerBeenRestarted(client, container.Pod.PodInfo.Name);
|
||||
}
|
||||
|
||||
private void Worker()
|
||||
{
|
||||
try
|
||||
@ -57,7 +63,7 @@ namespace KubernetesWorkflow
|
||||
|
||||
private void MonitorContainer(CancellationToken token)
|
||||
{
|
||||
var client = new Kubernetes(config);
|
||||
using var client = new Kubernetes(config);
|
||||
while (!token.IsCancellationRequested)
|
||||
{
|
||||
token.WaitHandle.WaitOne(TimeSpan.FromSeconds(1));
|
||||
@ -65,9 +71,7 @@ namespace KubernetesWorkflow
|
||||
var pod = container.Pod;
|
||||
var recipe = container.Recipe;
|
||||
var podName = pod.PodInfo.Name;
|
||||
var podInfo = client.ReadNamespacedPod(podName, k8sNamespace);
|
||||
|
||||
if (podInfo.Status.ContainerStatuses.Any(c => c.RestartCount > 0))
|
||||
if (HasContainerBeenRestarted(client, podName))
|
||||
{
|
||||
DownloadCrashedContainerLogs(client, podName, recipe);
|
||||
return;
|
||||
@ -75,6 +79,12 @@ namespace KubernetesWorkflow
|
||||
}
|
||||
}
|
||||
|
||||
private bool HasContainerBeenRestarted(Kubernetes client, string podName)
|
||||
{
|
||||
var podInfo = client.ReadNamespacedPod(podName, k8sNamespace);
|
||||
return podInfo.Status.ContainerStatuses.Any(c => c.RestartCount > 0);
|
||||
}
|
||||
|
||||
private void DownloadCrashedContainerLogs(Kubernetes client, string podName, ContainerRecipe recipe)
|
||||
{
|
||||
log.Log("Pod crash detected for " + container.Name);
|
||||
|
Loading…
x
Reference in New Issue
Block a user