Better logging in case codex node fails to respond to debug/info request.

This commit is contained in:
benbierens 2023-05-04 11:34:43 +02:00
parent ab07ac0389
commit 533bf32577
No known key found for this signature in database
GPG Key ID: FE44815D96D0A1AA
8 changed files with 42 additions and 27 deletions

View File

@ -42,6 +42,24 @@ namespace DistTestCore.Codex
return Http().HttpPostJson($"storage/request/{contentId}", request);
}
public void EnsureOnline()
{
try
{
var debugInfo = GetDebugInfo();
if (debugInfo == null || string.IsNullOrEmpty(debugInfo.id)) throw new InvalidOperationException("Unable to get debug-info from codex node at startup.");
var nodePeerId = debugInfo.id;
var nodeName = Container.Name;
log.AddStringReplace(nodePeerId, $"___{nodeName}___");
}
catch (Exception e)
{
log.Error($"Failed to start codex node: {e}. Test infra failure.");
throw new InvalidOperationException($"Failed to start codex node. Test infra failure.", e);
}
}
private Http Http()
{
var ip = Container.Pod.Cluster.IP;

View File

@ -62,29 +62,15 @@ namespace DistTestCore
return $"group:[{Containers.Describe()}]";
}
public void EnsureOnline()
{
foreach (var node in Nodes) node.CodexAccess.EnsureOnline();
}
private OnlineCodexNode CreateOnlineCodexNode(RunningContainer c, ICodexNodeFactory factory)
{
var access = new CodexAccess(lifecycle.Log, lifecycle.TimeSet, c);
EnsureOnline(access);
return factory.CreateOnlineCodexNode(access, this);
}
private void EnsureOnline(CodexAccess access)
{
try
{
var debugInfo = access.GetDebugInfo();
if (debugInfo == null || string.IsNullOrEmpty(debugInfo.id)) throw new InvalidOperationException("Unable to get debug-info from codex node at startup.");
var nodePeerId = debugInfo.id;
var nodeName = access.Container.Name;
lifecycle.Log.AddStringReplace(nodePeerId, $"___{nodeName}___");
}
catch (Exception e)
{
lifecycle.Log.Error($"Failed to start codex node: {e}. Test infra failure.");
throw new InvalidOperationException($"Failed to start codex node. Test infra failure.", e);
}
}
}
}

View File

@ -74,6 +74,7 @@ namespace DistTestCore
{
var group = new CodexNodeGroup(lifecycle, codexSetup, runningContainers, codexNodeFactory);
RunningGroups.Add(group);
group.EnsureOnline();
return group;
}

View File

@ -24,6 +24,14 @@
{
return ExposedPorts.Concat(InternalPorts).Single(p => p.Tag == tag);
}
public override string ToString()
{
return $"(container-recipe: {Name}, image: {Image}, " +
$"exposedPorts: {string.Join(",", ExposedPorts.Select(p => p.Number))}, " +
$"internalPorts: {string.Join(",", InternalPorts.Select(p => p.Number))}, " +
$"envVars: {string.Join(",", EnvVars.Select(v => v.Name + ":" + v.Value))}, ";
}
}
public class Port

View File

@ -75,7 +75,13 @@ namespace KubernetesWorkflow
private RunningContainer[] CreateContainers(RunningPod runningPod, ContainerRecipe[] recipes, StartupConfig startupConfig)
{
log.Debug();
return recipes.Select(r => new RunningContainer(runningPod, r, runningPod.GetServicePortsForContainerRecipe(r), startupConfig)).ToArray();
return recipes.Select(r =>
{
var servicePorts = runningPod.GetServicePortsForContainerRecipe(r);
log.Debug($"{r} -> service ports: {string.Join(",", servicePorts.Select(p => p.Number))}");
return new RunningContainer(runningPod, r, servicePorts, startupConfig);
}).ToArray();
}
private ContainerRecipe[] CreateRecipes(int numberOfContainers, ContainerRecipeFactory recipeFactory, StartupConfig startupConfig)
@ -84,7 +90,7 @@ namespace KubernetesWorkflow
var result = new List<ContainerRecipe>();
for (var i = 0; i < numberOfContainers; i++)
{
result.Add(recipeFactory.CreateRecipe(i ,numberSource.GetContainerNumber(), componentFactory, startupConfig));
result.Add(recipeFactory.CreateRecipe(i, numberSource.GetContainerNumber(), componentFactory, startupConfig));
}
return result.ToArray();

View File

@ -6,7 +6,6 @@ namespace Tests.ParallelTests
[TestFixture]
public class DownloadTests : DistTest
{
[Ignore("a")]
[TestCase(3, 500)]
[TestCase(5, 100)]
[TestCase(10, 256)]
@ -37,4 +36,4 @@ namespace Tests.ParallelTests
}
}
}
}
}

View File

@ -11,8 +11,6 @@ namespace Tests.BasicTests
private IOnlineCodexNode? node = null;
// net isolation: only on real cluster?
// parallel upload/download tests?
// operation times.
[Test]
public void SetUpANodeAndWait()

View File

@ -6,7 +6,6 @@ namespace Tests.ParallelTests
[TestFixture]
public class UploadTests : DistTest
{
[Ignore("a")]
[TestCase(3, 50)]
[TestCase(5, 75)]
[TestCase(10, 25)]
@ -43,4 +42,4 @@ namespace Tests.ParallelTests
}
}
}
}
}