From 533bf325774fac3ccf365df40557b57756e26a5d Mon Sep 17 00:00:00 2001 From: benbierens Date: Thu, 4 May 2023 11:34:43 +0200 Subject: [PATCH] Better logging in case codex node fails to respond to debug/info request. --- DistTestCore/Codex/CodexAccess.cs | 18 ++++++++++++++++++ DistTestCore/CodexNodeGroup.cs | 24 +++++------------------- DistTestCore/CodexStarter.cs | 1 + KubernetesWorkflow/ContainerRecipe.cs | 8 ++++++++ KubernetesWorkflow/StartupWorkflow.cs | 10 ++++++++-- Tests/BasicTests/DownloadTests.cs | 3 +-- Tests/BasicTests/NetworkIsolationTest.cs | 2 -- Tests/BasicTests/UploadTests.cs | 3 +-- 8 files changed, 42 insertions(+), 27 deletions(-) diff --git a/DistTestCore/Codex/CodexAccess.cs b/DistTestCore/Codex/CodexAccess.cs index c4fe91ff..8654878d 100644 --- a/DistTestCore/Codex/CodexAccess.cs +++ b/DistTestCore/Codex/CodexAccess.cs @@ -42,6 +42,24 @@ namespace DistTestCore.Codex return Http().HttpPostJson($"storage/request/{contentId}", request); } + public void EnsureOnline() + { + try + { + var debugInfo = GetDebugInfo(); + if (debugInfo == null || string.IsNullOrEmpty(debugInfo.id)) throw new InvalidOperationException("Unable to get debug-info from codex node at startup."); + + var nodePeerId = debugInfo.id; + var nodeName = Container.Name; + log.AddStringReplace(nodePeerId, $"___{nodeName}___"); + } + catch (Exception e) + { + log.Error($"Failed to start codex node: {e}. Test infra failure."); + throw new InvalidOperationException($"Failed to start codex node. Test infra failure.", e); + } + } + private Http Http() { var ip = Container.Pod.Cluster.IP; diff --git a/DistTestCore/CodexNodeGroup.cs b/DistTestCore/CodexNodeGroup.cs index d0867d7d..2005410a 100644 --- a/DistTestCore/CodexNodeGroup.cs +++ b/DistTestCore/CodexNodeGroup.cs @@ -62,29 +62,15 @@ namespace DistTestCore return $"group:[{Containers.Describe()}]"; } + public void EnsureOnline() + { + foreach (var node in Nodes) node.CodexAccess.EnsureOnline(); + } + private OnlineCodexNode CreateOnlineCodexNode(RunningContainer c, ICodexNodeFactory factory) { var access = new CodexAccess(lifecycle.Log, lifecycle.TimeSet, c); - EnsureOnline(access); return factory.CreateOnlineCodexNode(access, this); } - - private void EnsureOnline(CodexAccess access) - { - try - { - var debugInfo = access.GetDebugInfo(); - if (debugInfo == null || string.IsNullOrEmpty(debugInfo.id)) throw new InvalidOperationException("Unable to get debug-info from codex node at startup."); - - var nodePeerId = debugInfo.id; - var nodeName = access.Container.Name; - lifecycle.Log.AddStringReplace(nodePeerId, $"___{nodeName}___"); - } - catch (Exception e) - { - lifecycle.Log.Error($"Failed to start codex node: {e}. Test infra failure."); - throw new InvalidOperationException($"Failed to start codex node. Test infra failure.", e); - } - } } } diff --git a/DistTestCore/CodexStarter.cs b/DistTestCore/CodexStarter.cs index 52c70ca6..e36ebca8 100644 --- a/DistTestCore/CodexStarter.cs +++ b/DistTestCore/CodexStarter.cs @@ -74,6 +74,7 @@ namespace DistTestCore { var group = new CodexNodeGroup(lifecycle, codexSetup, runningContainers, codexNodeFactory); RunningGroups.Add(group); + group.EnsureOnline(); return group; } diff --git a/KubernetesWorkflow/ContainerRecipe.cs b/KubernetesWorkflow/ContainerRecipe.cs index 9fbbb9f2..ce3252f4 100644 --- a/KubernetesWorkflow/ContainerRecipe.cs +++ b/KubernetesWorkflow/ContainerRecipe.cs @@ -24,6 +24,14 @@ { return ExposedPorts.Concat(InternalPorts).Single(p => p.Tag == tag); } + + public override string ToString() + { + return $"(container-recipe: {Name}, image: {Image}, " + + $"exposedPorts: {string.Join(",", ExposedPorts.Select(p => p.Number))}, " + + $"internalPorts: {string.Join(",", InternalPorts.Select(p => p.Number))}, " + + $"envVars: {string.Join(",", EnvVars.Select(v => v.Name + ":" + v.Value))}, "; + } } public class Port diff --git a/KubernetesWorkflow/StartupWorkflow.cs b/KubernetesWorkflow/StartupWorkflow.cs index cd229b07..20cea0f1 100644 --- a/KubernetesWorkflow/StartupWorkflow.cs +++ b/KubernetesWorkflow/StartupWorkflow.cs @@ -75,7 +75,13 @@ namespace KubernetesWorkflow private RunningContainer[] CreateContainers(RunningPod runningPod, ContainerRecipe[] recipes, StartupConfig startupConfig) { log.Debug(); - return recipes.Select(r => new RunningContainer(runningPod, r, runningPod.GetServicePortsForContainerRecipe(r), startupConfig)).ToArray(); + return recipes.Select(r => + { + var servicePorts = runningPod.GetServicePortsForContainerRecipe(r); + log.Debug($"{r} -> service ports: {string.Join(",", servicePorts.Select(p => p.Number))}"); + + return new RunningContainer(runningPod, r, servicePorts, startupConfig); + }).ToArray(); } private ContainerRecipe[] CreateRecipes(int numberOfContainers, ContainerRecipeFactory recipeFactory, StartupConfig startupConfig) @@ -84,7 +90,7 @@ namespace KubernetesWorkflow var result = new List(); for (var i = 0; i < numberOfContainers; i++) { - result.Add(recipeFactory.CreateRecipe(i ,numberSource.GetContainerNumber(), componentFactory, startupConfig)); + result.Add(recipeFactory.CreateRecipe(i, numberSource.GetContainerNumber(), componentFactory, startupConfig)); } return result.ToArray(); diff --git a/Tests/BasicTests/DownloadTests.cs b/Tests/BasicTests/DownloadTests.cs index 44ea4ee2..36a635f9 100644 --- a/Tests/BasicTests/DownloadTests.cs +++ b/Tests/BasicTests/DownloadTests.cs @@ -6,7 +6,6 @@ namespace Tests.ParallelTests [TestFixture] public class DownloadTests : DistTest { - [Ignore("a")] [TestCase(3, 500)] [TestCase(5, 100)] [TestCase(10, 256)] @@ -37,4 +36,4 @@ namespace Tests.ParallelTests } } } -} \ No newline at end of file +} diff --git a/Tests/BasicTests/NetworkIsolationTest.cs b/Tests/BasicTests/NetworkIsolationTest.cs index b26dcbe8..a7b2520f 100644 --- a/Tests/BasicTests/NetworkIsolationTest.cs +++ b/Tests/BasicTests/NetworkIsolationTest.cs @@ -11,8 +11,6 @@ namespace Tests.BasicTests private IOnlineCodexNode? node = null; // net isolation: only on real cluster? - // parallel upload/download tests? - // operation times. [Test] public void SetUpANodeAndWait() diff --git a/Tests/BasicTests/UploadTests.cs b/Tests/BasicTests/UploadTests.cs index fbe146f4..ea3b41c4 100644 --- a/Tests/BasicTests/UploadTests.cs +++ b/Tests/BasicTests/UploadTests.cs @@ -6,7 +6,6 @@ namespace Tests.ParallelTests [TestFixture] public class UploadTests : DistTest { - [Ignore("a")] [TestCase(3, 50)] [TestCase(5, 75)] [TestCase(10, 25)] @@ -43,4 +42,4 @@ namespace Tests.ParallelTests } } } -} \ No newline at end of file +}