2023-09-20 13:33:58 +02:00
|
|
|
|
using Logging;
|
2023-06-21 11:01:48 +02:00
|
|
|
|
using Utils;
|
2023-06-26 11:10:56 +02:00
|
|
|
|
using NUnit.Framework.Internal;
|
2023-06-27 10:16:59 +02:00
|
|
|
|
using System.Reflection;
|
2023-09-20 13:33:58 +02:00
|
|
|
|
using CodexPlugin;
|
|
|
|
|
using DistTestCore.Logs;
|
|
|
|
|
using Core;
|
2023-11-12 10:07:23 +01:00
|
|
|
|
using KubernetesWorkflow.Types;
|
2023-12-20 15:56:03 +01:00
|
|
|
|
using TaskFactory = Utils.TaskFactory;
|
2023-06-21 11:01:48 +02:00
|
|
|
|
|
|
|
|
|
namespace ContinuousTests
|
|
|
|
|
{
|
|
|
|
|
public class SingleTestRun
|
|
|
|
|
{
|
2023-06-25 09:53:10 +02:00
|
|
|
|
private readonly List<Exception> exceptions = new List<Exception>();
|
2023-09-20 13:33:58 +02:00
|
|
|
|
private readonly EntryPoint entryPoint;
|
2023-06-28 16:19:37 +02:00
|
|
|
|
private readonly TaskFactory taskFactory;
|
2023-06-21 11:01:48 +02:00
|
|
|
|
private readonly Configuration config;
|
2023-09-21 10:33:09 +02:00
|
|
|
|
private readonly ILog overviewLog;
|
2023-11-09 11:35:45 +01:00
|
|
|
|
private readonly StatusLog statusLog;
|
2023-06-25 09:53:10 +02:00
|
|
|
|
private readonly TestHandle handle;
|
2023-06-28 16:19:37 +02:00
|
|
|
|
private readonly CancellationToken cancelToken;
|
2023-09-20 13:33:58 +02:00
|
|
|
|
private readonly ICodexNode[] nodes;
|
2023-06-25 09:53:10 +02:00
|
|
|
|
private readonly FixtureLog fixtureLog;
|
2023-06-25 11:24:32 +02:00
|
|
|
|
private readonly string testName;
|
2023-08-30 09:23:13 +02:00
|
|
|
|
private static int failureCount = 0;
|
2023-06-21 11:01:48 +02:00
|
|
|
|
|
2024-02-22 10:41:07 -03:00
|
|
|
|
public SingleTestRun(EntryPointFactory entryPointFactory,
|
|
|
|
|
TaskFactory taskFactory, Configuration config, ILog overviewLog, StatusLog statusLog, TestHandle handle,
|
|
|
|
|
StartupChecker startupChecker, CancellationToken cancelToken, string deployId)
|
2023-06-21 11:01:48 +02:00
|
|
|
|
{
|
2023-06-28 16:19:37 +02:00
|
|
|
|
this.taskFactory = taskFactory;
|
2023-06-21 11:01:48 +02:00
|
|
|
|
this.config = config;
|
2023-06-25 11:06:47 +02:00
|
|
|
|
this.overviewLog = overviewLog;
|
2023-11-09 11:35:45 +01:00
|
|
|
|
this.statusLog = statusLog;
|
2023-06-25 09:53:10 +02:00
|
|
|
|
this.handle = handle;
|
2023-06-28 16:19:37 +02:00
|
|
|
|
this.cancelToken = cancelToken;
|
2023-06-25 11:24:32 +02:00
|
|
|
|
testName = handle.Test.GetType().Name;
|
2024-02-22 10:41:07 -03:00
|
|
|
|
fixtureLog = new FixtureLog(new LogConfig(config.LogPath), DateTime.UtcNow, deployId, testName);
|
|
|
|
|
entryPoint = entryPointFactory.CreateEntryPoint(config.KubeConfigFile, config.DataPath,
|
|
|
|
|
config.CodexDeployment.Metadata.KubeNamespace, fixtureLog);
|
2023-08-30 10:57:20 +02:00
|
|
|
|
ApplyLogReplacements(fixtureLog, startupChecker);
|
2023-06-21 11:01:48 +02:00
|
|
|
|
|
2023-08-31 09:50:34 +02:00
|
|
|
|
nodes = CreateRandomNodes();
|
2023-06-21 11:01:48 +02:00
|
|
|
|
}
|
|
|
|
|
|
2023-09-28 09:39:15 +02:00
|
|
|
|
public void Run(EventWaitHandle runFinishedHandle, Action<bool> resultHandler)
|
2023-06-21 11:01:48 +02:00
|
|
|
|
{
|
2023-06-28 16:19:37 +02:00
|
|
|
|
taskFactory.Run(() =>
|
2023-06-25 09:53:10 +02:00
|
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
2023-09-28 09:39:15 +02:00
|
|
|
|
RunTest(resultHandler);
|
2023-09-21 10:33:09 +02:00
|
|
|
|
|
|
|
|
|
entryPoint.Decommission(
|
|
|
|
|
deleteKubernetesResources: false, // This would delete the continuous test net.
|
2024-06-06 15:09:52 +02:00
|
|
|
|
deleteTrackedFiles: true,
|
|
|
|
|
waitTillDone: false
|
2023-09-21 10:33:09 +02:00
|
|
|
|
);
|
2023-06-29 13:39:05 +02:00
|
|
|
|
runFinishedHandle.Set();
|
2023-06-25 09:53:10 +02:00
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
2023-06-27 10:16:59 +02:00
|
|
|
|
overviewLog.Error("Test infra failure: SingleTestRun failed with " + ex);
|
|
|
|
|
Environment.Exit(-1);
|
2023-06-25 09:53:10 +02:00
|
|
|
|
}
|
2023-11-14 13:28:50 +01:00
|
|
|
|
}, nameof(SingleTestRun));
|
2023-06-25 09:53:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
2023-09-28 09:39:15 +02:00
|
|
|
|
private void RunTest(Action<bool> resultHandler)
|
2023-06-27 10:16:59 +02:00
|
|
|
|
{
|
2023-10-02 14:42:36 +02:00
|
|
|
|
var testStart = DateTime.UtcNow;
|
2023-11-09 11:35:45 +01:00
|
|
|
|
TimeSpan duration = TimeSpan.Zero;
|
|
|
|
|
|
2023-06-27 10:16:59 +02:00
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
RunTestMoments();
|
2023-11-09 11:35:45 +01:00
|
|
|
|
duration = DateTime.UtcNow - testStart;
|
2023-06-27 10:16:59 +02:00
|
|
|
|
|
2023-10-02 14:42:36 +02:00
|
|
|
|
OverviewLog($" > Test passed. ({Time.FormatDuration(duration)})");
|
2023-11-09 11:35:45 +01:00
|
|
|
|
UpdateStatusLogPassed(testStart, duration);
|
2023-10-02 14:42:36 +02:00
|
|
|
|
|
2023-10-01 09:57:32 +02:00
|
|
|
|
if (!config.KeepPassedTestLogs)
|
|
|
|
|
{
|
|
|
|
|
fixtureLog.Delete();
|
|
|
|
|
}
|
2024-02-22 10:41:07 -03:00
|
|
|
|
|
2023-09-28 09:39:15 +02:00
|
|
|
|
resultHandler(true);
|
2023-06-27 10:16:59 +02:00
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
fixtureLog.Error("Test run failed with exception: " + ex);
|
|
|
|
|
fixtureLog.MarkAsFailed();
|
2023-11-09 11:35:45 +01:00
|
|
|
|
UpdateStatusLogFailed(testStart, duration, ex.ToString());
|
2023-06-30 09:09:59 +02:00
|
|
|
|
|
2023-10-02 14:42:36 +02:00
|
|
|
|
DownloadContainerLogs(testStart);
|
|
|
|
|
|
2023-08-30 09:23:13 +02:00
|
|
|
|
failureCount++;
|
2023-09-28 09:39:15 +02:00
|
|
|
|
resultHandler(false);
|
2023-08-30 09:23:13 +02:00
|
|
|
|
if (config.StopOnFailure > 0)
|
2023-06-30 09:09:59 +02:00
|
|
|
|
{
|
2023-08-30 09:23:13 +02:00
|
|
|
|
OverviewLog($"Failures: {failureCount} / {config.StopOnFailure}");
|
|
|
|
|
if (failureCount >= config.StopOnFailure)
|
|
|
|
|
{
|
2023-10-01 09:57:32 +02:00
|
|
|
|
OverviewLog($"Configured to stop after {config.StopOnFailure} failures.");
|
2023-08-30 09:23:13 +02:00
|
|
|
|
Cancellation.Cts.Cancel();
|
|
|
|
|
}
|
2023-06-30 09:09:59 +02:00
|
|
|
|
}
|
2023-06-27 10:16:59 +02:00
|
|
|
|
}
|
2023-10-02 14:42:36 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void DownloadContainerLogs(DateTime testStart)
|
|
|
|
|
{
|
|
|
|
|
// The test failed just now. We can't expect the logs to be available in elastic-search immediately:
|
|
|
|
|
Thread.Sleep(TimeSpan.FromMinutes(1));
|
|
|
|
|
|
2023-10-04 09:36:59 +02:00
|
|
|
|
var effectiveStart = testStart.Subtract(TimeSpan.FromSeconds(30));
|
2023-10-08 19:11:31 +02:00
|
|
|
|
if (config.FullContainerLogs)
|
|
|
|
|
{
|
2023-10-16 13:10:45 +02:00
|
|
|
|
effectiveStart = config.CodexDeployment.Metadata.StartUtc.Subtract(TimeSpan.FromSeconds(30));
|
2023-10-08 19:11:31 +02:00
|
|
|
|
}
|
2024-02-22 10:41:07 -03:00
|
|
|
|
|
2023-10-04 09:36:59 +02:00
|
|
|
|
var effectiveEnd = DateTime.UtcNow;
|
2023-10-02 14:42:36 +02:00
|
|
|
|
var elasticSearchLogDownloader = new ElasticSearchLogDownloader(entryPoint.Tools, fixtureLog);
|
|
|
|
|
|
|
|
|
|
foreach (var node in nodes)
|
2023-10-01 10:52:05 +02:00
|
|
|
|
{
|
2023-11-07 14:33:45 +01:00
|
|
|
|
var container = node.Container;
|
2024-04-13 17:09:17 +03:00
|
|
|
|
var deploymentName = container.RunningPod.StartResult.Deployment.Name;
|
|
|
|
|
var namespaceName = container.RunningPod.StartResult.Cluster.Configuration.KubernetesNamespace;
|
2024-02-22 10:41:07 -03:00
|
|
|
|
var openingLine =
|
2024-04-01 08:29:55 +02:00
|
|
|
|
$"{namespaceName} - {deploymentName} = {node.Container.Name} = {node.GetDebugInfo().Id}";
|
2024-02-22 10:41:07 -03:00
|
|
|
|
elasticSearchLogDownloader.Download(fixtureLog.CreateSubfile(), node.Container, effectiveStart,
|
|
|
|
|
effectiveEnd, openingLine);
|
2023-10-01 10:52:05 +02:00
|
|
|
|
}
|
2023-06-27 10:16:59 +02:00
|
|
|
|
}
|
|
|
|
|
|
2023-08-30 10:57:20 +02:00
|
|
|
|
private void ApplyLogReplacements(FixtureLog fixtureLog, StartupChecker startupChecker)
|
|
|
|
|
{
|
2024-02-22 10:41:07 -03:00
|
|
|
|
foreach (var replacement in startupChecker.LogReplacements)
|
|
|
|
|
fixtureLog.AddStringReplace(replacement.From, replacement.To);
|
2023-08-30 10:57:20 +02:00
|
|
|
|
}
|
|
|
|
|
|
2023-06-27 10:16:59 +02:00
|
|
|
|
private void RunTestMoments()
|
2023-06-25 09:53:10 +02:00
|
|
|
|
{
|
|
|
|
|
var earliestMoment = handle.GetEarliestMoment();
|
|
|
|
|
|
|
|
|
|
var t = earliestMoment;
|
2023-10-01 09:57:32 +02:00
|
|
|
|
while (!cancelToken.IsCancellationRequested)
|
2023-06-25 09:53:10 +02:00
|
|
|
|
{
|
|
|
|
|
RunMoment(t);
|
|
|
|
|
|
|
|
|
|
if (handle.Test.TestFailMode == TestFailMode.StopAfterFirstFailure && exceptions.Any())
|
|
|
|
|
{
|
|
|
|
|
Log("Exception detected. TestFailMode = StopAfterFirstFailure. Stopping...");
|
2023-06-27 10:16:59 +02:00
|
|
|
|
ThrowFailTest();
|
2023-06-25 09:53:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var nextMoment = handle.GetNextMoment(t);
|
|
|
|
|
if (nextMoment != null)
|
|
|
|
|
{
|
2023-06-28 16:19:37 +02:00
|
|
|
|
var delta = TimeSpan.FromSeconds(nextMoment.Value - t);
|
|
|
|
|
Log($" > Next TestMoment in {Time.FormatDuration(delta)} seconds...");
|
|
|
|
|
cancelToken.WaitHandle.WaitOne(delta);
|
|
|
|
|
t = nextMoment.Value;
|
2023-06-25 09:53:10 +02:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2023-06-25 10:50:01 +02:00
|
|
|
|
if (exceptions.Any())
|
|
|
|
|
{
|
2023-06-27 10:16:59 +02:00
|
|
|
|
ThrowFailTest();
|
2023-06-25 10:50:01 +02:00
|
|
|
|
}
|
2024-02-22 10:41:07 -03:00
|
|
|
|
|
2023-06-25 10:50:01 +02:00
|
|
|
|
return;
|
2023-06-25 09:53:10 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2024-02-22 10:41:07 -03:00
|
|
|
|
|
2023-10-01 10:52:05 +02:00
|
|
|
|
fixtureLog.Log("Test run has been cancelled.");
|
2023-06-25 09:53:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
2023-06-27 10:16:59 +02:00
|
|
|
|
private void ThrowFailTest()
|
|
|
|
|
{
|
2023-08-30 10:13:48 +02:00
|
|
|
|
var exs = UnpackExceptions(exceptions);
|
|
|
|
|
var exceptionsMessage = GetCombinedExceptionsMessage(exs);
|
|
|
|
|
Log(exceptionsMessage);
|
2023-09-01 08:45:23 +02:00
|
|
|
|
OverviewLog($" > Test failed: " + exceptionsMessage);
|
2023-08-30 10:13:48 +02:00
|
|
|
|
throw new Exception(exceptionsMessage);
|
2023-06-27 10:16:59 +02:00
|
|
|
|
}
|
|
|
|
|
|
2023-11-09 11:35:45 +01:00
|
|
|
|
private void UpdateStatusLogFailed(DateTime testStart, TimeSpan duration, string error)
|
|
|
|
|
{
|
|
|
|
|
statusLog.ConcludeTest("Failed", duration, CreateStatusLogData(testStart, error));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void UpdateStatusLogPassed(DateTime testStart, TimeSpan duration)
|
|
|
|
|
{
|
|
|
|
|
statusLog.ConcludeTest("Passed", duration, CreateStatusLogData(testStart, "OK"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private Dictionary<string, string> CreateStatusLogData(DateTime testStart, string message)
|
|
|
|
|
{
|
2023-11-13 11:56:02 +01:00
|
|
|
|
var result = entryPoint.GetPluginMetadata();
|
|
|
|
|
result.Add("teststart", testStart.ToString("o"));
|
|
|
|
|
result.Add("testname", testName);
|
|
|
|
|
result.Add("message", message);
|
2023-11-14 12:56:47 +01:00
|
|
|
|
result.Add("involvedpods", string.Join(",", nodes.Select(n => n.GetName())));
|
2024-02-22 10:41:07 -03:00
|
|
|
|
result.Add("involvedpodnames", string.Join(",", nodes.Select(n => n.GetPodInfo().Name)));
|
2023-11-14 12:56:47 +01:00
|
|
|
|
|
|
|
|
|
var error = message.Split(Environment.NewLine).First();
|
|
|
|
|
if (error.Contains(":")) error = error.Substring(1 + error.LastIndexOf(":"));
|
|
|
|
|
result.Add("error", error);
|
|
|
|
|
|
2023-12-06 10:50:02 +01:00
|
|
|
|
var upload = nodes.Select(n => n.TransferSpeeds.GetUploadSpeed()).ToList()!.OptionalAverage();
|
|
|
|
|
var download = nodes.Select(n => n.TransferSpeeds.GetDownloadSpeed()).ToList()!.OptionalAverage();
|
|
|
|
|
if (upload != null) result.Add("avgupload", upload.ToString());
|
|
|
|
|
if (download != null) result.Add("avgdownload", download.ToString());
|
|
|
|
|
|
2023-11-13 11:56:02 +01:00
|
|
|
|
return result;
|
2023-11-09 11:35:45 +01:00
|
|
|
|
}
|
|
|
|
|
|
2023-08-30 10:13:48 +02:00
|
|
|
|
private string GetCombinedExceptionsMessage(Exception[] exceptions)
|
|
|
|
|
{
|
|
|
|
|
return string.Join(Environment.NewLine, exceptions.Select(ex => ex.ToString()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private Exception[] UnpackExceptions(List<Exception> exceptions)
|
|
|
|
|
{
|
|
|
|
|
return exceptions.Select(UnpackException).ToArray();
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-27 10:16:59 +02:00
|
|
|
|
private Exception UnpackException(Exception exception)
|
|
|
|
|
{
|
|
|
|
|
if (exception is AggregateException a)
|
|
|
|
|
{
|
|
|
|
|
return UnpackException(a.InnerExceptions.First());
|
|
|
|
|
}
|
2024-02-22 10:41:07 -03:00
|
|
|
|
|
2023-06-27 10:16:59 +02:00
|
|
|
|
if (exception is TargetInvocationException t)
|
|
|
|
|
{
|
|
|
|
|
return UnpackException(t.InnerException!);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return exception;
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-25 09:53:10 +02:00
|
|
|
|
private void RunMoment(int t)
|
|
|
|
|
{
|
2023-06-26 11:10:56 +02:00
|
|
|
|
using (var context = new TestExecutionContext.IsolatedContext())
|
2023-06-25 09:53:10 +02:00
|
|
|
|
{
|
2023-06-26 11:10:56 +02:00
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
handle.InvokeMoment(t, InitializeTest);
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
{
|
|
|
|
|
exceptions.Add(ex);
|
|
|
|
|
}
|
2023-06-25 09:53:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DecommissionTest();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void InitializeTest(string name)
|
|
|
|
|
{
|
|
|
|
|
Log($" > Running TestMoment '{name}'");
|
2023-09-20 13:33:58 +02:00
|
|
|
|
handle.Test.Initialize(nodes, fixtureLog, entryPoint.Tools.GetFileManager(), config, cancelToken);
|
2023-06-25 09:53:10 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void DecommissionTest()
|
|
|
|
|
{
|
2023-06-28 16:19:37 +02:00
|
|
|
|
handle.Test.Initialize(null!, null!, null!, null!, cancelToken);
|
2023-06-21 11:01:48 +02:00
|
|
|
|
}
|
|
|
|
|
|
2023-06-25 09:53:10 +02:00
|
|
|
|
private void Log(string msg)
|
2023-06-21 11:01:48 +02:00
|
|
|
|
{
|
2023-06-25 09:53:10 +02:00
|
|
|
|
fixtureLog.Log(msg);
|
2023-06-21 11:01:48 +02:00
|
|
|
|
}
|
|
|
|
|
|
2023-06-25 11:06:47 +02:00
|
|
|
|
private void OverviewLog(string msg)
|
|
|
|
|
{
|
|
|
|
|
Log(msg);
|
2023-08-31 09:50:34 +02:00
|
|
|
|
var containerNames = GetContainerNames();
|
2023-06-30 08:39:18 +02:00
|
|
|
|
overviewLog.Log($"{containerNames} {testName}: {msg}");
|
2023-06-25 11:06:47 +02:00
|
|
|
|
}
|
|
|
|
|
|
2023-08-31 09:50:34 +02:00
|
|
|
|
private string GetContainerNames()
|
2023-06-21 11:01:48 +02:00
|
|
|
|
{
|
2023-08-31 09:50:34 +02:00
|
|
|
|
if (handle.Test.RequiredNumberOfNodes == -1) return "(All Nodes)";
|
|
|
|
|
return $"({string.Join(",", nodes.Select(n => n.Container.Name))})";
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-20 13:33:58 +02:00
|
|
|
|
private ICodexNode[] CreateRandomNodes()
|
2023-08-31 09:50:34 +02:00
|
|
|
|
{
|
|
|
|
|
var containers = SelectRandomContainers();
|
2023-06-25 09:53:10 +02:00
|
|
|
|
fixtureLog.Log("Selected nodes: " + string.Join(",", containers.Select(c => c.Name)));
|
2023-09-20 13:33:58 +02:00
|
|
|
|
return entryPoint.CreateInterface().WrapCodexContainers(containers).ToArray();
|
2023-06-21 11:01:48 +02:00
|
|
|
|
}
|
|
|
|
|
|
2024-04-13 17:09:17 +03:00
|
|
|
|
private RunningPod[] SelectRandomContainers()
|
2023-06-21 11:01:48 +02:00
|
|
|
|
{
|
2023-08-31 09:50:34 +02:00
|
|
|
|
var number = handle.Test.RequiredNumberOfNodes;
|
2024-04-13 17:09:17 +03:00
|
|
|
|
var containers = config.CodexDeployment.CodexInstances.Select(i => i.Pod).ToList();
|
2023-10-25 09:14:35 +02:00
|
|
|
|
if (number == -1) return containers.ToArray();
|
2023-08-29 16:08:40 +02:00
|
|
|
|
|
2024-04-13 17:09:17 +03:00
|
|
|
|
var result = new RunningPod[number];
|
2023-06-21 11:01:48 +02:00
|
|
|
|
for (var i = 0; i < number; i++)
|
|
|
|
|
{
|
2023-06-23 10:14:16 +02:00
|
|
|
|
result[i] = containers.PickOneRandom();
|
2023-06-21 11:01:48 +02:00
|
|
|
|
}
|
2024-02-22 10:41:07 -03:00
|
|
|
|
|
2023-06-21 11:01:48 +02:00
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-02-22 10:41:07 -03:00
|
|
|
|
}
|