diff --git a/Framework/KubernetesWorkflow/K8sController.cs b/Framework/KubernetesWorkflow/K8sController.cs index ffcac5df..e07116a5 100644 --- a/Framework/KubernetesWorkflow/K8sController.cs +++ b/Framework/KubernetesWorkflow/K8sController.cs @@ -724,9 +724,11 @@ namespace KubernetesWorkflow private V1Pod GetPodForDeployment(RunningDeployment deployment) { return Time.Retry(() => GetPodForDeplomentInternal(deployment), - // We will wait up to 1 minute, k8s might be moving pods around. - maxTimeout: TimeSpan.FromMinutes(1), - retryTime: TimeSpan.FromSeconds(10), + // K8s might be moving pods around. If it's scaling the cluster + // to handle the increased load, it might take a while before the new + // VMs are up and ready. So we use a generous timeout. + maxTimeout: TimeSpan.FromMinutes(15.0), + retryTime: TimeSpan.FromSeconds(30.0), description: "Find pod by label for deployment."); } diff --git a/Framework/KubernetesWorkflow/StartupWorkflow.cs b/Framework/KubernetesWorkflow/StartupWorkflow.cs index 8b9d5a19..11247fc7 100644 --- a/Framework/KubernetesWorkflow/StartupWorkflow.cs +++ b/Framework/KubernetesWorkflow/StartupWorkflow.cs @@ -61,8 +61,7 @@ namespace KubernetesWorkflow var startResult = controller.BringOnline(recipes, location); var containers = CreateContainers(startResult, recipes, startupConfig); - var info = GetPodInfo(startResult.Deployment); - var rc = new RunningPod(Guid.NewGuid().ToString(), info, startupConfig, startResult, containers); + var rc = new RunningPod(Guid.NewGuid().ToString(), startupConfig, startResult, containers); cluster.Configuration.Hooks.OnContainersStarted(rc); if (startResult.ExternalService != null) @@ -73,7 +72,7 @@ namespace KubernetesWorkflow }); } - public void WaitUntilOnline(RunningPod rc) + public PodInfo WaitUntilOnline(RunningPod rc) { K8s(controller => { @@ -82,6 +81,8 @@ namespace KubernetesWorkflow controller.WaitUntilOnline(c); } }); + + return GetPodInfo(rc.StartResult.Deployment); } public PodInfo GetPodInfo(RunningDeployment deployment) diff --git a/Framework/KubernetesWorkflow/Types/FutureContainers.cs b/Framework/KubernetesWorkflow/Types/FutureContainers.cs index 296be534..805b8e2c 100644 --- a/Framework/KubernetesWorkflow/Types/FutureContainers.cs +++ b/Framework/KubernetesWorkflow/Types/FutureContainers.cs @@ -13,7 +13,8 @@ public RunningPod WaitForOnline() { - workflow.WaitUntilOnline(runningPod); + var podInfo = workflow.WaitUntilOnline(runningPod); + runningPod.Initialize(podInfo); return runningPod; } } diff --git a/Framework/KubernetesWorkflow/Types/RunningPod.cs b/Framework/KubernetesWorkflow/Types/RunningPod.cs index bbe7f080..d1034885 100644 --- a/Framework/KubernetesWorkflow/Types/RunningPod.cs +++ b/Framework/KubernetesWorkflow/Types/RunningPod.cs @@ -4,10 +4,10 @@ namespace KubernetesWorkflow.Types { public class RunningPod { - public RunningPod(string id, PodInfo podInfo, StartupConfig startupConfig, StartResult startResult, RunningContainer[] containers) + public RunningPod(string id, StartupConfig startupConfig, StartResult startResult, RunningContainer[] containers) { Id = id; - PodInfo = podInfo; + PodInfo = null!; StartupConfig = startupConfig; StartResult = startResult; Containers = containers; @@ -16,7 +16,7 @@ namespace KubernetesWorkflow.Types } public string Id { get; } - public PodInfo PodInfo { get; } + public PodInfo PodInfo { get; private set; } public StartupConfig StartupConfig { get; } public StartResult StartResult { get; } public RunningContainer[] Containers { get; } @@ -30,6 +30,11 @@ namespace KubernetesWorkflow.Types [JsonIgnore] public bool IsStopped { get; internal set; } + public void Initialize(PodInfo podInfo) + { + PodInfo = podInfo; + } + public string Describe() { return string.Join(",", Containers.Select(c => c.Name));