Merge branch 'master' into feature/public-testnet-deploying
This commit is contained in:
commit
485e3cf02e
@ -360,6 +360,7 @@ namespace KubernetesWorkflow
|
||||
},
|
||||
Spec = new V1PodSpec
|
||||
{
|
||||
Affinity = CreatePodAffinity(containerRecipes),
|
||||
NodeSelector = CreateNodeSelector(location),
|
||||
Containers = CreateDeploymentContainers(containerRecipes),
|
||||
Volumes = CreateVolumes(containerRecipes)
|
||||
@ -392,6 +393,42 @@ namespace KubernetesWorkflow
|
||||
};
|
||||
}
|
||||
|
||||
private V1Affinity? CreatePodAffinity(ContainerRecipe[] recipes)
|
||||
{
|
||||
var notIns = recipes
|
||||
.Select(r => r.SchedulingAffinity.NotIn)
|
||||
.Where(n => !string.IsNullOrEmpty(n))
|
||||
.Distinct()
|
||||
.ToList();
|
||||
|
||||
if (!notIns.Any()) return null;
|
||||
|
||||
return new V1Affinity
|
||||
{
|
||||
NodeAffinity = new V1NodeAffinity
|
||||
{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution = new V1NodeSelector
|
||||
{
|
||||
NodeSelectorTerms = new List<V1NodeSelectorTerm>
|
||||
{
|
||||
new V1NodeSelectorTerm
|
||||
{
|
||||
MatchExpressions = new List<V1NodeSelectorRequirement>
|
||||
{
|
||||
new V1NodeSelectorRequirement
|
||||
{
|
||||
Key = "workload-type",
|
||||
OperatorProperty = "NotIn",
|
||||
Values = notIns
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private K8sNodeLabel? GetNodeLabelForLocation(ILocation location)
|
||||
{
|
||||
var l = (Location)location;
|
||||
|
@ -2,12 +2,13 @@
|
||||
{
|
||||
public class ContainerRecipe
|
||||
{
|
||||
public ContainerRecipe(int number, string? nameOverride, string image, ContainerResources resources, Port[] exposedPorts, Port[] internalPorts, EnvVar[] envVars, PodLabels podLabels, PodAnnotations podAnnotations, VolumeMount[] volumes, ContainerAdditionals additionals)
|
||||
public ContainerRecipe(int number, string? nameOverride, string image, ContainerResources resources, SchedulingAffinity schedulingAffinity, Port[] exposedPorts, Port[] internalPorts, EnvVar[] envVars, PodLabels podLabels, PodAnnotations podAnnotations, VolumeMount[] volumes, ContainerAdditionals additionals)
|
||||
{
|
||||
Number = number;
|
||||
NameOverride = nameOverride;
|
||||
Image = image;
|
||||
Resources = resources;
|
||||
SchedulingAffinity = schedulingAffinity;
|
||||
ExposedPorts = exposedPorts;
|
||||
InternalPorts = internalPorts;
|
||||
EnvVars = envVars;
|
||||
@ -32,6 +33,7 @@
|
||||
public int Number { get; }
|
||||
public string? NameOverride { get; }
|
||||
public ContainerResources Resources { get; }
|
||||
public SchedulingAffinity SchedulingAffinity { get; }
|
||||
public string Image { get; }
|
||||
public Port[] ExposedPorts { get; }
|
||||
public Port[] InternalPorts { get; }
|
||||
@ -53,6 +55,7 @@
|
||||
$"internalPorts: {string.Join(",", InternalPorts.Select(p => p.Number))}, " +
|
||||
$"envVars: {string.Join(",", EnvVars.Select(v => v.ToString()))}, " +
|
||||
$"limits: {Resources}, " +
|
||||
$"affinity: {SchedulingAffinity}, " +
|
||||
$"volumes: {string.Join(",", Volumes.Select(v => $"'{v.MountPath}'"))}";
|
||||
}
|
||||
}
|
||||
|
@ -13,6 +13,7 @@ namespace KubernetesWorkflow.Recipe
|
||||
private readonly List<object> additionals = new List<object>();
|
||||
private RecipeComponentFactory factory = null!;
|
||||
private ContainerResources resources = new ContainerResources();
|
||||
private SchedulingAffinity schedulingAffinity = new SchedulingAffinity();
|
||||
|
||||
public ContainerRecipe CreateRecipe(int index, int containerNumber, RecipeComponentFactory factory, StartupConfig config)
|
||||
{
|
||||
@ -22,7 +23,7 @@ namespace KubernetesWorkflow.Recipe
|
||||
|
||||
Initialize(config);
|
||||
|
||||
var recipe = new ContainerRecipe(containerNumber, config.NameOverride, Image, resources,
|
||||
var recipe = new ContainerRecipe(containerNumber, config.NameOverride, Image, resources, schedulingAffinity,
|
||||
exposedPorts.ToArray(),
|
||||
internalPorts.ToArray(),
|
||||
envVars.ToArray(),
|
||||
@ -40,6 +41,7 @@ namespace KubernetesWorkflow.Recipe
|
||||
additionals.Clear();
|
||||
this.factory = null!;
|
||||
resources = new ContainerResources();
|
||||
schedulingAffinity = new SchedulingAffinity();
|
||||
|
||||
return recipe;
|
||||
}
|
||||
@ -121,6 +123,11 @@ namespace KubernetesWorkflow.Recipe
|
||||
SetResourcesRequest(new ContainerResourceSet(milliCPUs, memory));
|
||||
}
|
||||
|
||||
protected void SetSchedulingAffinity(string notIn)
|
||||
{
|
||||
schedulingAffinity = new SchedulingAffinity(notIn);
|
||||
}
|
||||
|
||||
// Disabled following a possible bug in the k8s cluster that will throttle containers much more than is
|
||||
// called for if they have resource limits defined.
|
||||
//protected void SetResourceLimits(int milliCPUs, ByteSize memory)
|
||||
|
18
Framework/KubernetesWorkflow/Recipe/SchedulingAffinity.cs
Normal file
18
Framework/KubernetesWorkflow/Recipe/SchedulingAffinity.cs
Normal file
@ -0,0 +1,18 @@
|
||||
namespace KubernetesWorkflow.Recipe
|
||||
{
|
||||
public class SchedulingAffinity
|
||||
{
|
||||
public SchedulingAffinity(string? notIn = null)
|
||||
{
|
||||
NotIn = notIn;
|
||||
}
|
||||
|
||||
public string? NotIn { get; }
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
if (string.IsNullOrEmpty(NotIn)) return "none";
|
||||
return "notIn:" + NotIn;
|
||||
}
|
||||
}
|
||||
}
|
@ -21,6 +21,8 @@ namespace CodexContractsPlugin
|
||||
|
||||
var address = config.GethNode.StartResult.Container.GetAddress(new NullLog(), GethContainerRecipe.HttpPortTag);
|
||||
|
||||
SetSchedulingAffinity(notIn: "tests-runners");
|
||||
|
||||
AddEnvVar("DISTTEST_NETWORK_URL", address.ToString());
|
||||
AddEnvVar("HARDHAT_NETWORK", "codexdisttestnetwork");
|
||||
AddEnvVar("KEEP_ALIVE", "1");
|
||||
|
@ -13,6 +13,8 @@ namespace CodexDiscordBotPlugin
|
||||
{
|
||||
var config = startupConfig.Get<DiscordBotStartupConfig>();
|
||||
|
||||
SetSchedulingAffinity(notIn: "tests-runners");
|
||||
|
||||
AddEnvVar("TOKEN", config.Token);
|
||||
AddEnvVar("SERVERNAME", config.ServerName);
|
||||
AddEnvVar("ADMINROLE", config.AdminRoleName);
|
||||
|
@ -29,6 +29,8 @@ namespace CodexPlugin
|
||||
SetResourcesRequest(milliCPUs: 100, memory: 100.MB());
|
||||
//SetResourceLimits(milliCPUs: 4000, memory: 12.GB());
|
||||
|
||||
SetSchedulingAffinity(notIn: "tests-runners");
|
||||
|
||||
var config = startupConfig.Get<CodexStartupConfig>();
|
||||
|
||||
var apiPort = CreateApiPort(config, ApiPortTag);
|
||||
|
@ -24,6 +24,8 @@ namespace GethPlugin
|
||||
|
||||
var args = CreateArgs(config);
|
||||
|
||||
SetSchedulingAffinity(notIn: "tests-runners");
|
||||
|
||||
AddEnvVar("GETH_ARGS", args);
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,8 @@ namespace MetricsPlugin
|
||||
{
|
||||
var config = startupConfig.Get<PrometheusStartupConfig>();
|
||||
|
||||
SetSchedulingAffinity(notIn: "tests-runners");
|
||||
|
||||
AddExposedPortAndVar("PROM_PORT", PortTag);
|
||||
AddEnvVar("PROM_CONFIG", config.PrometheusConfigBase64);
|
||||
}
|
||||
|
@ -0,0 +1,52 @@
|
||||
# Codex Continuous Test-net Report
|
||||
Date: 13-11-2023
|
||||
|
||||
Report for: 10-2023
|
||||
|
||||
|
||||
## Test-net Status
|
||||
- Start of month: Offline - stopped
|
||||
- End of month: Offline - stopped
|
||||
|
||||
(Stopped: The number of tests that can successfully run on the test-net is not high enough to justify the cost of leaving it running.)
|
||||
|
||||
## Deployment Configuration
|
||||
Continous Test-net is deployed to the kubernetes cluster with the following configuration:
|
||||
|
||||
5x Codex Nodes:
|
||||
- Log-level: Trace
|
||||
- Storage quota: 2048 MB
|
||||
- Storage sell: 1024 MB
|
||||
- Min price: 1024
|
||||
- Max collateral: 1024
|
||||
- Max duration: 3600000 seconds
|
||||
- Block-TTL*: 180 seconds
|
||||
- Block-MI*: 120 seconds
|
||||
- Block-MN*: 10000 blocks
|
||||
|
||||
3 of these 5 nodes have:
|
||||
- Validator: true
|
||||
|
||||
Kubernetes namespace: 'codex-continuous-tests'
|
||||
* Some tests have been performed with alternative (disabled) maintenance parameters:
|
||||
- Block-TTL: 99999999 seconds
|
||||
- Block-MI: 99999999 seconds
|
||||
- Block-MN: 100 blocks
|
||||
|
||||
## Test Overview
|
||||
| Changes | Test | Description | Status | Results |
|
||||
|---------------------|------------------|--------------------------------|------------|---------------------------------------------------------------|
|
||||
| No change | Two-client test | See report for July 2023. | Faulted | Test reliably fails. Both upload and download failures occur. |
|
||||
| No change | Two-client test* | See report for September 2023. | Faulted | Test reliably fails. Both upload and download failures occur. |
|
||||
| Possible regression | HoldMyBeer test | See report for August 2023. | Unreliable | Successful runs of 48h have not been observed in October. |
|
||||
| Possible regression | Peers test | See report for August 2023. | Unreliable | Successful runs of 48h have not been observed in October. |
|
||||
|
||||
## Resulting changes
|
||||
As a result of the testing efforts in 10-2023, these changes were made:
|
||||
1. Consolidation of test logs and metrics using grafana and elastic-search.
|
||||
1. Investment made in profiling instrumentation in Codex codebase.
|
||||
1. Some testing effort has been diverted to preparing the necessary infrastructure for the creation of a public testnet by 1-December-2023.
|
||||
|
||||
## Action Points
|
||||
- Debugging efforts continuou
|
||||
- Some effort remains allocated to deploying and supporting the public testnet
|
@ -18,14 +18,14 @@ spec:
|
||||
spec:
|
||||
priorityClassName: system-node-critical
|
||||
nodeSelector:
|
||||
doks.digitalocean.com/node-pool: "fixed-s-4vcpu-16gb-amd"
|
||||
workload-type: "tests-runners"
|
||||
containers:
|
||||
- name: ${NAMEPREFIX}-runner
|
||||
image: codexstorage/cs-codex-dist-tests:latest
|
||||
imagePullPolicy: Always
|
||||
resources:
|
||||
requests:
|
||||
memory: "2Gi"
|
||||
memory: "1Gi"
|
||||
env:
|
||||
- name: KUBECONFIG
|
||||
value: "/opt/kubeconfig.yaml"
|
||||
|
@ -16,10 +16,16 @@ spec:
|
||||
name: ${NAMEPREFIX}-${RUNID}
|
||||
run-id: ${RUNID}
|
||||
spec:
|
||||
priorityClassName: system-node-critical
|
||||
nodeSelector:
|
||||
workload-type: "tests-runners"
|
||||
containers:
|
||||
- name: ${NAMEPREFIX}-runner
|
||||
image: codexstorage/cs-codex-dist-tests:latest
|
||||
imagePullPolicy: Always
|
||||
resources:
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
env:
|
||||
- name: KUBECONFIG
|
||||
value: "/opt/kubeconfig.yaml"
|
||||
|
Loading…
x
Reference in New Issue
Block a user