mirror of
https://github.com/logos-blockchain/logos-blockchain-testing.git
synced 2026-01-04 06:13:09 +00:00
More robustness: k8s namespace override + extra checks
This commit is contained in:
parent
be0c1ba91e
commit
57fe5e8376
@ -427,6 +427,7 @@ cargo run -p runner-examples --bin compose_runner
|
|||||||
- `COMPOSE_RUNNER_HOST_GATEWAY=host.docker.internal:host-gateway` — controls the `extra_hosts` entry injected into compose (set to `disable` to omit)
|
- `COMPOSE_RUNNER_HOST_GATEWAY=host.docker.internal:host-gateway` — controls the `extra_hosts` entry injected into compose (set to `disable` to omit)
|
||||||
- `TESTNET_RUNNER_PRESERVE=1` — alias for `COMPOSE_RUNNER_PRESERVE=1`
|
- `TESTNET_RUNNER_PRESERVE=1` — alias for `COMPOSE_RUNNER_PRESERVE=1`
|
||||||
- `COMPOSE_GRAFANA_PORT=<port>` — pin Grafana to a fixed host port instead of ephemeral assignment
|
- `COMPOSE_GRAFANA_PORT=<port>` — pin Grafana to a fixed host port instead of ephemeral assignment
|
||||||
|
- `COMPOSE_RUNNER_HTTP_TIMEOUT_SECS=<secs>` — override compose node HTTP readiness timeout
|
||||||
|
|
||||||
**Note:** Container names follow pattern `nomos-compose-{uuid}-validator-{index}-1` where `{uuid}` changes per run.
|
**Note:** Container names follow pattern `nomos-compose-{uuid}-validator-{index}-1` where `{uuid}` changes per run.
|
||||||
|
|
||||||
@ -466,7 +467,8 @@ kubectl logs nomos-executor-1 > executor-1.log
|
|||||||
- Debug helpers:
|
- Debug helpers:
|
||||||
- `K8S_RUNNER_DEBUG=1` — logs Helm stdout/stderr for install commands.
|
- `K8S_RUNNER_DEBUG=1` — logs Helm stdout/stderr for install commands.
|
||||||
- `K8S_RUNNER_PRESERVE=1` — keep the namespace/release after the run.
|
- `K8S_RUNNER_PRESERVE=1` — keep the namespace/release after the run.
|
||||||
- `K8S_RUNNER_NODE_HOST=<ip|hostname>` — override NodePort host resolution for non-local clusters.
|
- `K8S_RUNNER_NODE_HOST=<ip|hostname>` — override NodePort host resolution for non-local clusters.
|
||||||
|
- `K8S_RUNNER_NAMESPACE=<name>` / `K8S_RUNNER_RELEASE=<name>` — pin namespace/release instead of random IDs (useful for debugging)
|
||||||
|
|
||||||
**Specify namespace (if not using default):**
|
**Specify namespace (if not using default):**
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@ -10,6 +10,7 @@
|
|||||||
- **Disk space:** bundle/image builds are storage-heavy. If you see I/O errors or Docker build failures, check free space and prune old artifacts (`.tmp/`, `target/`, and Docker build cache) before retrying.
|
- **Disk space:** bundle/image builds are storage-heavy. If you see I/O errors or Docker build failures, check free space and prune old artifacts (`.tmp/`, `target/`, and Docker build cache) before retrying.
|
||||||
- **K8s runner scope:** the default Helm chart mounts KZG params via `hostPath` and uses a local image tag (`logos-blockchain-testing:local`). This is intended for local clusters (Docker Desktop / minikube / kind), not remote managed clusters without additional setup.
|
- **K8s runner scope:** the default Helm chart mounts KZG params via `hostPath` and uses a local image tag (`logos-blockchain-testing:local`). This is intended for local clusters (Docker Desktop / minikube / kind), not remote managed clusters without additional setup.
|
||||||
- Quick cleanup: `scripts/clean` (and `scripts/clean --docker` if needed).
|
- Quick cleanup: `scripts/clean` (and `scripts/clean --docker` if needed).
|
||||||
|
- Destructive cleanup (last resort): `scripts/clean --docker-system --dangerous` (add `--volumes` if you also want to prune Docker volumes).
|
||||||
|
|
||||||
**Recommended:** Use `scripts/run-examples.sh` which handles all setup automatically.
|
**Recommended:** Use `scripts/run-examples.sh` which handles all setup automatically.
|
||||||
|
|
||||||
|
|||||||
@ -94,6 +94,7 @@ async fn run_compose_case(
|
|||||||
|
|
||||||
let deployer = ComposeDeployer::new();
|
let deployer = ComposeDeployer::new();
|
||||||
info!("deploying compose stack");
|
info!("deploying compose stack");
|
||||||
|
|
||||||
let runner: Runner = match deployer.deploy(&plan).await {
|
let runner: Runner = match deployer.deploy(&plan).await {
|
||||||
Ok(runner) => runner,
|
Ok(runner) => runner,
|
||||||
Err(ComposeRunnerError::DockerUnavailable) => {
|
Err(ComposeRunnerError::DockerUnavailable) => {
|
||||||
|
|||||||
@ -67,6 +67,7 @@ async fn run_k8s_case(
|
|||||||
|
|
||||||
let deployer = K8sDeployer::new();
|
let deployer = K8sDeployer::new();
|
||||||
info!("deploying k8s stack");
|
info!("deploying k8s stack");
|
||||||
|
|
||||||
let runner: Runner = match deployer.deploy(&plan).await {
|
let runner: Runner = match deployer.deploy(&plan).await {
|
||||||
Ok(runner) => runner,
|
Ok(runner) => runner,
|
||||||
Err(K8sRunnerError::ClientInit { source }) => {
|
Err(K8sRunnerError::ClientInit { source }) => {
|
||||||
|
|||||||
@ -18,6 +18,7 @@ async fn main() {
|
|||||||
|
|
||||||
if std::env::var("POL_PROOF_DEV_MODE").is_err() {
|
if std::env::var("POL_PROOF_DEV_MODE").is_err() {
|
||||||
warn!("POL_PROOF_DEV_MODE=true is required for the local runner demo");
|
warn!("POL_PROOF_DEV_MODE=true is required for the local runner demo");
|
||||||
|
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -41,6 +42,7 @@ async fn main() {
|
|||||||
|
|
||||||
if let Err(err) = run_local_case(validators, executors, Duration::from_secs(run_secs)).await {
|
if let Err(err) = run_local_case(validators, executors, Duration::from_secs(run_secs)).await {
|
||||||
warn!("local runner demo failed: {err}");
|
warn!("local runner demo failed: {err}");
|
||||||
|
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -162,6 +162,49 @@ else
|
|||||||
warn "helm not found (k8s runner uses helm)"
|
warn "helm not found (k8s runner uses helm)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
section "K8s Image Visibility"
|
||||||
|
image="${NOMOS_TESTNET_IMAGE:-logos-blockchain-testing:local}"
|
||||||
|
if [ -n "${ctx:-}" ]; then
|
||||||
|
case "${ctx}" in
|
||||||
|
docker-desktop)
|
||||||
|
ok "docker-desktop context shares local Docker images"
|
||||||
|
;;
|
||||||
|
kind-*)
|
||||||
|
if [[ "${image}" == *":local" ]]; then
|
||||||
|
warn "kind cluster won't see local Docker images by default"
|
||||||
|
say "Suggested: kind load docker-image ${image}"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
minikube)
|
||||||
|
if [[ "${image}" == *":local" ]]; then
|
||||||
|
warn "minikube may not see local Docker images by default"
|
||||||
|
say "Suggested: minikube image load ${image}"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
if [[ "${image}" == *":local" ]]; then
|
||||||
|
warn "current context is ${ctx}; a :local image tag may not be reachable by cluster nodes"
|
||||||
|
say "Suggested: push to a registry and set NOMOS_TESTNET_IMAGE, or load into the cluster if supported"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
section "Docker Desktop Kubernetes Health (best-effort)"
|
||||||
|
if have kubectl && [ "${ctx:-}" = "docker-desktop" ]; then
|
||||||
|
if ! kubectl -n kube-system get pod storage-provisioner >/dev/null 2>&1; then
|
||||||
|
warn "storage-provisioner pod not found"
|
||||||
|
else
|
||||||
|
phase="$(kubectl -n kube-system get pod storage-provisioner -o jsonpath='{.status.phase}' 2>/dev/null || true)"
|
||||||
|
reason="$(kubectl -n kube-system get pod storage-provisioner -o jsonpath='{.status.containerStatuses[0].state.waiting.reason}' 2>/dev/null || true)"
|
||||||
|
if [ "${phase}" = "Running" ] || [ "${phase}" = "Succeeded" ]; then
|
||||||
|
ok "storage-provisioner: ${phase}"
|
||||||
|
else
|
||||||
|
warn "storage-provisioner: ${phase:-<unknown>} ${reason}"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
section "Runner Debug Flags (optional)"
|
section "Runner Debug Flags (optional)"
|
||||||
say "SLOW_TEST_ENV=${SLOW_TEST_ENV:-<unset>} (if true: doubles readiness timeouts)"
|
say "SLOW_TEST_ENV=${SLOW_TEST_ENV:-<unset>} (if true: doubles readiness timeouts)"
|
||||||
say "NOMOS_SKIP_IMAGE_BUILD=${NOMOS_SKIP_IMAGE_BUILD:-<unset>} (compose/k8s)"
|
say "NOMOS_SKIP_IMAGE_BUILD=${NOMOS_SKIP_IMAGE_BUILD:-<unset>} (compose/k8s)"
|
||||||
@ -170,6 +213,7 @@ say "K8S_RUNNER_PRESERVE=${K8S_RUNNER_PRESERVE:-<unset>} (k8s)"
|
|||||||
say "K8S_RUNNER_DEBUG=${K8S_RUNNER_DEBUG:-<unset>} (k8s helm debug)"
|
say "K8S_RUNNER_DEBUG=${K8S_RUNNER_DEBUG:-<unset>} (k8s helm debug)"
|
||||||
say "COMPOSE_RUNNER_HOST=${COMPOSE_RUNNER_HOST:-<unset>} (compose readiness host override)"
|
say "COMPOSE_RUNNER_HOST=${COMPOSE_RUNNER_HOST:-<unset>} (compose readiness host override)"
|
||||||
say "K8S_RUNNER_NODE_HOST=${K8S_RUNNER_NODE_HOST:-<unset>} (k8s NodePort host override)"
|
say "K8S_RUNNER_NODE_HOST=${K8S_RUNNER_NODE_HOST:-<unset>} (k8s NodePort host override)"
|
||||||
|
say "K8S_RUNNER_NAMESPACE=${K8S_RUNNER_NAMESPACE:-<unset>} (k8s fixed namespace)"
|
||||||
|
|
||||||
section "Done"
|
section "Done"
|
||||||
say "If something looks off, start with: scripts/run-examples.sh <mode> -t 60 -v 1 -e 1"
|
say "If something looks off, start with: scripts/run-examples.sh <mode> -t 60 -v 1 -e 1"
|
||||||
|
|||||||
@ -13,6 +13,9 @@ Options:
|
|||||||
--tmp Remove .tmp (default)
|
--tmp Remove .tmp (default)
|
||||||
--target Remove target (default)
|
--target Remove target (default)
|
||||||
--docker Prune Docker builder cache (docker builder prune -f)
|
--docker Prune Docker builder cache (docker builder prune -f)
|
||||||
|
--docker-system Prune Docker system objects (requires --dangerous)
|
||||||
|
--volumes With --docker-system, also prune volumes
|
||||||
|
--dangerous Required for --docker-system (destructive)
|
||||||
--all Equivalent to --tmp --target --docker
|
--all Equivalent to --tmp --target --docker
|
||||||
-h, --help Show this help
|
-h, --help Show this help
|
||||||
EOF
|
EOF
|
||||||
@ -21,6 +24,9 @@ EOF
|
|||||||
DO_TMP=0
|
DO_TMP=0
|
||||||
DO_TARGET=0
|
DO_TARGET=0
|
||||||
DO_DOCKER=0
|
DO_DOCKER=0
|
||||||
|
DO_DOCKER_SYSTEM=0
|
||||||
|
DO_VOLUMES=0
|
||||||
|
DANGEROUS=0
|
||||||
|
|
||||||
if [ "$#" -eq 0 ]; then
|
if [ "$#" -eq 0 ]; then
|
||||||
DO_TMP=1
|
DO_TMP=1
|
||||||
@ -32,6 +38,9 @@ while [ "$#" -gt 0 ]; do
|
|||||||
--tmp) DO_TMP=1; shift ;;
|
--tmp) DO_TMP=1; shift ;;
|
||||||
--target) DO_TARGET=1; shift ;;
|
--target) DO_TARGET=1; shift ;;
|
||||||
--docker) DO_DOCKER=1; shift ;;
|
--docker) DO_DOCKER=1; shift ;;
|
||||||
|
--docker-system) DO_DOCKER_SYSTEM=1; shift ;;
|
||||||
|
--volumes) DO_VOLUMES=1; shift ;;
|
||||||
|
--dangerous) DANGEROUS=1; shift ;;
|
||||||
--all) DO_TMP=1; DO_TARGET=1; DO_DOCKER=1; shift ;;
|
--all) DO_TMP=1; DO_TARGET=1; DO_DOCKER=1; shift ;;
|
||||||
-h|--help) usage; exit 0 ;;
|
-h|--help) usage; exit 0 ;;
|
||||||
*) echo "Unknown argument: $1" >&2; usage; exit 2 ;;
|
*) echo "Unknown argument: $1" >&2; usage; exit 2 ;;
|
||||||
@ -60,5 +69,22 @@ if [ "${DO_DOCKER}" -eq 1 ]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Done."
|
if [ "${DO_DOCKER_SYSTEM}" -eq 1 ]; then
|
||||||
|
if [ "${DANGEROUS}" -ne 1 ]; then
|
||||||
|
echo "ERROR: --docker-system requires --dangerous" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
if command -v docker >/dev/null 2>&1; then
|
||||||
|
echo "==> Pruning Docker system objects"
|
||||||
|
if [ "${DO_VOLUMES}" -eq 1 ]; then
|
||||||
|
docker system prune -af --volumes >/dev/null
|
||||||
|
else
|
||||||
|
docker system prune -af >/dev/null
|
||||||
|
fi
|
||||||
|
echo "==> Docker system prune complete"
|
||||||
|
else
|
||||||
|
echo "WARN: docker not found; skipping Docker system prune" >&2
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Done."
|
||||||
|
|||||||
@ -30,6 +30,7 @@ Options:
|
|||||||
-t, --run-seconds N Duration to run the demo (required)
|
-t, --run-seconds N Duration to run the demo (required)
|
||||||
-v, --validators N Number of validators (required)
|
-v, --validators N Number of validators (required)
|
||||||
-e, --executors N Number of executors (required)
|
-e, --executors N Number of executors (required)
|
||||||
|
--no-image-build Skip rebuilding the compose/k8s image (sets NOMOS_SKIP_IMAGE_BUILD=1)
|
||||||
|
|
||||||
Environment:
|
Environment:
|
||||||
VERSION Circuits version (default v0.3.1)
|
VERSION Circuits version (default v0.3.1)
|
||||||
@ -38,6 +39,13 @@ Environment:
|
|||||||
NOMOS_CIRCUITS_REBUILD_RAPIDSNARK Force rapidsnark rebuild
|
NOMOS_CIRCUITS_REBUILD_RAPIDSNARK Force rapidsnark rebuild
|
||||||
NOMOS_BINARIES_TAR Path to prebuilt binaries/circuits tarball (required)
|
NOMOS_BINARIES_TAR Path to prebuilt binaries/circuits tarball (required)
|
||||||
NOMOS_SKIP_IMAGE_BUILD Set to 1 to skip rebuilding the compose/k8s image
|
NOMOS_SKIP_IMAGE_BUILD Set to 1 to skip rebuilding the compose/k8s image
|
||||||
|
TESTNET_PRINT_ENDPOINTS If set, runners print TESTNET_ENDPOINTS/TESTNET_PPROF (set automatically)
|
||||||
|
COMPOSE_RUNNER_HTTP_TIMEOUT_SECS Compose readiness timeout override
|
||||||
|
K8S_RUNNER_DEPLOYMENT_TIMEOUT_SECS K8s deployment readiness timeout override
|
||||||
|
K8S_RUNNER_HTTP_TIMEOUT_SECS K8s port-forward readiness timeout override
|
||||||
|
K8S_RUNNER_HTTP_PROBE_TIMEOUT_SECS K8s NodePort readiness timeout override
|
||||||
|
K8S_RUNNER_PROMETHEUS_HTTP_TIMEOUT_SECS K8s Prometheus port-forward readiness timeout override
|
||||||
|
K8S_RUNNER_PROMETHEUS_HTTP_PROBE_TIMEOUT_SECS K8s Prometheus NodePort probe timeout override
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,6 +98,10 @@ while [ "$#" -gt 0 ]; do
|
|||||||
DEMO_VALIDATORS="${2:-}"; shift 2 ;;
|
DEMO_VALIDATORS="${2:-}"; shift 2 ;;
|
||||||
-e|--executors)
|
-e|--executors)
|
||||||
DEMO_EXECUTORS="${2:-}"; shift 2 ;;
|
DEMO_EXECUTORS="${2:-}"; shift 2 ;;
|
||||||
|
--no-image-build)
|
||||||
|
NOMOS_SKIP_IMAGE_BUILD=1
|
||||||
|
export NOMOS_SKIP_IMAGE_BUILD
|
||||||
|
shift ;;
|
||||||
compose|host|k8s)
|
compose|host|k8s)
|
||||||
MODE="$1"; shift ;;
|
MODE="$1"; shift ;;
|
||||||
*)
|
*)
|
||||||
|
|||||||
@ -231,6 +231,16 @@ pub async fn ensure_cluster_readiness(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn cluster_identifiers() -> (String, String) {
|
pub fn cluster_identifiers() -> (String, String) {
|
||||||
|
if let Ok(namespace) = env::var("K8S_RUNNER_NAMESPACE")
|
||||||
|
&& !namespace.is_empty()
|
||||||
|
{
|
||||||
|
let release = env::var("K8S_RUNNER_RELEASE")
|
||||||
|
.ok()
|
||||||
|
.filter(|value| !value.is_empty())
|
||||||
|
.unwrap_or_else(|| namespace.clone());
|
||||||
|
return (namespace, release);
|
||||||
|
}
|
||||||
|
|
||||||
let run_id = Uuid::new_v4().simple().to_string();
|
let run_id = Uuid::new_v4().simple().to_string();
|
||||||
let namespace = format!("nomos-k8s-{run_id}");
|
let namespace = format!("nomos-k8s-{run_id}");
|
||||||
(namespace.clone(), namespace)
|
(namespace.clone(), namespace)
|
||||||
|
|||||||
@ -106,6 +106,7 @@ impl LocalDeployer {
|
|||||||
let skip_membership = !membership_check;
|
let skip_membership = !membership_check;
|
||||||
if let Err(source) = wait_for_readiness(&topology, skip_membership).await {
|
if let Err(source) = wait_for_readiness(&topology, skip_membership).await {
|
||||||
debug!(error = ?source, "local readiness failed");
|
debug!(error = ?source, "local readiness failed");
|
||||||
|
|
||||||
return Err(LocalDeployerError::ReadinessFailed { source });
|
return Err(LocalDeployerError::ReadinessFailed { source });
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -134,6 +135,7 @@ async fn wait_for_readiness(
|
|||||||
}
|
}
|
||||||
info!("waiting for membership readiness");
|
info!("waiting for membership readiness");
|
||||||
topology.wait_membership_ready().await?;
|
topology.wait_membership_ready().await?;
|
||||||
|
|
||||||
info!("waiting for DA balancer readiness");
|
info!("waiting for DA balancer readiness");
|
||||||
topology.wait_da_balancer_ready().await
|
topology.wait_da_balancer_ready().await
|
||||||
}
|
}
|
||||||
@ -154,6 +156,7 @@ async fn spawn_block_feed_with(
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
info!("starting block feed");
|
info!("starting block feed");
|
||||||
|
|
||||||
spawn_block_feed(block_source_client)
|
spawn_block_feed(block_source_client)
|
||||||
.await
|
.await
|
||||||
.map_err(|source| LocalDeployerError::WorkloadFailed {
|
.map_err(|source| LocalDeployerError::WorkloadFailed {
|
||||||
|
|||||||
@ -149,6 +149,7 @@ impl<Caps> TransactionFlowBuilder<Caps> {
|
|||||||
users = self.users.map(|u| u.get()),
|
users = self.users.map(|u| u.get()),
|
||||||
"attaching transaction workload"
|
"attaching transaction workload"
|
||||||
);
|
);
|
||||||
|
|
||||||
self.builder = self.builder.with_workload(workload);
|
self.builder = self.builder.with_workload(workload);
|
||||||
self.builder
|
self.builder
|
||||||
}
|
}
|
||||||
@ -225,6 +226,7 @@ impl<Caps> DataAvailabilityFlowBuilder<Caps> {
|
|||||||
headroom_percent = self.headroom_percent,
|
headroom_percent = self.headroom_percent,
|
||||||
"attaching data-availability workload"
|
"attaching data-availability workload"
|
||||||
);
|
);
|
||||||
|
|
||||||
self.builder = self.builder.with_workload(workload);
|
self.builder = self.builder.with_workload(workload);
|
||||||
self.builder
|
self.builder
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user