mirror of
https://github.com/logos-blockchain/logos-blockchain-testing.git
synced 2026-01-02 13:23:13 +00:00
More robustness: k8s namespace override + extra checks
This commit is contained in:
parent
be0c1ba91e
commit
57fe5e8376
@ -427,6 +427,7 @@ cargo run -p runner-examples --bin compose_runner
|
||||
- `COMPOSE_RUNNER_HOST_GATEWAY=host.docker.internal:host-gateway` — controls the `extra_hosts` entry injected into compose (set to `disable` to omit)
|
||||
- `TESTNET_RUNNER_PRESERVE=1` — alias for `COMPOSE_RUNNER_PRESERVE=1`
|
||||
- `COMPOSE_GRAFANA_PORT=<port>` — pin Grafana to a fixed host port instead of ephemeral assignment
|
||||
- `COMPOSE_RUNNER_HTTP_TIMEOUT_SECS=<secs>` — override compose node HTTP readiness timeout
|
||||
|
||||
**Note:** Container names follow pattern `nomos-compose-{uuid}-validator-{index}-1` where `{uuid}` changes per run.
|
||||
|
||||
@ -466,7 +467,8 @@ kubectl logs nomos-executor-1 > executor-1.log
|
||||
- Debug helpers:
|
||||
- `K8S_RUNNER_DEBUG=1` — logs Helm stdout/stderr for install commands.
|
||||
- `K8S_RUNNER_PRESERVE=1` — keep the namespace/release after the run.
|
||||
- `K8S_RUNNER_NODE_HOST=<ip|hostname>` — override NodePort host resolution for non-local clusters.
|
||||
- `K8S_RUNNER_NODE_HOST=<ip|hostname>` — override NodePort host resolution for non-local clusters.
|
||||
- `K8S_RUNNER_NAMESPACE=<name>` / `K8S_RUNNER_RELEASE=<name>` — pin namespace/release instead of random IDs (useful for debugging)
|
||||
|
||||
**Specify namespace (if not using default):**
|
||||
```bash
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
- **Disk space:** bundle/image builds are storage-heavy. If you see I/O errors or Docker build failures, check free space and prune old artifacts (`.tmp/`, `target/`, and Docker build cache) before retrying.
|
||||
- **K8s runner scope:** the default Helm chart mounts KZG params via `hostPath` and uses a local image tag (`logos-blockchain-testing:local`). This is intended for local clusters (Docker Desktop / minikube / kind), not remote managed clusters without additional setup.
|
||||
- Quick cleanup: `scripts/clean` (and `scripts/clean --docker` if needed).
|
||||
- Destructive cleanup (last resort): `scripts/clean --docker-system --dangerous` (add `--volumes` if you also want to prune Docker volumes).
|
||||
|
||||
**Recommended:** Use `scripts/run-examples.sh` which handles all setup automatically.
|
||||
|
||||
|
||||
@ -94,6 +94,7 @@ async fn run_compose_case(
|
||||
|
||||
let deployer = ComposeDeployer::new();
|
||||
info!("deploying compose stack");
|
||||
|
||||
let runner: Runner = match deployer.deploy(&plan).await {
|
||||
Ok(runner) => runner,
|
||||
Err(ComposeRunnerError::DockerUnavailable) => {
|
||||
|
||||
@ -67,6 +67,7 @@ async fn run_k8s_case(
|
||||
|
||||
let deployer = K8sDeployer::new();
|
||||
info!("deploying k8s stack");
|
||||
|
||||
let runner: Runner = match deployer.deploy(&plan).await {
|
||||
Ok(runner) => runner,
|
||||
Err(K8sRunnerError::ClientInit { source }) => {
|
||||
|
||||
@ -18,6 +18,7 @@ async fn main() {
|
||||
|
||||
if std::env::var("POL_PROOF_DEV_MODE").is_err() {
|
||||
warn!("POL_PROOF_DEV_MODE=true is required for the local runner demo");
|
||||
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
@ -41,6 +42,7 @@ async fn main() {
|
||||
|
||||
if let Err(err) = run_local_case(validators, executors, Duration::from_secs(run_secs)).await {
|
||||
warn!("local runner demo failed: {err}");
|
||||
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -162,6 +162,49 @@ else
|
||||
warn "helm not found (k8s runner uses helm)"
|
||||
fi
|
||||
|
||||
section "K8s Image Visibility"
|
||||
image="${NOMOS_TESTNET_IMAGE:-logos-blockchain-testing:local}"
|
||||
if [ -n "${ctx:-}" ]; then
|
||||
case "${ctx}" in
|
||||
docker-desktop)
|
||||
ok "docker-desktop context shares local Docker images"
|
||||
;;
|
||||
kind-*)
|
||||
if [[ "${image}" == *":local" ]]; then
|
||||
warn "kind cluster won't see local Docker images by default"
|
||||
say "Suggested: kind load docker-image ${image}"
|
||||
fi
|
||||
;;
|
||||
minikube)
|
||||
if [[ "${image}" == *":local" ]]; then
|
||||
warn "minikube may not see local Docker images by default"
|
||||
say "Suggested: minikube image load ${image}"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
if [[ "${image}" == *":local" ]]; then
|
||||
warn "current context is ${ctx}; a :local image tag may not be reachable by cluster nodes"
|
||||
say "Suggested: push to a registry and set NOMOS_TESTNET_IMAGE, or load into the cluster if supported"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
section "Docker Desktop Kubernetes Health (best-effort)"
|
||||
if have kubectl && [ "${ctx:-}" = "docker-desktop" ]; then
|
||||
if ! kubectl -n kube-system get pod storage-provisioner >/dev/null 2>&1; then
|
||||
warn "storage-provisioner pod not found"
|
||||
else
|
||||
phase="$(kubectl -n kube-system get pod storage-provisioner -o jsonpath='{.status.phase}' 2>/dev/null || true)"
|
||||
reason="$(kubectl -n kube-system get pod storage-provisioner -o jsonpath='{.status.containerStatuses[0].state.waiting.reason}' 2>/dev/null || true)"
|
||||
if [ "${phase}" = "Running" ] || [ "${phase}" = "Succeeded" ]; then
|
||||
ok "storage-provisioner: ${phase}"
|
||||
else
|
||||
warn "storage-provisioner: ${phase:-<unknown>} ${reason}"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
section "Runner Debug Flags (optional)"
|
||||
say "SLOW_TEST_ENV=${SLOW_TEST_ENV:-<unset>} (if true: doubles readiness timeouts)"
|
||||
say "NOMOS_SKIP_IMAGE_BUILD=${NOMOS_SKIP_IMAGE_BUILD:-<unset>} (compose/k8s)"
|
||||
@ -170,6 +213,7 @@ say "K8S_RUNNER_PRESERVE=${K8S_RUNNER_PRESERVE:-<unset>} (k8s)"
|
||||
say "K8S_RUNNER_DEBUG=${K8S_RUNNER_DEBUG:-<unset>} (k8s helm debug)"
|
||||
say "COMPOSE_RUNNER_HOST=${COMPOSE_RUNNER_HOST:-<unset>} (compose readiness host override)"
|
||||
say "K8S_RUNNER_NODE_HOST=${K8S_RUNNER_NODE_HOST:-<unset>} (k8s NodePort host override)"
|
||||
say "K8S_RUNNER_NAMESPACE=${K8S_RUNNER_NAMESPACE:-<unset>} (k8s fixed namespace)"
|
||||
|
||||
section "Done"
|
||||
say "If something looks off, start with: scripts/run-examples.sh <mode> -t 60 -v 1 -e 1"
|
||||
|
||||
@ -13,6 +13,9 @@ Options:
|
||||
--tmp Remove .tmp (default)
|
||||
--target Remove target (default)
|
||||
--docker Prune Docker builder cache (docker builder prune -f)
|
||||
--docker-system Prune Docker system objects (requires --dangerous)
|
||||
--volumes With --docker-system, also prune volumes
|
||||
--dangerous Required for --docker-system (destructive)
|
||||
--all Equivalent to --tmp --target --docker
|
||||
-h, --help Show this help
|
||||
EOF
|
||||
@ -21,6 +24,9 @@ EOF
|
||||
DO_TMP=0
|
||||
DO_TARGET=0
|
||||
DO_DOCKER=0
|
||||
DO_DOCKER_SYSTEM=0
|
||||
DO_VOLUMES=0
|
||||
DANGEROUS=0
|
||||
|
||||
if [ "$#" -eq 0 ]; then
|
||||
DO_TMP=1
|
||||
@ -32,6 +38,9 @@ while [ "$#" -gt 0 ]; do
|
||||
--tmp) DO_TMP=1; shift ;;
|
||||
--target) DO_TARGET=1; shift ;;
|
||||
--docker) DO_DOCKER=1; shift ;;
|
||||
--docker-system) DO_DOCKER_SYSTEM=1; shift ;;
|
||||
--volumes) DO_VOLUMES=1; shift ;;
|
||||
--dangerous) DANGEROUS=1; shift ;;
|
||||
--all) DO_TMP=1; DO_TARGET=1; DO_DOCKER=1; shift ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) echo "Unknown argument: $1" >&2; usage; exit 2 ;;
|
||||
@ -60,5 +69,22 @@ if [ "${DO_DOCKER}" -eq 1 ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Done."
|
||||
if [ "${DO_DOCKER_SYSTEM}" -eq 1 ]; then
|
||||
if [ "${DANGEROUS}" -ne 1 ]; then
|
||||
echo "ERROR: --docker-system requires --dangerous" >&2
|
||||
exit 2
|
||||
fi
|
||||
if command -v docker >/dev/null 2>&1; then
|
||||
echo "==> Pruning Docker system objects"
|
||||
if [ "${DO_VOLUMES}" -eq 1 ]; then
|
||||
docker system prune -af --volumes >/dev/null
|
||||
else
|
||||
docker system prune -af >/dev/null
|
||||
fi
|
||||
echo "==> Docker system prune complete"
|
||||
else
|
||||
echo "WARN: docker not found; skipping Docker system prune" >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Done."
|
||||
|
||||
@ -30,6 +30,7 @@ Options:
|
||||
-t, --run-seconds N Duration to run the demo (required)
|
||||
-v, --validators N Number of validators (required)
|
||||
-e, --executors N Number of executors (required)
|
||||
--no-image-build Skip rebuilding the compose/k8s image (sets NOMOS_SKIP_IMAGE_BUILD=1)
|
||||
|
||||
Environment:
|
||||
VERSION Circuits version (default v0.3.1)
|
||||
@ -38,6 +39,13 @@ Environment:
|
||||
NOMOS_CIRCUITS_REBUILD_RAPIDSNARK Force rapidsnark rebuild
|
||||
NOMOS_BINARIES_TAR Path to prebuilt binaries/circuits tarball (required)
|
||||
NOMOS_SKIP_IMAGE_BUILD Set to 1 to skip rebuilding the compose/k8s image
|
||||
TESTNET_PRINT_ENDPOINTS If set, runners print TESTNET_ENDPOINTS/TESTNET_PPROF (set automatically)
|
||||
COMPOSE_RUNNER_HTTP_TIMEOUT_SECS Compose readiness timeout override
|
||||
K8S_RUNNER_DEPLOYMENT_TIMEOUT_SECS K8s deployment readiness timeout override
|
||||
K8S_RUNNER_HTTP_TIMEOUT_SECS K8s port-forward readiness timeout override
|
||||
K8S_RUNNER_HTTP_PROBE_TIMEOUT_SECS K8s NodePort readiness timeout override
|
||||
K8S_RUNNER_PROMETHEUS_HTTP_TIMEOUT_SECS K8s Prometheus port-forward readiness timeout override
|
||||
K8S_RUNNER_PROMETHEUS_HTTP_PROBE_TIMEOUT_SECS K8s Prometheus NodePort probe timeout override
|
||||
EOF
|
||||
}
|
||||
|
||||
@ -90,6 +98,10 @@ while [ "$#" -gt 0 ]; do
|
||||
DEMO_VALIDATORS="${2:-}"; shift 2 ;;
|
||||
-e|--executors)
|
||||
DEMO_EXECUTORS="${2:-}"; shift 2 ;;
|
||||
--no-image-build)
|
||||
NOMOS_SKIP_IMAGE_BUILD=1
|
||||
export NOMOS_SKIP_IMAGE_BUILD
|
||||
shift ;;
|
||||
compose|host|k8s)
|
||||
MODE="$1"; shift ;;
|
||||
*)
|
||||
|
||||
@ -231,6 +231,16 @@ pub async fn ensure_cluster_readiness(
|
||||
}
|
||||
|
||||
pub fn cluster_identifiers() -> (String, String) {
|
||||
if let Ok(namespace) = env::var("K8S_RUNNER_NAMESPACE")
|
||||
&& !namespace.is_empty()
|
||||
{
|
||||
let release = env::var("K8S_RUNNER_RELEASE")
|
||||
.ok()
|
||||
.filter(|value| !value.is_empty())
|
||||
.unwrap_or_else(|| namespace.clone());
|
||||
return (namespace, release);
|
||||
}
|
||||
|
||||
let run_id = Uuid::new_v4().simple().to_string();
|
||||
let namespace = format!("nomos-k8s-{run_id}");
|
||||
(namespace.clone(), namespace)
|
||||
|
||||
@ -106,6 +106,7 @@ impl LocalDeployer {
|
||||
let skip_membership = !membership_check;
|
||||
if let Err(source) = wait_for_readiness(&topology, skip_membership).await {
|
||||
debug!(error = ?source, "local readiness failed");
|
||||
|
||||
return Err(LocalDeployerError::ReadinessFailed { source });
|
||||
}
|
||||
|
||||
@ -134,6 +135,7 @@ async fn wait_for_readiness(
|
||||
}
|
||||
info!("waiting for membership readiness");
|
||||
topology.wait_membership_ready().await?;
|
||||
|
||||
info!("waiting for DA balancer readiness");
|
||||
topology.wait_da_balancer_ready().await
|
||||
}
|
||||
@ -154,6 +156,7 @@ async fn spawn_block_feed_with(
|
||||
})?;
|
||||
|
||||
info!("starting block feed");
|
||||
|
||||
spawn_block_feed(block_source_client)
|
||||
.await
|
||||
.map_err(|source| LocalDeployerError::WorkloadFailed {
|
||||
|
||||
@ -149,6 +149,7 @@ impl<Caps> TransactionFlowBuilder<Caps> {
|
||||
users = self.users.map(|u| u.get()),
|
||||
"attaching transaction workload"
|
||||
);
|
||||
|
||||
self.builder = self.builder.with_workload(workload);
|
||||
self.builder
|
||||
}
|
||||
@ -225,6 +226,7 @@ impl<Caps> DataAvailabilityFlowBuilder<Caps> {
|
||||
headroom_percent = self.headroom_percent,
|
||||
"attaching data-availability workload"
|
||||
);
|
||||
|
||||
self.builder = self.builder.with_workload(workload);
|
||||
self.builder
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user