Deduplicate node/executor entrypoints into shared run_nomos.sh

This commit is contained in:
andrussal 2025-12-09 16:24:05 +01:00
parent baa94221e8
commit 874a4531c9
9 changed files with 99 additions and 121 deletions

View File

@ -0,0 +1,75 @@
#!/bin/sh
set -e
role="${1:-validator}"
bin_for_role() {
case "$1" in
validator) echo "/usr/bin/nomos-node" ;;
executor) echo "/usr/bin/nomos-executor" ;;
*) echo "Unknown role: $1" >&2; exit 2 ;;
esac
}
check_binary_arch() {
bin_path="$1"
label="$2"
if ! command -v file >/dev/null 2>&1; then
echo "Warning: 'file' command not available; skipping ${label} arch check" >&2
return
fi
bin_info="$(file -b "${bin_path}" 2>/dev/null || true)"
host_arch="$(uname -m)"
case "$bin_info" in
*"Mach-O"*) echo "${label} binary is Mach-O (host bundle) but container requires Linux ELF for ${host_arch}" >&2; exit 126 ;;
*"ELF"*) : ;;
*) echo "${label} binary missing or unreadable; info='${bin_info}'" >&2; exit 126 ;;
esac
case "$host_arch" in
x86_64) expected="x86-64|x86_64" ;;
aarch64|arm64) expected="arm64|aarch64" ;;
*) expected="" ;;
esac
if [ -n "$expected" ] && ! echo "$bin_info" | grep -Eqi "$expected"; then
echo "${label} binary architecture mismatch: host=${host_arch}, file='${bin_info}'" >&2
exit 126
fi
}
bin_path="$(bin_for_role "$role")"
check_binary_arch "$bin_path" "nomos-${role}"
KZG_CONTAINER_PATH="${NOMOS_KZG_CONTAINER_PATH:-/kzgrs_test_params/kzgrs_test_params}"
host_identifier_default="${role}-$(hostname -i)"
export CFG_FILE_PATH="/config.yaml" \
CFG_SERVER_ADDR="${CFG_SERVER_ADDR:-http://cfgsync:4400}" \
CFG_HOST_IP=$(hostname -i) \
CFG_HOST_KIND="${CFG_HOST_KIND:-$role}" \
CFG_HOST_IDENTIFIER="${CFG_HOST_IDENTIFIER:-$host_identifier_default}" \
NOMOS_KZGRS_PARAMS_PATH="${NOMOS_KZGRS_PARAMS_PATH:-${KZG_CONTAINER_PATH}}" \
NOMOS_TIME_BACKEND="${NOMOS_TIME_BACKEND:-monotonic}" \
LOG_LEVEL="${LOG_LEVEL:-INFO}" \
POL_PROOF_DEV_MODE="${POL_PROOF_DEV_MODE:-true}"
# Ensure recovery directory exists to avoid early crashes in services that
# persist state.
mkdir -p /recovery
# cfgsync-server can start a little after the container; retry until it is
# reachable instead of exiting immediately and crash-looping.
attempt=0
max_attempts=30
sleep_seconds=3
until /usr/bin/cfgsync-client; do
attempt=$((attempt + 1))
if [ "$attempt" -ge "$max_attempts" ]; then
echo "cfgsync-client failed after ${max_attempts} attempts, giving up"
exit 1
fi
echo "cfgsync not ready yet (attempt ${attempt}/${max_attempts}), retrying in ${sleep_seconds}s..."
sleep "$sleep_seconds"
done
exec "${bin_path}" /config.yaml

View File

@ -1,61 +1,2 @@
#!/bin/sh
set -e
check_binary_arch() {
if ! command -v file >/dev/null 2>&1; then
echo "Warning: 'file' command not available; skipping nomos-executor arch check" >&2
return
fi
bin_info="$(file -b /usr/bin/nomos-executor 2>/dev/null || true)"
host_arch="$(uname -m)"
case "$bin_info" in
*"Mach-O"*) echo "nomos-executor binary is Mach-O (host bundle) but container requires Linux ELF for ${host_arch}" >&2; exit 126 ;;
*"ELF"*) : ;;
*) echo "nomos-executor binary missing or unreadable; info='${bin_info}'" >&2; exit 126 ;;
esac
case "$host_arch" in
x86_64) expected="x86-64|x86_64" ;;
aarch64|arm64) expected="arm64|aarch64" ;;
*) expected="" ;;
esac
if [ -n "$expected" ] && ! echo "$bin_info" | grep -Eqi "$expected"; then
echo "nomos-executor binary architecture mismatch: host=${host_arch}, file='${bin_info}'" >&2
exit 126
fi
}
check_binary_arch
KZG_CONTAINER_PATH="${NOMOS_KZG_CONTAINER_PATH:-/kzgrs_test_params/kzgrs_test_params}"
export CFG_FILE_PATH="/config.yaml" \
CFG_SERVER_ADDR="${CFG_SERVER_ADDR:-http://cfgsync:4400}" \
CFG_HOST_IP=$(hostname -i) \
CFG_HOST_KIND="${CFG_HOST_KIND:-executor}" \
CFG_HOST_IDENTIFIER="${CFG_HOST_IDENTIFIER:-executor-$(hostname -i)}" \
NOMOS_KZGRS_PARAMS_PATH="${NOMOS_KZGRS_PARAMS_PATH:-${KZG_CONTAINER_PATH}}" \
NOMOS_TIME_BACKEND="${NOMOS_TIME_BACKEND:-monotonic}" \
LOG_LEVEL="INFO" \
POL_PROOF_DEV_MODE="${POL_PROOF_DEV_MODE:-true}"
# Ensure recovery directory exists to avoid early crashes in services that
# persist state.
mkdir -p /recovery
# cfgsync-server can start a little after the executor container; retry until
# it is reachable instead of exiting immediately and crash-looping.
attempt=0
max_attempts=30
sleep_seconds=3
until /usr/bin/cfgsync-client; do
attempt=$((attempt + 1))
if [ "$attempt" -ge "$max_attempts" ]; then
echo "cfgsync-client failed after ${max_attempts} attempts, giving up"
exit 1
fi
echo "cfgsync not ready yet (attempt ${attempt}/${max_attempts}), retrying in ${sleep_seconds}s..."
sleep "$sleep_seconds"
done
exec /usr/bin/nomos-executor /config.yaml
exec /etc/nomos/scripts/run_nomos.sh executor

View File

@ -1,61 +1,2 @@
#!/bin/sh
set -e
check_binary_arch() {
if ! command -v file >/dev/null 2>&1; then
echo "Warning: 'file' command not available; skipping nomos-node arch check" >&2
return
fi
bin_info="$(file -b /usr/bin/nomos-node 2>/dev/null || true)"
host_arch="$(uname -m)"
case "$bin_info" in
*"Mach-O"*) echo "nomos-node binary is Mach-O (host bundle) but container requires Linux ELF for ${host_arch}" >&2; exit 126 ;;
*"ELF"*) : ;;
*) echo "nomos-node binary missing or unreadable; info='${bin_info}'" >&2; exit 126 ;;
esac
case "$host_arch" in
x86_64) expected="x86-64|x86_64" ;;
aarch64|arm64) expected="arm64|aarch64" ;;
*) expected="" ;;
esac
if [ -n "$expected" ] && ! echo "$bin_info" | grep -Eqi "$expected"; then
echo "nomos-node binary architecture mismatch: host=${host_arch}, file='${bin_info}'" >&2
exit 126
fi
}
check_binary_arch
KZG_CONTAINER_PATH="${NOMOS_KZG_CONTAINER_PATH:-/kzgrs_test_params/kzgrs_test_params}"
export CFG_FILE_PATH="/config.yaml" \
CFG_SERVER_ADDR="${CFG_SERVER_ADDR:-http://cfgsync:4400}" \
CFG_HOST_IP=$(hostname -i) \
CFG_HOST_KIND="${CFG_HOST_KIND:-validator}" \
CFG_HOST_IDENTIFIER="${CFG_HOST_IDENTIFIER:-validator-$(hostname -i)}" \
NOMOS_KZGRS_PARAMS_PATH="${NOMOS_KZGRS_PARAMS_PATH:-${KZG_CONTAINER_PATH}}" \
NOMOS_TIME_BACKEND="${NOMOS_TIME_BACKEND:-monotonic}" \
LOG_LEVEL="INFO" \
POL_PROOF_DEV_MODE="${POL_PROOF_DEV_MODE:-true}"
# Ensure recovery directory exists to avoid early crashes in services that
# persist state.
mkdir -p /recovery
# cfgsync-server can start a little after the node container; retry until it is
# reachable instead of exiting immediately and crash-looping.
attempt=0
max_attempts=30
sleep_seconds=3
until /usr/bin/cfgsync-client; do
attempt=$((attempt + 1))
if [ "$attempt" -ge "$max_attempts" ]; then
echo "cfgsync-client failed after ${max_attempts} attempts, giving up"
exit 1
fi
echo "cfgsync not ready yet (attempt ${attempt}/${max_attempts}), retrying in ${sleep_seconds}s..."
sleep "$sleep_seconds"
done
exec /usr/bin/nomos-node /config.yaml
exec /etc/nomos/scripts/run_nomos.sh validator

View File

@ -16,6 +16,12 @@ data:
{{ .Values.scripts.runCfgsyncSh | indent 4 }}
{{- else }}
{{ "" | indent 4 }}
{{- end }}
run_nomos.sh: |
{{- if .Values.scripts.runNomosSh }}
{{ .Values.scripts.runNomosSh | indent 4 }}
{{- else }}
{{ "" | indent 4 }}
{{- end }}
run_nomos_node.sh: |
{{- if .Values.scripts.runNomosNodeSh }}

View File

@ -52,6 +52,8 @@ spec:
path: cfgsync.yaml
- key: run_cfgsync.sh
path: scripts/run_cfgsync.sh
- key: run_nomos.sh
path: scripts/run_nomos.sh
- key: run_nomos_executor.sh
path: scripts/run_nomos_executor.sh
- key: run_nomos_node.sh

View File

@ -52,6 +52,8 @@ spec:
path: cfgsync.yaml
- key: run_cfgsync.sh
path: scripts/run_cfgsync.sh
- key: run_nomos.sh
path: scripts/run_nomos.sh
- key: run_nomos_node.sh
path: scripts/run_nomos_node.sh
- name: kzg-params

View File

@ -7,6 +7,7 @@ cfgsync:
scripts:
runCfgsyncSh: ""
runNomosSh: ""
runNomosNodeSh: ""
runNomosExecutorSh: ""

View File

@ -20,6 +20,7 @@ pub struct RunnerAssets {
pub chart_path: PathBuf,
pub cfgsync_file: PathBuf,
pub run_cfgsync_script: PathBuf,
pub run_nomos_script: PathBuf,
pub run_nomos_node_script: PathBuf,
pub run_nomos_executor_script: PathBuf,
pub values_file: PathBuf,
@ -90,6 +91,7 @@ pub fn prepare_assets(topology: &GeneratedTopology) -> Result<RunnerAssets, Asse
kzg_path,
chart_path,
cfgsync_file,
run_nomos_script: scripts.run_shared,
run_cfgsync_script: scripts.run_cfgsync,
run_nomos_node_script: scripts.run_node,
run_nomos_executor_script: scripts.run_executor,
@ -111,6 +113,7 @@ fn render_cfgsync_config(root: &Path, topology: &GeneratedTopology) -> Result<St
struct ScriptPaths {
run_cfgsync: PathBuf,
run_shared: PathBuf,
run_node: PathBuf,
run_executor: PathBuf,
}
@ -118,10 +121,11 @@ struct ScriptPaths {
fn validate_scripts(root: &Path) -> Result<ScriptPaths, AssetsError> {
let scripts_dir = stack_scripts_root(root);
let run_cfgsync = scripts_dir.join("run_cfgsync.sh");
let run_shared = scripts_dir.join("run_nomos.sh");
let run_node = scripts_dir.join("run_nomos_node.sh");
let run_executor = scripts_dir.join("run_nomos_executor.sh");
for path in [&run_cfgsync, &run_node, &run_executor] {
for path in [&run_cfgsync, &run_shared, &run_node, &run_executor] {
if !path.exists() {
return Err(AssetsError::MissingScript { path: path.clone() });
}
@ -129,6 +133,7 @@ fn validate_scripts(root: &Path) -> Result<ScriptPaths, AssetsError> {
Ok(ScriptPaths {
run_cfgsync,
run_shared,
run_node,
run_executor,
})

View File

@ -74,6 +74,11 @@ pub async fn install_release(
assets.run_nomos_node_script.display()
))
.arg("--set-file")
.arg(format!(
"scripts.runNomosSh={}",
assets.run_nomos_script.display()
))
.arg("--set-file")
.arg(format!(
"scripts.runNomosExecutorSh={}",
assets.run_nomos_executor_script.display()