From 8cc5ee689698e3a6785f427a145f4ca009c9a81a Mon Sep 17 00:00:00 2001 From: gmega Date: Mon, 23 Jun 2025 17:27:21 -0300 Subject: [PATCH] feat: add Codex prometheus targets in experiments --- src/clh | 8 ++------ src/codex.bash | 2 +- src/experiment.bash | 33 ++++++++++++++++++++++++++++++++- src/procmon.bash | 8 +++++--- src/utils.bash | 3 ++- test/test_experiment.bats | 27 +++++++++++++++++---------- test/test_procmon.bats | 9 ++++++--- 7 files changed, 65 insertions(+), 25 deletions(-) diff --git a/src/clh b/src/clh index c67bd7d..2a367d5 100644 --- a/src/clh +++ b/src/clh @@ -2,9 +2,5 @@ LIB_SRC=${LIB_SRC:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)} -# shellcheck source=./src/utils.bash -source "${LIB_SRC}/utils.bash" -# shellcheck source=./src/procmon.bash -source "${LIB_SRC}/procmon.bash" -# shellcheck source=./src/codex.bash -source "${LIB_SRC}/codex.bash" +# shellcheck source=./src/experiment.bash +source "${LIB_SRC}/experiment.bash" diff --git a/src/codex.bash b/src/codex.bash index 3e980b9..d7bbc98 100644 --- a/src/codex.bash +++ b/src/codex.bash @@ -136,7 +136,7 @@ cdx_launch_node() { cmd_array=() IFS=' ' read -r -a cmd_array <<<"$codex_cmd" - pm_async "${cmd_array[@]}" -%- "codex" + pm_async "${cmd_array[@]}" -%- "codex" "${node_index}" _cdx_pids[$node_index]=$! cdx_ensure_ready "$node_index" diff --git a/src/experiment.bash b/src/experiment.bash index 47dfa7b..71c42f3 100644 --- a/src/experiment.bash +++ b/src/experiment.bash @@ -7,6 +7,8 @@ LIB_SRC=${LIB_SRC:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)} source "${LIB_SRC}/utils.bash" # shellcheck source=./src/codex.bash source "${LIB_SRC}/codex.bash" +# shellcheck source=./src/prometheus.bash +source "${LIB_SRC}/prometheus.bash" _experiment_type="" _experiment_id="" @@ -14,7 +16,7 @@ _experiment_id="" exp_start() { local experiment_id experiment_type="$1" - experiment_id="${experiment_type}-$(date +%s)-${RANDOM}" || return 1 + experiment_id="$(date +%s)-${RANDOM}" || return 1 # FIXME: this is pretty clumsy/confusing. We're "initing" the # harness just so it sets the base output folder, and then @@ -28,4 +30,33 @@ exp_start() { clh_init "${_clh_output}/${_experiment_id}" || return 1 cdx_add_defaultopts "--metrics" + + pm_register_callback "codex" _codex_target_changed +} + +_codex_target_changed() { + local event="$1" + if [ "$event" = "start" ]; then + shift 3 + _add_target "$@" + elif [ "$event" = "exit" ]; then + shift 4 + _remove_target "$@" + fi +} + +_add_target() { + local node_index="$1" metrics_port + metrics_port=$(_cdx_metrics_port "$node_index") || return 1 + + prom_add "${metrics_port}" "${_experiment_type}" "${_experiment_id}"\ + "${node_index}" "codex" +} + +_remove_target() { + local node_index="$1" metrics_port + metrics_port=$(_cdx_metrics_port "$node_index") || return 1 + + prom_remove "${metrics_port}" "${_experiment_type}" "${_experiment_id}"\ + "${node_index}" "codex" } diff --git a/src/procmon.bash b/src/procmon.bash index 505e6cf..4beca39 100644 --- a/src/procmon.bash +++ b/src/procmon.bash @@ -215,8 +215,8 @@ pm_async() { ( set +e _pm_job_started "${BASHPID}" "$proc_type" "$@" - trap '_pm_job_exited "${BASHPID}" "$proc_type" "killed"' TERM - trap '_pm_job_exited "${BASHPID}" "$proc_type" "$?"' EXIT + trap '_pm_job_exited "${BASHPID}" "$proc_type" "killed" "$@"' TERM + trap '_pm_job_exited "${BASHPID}" "$proc_type" "$?" "$@"' EXIT "${command[@]}" ) & result=("$!") @@ -257,6 +257,8 @@ _pm_job_exited() { proc_type=$2\ exit_code=$3 + shift 3 + local pid_file="${_pm_output}/${pid}.pid" # If the process is not tracked, don't write down an exit code. @@ -265,7 +267,7 @@ _pm_job_exited() { else echo "$exit_code" > "$pid_file" fi - _pm_invoke_callback "exit" "$proc_type" "$pid" "$exit_code" + _pm_invoke_callback "exit" "$proc_type" "$pid" "$exit_code" "$@" } pm_register_callback() { diff --git a/src/utils.bash b/src/utils.bash index 3c85390..5778d6e 100644 --- a/src/utils.bash +++ b/src/utils.bash @@ -27,7 +27,8 @@ echoerr() { shift_arr () { local -n arr_ref="$1" - arr_ref=("${arr_ref[@]:1}") + local shifts="${2:-1}" + arr_ref=("${arr_ref[@]:shifts}") } sha1() { diff --git a/test/test_experiment.bats b/test/test_experiment.bats index 51aeb91..09e05cd 100644 --- a/test/test_experiment.bats +++ b/test/test_experiment.bats @@ -19,18 +19,25 @@ setup() { assert [ -d "${_clh_output}" ] } -# @test "should add a prometheus target for each Codex node when requested" { -# pm_start +@test "should launch Codex nodes with metrics enabled when there is an experiment in scope" { + exp_start "experiment-type" -# cdx_enable_prometheus "anexperiment" "84858" + [[ "$(cdx_cmdline 0)" =~ "--metrics-port=8290 --metrics-address=0.0.0.0" ]] +} -# cdx_launch_node 0 -# config_file="${_prom_output}/8290-anexperiment-84858-node-1-codex.json" -# assert [ -f "$config_file" ] +@test "should add a prometheus target for each Codex node when there is an experiment in scope" { + exp_start "k-node" -# cdx_destroy_node 0 -# assert [ ! -f "$config_file" ] + pm_start + cdx_launch_node 0 -# pm_stop -# } + config_file="${_prom_output}/8290-k-node-${_experiment_id}-0-codex.json" + assert [ -f "$config_file" ] + cdx_destroy_node 0 + assert [ ! -f "$config_file" ] +} + +teardown() { + pm_stop +} diff --git a/test/test_procmon.bats b/test/test_procmon.bats index 7a7c1c3..fad3d66 100644 --- a/test/test_procmon.bats +++ b/test/test_procmon.bats @@ -162,14 +162,16 @@ callback() { if [ "$event" = "start" ]; then shift 3 - if [ "$#" -gt 0 ]; then - echo "$*" > "${_pm_output}/${pid}-${proc_type}-start-args" - fi touch "${_pm_output}/${pid}-${proc_type}-start" elif [ "$event" = "exit" ]; then exit_code="$4" + shift 4 touch "${_pm_output}/${pid}-${proc_type}-${exit_code}-exit" fi + + if [ "$#" -gt 0 ]; then + echo "$*" > "${_pm_output}/${pid}-${proc_type}-${event}-args" + fi } @test "should call lifecycle callbacks when processes start and stop" { @@ -231,4 +233,5 @@ callback() { await "$pid" assert_equal "$(cat "${_pm_output}/${pid}-sleepy-start-args")" "arg1 arg2" + assert_equal "$(cat "${_pm_output}/${pid}-sleepy-exit-args")" "arg1 arg2" } \ No newline at end of file