mirror of
https://github.com/logos-storage/logos-storage-local-harness.git
synced 2026-01-02 13:33:11 +00:00
feat: add Codex prometheus targets in experiments
This commit is contained in:
parent
a0af4c4c90
commit
8cc5ee6896
8
src/clh
8
src/clh
@ -2,9 +2,5 @@
|
||||
|
||||
LIB_SRC=${LIB_SRC:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)}
|
||||
|
||||
# shellcheck source=./src/utils.bash
|
||||
source "${LIB_SRC}/utils.bash"
|
||||
# shellcheck source=./src/procmon.bash
|
||||
source "${LIB_SRC}/procmon.bash"
|
||||
# shellcheck source=./src/codex.bash
|
||||
source "${LIB_SRC}/codex.bash"
|
||||
# shellcheck source=./src/experiment.bash
|
||||
source "${LIB_SRC}/experiment.bash"
|
||||
|
||||
@ -136,7 +136,7 @@ cdx_launch_node() {
|
||||
cmd_array=()
|
||||
IFS=' ' read -r -a cmd_array <<<"$codex_cmd"
|
||||
|
||||
pm_async "${cmd_array[@]}" -%- "codex"
|
||||
pm_async "${cmd_array[@]}" -%- "codex" "${node_index}"
|
||||
_cdx_pids[$node_index]=$!
|
||||
|
||||
cdx_ensure_ready "$node_index"
|
||||
|
||||
@ -7,6 +7,8 @@ LIB_SRC=${LIB_SRC:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)}
|
||||
source "${LIB_SRC}/utils.bash"
|
||||
# shellcheck source=./src/codex.bash
|
||||
source "${LIB_SRC}/codex.bash"
|
||||
# shellcheck source=./src/prometheus.bash
|
||||
source "${LIB_SRC}/prometheus.bash"
|
||||
|
||||
_experiment_type=""
|
||||
_experiment_id=""
|
||||
@ -14,7 +16,7 @@ _experiment_id=""
|
||||
exp_start() {
|
||||
local experiment_id experiment_type="$1"
|
||||
|
||||
experiment_id="${experiment_type}-$(date +%s)-${RANDOM}" || return 1
|
||||
experiment_id="$(date +%s)-${RANDOM}" || return 1
|
||||
|
||||
# FIXME: this is pretty clumsy/confusing. We're "initing" the
|
||||
# harness just so it sets the base output folder, and then
|
||||
@ -28,4 +30,33 @@ exp_start() {
|
||||
|
||||
clh_init "${_clh_output}/${_experiment_id}" || return 1
|
||||
cdx_add_defaultopts "--metrics"
|
||||
|
||||
pm_register_callback "codex" _codex_target_changed
|
||||
}
|
||||
|
||||
_codex_target_changed() {
|
||||
local event="$1"
|
||||
if [ "$event" = "start" ]; then
|
||||
shift 3
|
||||
_add_target "$@"
|
||||
elif [ "$event" = "exit" ]; then
|
||||
shift 4
|
||||
_remove_target "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
_add_target() {
|
||||
local node_index="$1" metrics_port
|
||||
metrics_port=$(_cdx_metrics_port "$node_index") || return 1
|
||||
|
||||
prom_add "${metrics_port}" "${_experiment_type}" "${_experiment_id}"\
|
||||
"${node_index}" "codex"
|
||||
}
|
||||
|
||||
_remove_target() {
|
||||
local node_index="$1" metrics_port
|
||||
metrics_port=$(_cdx_metrics_port "$node_index") || return 1
|
||||
|
||||
prom_remove "${metrics_port}" "${_experiment_type}" "${_experiment_id}"\
|
||||
"${node_index}" "codex"
|
||||
}
|
||||
|
||||
@ -215,8 +215,8 @@ pm_async() {
|
||||
(
|
||||
set +e
|
||||
_pm_job_started "${BASHPID}" "$proc_type" "$@"
|
||||
trap '_pm_job_exited "${BASHPID}" "$proc_type" "killed"' TERM
|
||||
trap '_pm_job_exited "${BASHPID}" "$proc_type" "$?"' EXIT
|
||||
trap '_pm_job_exited "${BASHPID}" "$proc_type" "killed" "$@"' TERM
|
||||
trap '_pm_job_exited "${BASHPID}" "$proc_type" "$?" "$@"' EXIT
|
||||
"${command[@]}"
|
||||
) &
|
||||
result=("$!")
|
||||
@ -257,6 +257,8 @@ _pm_job_exited() {
|
||||
proc_type=$2\
|
||||
exit_code=$3
|
||||
|
||||
shift 3
|
||||
|
||||
local pid_file="${_pm_output}/${pid}.pid"
|
||||
|
||||
# If the process is not tracked, don't write down an exit code.
|
||||
@ -265,7 +267,7 @@ _pm_job_exited() {
|
||||
else
|
||||
echo "$exit_code" > "$pid_file"
|
||||
fi
|
||||
_pm_invoke_callback "exit" "$proc_type" "$pid" "$exit_code"
|
||||
_pm_invoke_callback "exit" "$proc_type" "$pid" "$exit_code" "$@"
|
||||
}
|
||||
|
||||
pm_register_callback() {
|
||||
|
||||
@ -27,7 +27,8 @@ echoerr() {
|
||||
|
||||
shift_arr () {
|
||||
local -n arr_ref="$1"
|
||||
arr_ref=("${arr_ref[@]:1}")
|
||||
local shifts="${2:-1}"
|
||||
arr_ref=("${arr_ref[@]:shifts}")
|
||||
}
|
||||
|
||||
sha1() {
|
||||
|
||||
@ -19,18 +19,25 @@ setup() {
|
||||
assert [ -d "${_clh_output}" ]
|
||||
}
|
||||
|
||||
# @test "should add a prometheus target for each Codex node when requested" {
|
||||
# pm_start
|
||||
@test "should launch Codex nodes with metrics enabled when there is an experiment in scope" {
|
||||
exp_start "experiment-type"
|
||||
|
||||
# cdx_enable_prometheus "anexperiment" "84858"
|
||||
[[ "$(cdx_cmdline 0)" =~ "--metrics-port=8290 --metrics-address=0.0.0.0" ]]
|
||||
}
|
||||
|
||||
# cdx_launch_node 0
|
||||
# config_file="${_prom_output}/8290-anexperiment-84858-node-1-codex.json"
|
||||
# assert [ -f "$config_file" ]
|
||||
@test "should add a prometheus target for each Codex node when there is an experiment in scope" {
|
||||
exp_start "k-node"
|
||||
|
||||
# cdx_destroy_node 0
|
||||
# assert [ ! -f "$config_file" ]
|
||||
pm_start
|
||||
cdx_launch_node 0
|
||||
|
||||
# pm_stop
|
||||
# }
|
||||
config_file="${_prom_output}/8290-k-node-${_experiment_id}-0-codex.json"
|
||||
assert [ -f "$config_file" ]
|
||||
|
||||
cdx_destroy_node 0
|
||||
assert [ ! -f "$config_file" ]
|
||||
}
|
||||
|
||||
teardown() {
|
||||
pm_stop
|
||||
}
|
||||
|
||||
@ -162,14 +162,16 @@ callback() {
|
||||
|
||||
if [ "$event" = "start" ]; then
|
||||
shift 3
|
||||
if [ "$#" -gt 0 ]; then
|
||||
echo "$*" > "${_pm_output}/${pid}-${proc_type}-start-args"
|
||||
fi
|
||||
touch "${_pm_output}/${pid}-${proc_type}-start"
|
||||
elif [ "$event" = "exit" ]; then
|
||||
exit_code="$4"
|
||||
shift 4
|
||||
touch "${_pm_output}/${pid}-${proc_type}-${exit_code}-exit"
|
||||
fi
|
||||
|
||||
if [ "$#" -gt 0 ]; then
|
||||
echo "$*" > "${_pm_output}/${pid}-${proc_type}-${event}-args"
|
||||
fi
|
||||
}
|
||||
|
||||
@test "should call lifecycle callbacks when processes start and stop" {
|
||||
@ -231,4 +233,5 @@ callback() {
|
||||
await "$pid"
|
||||
|
||||
assert_equal "$(cat "${_pm_output}/${pid}-sleepy-start-args")" "arg1 arg2"
|
||||
assert_equal "$(cat "${_pm_output}/${pid}-sleepy-exit-args")" "arg1 arg2"
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user