diff --git a/dashboard/grafana/dashboards/dashboards/dashboards.yml b/dashboard/grafana/dashboards/dashboards/dashboards.yml index 7435f09..8b1f27a 100644 --- a/dashboard/grafana/dashboards/dashboards/dashboards.yml +++ b/dashboard/grafana/dashboards/dashboards/dashboards.yml @@ -3,9 +3,9 @@ apiVersion: 1 providers: - name: 'default' orgId: 1 - folder: '' + folder: 'Codex' type: file - disableDeletion: false + disableDeletion: true updateIntervalSeconds: 10 allowUiUpdates: true options: diff --git a/dashboard/prometheus.yml b/dashboard/prometheus.yml index a2da86b..68ab5a9 100644 --- a/dashboard/prometheus.yml +++ b/dashboard/prometheus.yml @@ -1,8 +1,8 @@ # Prometheus configuration file global: - scrape_interval: 15s - evaluation_interval: 15s + scrape_interval: 1s + evaluation_interval: 1s scrape_configs: - job_name: 'prometheus' diff --git a/experiments/k-node.sh b/experiments/k-node.sh index af499e3..ab40114 100755 --- a/experiments/k-node.sh +++ b/experiments/k-node.sh @@ -11,7 +11,6 @@ set -e -o pipefail SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) - source "${SCRIPT_DIR}/../src/clh" node_count="${1:-2}" @@ -22,8 +21,15 @@ file_sizes=("$@") exp_start "k-node" +echoerr "* Nodes: ${node_count}" +echoerr "* Repetitions: ${repetitions}" +echoerr "* File Sizes: ${file_sizes[*]}" +echoerr "* Timing log: ${output_log}" + # TODO: procmon management should be moved into # experiment lifecycle management. +# TODO: we should register this process with procmon +# so its also killed if something fails. trap pm_stop EXIT INT TERM pm_start diff --git a/src/codex.bash b/src/codex.bash index 572ea5e..1ce7da4 100644 --- a/src/codex.bash +++ b/src/codex.bash @@ -10,13 +10,14 @@ source "${LIB_SRC}/procmon.bash" # Codex binary if [ -z "${CODEX_BINARY}" ]; then - _cdx_binary="$(command -v codex)" + _cdx_binary="$(command -v codex)" || true else _cdx_binary="${CODEX_BINARY}" fi if [ ! -f "${_cdx_binary}" ]; then - echoerr "Error: no valid Codex binary found" + echoerr "Error: no valid Codex binary found."\ + "Set CODEX_BINARY to point to a valid Codex binary." exit 1 fi @@ -188,8 +189,9 @@ cdx_destroy_node() { cdx_ensure_ready() { local node_index="$1" timeout=${2:-$_cdx_node_start_timeout} start="${SECONDS}" + echoerr "Waiting ${timeout} seconds for node ${node_index} to be ready." while true; do - if cdx_get_spr "$node_index"; then + if cdx_get_spr "$node_index" 2> /dev/null; then echoerr "Codex node $node_index is ready." return 0 fi @@ -264,7 +266,7 @@ cdx_download_file() { } cdx_download_file_async() { - pm_async cdx_download_file "$@" + pm_async cdx_download_file "$@" -%- "download" } cdx_upload_sha1() { diff --git a/src/experiment.bash b/src/experiment.bash index 94fdf75..1a959c4 100644 --- a/src/experiment.bash +++ b/src/experiment.bash @@ -44,6 +44,8 @@ exp_start() { cdx_add_defaultopts "--metrics" pm_register_callback "codex" _codex_target_changed + + echoerr "[exp] Experiment ID is ${experiment_id}" } _codex_target_changed() {