diff --git a/.gitignore b/.gitignore index d2fc23dc3..c73c8da16 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,8 @@ build/ /local_testnet_data*/ +# Prometheus db +/data # Grafana dashboards /docker/*.json diff --git a/Makefile b/Makefile index 85c2b87f0..6eea6ad1b 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ TOOLS_DIRS := \ ncli \ nbench \ research \ - tests/simulation + tools TOOLS_CSV := $(subst $(SPACE),$(COMMA),$(TOOLS)) .PHONY: \ diff --git a/README.md b/README.md index ad0519803..e023f6c65 100644 --- a/README.md +++ b/README.md @@ -102,16 +102,29 @@ apt install build-essential git libpcre3-dev Nimbus connects to any of the testnets published in the [eth2-clients/eth2-testnets repo](https://github.com/eth2-clients/eth2-testnets/tree/master/nimbus). -Once the [prerequisites](#prerequisites) are installed you can connect to testnet0 with the following commands: +Once the [prerequisites](#prerequisites) are installed you can connect to the [Witti testnet](https://github.com/goerli/witti) with the following commands: ```bash git clone https://github.com/status-im/nim-beacon-chain cd nim-beacon-chain -make testnet0 # This will build Nimbus and all other dependencies - # and connect you to testnet0 +make witti # This will build Nimbus and all other dependencies + # and connect you to Witti ``` -The testnets are restarted once per week, usually on Monday evenings (UTC)) and integrate the changes for the past week. +### Getting metrics from a local testnet client + +```bash +# the primitive HTTP server started to serve the metrics is considered insecure +make NIMFLAGS="-d:insecure" witti +``` + +You can now see the raw metrics on http://127.0.0.1:8008/metrics but they're not very useful like this, so let's feed them to a Prometheus instance: + +```bash +prometheus --config.file=build/data/shared_witti/prometheus.yml +``` + +For some pretty pictures, get [Grafana](https://grafana.com/) up and running, then import the dashboard definition in "grafana/beacon\_nodes\_Grafana\_dashboard.json". ## Interop (for other Eth2 clients) @@ -178,8 +191,8 @@ The [generic instructions from the Nimbus repo](https://github.com/status-im/nim Specific steps: ```bash -# This will generate the Prometheus config and the Grafana dashboard on the fly, -# based on the number of nodes (which you can control by passing something like NODES=6 to `make`). +# This will generate the Prometheus config on the fly, based on the number of +# nodes (which you can control by passing something like NODES=6 to `make`). # The `-d:insecure` flag starts an HTTP server from which the Prometheus daemon will pull the metrics. make VALIDATORS=192 NODES=6 USER_NODES=0 NIMFLAGS="-d:insecure" eth2_network_simulation @@ -188,7 +201,7 @@ cd tests/simulation/prometheus prometheus ``` -The dashboard you need to import in Grafana is "tests/simulation/beacon-chain-sim-all-nodes-Grafana-dashboard.json". +The dashboard you need to import in Grafana is "grafana/beacon\_nodes\_Grafana\_dashboard.json". ![monitoring dashboard](./media/monitoring.png) diff --git a/tests/simulation/beacon-chain-sim-node0-Grafana-dashboard.json b/grafana/beacon_nodes_Grafana_dashboard.json similarity index 91% rename from tests/simulation/beacon-chain-sim-node0-Grafana-dashboard.json rename to grafana/beacon_nodes_Grafana_dashboard.json index da256b59a..65e6fbc14 100644 --- a/tests/simulation/beacon-chain-sim-node0-Grafana-dashboard.json +++ b/grafana/beacon_nodes_Grafana_dashboard.json @@ -101,27 +101,27 @@ "steppedLine": false, "targets": [ { - "expr": "rate(process_cpu_seconds_total{node=\"0\"}[2s]) * 100", + "expr": "rate(process_cpu_seconds_total{node=\"${node}\"}[2s]) * 100", "legendFormat": "CPU usage %", "refId": "A" }, { - "expr": "process_open_fds{node=\"0\"}", + "expr": "process_open_fds{node=\"${node}\"}", "legendFormat": "open file descriptors", "refId": "C" }, { - "expr": "process_resident_memory_bytes{node=\"0\"}", + "expr": "process_resident_memory_bytes{node=\"${node}\"}", "legendFormat": "RSS", "refId": "D" }, { - "expr": "nim_gc_mem_bytes{node=\"0\"}", + "expr": "nim_gc_mem_bytes{node=\"${node}\"}", "legendFormat": "Nim GC mem total", "refId": "F" }, { - "expr": "nim_gc_mem_occupied_bytes{node=\"0\"}", + "expr": "nim_gc_mem_occupied_bytes{node=\"${node}\"}", "legendFormat": "Nim GC mem used", "refId": "G" } @@ -130,7 +130,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "resources #0", + "title": "resources #${node}", "tooltip": { "shared": true, "sort": 0, @@ -210,12 +210,12 @@ "steppedLine": false, "targets": [ { - "expr": "libp2p_open_bufferstream{node=\"0\"}", + "expr": "libp2p_open_bufferstream{node=\"${node}\"}", "legendFormat": "BufferStream", "refId": "A" }, { - "expr": "libp2p_open_connection{node=\"0\"}", + "expr": "libp2p_open_connection{node=\"${node}\"}", "legendFormat": "Connection", "refId": "B" } @@ -224,7 +224,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "open streams #0", + "title": "open streams #${node}", "tooltip": { "shared": true, "sort": 0, @@ -304,13 +304,13 @@ "steppedLine": false, "targets": [ { - "expr": "beacon_current_validators{node=\"0\"}", + "expr": "beacon_current_validators{node=\"${node}\"}", "interval": "", "legendFormat": "current validators", "refId": "A" }, { - "expr": "beacon_current_live_validators{node=\"0\"}", + "expr": "beacon_current_live_validators{node=\"${node}\"}", "interval": "", "legendFormat": "current live validators", "refId": "B" @@ -320,7 +320,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "validators #0", + "title": "validators #${node}", "tooltip": { "shared": true, "sort": 0, @@ -405,7 +405,7 @@ "steppedLine": false, "targets": [ { - "expr": "nim_gc_heap_instance_occupied_bytes{node=\"0\"}", + "expr": "nim_gc_heap_instance_occupied_bytes{node=\"${node}\"}", "interval": "", "legendFormat": "{{type_name}}", "refId": "A" @@ -415,7 +415,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "GC heap objects #0", + "title": "GC heap objects #${node}", "tooltip": { "shared": true, "sort": 0, @@ -493,7 +493,7 @@ "steppedLine": false, "targets": [ { - "expr": "beacon_state_data_cache_hits_total{node=\"0\"} * 100 / (beacon_state_data_cache_hits_total{node=\"0\"} + beacon_state_data_cache_misses_total{node=\"0\"})", + "expr": "beacon_state_data_cache_hits_total{node=\"${node}\"} * 100 / (beacon_state_data_cache_hits_total{node=\"${node}\"} + beacon_state_data_cache_misses_total{node=\"${node}\"})", "interval": "", "legendFormat": "cache hit rate", "refId": "A" @@ -503,7 +503,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "pool.cachedStates #0", + "title": "pool.cachedStates #${node}", "tooltip": { "shared": true, "sort": 0, @@ -587,7 +587,7 @@ "steppedLine": false, "targets": [ { - "expr": "sqlite3_memory_used_bytes{node=\"0\"}", + "expr": "sqlite3_memory_used_bytes{node=\"${node}\"}", "interval": "", "legendFormat": "Memory used", "refId": "A" @@ -597,7 +597,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "SQLite3 #0", + "title": "SQLite3 #${node}", "tooltip": { "shared": true, "sort": 0, @@ -698,14 +698,14 @@ "tableColumn": "", "targets": [ { - "expr": "process_resident_memory_bytes{node=\"0\"}", + "expr": "process_resident_memory_bytes{node=\"${node}\"}", "refId": "A" } ], "thresholds": "", "timeFrom": null, "timeShift": null, - "title": "RSS mem #0", + "title": "RSS mem #${node}", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -781,14 +781,14 @@ "tableColumn": "", "targets": [ { - "expr": "rate(process_cpu_seconds_total{node=\"0\"}[2s]) * 100", + "expr": "rate(process_cpu_seconds_total{node=\"${node}\"}[2s]) * 100", "refId": "A" } ], "thresholds": "", "timeFrom": null, "timeShift": null, - "title": "CPU usage #0", + "title": "CPU usage #${node}", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -864,7 +864,7 @@ "tableColumn": "", "targets": [ { - "expr": "beacon_slot{node=\"0\"}", + "expr": "beacon_slot{node=\"${node}\"}", "interval": "", "legendFormat": "", "refId": "A" @@ -873,7 +873,7 @@ "thresholds": "", "timeFrom": null, "timeShift": null, - "title": "current slot #0", + "title": "current slot #${node}", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -1034,14 +1034,14 @@ "tableColumn": "", "targets": [ { - "expr": "beacon_attestations_received_total{node=\"0\"}", + "expr": "beacon_attestations_received_total{node=\"${node}\"}", "refId": "A" } ], "thresholds": "", "timeFrom": null, "timeShift": null, - "title": "att'ns recv'd #0", + "title": "att'ns recv'd #${node}", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -1097,13 +1097,13 @@ "steppedLine": false, "targets": [ { - "expr": "rate(beacon_blocks_received_total{node=\"0\"}[4s]) * 3", + "expr": "rate(beacon_blocks_received_total{node=\"${node}\"}[4s]) * 3", "interval": "", "legendFormat": "received", "refId": "B" }, { - "expr": "rate(beacon_blocks_proposed_total{node=\"0\"}[4s]) * 3", + "expr": "rate(beacon_blocks_proposed_total{node=\"${node}\"}[4s]) * 3", "interval": "", "legendFormat": "proposed", "refId": "A" @@ -1113,7 +1113,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "blocks #0", + "title": "blocks #${node}", "tooltip": { "shared": true, "sort": 0, @@ -1213,7 +1213,7 @@ "tableColumn": "", "targets": [ { - "expr": "beacon_current_epoch{node=\"0\"}", + "expr": "beacon_current_epoch{node=\"${node}\"}", "interval": "", "legendFormat": "", "refId": "A" @@ -1222,7 +1222,7 @@ "thresholds": "", "timeFrom": null, "timeShift": null, - "title": "current epoch #0", + "title": "current epoch #${node}", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -1297,7 +1297,7 @@ "tableColumn": "", "targets": [ { - "expr": "beacon_current_justified_epoch{node=\"0\"}", + "expr": "beacon_current_justified_epoch{node=\"${node}\"}", "interval": "", "legendFormat": "", "refId": "A" @@ -1306,7 +1306,7 @@ "thresholds": "", "timeFrom": null, "timeShift": null, - "title": "current justified epoch #0", + "title": "current justified epoch #${node}", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -1382,7 +1382,7 @@ "tableColumn": "", "targets": [ { - "expr": "time() - process_start_time_seconds{node=\"0\"}", + "expr": "time() - process_start_time_seconds{node=\"${node}\"}", "interval": "", "legendFormat": "", "refId": "A" @@ -1391,7 +1391,7 @@ "thresholds": "", "timeFrom": null, "timeShift": null, - "title": "runtime #0", + "title": "runtime #${node}", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -1467,14 +1467,14 @@ "tableColumn": "", "targets": [ { - "expr": "libp2p_peers{node=\"0\"}", + "expr": "libp2p_peers{node=\"${node}\"}", "refId": "A" } ], "thresholds": "", "timeFrom": null, "timeShift": null, - "title": "peers #0", + "title": "peers #${node}", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -1549,7 +1549,7 @@ "tableColumn": "", "targets": [ { - "expr": "beacon_finalized_epoch{node=\"0\"}", + "expr": "beacon_finalized_epoch{node=\"${node}\"}", "interval": "", "legendFormat": "", "refId": "A" @@ -1558,7 +1558,7 @@ "thresholds": "", "timeFrom": null, "timeShift": null, - "title": "last finalized epoch #0", + "title": "last finalized epoch #${node}", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ @@ -1611,13 +1611,13 @@ "steppedLine": false, "targets": [ { - "expr": "rate(beacon_attestations_received_total{node=\"0\"}[4s]) * 3", + "expr": "rate(beacon_attestations_received_total{node=\"${node}\"}[4s]) * 3", "interval": "", "legendFormat": "received", "refId": "A" }, { - "expr": "rate(beacon_attestations_sent_total{node=\"0\"}[4s]) * 3", + "expr": "rate(beacon_attestations_sent_total{node=\"${node}\"}[4s]) * 3", "interval": "", "legendFormat": "sent", "refId": "B" @@ -1627,7 +1627,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "attestations #0", + "title": "attestations #${node}", "tooltip": { "shared": true, "sort": 0, @@ -1697,7 +1697,7 @@ "reverseYBuckets": false, "targets": [ { - "expr": "rate(beacon_attestation_received_seconds_from_slot_start_bucket{node=\"0\"}[4s]) * 3", + "expr": "rate(beacon_attestation_received_seconds_from_slot_start_bucket{node=\"${node}\"}[4s]) * 3", "format": "heatmap", "instant": false, "interval": "", @@ -1708,7 +1708,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "received attestation delay (s) #0", + "title": "received attestation delay (s) #${node}", "tooltip": { "show": true, "showHistogram": false @@ -1738,7 +1738,35 @@ "style": "dark", "tags": [], "templating": { - "list": [] + "list": [ + { + "allValue": null, + "current": { + "tags": [], + "text": "0", + "value": "0" + }, + "datasource": "Prometheus", + "definition": "label_values(process_virtual_memory_bytes,node)", + "hide": 0, + "includeAll": false, + "index": -1, + "label": null, + "multi": false, + "name": "node", + "options": [], + "query": "label_values(process_virtual_memory_bytes,node)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] }, "time": { "from": "now-15m", @@ -1759,10 +1787,10 @@ ] }, "timezone": "", - "title": "beacon chain sim (node0)", - "uid": "pgeNfj2Wz2", + "title": "NBC local testnet/sim (all nodes)", + "uid": "pgeNfj2Wz2a", "variables": { "list": [] }, "version": 38 -} +} \ No newline at end of file diff --git a/media/monitoring.png b/media/monitoring.png index 6b2869084..96b24f468 100644 Binary files a/media/monitoring.png and b/media/monitoring.png differ diff --git a/scripts/connect_to_testnet.nims b/scripts/connect_to_testnet.nims index f35238a49..359143626 100644 --- a/scripts/connect_to_testnet.nims +++ b/scripts/connect_to_testnet.nims @@ -112,6 +112,9 @@ cli do (skipGoerliKey {. rmDir dataDir cd rootDir + + exec &"""./scripts/make_prometheus_config.sh --nodes 1 --base-metrics-port 8008 --config-file "{dataDir}/prometheus.yml"""" + exec &"""nim c {nimFlags} -d:"const_preset={preset}" -o:"{beaconNodeBinary}" beacon_chain/beacon_node.nim""" proc execIgnoringExitCode(s: string) = diff --git a/scripts/launch_local_testnet.sh b/scripts/launch_local_testnet.sh index d274532df..602925d9e 100755 --- a/scripts/launch_local_testnet.sh +++ b/scripts/launch_local_testnet.sh @@ -24,7 +24,7 @@ if [ ${PIPESTATUS[0]} != 4 ]; then fi OPTS="ht:n:d:" -LONGOPTS="help,testnet:,nodes:,data-dir:,disable-htop,log-level:,grafana,base-port:,base-metrics-port:" +LONGOPTS="help,testnet:,nodes:,data-dir:,disable-htop,log-level:,base-port:,base-metrics-port:" # default values TESTNET="1" @@ -32,7 +32,6 @@ NUM_NODES="10" DATA_DIR="local_testnet_data" USE_HTOP="1" LOG_LEVEL="DEBUG" -ENABLE_GRAFANA="0" BASE_PORT="9000" BASE_METRICS_PORT="8008" @@ -51,7 +50,6 @@ CI run: $(basename $0) --disable-htop -- --verify-finalization --stop-at-epoch=5 --base-metrics-port bootstrap node's metrics server port (default: ${BASE_METRICS_PORT}) --disable-htop don't use "htop" to see the beacon_node processes --log-level set the log level (default: ${LOG_LEVEL}) - --grafana generate Grafana dashboards (and Prometheus config file) EOF } @@ -89,10 +87,6 @@ while true; do LOG_LEVEL="$2" shift 2 ;; - --grafana) - ENABLE_GRAFANA="1" - shift - ;; --base-port) BASE_PORT="$2" shift 2 @@ -137,7 +131,7 @@ else fi NETWORK_NIM_FLAGS=$(scripts/load-testnet-nim-flags.sh ${NETWORK}) -$MAKE -j2 LOG_LEVEL="${LOG_LEVEL}" NIMFLAGS="-d:insecure -d:testnet_servers_image ${NETWORK_NIM_FLAGS}" beacon_node process_dashboard +$MAKE LOG_LEVEL="${LOG_LEVEL}" NIMFLAGS="-d:insecure -d:testnet_servers_image ${NETWORK_NIM_FLAGS}" beacon_node ./build/beacon_node makeDeposits \ --quickstart-deposits=${QUICKSTART_VALIDATORS} \ @@ -157,29 +151,10 @@ BOOTSTRAP_IP="127.0.0.1" --bootstrap-port=${BASE_PORT} \ --genesis-offset=30 # Delay in seconds -if [[ "$ENABLE_GRAFANA" == "1" ]]; then - # Prometheus config - cat > "${DATA_DIR}/prometheus.yml" <> "${DATA_DIR}/prometheus.yml" < /dev/null +if [ ${PIPESTATUS[0]} != 4 ]; then + echo '`getopt --test` failed in this environment.' + exit 1 +fi + +OPTS="h" +LONGOPTS="help,nodes:,base-metrics-port:,config-file:" + +# default values +NUM_NODES="10" +BASE_METRICS_PORT="8008" +CONFIG_FILE="prometheus.yml" + +print_help() { + cat < "${CONFIG_FILE}" <> "${CONFIG_FILE}" </dev/null || { echo $GANACHE is missing; USE_GANACHE="no"; } USE_PROMETHEUS="${LAUNCH_PROMETHEUS:-no}" type "$PROMETHEUS" &>/dev/null || { echo $PROMETHEUS is missing; USE_PROMETHEUS="no"; } -# Prometheus config (continued inside the loop) -mkdir -p "${METRICS_DIR}" -cat > "${METRICS_DIR}/prometheus.yml" <> "${METRICS_DIR}/prometheus.yml" <