simulation: metric visualisation

This commit is contained in:
Ștefan Talpalaru 2019-10-17 15:18:58 +02:00
parent 021661180b
commit 3a2fc249a2
No known key found for this signature in database
GPG Key ID: CBF7934204F1B6F9
10 changed files with 561 additions and 14 deletions

View File

@ -13,8 +13,8 @@ BUILD_SYSTEM_DIR := vendor/nimbus-build-system
# we don't want an error here, so we can handle things later, in the build-system-checks target
-include $(BUILD_SYSTEM_DIR)/makefiles/variables.mk
TOOLS := beacon_node bench_bls_sig_agggregation state_sim ncli_hash_tree_root ncli_pretty ncli_signing_root ncli_transition
TOOLS_DIRS := beacon_chain benchmarks research ncli
TOOLS := beacon_node bench_bls_sig_agggregation state_sim ncli_hash_tree_root ncli_pretty ncli_signing_root ncli_transition process_dashboard
TOOLS_DIRS := beacon_chain benchmarks research ncli tests/simulation
TOOLS_CSV := $(subst $(SPACE),$(COMMA),$(TOOLS))
.PHONY: all build-system-checks deps update p2pd test $(TOOLS) clean_eth2_network_simulation_files eth2_network_simulation clean-testnet0 testnet0 clean-testnet1 testnet1 clean
@ -67,7 +67,7 @@ $(TOOLS): | build deps
clean_eth2_network_simulation_files:
rm -rf tests/simulation/{data,validators}
eth2_network_simulation: | build deps p2pd clean_eth2_network_simulation_files
eth2_network_simulation: | build deps p2pd clean_eth2_network_simulation_files process_dashboard
GIT_ROOT="$$PWD" tests/simulation/start.sh
testnet0 testnet1: | build deps

View File

@ -136,6 +136,26 @@ You can find out more about it in the [development update](https://our.status.im
_Alternatively, fire up our [experimental Vagrant instance with Nim pre-installed](https://our.status.im/setting-up-a-local-vagrant-environment-for-nim-development/) and give us yout feedback about the process!_
### Visualising simulation metrics
Those [generic instructions from the Nimbus repo](https://github.com/status-im/nimbus/#metric-visualisation) apply here as well.
Specific steps:
```bash
# This will generate the Prometheus config and the Grafana dashboard on the fly,
# based on the number of nodes (which you can control by passing something like NODES=6 to `make`).
make eth2_network_simulation
# In another terminal tab, after the sim started:
cd tests/simulation/prometheus
prometheus
```
The dashboard you need to import in Grafana is "tests/simulation/beacon-chain-sim-all-nodes-Grafana-dashboard.json".
[Obligatory screenshot.](https://i.imgur.com/pLvLhID.png)
### Makefile tips and tricks for developers
- build all those tools known to the Makefile:

View File

@ -9,11 +9,11 @@ contract(DepositContract):
proc writeTextFile(filename: string, contents: string) =
writeFile(filename, contents)
echo "Wrote ", filename
# echo "Wrote ", filename
proc writeFile(filename: string, value: auto) =
Json.saveFile(filename, value, pretty = true)
echo "Wrote ", filename
# echo "Wrote ", filename
proc ethToWei(eth: UInt256): UInt256 =
eth * 1000000000000000000.u256

View File

@ -1,3 +1,5 @@
data/
validators/
prometheus/
beacon-chain-sim-all-nodes-Grafana-dashboard.json

View File

@ -0,0 +1,442 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 6,
"links": [],
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "process_resident_memory_bytes{instance=\"localhost:9093\",job=\"nimbus\"}",
"yaxis": 2
},
{
"alias": "process_virtual_memory_bytes{instance=\"localhost:9093\",job=\"nimbus\"}",
"yaxis": 2
},
{
"alias": "nim_gc_mem_bytes{instance=\"localhost:9093\",job=\"nimbus\"}",
"yaxis": 2
},
{
"alias": "nim_gc_mem_occupied_bytes{instance=\"localhost:9093\",job=\"nimbus\"}",
"yaxis": 2
},
{
"alias": "RSS",
"yaxis": 2
},
{
"alias": "virtual mem",
"yaxis": 2
},
{
"alias": "Nim GC mem total",
"yaxis": 2
},
{
"alias": "Nim GC mem used",
"yaxis": 2
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(process_cpu_seconds_total{node=\"0\"}[2s]) * 100",
"legendFormat": "CPU usage %",
"refId": "A"
},
{
"expr": "process_open_fds{node=\"0\"}",
"legendFormat": "open file descriptors",
"refId": "C"
},
{
"expr": "process_resident_memory_bytes{node=\"0\"}",
"legendFormat": "RSS",
"refId": "D"
},
{
"expr": "nim_gc_mem_bytes{node=\"0\"}",
"legendFormat": "Nim GC mem total",
"refId": "F"
},
{
"expr": "nim_gc_mem_occupied_bytes{node=\"0\"}",
"legendFormat": "Nim GC mem used",
"refId": "G"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "beacon_node #0",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": null,
"format": "bytes",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 3,
"w": 5,
"x": 0,
"y": 8
},
"id": 6,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"options": {},
"pluginVersion": "6.3.5",
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false,
"ymax": null,
"ymin": null
},
"tableColumn": "",
"targets": [
{
"expr": "process_resident_memory_bytes{node=\"0\"}",
"refId": "A"
}
],
"thresholds": "",
"timeFrom": null,
"timeShift": null,
"title": "RSS mem #0",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": null,
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 3,
"w": 5,
"x": 5,
"y": 8
},
"id": 8,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"options": {},
"pluginVersion": "6.3.5",
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false,
"ymax": null,
"ymin": null
},
"tableColumn": "",
"targets": [
{
"expr": "rate(process_cpu_seconds_total{node=\"0\"}[2s]) * 100",
"refId": "A"
}
],
"thresholds": "",
"timeFrom": null,
"timeShift": null,
"title": "CPU usage #0",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorPrefix": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": null,
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 3,
"w": 4,
"x": 10,
"y": 8
},
"id": 12,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"options": {},
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false,
"ymax": null,
"ymin": null
},
"tableColumn": "",
"targets": [
{
"expr": "libp2p_peers{node=\"0\"}",
"refId": "A"
}
],
"thresholds": "",
"timeFrom": null,
"timeShift": null,
"title": "peers #0",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
}
],
"refresh": "5s",
"schemaVersion": 20,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "",
"title": "beacon chain sim",
"uid": "pgeNfj2Wz",
"version": 15
}

View File

@ -0,0 +1,55 @@
import json, parseopt, strutils
# usage: process_dashboard --nodes=2 --in=node0_dashboard.json --out=all_nodes_dashboard.json
var
p = initOptParser()
nodes: int
inputFileName, outputFilename: string
while true:
p.next()
case p.kind:
of cmdEnd:
break
of cmdShortOption, cmdLongOption:
if p.key == "nodes":
nodes = p.val.parseInt()
elif p.key == "in":
inputFileName = p.val
elif p.key == "out":
outputFileName = p.val
else:
echo "unsupported argument: ", p.key
of cmdArgument:
echo "unsupported argument: ", p.key
var
inputData = parseFile(inputFileName)
panels = inputData["panels"].copy()
numPanels = len(panels)
gridHeight = 0
outputData = inputData
for panel in panels:
if panel["gridPos"]["x"].getInt() == 0:
gridHeight += panel["gridPos"]["h"].getInt()
outputData["panels"] = %* []
for nodeNum in 0 .. (nodes - 1):
var
nodePanels = panels.copy()
panelIndex = 0
for panel in nodePanels.mitems:
panel["title"] = %* replace(panel["title"].getStr(), "#0", "#" & $nodeNum)
panel["id"] = %* (panelIndex + (nodeNum * numPanels))
panel["gridPos"]["y"] = %* (panel["gridPos"]["y"].getInt() + (nodeNum * gridHeight))
var targets = panel["targets"]
for target in targets.mitems:
target["expr"] = %* replace(target["expr"].getStr(), "{node=\"0\"}", "{node=\"" & $nodeNum & "\"}")
outputData["panels"].add(panel)
panelIndex.inc()
outputData["uid"] = %* (outputData["uid"].getStr() & "a")
outputData["title"] = %* (outputData["title"].getStr() & " (all nodes)")
writeFile(outputFilename, pretty(outputData))

View File

@ -38,5 +38,8 @@ $BEACON_NODE_BIN \
--stateSnapshot:$SNAPSHOT_FILE \
$DEPOSIT_WEB3_URL_ARG \
--depositContractAddress=$DEPOSIT_CONTRACT_ADDRESS \
$*
--metricsServer=true \
--metricsServerAddress="127.0.0.1" \
--metricsServerPort="$(( $BASE_METRICS_PORT + $NODE_ID ))"
"$@"

View File

@ -19,7 +19,7 @@ mkdir -p "$VALIDATORS_DIR"
cd "$GIT_ROOT"
NIMFLAGS="-d:chronicles_log_level=DEBUG --hints:off --warnings:off --opt:speed --debuginfo"
NIMFLAGS="-d:chronicles_log_level=DEBUG --hints:off --warnings:off --verbosity:0 --opt:speed --debuginfo"
# Run with "SHARD_COUNT=4 ./start.sh" to change these
DEFS=""
@ -75,6 +75,23 @@ MULTITAIL="${MULTITAIL:-multitail}" # to allow overriding the program name
USE_MULTITAIL="${USE_MULTITAIL:-no}" # make it an opt-in
type "$MULTITAIL" &>/dev/null || USE_MULTITAIL="no"
# Prometheus config (continued inside the loop)
mkdir -p "${METRICS_DIR}"
cat > "${METRICS_DIR}/prometheus.yml" <<EOF
global:
scrape_interval: 1s
scrape_configs:
- job_name: "nimbus"
static_configs:
EOF
# use the exported Grafana dashboard for a single node to create one for all nodes
"${SIM_ROOT}/../../build/process_dashboard" \
--nodes=${NUM_NODES} \
--in="${SIM_ROOT}/beacon-chain-sim-node0-Grafana-dashboard.json" \
--out="${SIM_ROOT}/beacon-chain-sim-all-nodes-Grafana-dashboard.json"
# Kill child processes on Ctrl-C by sending SIGTERM to the whole process group,
# passing the negative PID of this shell instance to the "kill" command.
# Trap and ignore SIGTERM, so we don't kill this process along with its children.
@ -87,9 +104,7 @@ COMMANDS=()
LAST_NODE=$(( NUM_NODES - 1 ))
for i in $(seq 0 $LAST_NODE); do
if [[ "$i" == "0" ]]; then
sleep 0
elif [ "$USE_MULTITAIL" = "no" ]; then
if [[ "$i" != "0" && "$USE_MULTITAIL" == "no" ]]; then
# Wait for the master node to write out its address file
while [ ! -f "${MASTER_NODE_ADDRESS_FILE}" ]; do
sleep 0.1
@ -98,8 +113,8 @@ for i in $(seq 0 $LAST_NODE); do
CMD="${SIM_ROOT}/run_node.sh $i --statusbar:off"
if [ "$USE_MULTITAIL" != "no" ]; then
if [ "$i" = "0" ]; then
if [[ "$USE_MULTITAIL" != "no" ]]; then
if [[ "$i" == "0" ]]; then
SLEEP="0"
else
SLEEP="2"
@ -109,9 +124,16 @@ for i in $(seq 0 $LAST_NODE); do
else
eval "${CMD}" &
fi
# Prometheus config
cat >> "${METRICS_DIR}/prometheus.yml" <<EOF
- targets: ['127.0.0.1:$(( $BASE_METRICS_PORT + $i ))']
labels:
node: '$i'
EOF
done
if [ "$USE_MULTITAIL" != "no" ]; then
if [[ "$USE_MULTITAIL" != "no" ]]; then
eval $MULTITAIL -s 3 -M 0 -x \"Nimbus beacon chain\" "${COMMANDS[@]}"
else
wait # Stop when all nodes have gone down

View File

@ -20,9 +20,12 @@ NUM_NODES=${NODES:-6}
NUM_MISSING_NODES=${MISSING_NODES:-1}
SIMULATION_DIR="${SIM_ROOT}/data"
METRICS_DIR="${SIM_ROOT}/prometheus"
VALIDATORS_DIR="${SIM_ROOT}/validators"
SNAPSHOT_FILE="${SIMULATION_DIR}/state_snapshot.json"
NETWORK_METADATA_FILE="${SIMULATION_DIR}/network.json"
BEACON_NODE_BIN="${SIMULATION_DIR}/beacon_node"
DEPLOY_DEPOSIT_CONTRACT_BIN="${SIMULATION_DIR}/deploy_deposit_contract"
MASTER_NODE_ADDRESS_FILE="${SIMULATION_DIR}/node-0/beacon_node.address"
BASE_METRICS_PORT=8008

2
vendor/nim-metrics vendored

@ -1 +1 @@
Subproject commit d4492f0d0ae9d2c46cc3dde2d947bc175ae420de
Subproject commit dc9873ebacb789c971d5496737cca0cbebcc7a9f