generate Grafana dashboards for remote testnet nodes

This commit is contained in:
Ștefan Talpalaru 2020-06-03 02:18:25 +02:00 committed by zah
parent 07393c8de6
commit 3a6a9f8135
5 changed files with 108 additions and 106 deletions

4
.gitignore vendored
View File

@ -32,3 +32,7 @@ build/
*.sqlite3
/local_testnet_data*/
# Grafana dashboards
/docker/*.json

View File

@ -99,8 +99,10 @@ case conf.cmd
of restart_nodes:
for n in nodes():
if n.id mod 2 == 0:
echo &"echo Pulling container image on {n.server} ..."
# This will only print one line: "docker.io/statusteam/nimbus_beacon_node:testnet1".
echo &"ssh {n.server} docker pull -q statusteam/nimbus_beacon_node:{conf.network}"
echo &"echo Starting container {n.container}@{n.server} ..."
# docker-compose will rebuild the container if it detects a newer image.
# Prints: "Recreating beacon-node-testnet1-1 ... done".
echo &"ssh {n.server} 'cd /docker/{n.container} && docker-compose --compatibility up -d'"

View File

@ -64,7 +64,17 @@ if [ "$ETH1_PRIVATE_KEY" != "" ]; then
fi
echo "Building a local beacon_node instance for 'makeDeposits' and 'createTestnet'"
make NIMFLAGS="-d:insecure -d:testnet_servers_image ${NETWORK_NIM_FLAGS}" beacon_node
make -j2 NIMFLAGS="-d:insecure -d:testnet_servers_image ${NETWORK_NIM_FLAGS}" beacon_node process_dashboard
echo "Generating Grafana dashboards for remote testnet servers"
for testnet in 0 1; do
./build/process_dashboard \
--nodes=20 \
--in="tests/simulation/beacon-chain-sim-node0-Grafana-dashboard.json" \
--out="docker/beacon-chain-sim-remote-testnet${testnet}-Grafana-dashboard.json" \
--type="remote" \
--testnet="${testnet}"
done
cd docker
@ -104,7 +114,7 @@ if [[ $PUBLISH_TESTNET_RESETS != "0" ]]; then
# TODO If we try to use direct piping here, bash doesn't execute all of the commands.
# The reasons for this are unclear at the moment.
../env.sh nim --verbosity:0 manage_testnet_hosts.nims reset_network \
../env.sh nim --verbosity:0 --hints:off manage_testnet_hosts.nims reset_network \
--network=$NETWORK \
--deposits-dir="$DEPOSITS_DIR_ABS" \
--network-data-dir="$NETWORK_DIR_ABS" \
@ -130,7 +140,7 @@ if [[ $PUBLISH_TESTNET_RESETS != "0" ]]; then
git push
popd
../env.sh nim --verbosity:0 manage_testnet_hosts.nims restart_nodes \
../env.sh nim --verbosity:0 --hints:off manage_testnet_hosts.nims restart_nodes \
--network=$NETWORK \
> /tmp/restart-nodes.sh

View File

@ -47,6 +47,22 @@
"id": 13,
"links": [],
"panels": [
{
"content": "",
"datasource": null,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 44,
"mode": "markdown",
"timeFrom": null,
"timeShift": null,
"title": "node #0",
"type": "text"
},
{
"aliasColors": {},
"bars": false,
@ -59,7 +75,7 @@
"h": 6,
"w": 14,
"x": 0,
"y": 0
"y": 1
},
"hiddenSeries": false,
"id": 2,
@ -179,7 +195,7 @@
"h": 5,
"w": 10,
"x": 14,
"y": 0
"y": 1
},
"hiddenSeries": false,
"id": 16,
@ -273,7 +289,7 @@
"h": 5,
"w": 10,
"x": 14,
"y": 5
"y": 6
},
"hiddenSeries": false,
"id": 20,
@ -369,7 +385,7 @@
"h": 6,
"w": 14,
"x": 0,
"y": 6
"y": 7
},
"hiddenSeries": false,
"id": 18,
@ -464,7 +480,7 @@
"h": 5,
"w": 10,
"x": 14,
"y": 10
"y": 11
},
"hiddenSeries": false,
"id": 24,
@ -553,7 +569,7 @@
"h": 6,
"w": 14,
"x": 0,
"y": 12
"y": 13
},
"hiddenSeries": false,
"id": 22,
@ -656,7 +672,7 @@
"h": 2,
"w": 5,
"x": 14,
"y": 15
"y": 16
},
"id": 6,
"interval": null,
@ -739,7 +755,7 @@
"h": 2,
"w": 5,
"x": 19,
"y": 15
"y": 16
},
"id": 8,
"interval": null,
@ -822,7 +838,7 @@
"h": 2,
"w": 3,
"x": 14,
"y": 17
"y": 18
},
"id": 28,
"interval": null,
@ -908,7 +924,7 @@
"h": 2,
"w": 4,
"x": 17,
"y": 17
"y": 18
},
"id": 13,
"interval": null,
@ -993,7 +1009,7 @@
"h": 2,
"w": 3,
"x": 21,
"y": 17
"y": 18
},
"id": 14,
"interval": null,
@ -1068,7 +1084,7 @@
"h": 6,
"w": 14,
"x": 0,
"y": 18
"y": 19
},
"hiddenSeries": false,
"id": 38,
@ -1113,7 +1129,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "blocks",
"title": "blocks #0",
"tooltip": {
"shared": true,
"sort": 0,
@ -1172,7 +1188,7 @@
"h": 2,
"w": 3,
"x": 14,
"y": 19
"y": 20
},
"id": 32,
"interval": null,
@ -1256,7 +1272,7 @@
"h": 2,
"w": 4,
"x": 17,
"y": 19
"y": 20
},
"id": 34,
"interval": null,
@ -1306,7 +1322,7 @@
"thresholds": "",
"timeFrom": null,
"timeShift": null,
"title": "last justified epoch #0",
"title": "current justified epoch #0",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
@ -1341,7 +1357,7 @@
"h": 2,
"w": 3,
"x": 21,
"y": 19
"y": 20
},
"id": 40,
"interval": null,
@ -1426,7 +1442,7 @@
"h": 2,
"w": 3,
"x": 14,
"y": 21
"y": 22
},
"id": 12,
"interval": null,
@ -1508,7 +1524,7 @@
"h": 2,
"w": 4,
"x": 17,
"y": 21
"y": 22
},
"id": 36,
"interval": null,
@ -1570,82 +1586,6 @@
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"#299c46",
"rgba(237, 129, 40, 0.89)",
"#d44a3a"
],
"datasource": null,
"format": "none",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": false,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
"h": 12,
"w": 10,
"x": 14,
"y": 23
},
"id": 42,
"interval": null,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false,
"ymax": null,
"ymin": null
},
"tableColumn": "",
"thresholds": "",
"timeFrom": null,
"timeShift": null,
"title": "spacer #0",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "_",
"value": "null"
}
],
"valueName": "current"
},
{
"aliasColors": {},
"bars": false,
@ -1658,7 +1598,7 @@
"h": 5,
"w": 14,
"x": 0,
"y": 24
"y": 25
},
"hiddenSeries": false,
"id": 30,
@ -1760,7 +1700,7 @@
"h": 6,
"w": 14,
"x": 0,
"y": 29
"y": 30
},
"heatmap": {},
"hideZeroBuckets": false,
@ -1817,7 +1757,7 @@
"list": []
},
"time": {
"from": "now-1h",
"from": "now-15m",
"to": "now"
},
"timepicker": {
@ -1840,5 +1780,5 @@
"variables": {
"list": []
},
"version": 31
"version": 35
}

View File

@ -1,10 +1,31 @@
import json, parseopt, strutils
# usage: process_dashboard --nodes=2 --in=node0_dashboard.json --out=all_nodes_dashboard.json
# usage: process_dashboard --nodes=2 --in=node0_dashboard.json --out=all_nodes_dashboard.json --type=local --testnet=0
type
OutputType = enum
local
remote
var
p = initOptParser()
nodes: int
inputFileName, outputFilename: string
outputType = OutputType.local
testnet = 0
let
hosts = [
"master-01",
"node-01",
"node-02",
"node-03",
"node-04",
"node-05",
"node-06",
"node-07",
"node-08",
"node-09",
]
nodesPerHost = 2
while true:
p.next()
@ -18,6 +39,10 @@ while true:
inputFileName = p.val
elif p.key == "out":
outputFileName = p.val
elif p.key == "type":
outputType = parseEnum[OutputType](p.val)
elif p.key == "testnet":
testnet = p.val.parseInt()
else:
echo "unsupported argument: ", p.key
of cmdArgument:
@ -35,6 +60,11 @@ for panel in panels:
gridHeight += panel["gridPos"]["h"].getInt()
outputData["panels"] = %* []
if outputType == OutputType.remote:
var annotations = outputData["annotations"]["list"]
for annotation in annotations.mitems:
annotation["datasource"] = %* "-- Grafana --"
for nodeNum in 0 .. (nodes - 1):
var
nodePanels = panels.copy()
@ -43,14 +73,30 @@ for nodeNum in 0 .. (nodes - 1):
panel["title"] = %* replace(panel["title"].getStr(), "#0", "#" & $nodeNum)
panel["id"] = %* (panelIndex + (nodeNum * numPanels))
panel["gridPos"]["y"] = %* (panel["gridPos"]["y"].getInt() + (nodeNum * gridHeight))
if outputType == OutputType.remote:
panel["datasource"] = newJNull()
if panel.hasKey("targets"):
var targets = panel["targets"]
for target in targets.mitems:
case outputType:
of OutputType.local:
target["expr"] = %* replace(target["expr"].getStr(), "{node=\"0\"}", "{node=\"" & $nodeNum & "\"}")
of OutputType.remote:
# The remote Prometheus instance polls once per minute, so the
# minimum rate() interval is 2 minutes.
target["expr"] = %* multiReplace(target["expr"].getStr(),
("{node=\"0\"}", "{container=\"beacon-node-testnet" & $testnet & "-" & $((nodeNum mod 2) + 1) & "\",instance=\"" & (hosts[nodeNum div nodesPerHost]) & ".aws-eu-central-1a.nimbus.test\"}"),
("[2s]", "[2m]"),
("[4s]) * 3", "[2m]) * 120"))
outputData["panels"].add(panel)
panelIndex.inc()
outputData["uid"] = %* (outputData["uid"].getStr() & "a")
case outputType:
of OutputType.local:
outputData["title"] = %* (outputData["title"].getStr() & " (all nodes)")
outputData["uid"] = %* (outputData["uid"].getStr() & "a")
of OutputType.remote:
outputData["title"] = %* ("Nimbus testnet" & $testnet)
outputData["uid"] = %* (outputData["uid"].getStr() & $testnet)
writeFile(outputFilename, pretty(outputData))