feat: sonda tool (#2893)

This commit is contained in:
gabrielmer 2024-07-12 11:05:13 +03:00 committed by GitHub
parent 9bd8078697
commit 49d6adca3b
17 changed files with 6921 additions and 0 deletions

33
apps/sonda/.env.example Normal file
View File

@ -0,0 +1,33 @@
# RPC URL for accessing testnet via HTTP.
# e.g. https://sepolia.infura.io/v3/123aa110320f4aec179150fba1e1b1b1
RLN_RELAY_ETH_CLIENT_ADDRESS=
# Private key of testnet where you have sepolia ETH that would be staked into RLN contract.
# Note: make sure you don't use the '0x' prefix.
# e.g. 0116196e9a8abed42dd1a22eb63fa2a5a17b0c27d716b87ded2c54f1bf192a0b
ETH_TESTNET_KEY=
# Password you would like to use to protect your RLN membership.
RLN_RELAY_CRED_PASSWORD=
# Advanced. Can be left empty in normal use cases.
NWAKU_IMAGE=
NODEKEY=
DOMAIN=
EXTRA_ARGS=
RLN_RELAY_CONTRACT_ADDRESS=
# -------------------- SONDA CONFIG ------------------
CLUSTER_ID=16
SHARD=32
# Comma separated list of store nodes to poll
STORE_NODES="/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT,\
/dns4/store-02.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9aDJPkhGxc2SFcEACTFdZ91Q5TJjp76qZEhq9iF59x7R,\
/dns4/store-01.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmMELCo218hncCtTvC2Dwbej3rbyHQcR8erXNnKGei7WPZ,\
/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM,\
/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT,\
/dns4/store-02.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm9CQhsuwPR54q27kNj9iaQVfyRzTGKrhFmr94oD8ujU6P"
# Wait time in seconds between two consecutive queries
QUERY_DELAY=60
# Consecutive successful store requests to consider a store node healthy
HEALTH_THRESHOLD=5

4
apps/sonda/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
.env
keystore
rln_tree
.env

View File

@ -0,0 +1,3 @@
FROM python:3.9.18-alpine3.18
RUN pip install requests argparse prometheus_client

52
apps/sonda/README.md Normal file
View File

@ -0,0 +1,52 @@
# Sonda
Sonda is a tool to monitor store nodes and measure their performance.
It works by running a `nwaku` node, publishing a message from it every fixed interval and performing a store query to all the store nodes we want to monitor to check they respond with the last message we published.
## Instructions
1. Create an `.env` file which will contain the configuration parameters.
You can start by copying `.env.example` and adapting it for your use case
```
cp .env.example .env
${EDITOR} .env
```
The variables that have to be filled for Sonda are
```
CLUSTER_ID=
SHARD=
# Comma separated list of store nodes to poll
STORE_NODES=
# Wait time in seconds between two consecutive queries
QUERY_DELAY=
# Consecutive successful store requests to consider a store node healthy
HEALTH_THRESHOLD=
```
2. If you want to query nodes in `cluster-id` 1, then you have to follow the steps of registering an RLN membership. Otherwise, you can skip this step.
For it, you need:
* Ethereum Sepolia WebSocket endpoint. Get one free from [Infura](https://www.infura.io/).
* Ethereum Sepolia account with some balance <0.01 Eth. Get some [here](https://www.infura.io/faucet/sepolia).
* A password to protect your rln membership.
Fill the `RLN_RELAY_ETH_CLIENT_ADDRESS`, `ETH_TESTNET_KEY` and `RLN_RELAY_CRED_PASSWORD` env variables and run
```
./register_rln.sh
```
3. Start Sonda by running
```
docker-compose up -d
```
4. Browse to http://localhost:3000/dashboards and monitor the performance
There's two Grafana dashboards: `nwaku-monitoring` to track the stats of your node that is publishing messages and performing queries, and `sonda-monitoring` to monitor the responses of the store nodes.

View File

@ -0,0 +1,107 @@
version: "3.7"
x-logging: &logging
logging:
driver: json-file
options:
max-size: 1000m
# Environment variable definitions
x-rln-relay-eth-client-address: &rln_relay_eth_client_address ${RLN_RELAY_ETH_CLIENT_ADDRESS:-} # Add your RLN_RELAY_ETH_CLIENT_ADDRESS after the "-"
x-rln-environment: &rln_env
RLN_RELAY_CONTRACT_ADDRESS: ${RLN_RELAY_CONTRACT_ADDRESS:-0xCB33Aa5B38d79E3D9Fa8B10afF38AA201399a7e3}
RLN_RELAY_CRED_PATH: ${RLN_RELAY_CRED_PATH:-} # Optional: Add your RLN_RELAY_CRED_PATH after the "-"
RLN_RELAY_CRED_PASSWORD: ${RLN_RELAY_CRED_PASSWORD:-} # Optional: Add your RLN_RELAY_CRED_PASSWORD after the "-"
x-sonda-env: &sonda_env
CLUSTER_ID: ${CLUSTER_ID:-1}
SHARD: ${SHARD:-0}
STORE_NODES: ${STORE_NODES:-}
QUERY_DELAY: ${QUERY_DELAY-60}
HEALTH_THRESHOLD: ${HEALTH_THRESHOLD-5}
# Services definitions
services:
nwaku:
image: ${NWAKU_IMAGE:-harbor.status.im/wakuorg/nwaku:v0.30.1}
restart: on-failure
ports:
- 30304:30304/tcp
- 30304:30304/udp
- 9005:9005/udp
- 127.0.0.1:8003:8003
- 80:80 #Let's Encrypt
- 8000:8000/tcp #WSS
- 127.0.0.1:8645:8645
<<:
- *logging
environment:
DOMAIN: ${DOMAIN}
NODEKEY: ${NODEKEY}
RLN_RELAY_CRED_PASSWORD: "${RLN_RELAY_CRED_PASSWORD}"
RLN_RELAY_ETH_CLIENT_ADDRESS: *rln_relay_eth_client_address
EXTRA_ARGS: ${EXTRA_ARGS}
STORAGE_SIZE: ${STORAGE_SIZE}
<<:
- *rln_env
- *sonda_env
volumes:
- ./run_node.sh:/opt/run_node.sh:Z
- ${CERTS_DIR:-./certs}:/etc/letsencrypt/:Z
- ./rln_tree:/etc/rln_tree/:Z
- ./keystore:/keystore:Z
entrypoint: sh
command:
- /opt/run_node.sh
sonda:
build:
context: .
dockerfile: Dockerfile.sonda
ports:
- 127.0.0.1:8004:8004
environment:
<<:
- *sonda_env
command: >
python -u /opt/sonda.py
--delay-seconds=${QUERY_DELAY}
--pubsub-topic=/waku/2/rs/${CLUSTER_ID}/${SHARD}
--store-nodes=${STORE_NODES}
--health-threshold=${HEALTH_THRESHOLD}
volumes:
- ./sonda.py:/opt/sonda.py:Z
depends_on:
- nwaku
prometheus:
image: docker.io/prom/prometheus:latest
volumes:
- ./monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:Z
command:
- --config.file=/etc/prometheus/prometheus.yml
# ports:
# - 127.0.0.1:9090:9090
restart: on-failure:5
depends_on:
- nwaku
grafana:
image: docker.io/grafana/grafana:latest
env_file:
- ./monitoring/configuration/grafana-plugins.env
volumes:
- ./monitoring/configuration/grafana.ini:/etc/grafana/grafana.ini:Z
- ./monitoring/configuration/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z
- ./monitoring/configuration/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z
- ./monitoring/configuration/dashboards:/var/lib/grafana/dashboards/:Z
- ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_icon.svg:Z
- ./monitoring/configuration/customizations/custom-logo.svg:/usr/share/grafana/public/img/grafana_typelogo.svg:Z
- ./monitoring/configuration/customizations/custom-logo.png:/usr/share/grafana/public/img/fav32.png:Z
ports:
- 0.0.0.0:3000:3000
restart: on-failure:5
depends_on:
- prometheus

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 13 KiB

View File

@ -0,0 +1,9 @@
apiVersion: 1
providers:
- name: 'Prometheus'
orgId: 1
folder: ''
type: file
options:
path: /var/lib/grafana/dashboards

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,992 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 2,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Sonda messages successfully sent to the network",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "successful_sonda_msgs_total",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Sent Sonda Messages",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Sonda messages that failed to be sent to the network",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"__systemRef": "hideSeriesFrom",
"matcher": {
"id": "byNames",
"options": {
"mode": "exclude",
"names": [
"{__name__=\"failed_sonda_msgs_total\", instance=\"sonda:8004\", job=\"nwaku\"}"
],
"prefix": "All except:",
"readOnly": true
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "failed_sonda_msgs_total",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Failed Sonda Messages",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Store responses including the latest Sonda message ",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"__systemRef": "hideSeriesFrom",
"matcher": {
"id": "byNames",
"options": {
"mode": "exclude",
"names": [
"{__name__=\"successful_store_queries_total\" , node=\"/dns4/store-02.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmJnVR7ZzFaYvciPVafUXuYGLHPzSUigqAmeNw9nJUVGeM\"}"
],
"prefix": "All except:",
"readOnly": true
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "successful_store_queries_total",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{__name__=\"{{__name__}}\" , node=\"{{node}}\"}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Successful Store Responses",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Store queries with a non-200 response",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"__systemRef": "hideSeriesFrom",
"matcher": {
"id": "byNames",
"options": {
"mode": "exclude",
"names": [
"{__name__=\"failed_store_queries_total\" , error=\"300 BAD_RESPONSE: Future operation cancelled!\", node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}"
],
"prefix": "All except:",
"readOnly": true
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "failed_store_queries_total",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{__name__=\"{{__name__}}\" , error=\"{{error}}\", node=\"{{node}}\"}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Failed Store Queries",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Store responses that didn't include our latest Sonda message",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"__systemRef": "hideSeriesFrom",
"matcher": {
"id": "byNames",
"options": {
"mode": "exclude",
"names": [
"{__name__=\"empty_store_responses_total\" , node=\"/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT\"}"
],
"prefix": "All except:",
"readOnly": true
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "empty_store_responses_total",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{__name__=\"{{__name__}}\" , node=\"{{node}}\"}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Empty Store Responses",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Latency of each store query",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"__systemRef": "hideSeriesFrom",
"matcher": {
"id": "byNames",
"options": {
"mode": "exclude",
"names": [
"{__name__=\"store_query_latency\" , node=\"/dns4/store-01.ac-cn-hongkong-c.shards.test.status.im/tcp/30303/p2p/16Uiu2HAm2M7xs7cLPc3jamawkEqbr7cUJX11uvY7LxQ6WFUdUKUT\"}"
],
"prefix": "All except:",
"readOnly": true
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "store_query_latency",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{__name__=\"{{__name__}}\" , node=\"{{node}}\"}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Store Query Latency (seconds)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Latency of store queries",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"fillOpacity": 80,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineWidth": 1
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"__systemRef": "hideSeriesFrom",
"matcher": {
"id": "byNames",
"options": {
"mode": "exclude",
"names": [
"{__name__=\"store_query_latency\" , node=\"/dns4/store-01.do-ams3.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmAUdrQ3uwzuE4Gy4D56hX6uLKEeerJAnhKEHZ3DxF1EfT\"}"
],
"prefix": "All except:",
"readOnly": true
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 24
},
"id": 7,
"options": {
"bucketOffset": 0,
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "store_query_latency",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{__name__=\"{{__name__}}\" , node=\"{{node}}\"}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Store Query Latency (seconds)",
"type": "histogram"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"description": "Node health according to the configured health threshold. 1 means healthy, 0 not.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "stepAfter",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"__systemRef": "hideSeriesFrom",
"matcher": {
"id": "byNames",
"options": {
"mode": "exclude",
"names": [
"{__name__=\"node_health\" , node=\"/dns4/store-01.gc-us-central1-a.shards.test.status.im/tcp/30303/p2p/16Uiu2HAmMELCo218hncCtTvC2Dwbej3rbyHQcR8erXNnKGei7WPZ\"}"
],
"prefix": "All except:",
"readOnly": true
}
},
"properties": [
{
"id": "custom.hideFrom",
"value": {
"legend": false,
"tooltip": false,
"viz": true
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 24
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "node_health",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "{__name__=\"{{__name__}}\" , node=\"{{node}}\"}",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Node health",
"type": "timeseries"
}
],
"refresh": "",
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-5m",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "sonda-monitoring",
"uid": "cbd1b6c8-63d2-41f3-b57b-a776ec8fa23e",
"version": 1,
"weekStart": ""
}

View File

@ -0,0 +1,11 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
org_id: 1
url: http://prometheus:9090
is_default: true
version: 1
editable: true

View File

@ -0,0 +1,2 @@
#GF_INSTALL_PLUGINS=grafana-worldmap-panel,grafana-piechart-panel,digrich-bubblechart-panel,yesoreyeram-boomtheme-panel,briangann-gauge-panel,jdbranham-diagram-panel,agenty-flowcharting-panel,citilogics-geoloop-panel,savantly-heatmap-panel,mtanda-histogram-panel,pierosavi-imageit-panel,michaeldmoore-multistat-panel,zuburqan-parity-report-panel,natel-plotly-panel,bessler-pictureit-panel,grafana-polystat-panel,corpglory-progresslist-panel,snuids-radar-panel,fzakaria-simple-config.config.annotations-datasource,vonage-status-panel,snuids-trafficlights-panel,pr0ps-trackmap-panel,alexandra-trackmap-panel,btplc-trend-box-panel
GF_INSTALL_PLUGINS=grafana-worldmap-panel,grafana-piechart-panel,yesoreyeram-boomtheme-panel,briangann-gauge-panel,pierosavi-imageit-panel,bessler-pictureit-panel,vonage-status-panel

View File

@ -0,0 +1,51 @@
instance_name = nwaku dashboard
;[dashboards.json]
;enabled = true
;path = /home/git/grafana/grafana-dashboards/dashboards
#################################### Auth ##########################
[auth]
disable_login_form = false
#################################### Anonymous Auth ##########################
[auth.anonymous]
# enable anonymous access
enabled = true
# specify organization name that should be used for unauthenticated users
;org_name = Public
# specify role for unauthenticated users
org_role = Admin
; org_role = Viewer
;[security]
;admin_user = ocr
;admin_password = ocr
;[users]
# disable user signup / registration
;allow_sign_up = false
# Set to true to automatically assign new users to the default organization (id 1)
;auto_assign_org = true
# Default role new users will be automatically assigned (if disabled above is set to true)
;auto_assign_org_role = Viewer
#################################### SMTP / Emailing ##########################
;[smtp]
;enabled = false
;host = localhost:25
;user =
;password =
;cert_file =
;key_file =
;skip_verify = false
;from_address = admin@grafana.localhost
;[emails]
;welcome_email_on_sign_up = false

View File

@ -0,0 +1,10 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: "Monitoring"
scrape_configs:
- job_name: "nwaku"
static_configs:
- targets: ["nwaku:8003", "sonda:8004"]

31
apps/sonda/register_rln.sh Executable file
View File

@ -0,0 +1,31 @@
#!/bin/sh
if test -f ./keystore/keystore.json; then
echo "keystore/keystore.json already exists. Use it instead of creating a new one."
echo "Exiting"
exit 1
fi
if test -f .env; then
echo "Using .env file"
. $(pwd)/.env
fi
# TODO: Set nwaku release when ready instead of quay
if test -n "${ETH_CLIENT_ADDRESS}"; then
echo "ETH_CLIENT_ADDRESS variable was renamed to RLN_RELAY_ETH_CLIENT_ADDRESS"
echo "Please update your .env file"
exit 1
fi
docker run -v $(pwd)/keystore:/keystore/:Z harbor.status.im/wakuorg/nwaku:v0.30.1 generateRlnKeystore \
--rln-relay-eth-client-address=${RLN_RELAY_ETH_CLIENT_ADDRESS} \
--rln-relay-eth-private-key=${ETH_TESTNET_KEY} \
--rln-relay-eth-contract-address=0xCB33Aa5B38d79E3D9Fa8B10afF38AA201399a7e3 \
--rln-relay-cred-path=/keystore/keystore.json \
--rln-relay-cred-password="${RLN_RELAY_CRED_PASSWORD}" \
--rln-relay-user-message-limit=20 \
--execute

111
apps/sonda/run_node.sh Normal file
View File

@ -0,0 +1,111 @@
#!/bin/sh
echo "I am a nwaku node"
if test -n "${ETH_CLIENT_ADDRESS}" -o ; then
echo "ETH_CLIENT_ADDRESS variable was renamed to RLN_RELAY_ETH_CLIENT_ADDRESS"
echo "Please update your .env file"
exit 1
fi
if [ -z "${RLN_RELAY_ETH_CLIENT_ADDRESS}" ] && [ "${CLUSTER_ID}" -eq 1 ]; then
echo "Missing Eth client address, please refer to README.md for detailed instructions"
exit 1
fi
if [ "${CLUSTER_ID}" -ne 1 ]; then
echo "CLUSTER_ID is not equal to 1, clearing RLN configurations"
RLN_RELAY_CRED_PATH=""
RLN_RELAY_ETH_CLIENT_ADDRESS=""
RLN_RELAY_CRED_PASSWORD=""
fi
MY_EXT_IP=$(wget -qO- https://api4.ipify.org)
DNS_WSS_CMD=
if [ -n "${DOMAIN}" ]; then
LETSENCRYPT_PATH=/etc/letsencrypt/live/${DOMAIN}
if ! [ -d "${LETSENCRYPT_PATH}" ]; then
apk add --no-cache certbot
certbot certonly\
--non-interactive\
--agree-tos\
--no-eff-email\
--no-redirect\
--email admin@${DOMAIN}\
-d ${DOMAIN}\
--standalone
fi
if ! [ -e "${LETSENCRYPT_PATH}/privkey.pem" ]; then
echo "The certificate does not exist"
sleep 60
exit 1
fi
WS_SUPPORT="--websocket-support=true"
WSS_SUPPORT="--websocket-secure-support=true"
WSS_KEY="--websocket-secure-key-path=${LETSENCRYPT_PATH}/privkey.pem"
WSS_CERT="--websocket-secure-cert-path=${LETSENCRYPT_PATH}/cert.pem"
DNS4_DOMAIN="--dns4-domain-name=${DOMAIN}"
DNS_WSS_CMD="${WS_SUPPORT} ${WSS_SUPPORT} ${WSS_CERT} ${WSS_KEY} ${DNS4_DOMAIN}"
fi
if [ -n "${NODEKEY}" ]; then
NODEKEY=--nodekey=${NODEKEY}
fi
if [ "${CLUSTER_ID}" -eq 1 ]; then
RLN_RELAY_CRED_PATH=--rln-relay-cred-path=${RLN_RELAY_CRED_PATH:-/keystore/keystore.json}
RLN_TREE_PATH=--rln-relay-tree-path="/etc/rln_tree"
fi
if [ -n "${RLN_RELAY_CRED_PASSWORD}" ]; then
RLN_RELAY_CRED_PASSWORD=--rln-relay-cred-password="${RLN_RELAY_CRED_PASSWORD}"
fi
if [ -n "${RLN_RELAY_ETH_CLIENT_ADDRESS}" ]; then
RLN_RELAY_ETH_CLIENT_ADDRESS=--rln-relay-eth-client-address="${RLN_RELAY_ETH_CLIENT_ADDRESS}"
fi
# TO DO: configure bootstrap nodes in env
exec /usr/bin/wakunode\
--relay=true\
--filter=false\
--lightpush=false\
--keep-alive=true\
--max-connections=150\
--cluster-id="${CLUSTER_ID}"\
--discv5-discovery=true\
--discv5-udp-port=9005\
--discv5-enr-auto-update=True\
--log-level=DEBUG\
--tcp-port=30304\
--metrics-server=True\
--metrics-server-port=8003\
--metrics-server-address=0.0.0.0\
--rest=true\
--rest-admin=true\
--rest-address=0.0.0.0\
--rest-port=8645\
--rest-allow-origin="waku-org.github.io"\
--rest-allow-origin="localhost:*"\
--nat=extip:"${MY_EXT_IP}"\
--store=false\
--pubsub-topic="/waku/2/rs/${CLUSTER_ID}/${SHARD}"\
--discv5-bootstrap-node="enr:-QEKuECA0zhRJej2eaOoOPddNcYr7-5NdRwuoLCe2EE4wfEYkAZhFotg6Kkr8K15pMAGyUyt0smHkZCjLeld0BUzogNtAYJpZIJ2NIJpcISnYxMvim11bHRpYWRkcnO4WgAqNiVib290LTAxLmRvLWFtczMuc2hhcmRzLnRlc3Quc3RhdHVzLmltBnZfACw2JWJvb3QtMDEuZG8tYW1zMy5zaGFyZHMudGVzdC5zdGF0dXMuaW0GAbveA4Jyc40AEAUAAQAgAEAAgAEAiXNlY3AyNTZrMaEC3rRtFQSgc24uWewzXaxTY8hDAHB8sgnxr9k8Rjb5GeSDdGNwgnZfg3VkcIIjKIV3YWt1Mg0"\
--discv5-bootstrap-node="enr:-QEcuEAgXDqrYd_TrpUWtn3zmxZ9XPm7O3GS6lV7aMJJOTsbOAAeQwSd_eoHcCXqVzTUtwTyB4855qtbd8DARnExyqHPAYJpZIJ2NIJpcIQihw1Xim11bHRpYWRkcnO4bAAzNi5ib290LTAxLmdjLXVzLWNlbnRyYWwxLWEuc2hhcmRzLnRlc3Quc3RhdHVzLmltBnZfADU2LmJvb3QtMDEuZ2MtdXMtY2VudHJhbDEtYS5zaGFyZHMudGVzdC5zdGF0dXMuaW0GAbveA4Jyc40AEAUAAQAgAEAAgAEAiXNlY3AyNTZrMaECxjqgDQ0WyRSOilYU32DA5k_XNlDis3m1VdXkK9xM6kODdGNwgnZfg3VkcIIjKIV3YWt1Mg0"\
--discv5-bootstrap-node="enr:-QEcuEAX6Qk-vVAoJLxR4A_4UVogGhvQrqKW4DFKlf8MA1PmCjgowL-LBtSC9BLjXbb8gf42FdDHGtSjEvvWKD10erxqAYJpZIJ2NIJpcIQI2hdMim11bHRpYWRkcnO4bAAzNi5ib290LTAxLmFjLWNuLWhvbmdrb25nLWMuc2hhcmRzLnRlc3Quc3RhdHVzLmltBnZfADU2LmJvb3QtMDEuYWMtY24taG9uZ2tvbmctYy5zaGFyZHMudGVzdC5zdGF0dXMuaW0GAbveA4Jyc40AEAUAAQAgAEAAgAEAiXNlY3AyNTZrMaEDP7CbRk-YKJwOFFM4Z9ney0GPc7WPJaCwGkpNRyla7mCDdGNwgnZfg3VkcIIjKIV3YWt1Mg0"\
${RLN_RELAY_CRED_PATH}\
${RLN_RELAY_CRED_PASSWORD}\
${RLN_RELAY_TREE_PATH}\
${RLN_RELAY_ETH_CLIENT_ADDRESS}\
${DNS_WSS_CMD}\
${NODEKEY}\
${EXTRA_ARGS}

199
apps/sonda/sonda.py Normal file
View File

@ -0,0 +1,199 @@
import requests
import time
import json
import os
import base64
import sys
import urllib.parse
import requests
import argparse
from prometheus_client import Counter, Gauge, start_http_server
# Content topic where Sona messages are going to be sent
SONDA_CONTENT_TOPIC = '/sonda/2/polls/proto'
# Prometheus metrics
successful_sonda_msgs = Counter('successful_sonda_msgs', 'Number of successful Sonda messages sent')
failed_sonda_msgs = Counter('failed_sonda_msgs', 'Number of failed Sonda messages attempts')
successful_store_queries = Counter('successful_store_queries', 'Number of successful store queries', ['node'])
failed_store_queries = Counter('failed_store_queries', 'Number of failed store queries', ['node', 'error'])
empty_store_responses = Counter('empty_store_responses', "Number of store responses without the latest Sonda message", ['node'])
store_query_latency = Gauge('store_query_latency', 'Latency of the last store query in seconds', ['node'])
consecutive_successful_responses = Gauge('consecutive_successful_responses', 'Consecutive successful store responses', ['node'])
node_health = Gauge('node_health', "Binary indicator of a node's health. 1 is healthy, 0 is not", ['node'])
# Argparser configuration
parser = argparse.ArgumentParser(description='')
parser.add_argument('-p', '--pubsub-topic', type=str, help='pubsub topic', default='/waku/2/rs/1/0')
parser.add_argument('-d', '--delay-seconds', type=int, help='delay in second between messages', default=60)
parser.add_argument('-n', '--store-nodes', type=str, help='comma separated list of store nodes to query', required=True)
parser.add_argument('-t', '--health-threshold', type=int, help='consecutive successful store requests to consider a store node healthy', default=5)
args = parser.parse_args()
# Sends Sonda message. Returns True if successful, False otherwise
def send_sonda_msg(rest_address, pubsub_topic, content_topic, timestamp):
message = "Hi, I'm Sonda"
base64_message = base64.b64encode(message.encode('utf-8')).decode('ascii')
body = {
'payload': base64_message,
'contentTopic': content_topic,
'version': 1,
'timestamp': timestamp
}
encoded_pubsub_topic = urllib.parse.quote(pubsub_topic, safe='')
url = f'{rest_address}/relay/v1/messages/{encoded_pubsub_topic}'
headers = {'content-type': 'application/json'}
print(f'Waku REST API: {url} PubSubTopic: {pubsub_topic}, ContentTopic: {content_topic}')
try:
start_time = time.time()
response = requests.post(url, json=body, headers=headers, timeout=10)
elapsed_seconds = time.time() - start_time
print(f'Response from {rest_address}: status:{response.status_code} content:{response.text} [{elapsed_seconds:.4f} s.]')
if response.status_code == 200:
successful_sonda_msgs.inc()
return True
else:
response.raise_for_status()
except requests.RequestException as e:
print(f'Error sending request: {e}')
failed_sonda_msgs.inc()
return False
# We return true if both our node and the queried Store node returned a 200
# If our message isn't found but we did get a store 200 response, this function still returns true
def check_store_response(json_response, store_node, timestamp):
# Check for the store node status code
if json_response.get('statusCode') != 200:
error = f"{json_response.get('statusCode')} {json_response.get('statusDesc')}"
print(f'Failed performing store query {error}')
failed_store_queries.labels(node=store_node, error=error).inc()
consecutive_successful_responses.labels(node=store_node).set(0)
return False
messages = json_response.get('messages')
# If there's no message in the response, increase counters and return
if not messages:
print("No messages in store response")
empty_store_responses.labels(node=store_node).inc()
consecutive_successful_responses.labels(node=store_node).set(0)
return True
# Search for the Sonda message in the returned messages
for message in messages:
# If message field is missing in current message, continue
if not message.get("message"):
print("Could not retrieve message")
continue
# If a message is found with the same timestamp as sonda message, increase counters and return
if timestamp == message.get('message').get('timestamp'):
print(f'Found Sonda message in store response node={store_node}')
successful_store_queries.labels(node=store_node).inc()
consecutive_successful_responses.labels(node=store_node).inc()
return True
# If our message wasn't found in the returned messages, increase counter and return
empty_store_responses.labels(node=store_node).inc()
consecutive_successful_responses.labels(node=store_node).set(0)
return True
def send_store_query(rest_address, store_node, encoded_pubsub_topic, encoded_content_topic, timestamp):
url = f'{rest_address}/store/v3/messages'
params = {
'peerAddr': urllib.parse.quote(store_node, safe=''),
'pubsubTopic': encoded_pubsub_topic,
'contentTopics': encoded_content_topic,
'includeData': 'true',
'startTime': timestamp
}
s_time = time.time()
try:
print(f'Sending store request to {store_node}')
response = requests.get(url, params=params)
except Exception as e:
print(f'Error sending request: {e}')
failed_store_queries.labels(node=store_node, error=str(e)).inc()
consecutive_successful_responses.labels(node=store_node).set(0)
return False
elapsed_seconds = time.time() - s_time
print(f'Response from {rest_address}: status:{response.status_code} [{elapsed_seconds:.4f} s.]')
if response.status_code != 200:
failed_store_queries.labels(node=store_node, error=f'{response.status_code} {response.content}').inc()
consecutive_successful_responses.labels(node=store_node).set(0)
return False
# Parse REST response into JSON
try:
json_response = response.json()
except Exception as e:
print(f'Error parsing response JSON: {e}')
failed_store_queries.labels(node=store_node, error="JSON parse error").inc()
consecutive_successful_responses.labels(node=store_node).set(0)
return False
# Analyze Store response. Return false if response is incorrect or has an error status
if not check_store_response(json_response, store_node, timestamp):
return False
store_query_latency.labels(node=store_node).set(elapsed_seconds)
return True
def send_store_queries(rest_address, store_nodes, pubsub_topic, content_topic, timestamp):
print(f'Sending store queries. nodes = {store_nodes}')
encoded_pubsub_topic = urllib.parse.quote(pubsub_topic, safe='')
encoded_content_topic = urllib.parse.quote(content_topic, safe='')
for node in store_nodes:
send_store_query(rest_address, node, encoded_pubsub_topic, encoded_content_topic, timestamp)
def main():
print(f'Running Sonda with args={args}')
store_nodes = []
if args.store_nodes is not None:
store_nodes = [s.strip() for s in args.store_nodes.split(",")]
print(f'Store nodes to query: {store_nodes}')
# Start Prometheus HTTP server at port 8004
start_http_server(8004)
node_rest_address = 'http://nwaku:8645'
while True:
timestamp = time.time_ns()
# Send Sonda message
res = send_sonda_msg(node_rest_address, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp)
print(f'sleeping: {args.delay_seconds} seconds')
time.sleep(args.delay_seconds)
# Only send store query if message was successfully published
if(res):
send_store_queries(node_rest_address, store_nodes, args.pubsub_topic, SONDA_CONTENT_TOPIC, timestamp)
# Update node health metrics
for store_node in store_nodes:
if consecutive_successful_responses.labels(node=store_node)._value.get() >= args.health_threshold:
node_health.labels(node=store_node).set(1)
else:
node_health.labels(node=store_node).set(0)
main()