feature(tracing): Otlp metrics and preconfigured dashboard (#969)

* Use metrics endpoint from the cfgsync params

* Pin prometheus version for otlp functionality

* Predefined dashboard for testnet metrics.
This commit is contained in:
gusto 2024-12-30 15:33:29 +07:00 committed by GitHub
parent f83e725b0d
commit 2f89d6c344
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 260 additions and 11 deletions

View File

@ -4,13 +4,13 @@ services:
prometheus:
container_name: prometheus
image: prom/prometheus:latest
image: prom/prometheus:v3.0.1
volumes:
- ./testnet/monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:z
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=7d
- --enable-feature=otlp-write-receiver
- --web.enable-otlp-receiver
ports:
- 127.0.0.1:9090:9090
restart: on-failure
@ -28,6 +28,8 @@ services:
volumes:
- ./testnet/monitoring/grafana/grafana.ini:/etc/grafana/grafana.ini:z
- ./testnet/monitoring/grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:z
- ./testnet/monitoring/grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:z
- ./testnet/monitoring/grafana/dashboards:/var/lib/grafana/dashboards/:z
ports:
- 9091:3000
restart: on-failure

View File

@ -10,6 +10,8 @@ services:
image: nomos:latest
volumes:
- ./testnet:/etc/nomos
depends_on:
- grafana
entrypoint: /etc/nomos/scripts/run_cfgsync.sh
nomos-node-0:
@ -78,13 +80,13 @@ services:
prometheus:
container_name: prometheus
image: prom/prometheus:latest
image: prom/prometheus:v3.0.1
volumes:
- ./testnet/monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:z
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=7d
- --enable-feature=otlp-write-receiver
- --web.enable-otlp-receiver
ports:
- 127.0.0.1:9090:9090
restart: on-failure
@ -102,6 +104,8 @@ services:
volumes:
- ./testnet/monitoring/grafana/grafana.ini:/etc/grafana/grafana.ini:z
- ./testnet/monitoring/grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:z
- ./testnet/monitoring/grafana/dashboards.yml:/etc/grafana/provisioning/dashboards/dashboards.yml:z
- ./testnet/monitoring/grafana/dashboards:/var/lib/grafana/dashboards/:z
ports:
- 9091:3000
restart: on-failure

View File

@ -456,6 +456,7 @@ where
&mut self.block_subscription_sender
)
.await;
tracing::info!(counter.consensus_processed_blocks = 1);
}
_ = slot_timer.next() => {

View File

@ -14,7 +14,7 @@ RUN cargo install cargo-binstall
RUN cargo binstall -y cargo-risczero
RUN cargo risczero install
RUN cargo build --release --all --features metrics
RUN cargo build --release
# NODE IMAGE ----------------------------------------------------------

View File

@ -204,9 +204,7 @@ fn tracing_config_for_grafana(params: TracingParams, identifier: String) -> Gene
}),
filter: FilterLayer::None,
metrics: MetricsLayer::Otlp(OtlpMetricsConfig {
endpoint: "http://127.0.0.1:9090/api/v1/otlp/v1/metrics"
.try_into()
.unwrap(),
endpoint: params.metrics_endpoint,
host_identifier: identifier,
}),
level: Level::INFO,

View File

@ -0,0 +1,8 @@
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
type: 'file'
options:
path: '/var/lib/grafana/dashboards'

View File

@ -0,0 +1,237 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.4.0",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "da_mempool_pending_items",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Mempool: Pending DA blobs",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "11.4.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "builder",
"expr": "consensus_processed_blocks",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Consensus: Processed Blocks",
"type": "timeseries"
}
],
"preload": false,
"schemaVersion": 40,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Testnet Metrics",
"uid": "ce6ebepwk737kf",
"version": 5,
"weekStart": ""
}

View File

@ -1,5 +1,4 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: "Monitoring"

View File

@ -11,4 +11,4 @@ export CFG_FILE_PATH="/config.yaml" \
RISC0_DEV_MODE=true
/usr/bin/cfgsync-client && \
exec /usr/bin/nomos-executor /config.yaml --with-metrics
exec /usr/bin/nomos-executor /config.yaml

View File

@ -10,4 +10,4 @@ export CFG_FILE_PATH="/config.yaml" \
RISC0_DEV_MODE=true
/usr/bin/cfgsync-client && \
exec /usr/bin/nomos-node /config.yaml --with-metrics
exec /usr/bin/nomos-node /config.yaml