Having first alert working

This commit is contained in:
Gabriel mermelstein 2024-04-24 17:52:22 +03:00
parent 8551bffaf0
commit 722c58ab8b
No known key found for this signature in database
GPG Key ID: 82B8134785FEAE0D
2 changed files with 4 additions and 39 deletions

View File

@ -1,49 +1,11 @@
---
groups:
- name: waku
rules:
- alert: HighNimWakuNodeTraffic
expr: >
abs(
netdata_net_net_kilobits_persec_average{
family!~"^(wg|veth|br|docker).*",
fleet=~"(shards|waku|status|wakuv2)\\..*"
}) > 40000
for: 5m
annotations:
summary: "Too high traffic for {{ $labels.instance }}"
description: "Host {{ $labels.instance }} running nim-waku has {{ $labels.dimension }} traffic higher 40Mbps"
- alert: HighNimWakuMemUsage
expr: >
nim_gc_mem_bytes{
fleet=~"(shards|waku|status|wakuv2)\\..*"
} > 1073741824
nim_gc_mem_bytes{} > 1073741824
for: 5m
annotations:
summary: "Too high memory usage for {{ $labels.instance }}"
description: "Host {{ $labels.instance }} running nim-waku has GC memory usage higher 1GB"
current_value: "{{ $value }}"
- alert: HighNimWakuNodeOpenSockets
expr: >
netdata_ipv4_sockstat_tcp_sockets_sockets_average{
fleet=~"(shards|waku|status|wakuv2)\\..*"
} > 300
for: 5m
annotations:
summary: "Too high open sockets for {{ $labels.instance }}"
description: "Host {{ $labels.instance }} running nim-waku has more than 300 open sockets"
current_value: "{{ $value }}"
- record: job:waku_libp2p_peers
expr: libp2p_peers{fleet=~"(shards|waku|status|wakuv2)\\..*"}
- alert: NimWakuPeersDecrease
expr: >
(job:waku_libp2p_peers / avg_over_time(job:waku_libp2p_peers[12h])) < 0.50
for: 15m
annotations:
summary: "Drop of libp2p_peers on {{ $labels.instance }}"
description: "Host {{ $labels.instance }} running nim-waku has more than 50% drop of peers compared to 12h average"
current_value: "{{ $value }}"

View File

@ -10,6 +10,9 @@ alerting:
static_configs:
- targets: [ 'alertmanager:9093' ]
rule_files:
- "./alert-rules.yml"
scrape_configs:
- job_name: cadvisor
scrape_interval: 5s