diff --git a/.gitignore b/.gitignore index 047d548..65e2a1a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ **/.DS_Store *.env !wakusim.env +alertmanager-config.yml diff --git a/docker-compose.yml b/docker-compose.yml index 8927623..cbd99d8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -108,7 +108,7 @@ services: image: prom/prometheus:latest volumes: - ./monitoring/prometheus-config.yml:/etc/prometheus/prometheus.yml:z - - ./monitoring/alert-rules.yml:/etc/prometheus/alert-rules.yml:z + - ./monitoring/alert-manager/alert-rules.yml:/etc/prometheus/alert-rules.yml:z command: - --config.file=/etc/prometheus/prometheus.yml - --storage.tsdb.retention.time=7d @@ -218,14 +218,25 @@ services: networks: - simulation + env_replacer: + image: alpine:3.19.1 + environment: + - DISCORD_WEBHOOK=$DISCORD_WEBHOOK + volumes: + - ./monitoring/alert-manager/alertmanager-config.yml.template:/etc/alertmanager/alertmanager.yml.template:z + - ./monitoring/alert-manager/alertmanager-config.yml:/etc/alertmanager/alertmanager.yml:z + command: ["/bin/sh", "-c", "apk add --no-cache gettext && envsubst < /etc/alertmanager/alertmanager.yml.template > /etc/alertmanager/alertmanager.yml"] + alertmanager: image: prom/alertmanager:latest volumes: - - ./monitoring/alertmanager-config.yml:/etc/alertmanager/alertmanager.yml:z + - ./monitoring/alert-manager/alertmanager-config.yml:/etc/alertmanager/alertmanager.yml:z command: - --config.file=/etc/alertmanager/alertmanager.yml ports: - 127.0.0.1:9093:9093 restart: on-failure networks: - - simulation \ No newline at end of file + - simulation + depends_on: + - env_replacer \ No newline at end of file diff --git a/monitoring/alert-rules.yml b/monitoring/alert-manager/alert-rules.yml similarity index 95% rename from monitoring/alert-rules.yml rename to monitoring/alert-manager/alert-rules.yml index 5d02aaa..e7bff1c 100644 --- a/monitoring/alert-rules.yml +++ b/monitoring/alert-manager/alert-rules.yml @@ -3,7 +3,7 @@ groups: rules: - alert: HighNimWakuMemUsage expr: > - nim_gc_mem_bytes{} > 1073741824 + nim_gc_mem_bytes{} > 1 for: 5m annotations: summary: "Too high memory usage for {{ $labels.instance }}" diff --git a/monitoring/alert-manager/alertmanager-config.yml.template b/monitoring/alert-manager/alertmanager-config.yml.template new file mode 100644 index 0000000..f08be0d --- /dev/null +++ b/monitoring/alert-manager/alertmanager-config.yml.template @@ -0,0 +1,11 @@ +route: + group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 2m + receiver: 'discord' + +receivers: + - name: 'discord' + discord_configs: + - webhook_url: ${DISCORD_WEBHOOK} \ No newline at end of file diff --git a/monitoring/alertmanager-config.yml b/monitoring/alertmanager-config.yml deleted file mode 100644 index 77bf5fa..0000000 --- a/monitoring/alertmanager-config.yml +++ /dev/null @@ -1,11 +0,0 @@ -route: - group_by: ['alertname'] - group_wait: 60s - group_interval: 5m - repeat_interval: 1d - receiver: 'discord' - -receivers: - - name: 'discord' - discord_configs: - - webhook_url: # TO DO: fill discord webhook \ No newline at end of file