From 7fe60e59898bc39a9dfa693d0e3ea76e26c8ae47 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Thu, 5 Aug 2021 18:38:06 -0400 Subject: [PATCH] telemetry: prevent stale values from cert monitors Prometheus scrapes metrics from each process, so when leadership transfers to a different node the previous leader would still be reporting the old cached value. By setting NaN, I believe we should zero-out the value, so that prometheus should only consider the value from the new leader. --- agent/consul/leader_metrics.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/agent/consul/leader_metrics.go b/agent/consul/leader_metrics.go index 02a6c6d196..bb65d4dafa 100644 --- a/agent/consul/leader_metrics.go +++ b/agent/consul/leader_metrics.go @@ -5,6 +5,7 @@ import ( "crypto/x509" "errors" "fmt" + "math" "strings" "time" @@ -156,6 +157,9 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error { for { select { case <-ctx.Done(): + // "Zero-out" the metric on exit so that when prometheus scrapes this + // metric from a non-leader, it does not get a stale value. + metrics.SetGauge(m.Key, float32(math.NaN())) return nil case <-ticker.C: fn()