diff --git a/agent/consul/leader_metrics.go b/agent/consul/leader_metrics.go index faf8f68eb8..6da2d70a98 100644 --- a/agent/consul/leader_metrics.go +++ b/agent/consul/leader_metrics.go @@ -42,10 +42,7 @@ var AgentCertExpirationGauges = []prometheus.GaugeDefinition{ func rootCAExpiryMonitor(s *Server) CertExpirationMonitor { return CertExpirationMonitor{ - Key: metricsKeyMeshRootCAExpiry, - Labels: []metrics.Label{ - {Name: "datacenter", Value: s.config.Datacenter}, - }, + Key: metricsKeyMeshRootCAExpiry, Logger: s.logger.Named(logging.Connect), Query: func() (time.Duration, error) { return getRootCAExpiry(s) @@ -70,10 +67,7 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor { isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter if isPrimary { return CertExpirationMonitor{ - Key: metricsKeyMeshActiveSigningCAExpiry, - Labels: []metrics.Label{ - {Name: "datacenter", Value: s.config.Datacenter}, - }, + Key: metricsKeyMeshActiveSigningCAExpiry, Logger: s.logger.Named(logging.Connect), Query: func() (time.Duration, error) { provider, _ := s.caManager.getCAProvider() @@ -87,10 +81,7 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor { } return CertExpirationMonitor{ - Key: metricsKeyMeshActiveSigningCAExpiry, - Labels: []metrics.Label{ - {Name: "datacenter", Value: s.config.Datacenter}, - }, + Key: metricsKeyMeshActiveSigningCAExpiry, Logger: s.logger.Named(logging.Connect), Query: func() (time.Duration, error) { return getActiveIntermediateExpiry(s) @@ -121,7 +112,11 @@ func getActiveIntermediateExpiry(s *Server) (time.Duration, error) { } type CertExpirationMonitor struct { - Key []string + Key []string + // Labels to be emitted along with the metric. It is very important that these + // labels be included in the pre-declaration as well. Otherwise, if + // telemetry.prometheus_retention_time is less than certExpirationMonitorInterval + // then the metrics will expire before they are emitted again. Labels []metrics.Label Logger hclog.Logger // Query is called at each interval. It should return the duration until the @@ -137,7 +132,7 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error { logger := m.Logger.With("metric", strings.Join(m.Key, ".")) - fn := func() { + emitMetric := func() { d, err := m.Query() if err != nil { logger.Warn("failed to emit certificate expiry metric", "error", err) @@ -155,17 +150,17 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error { // emit the metric immediately so that if a cert was just updated the // new metric will be updated to the new expiration time. - fn() + emitMetric() for { select { case <-ctx.Done(): // "Zero-out" the metric on exit so that when prometheus scrapes this // metric from a non-leader, it does not get a stale value. - metrics.SetGauge(m.Key, float32(math.NaN())) + metrics.SetGaugeWithLabels(m.Key, float32(math.NaN()), m.Labels) return nil case <-ticker.C: - fn() + emitMetric() } } } @@ -176,11 +171,7 @@ var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"} // monitor the expiration of the certificate used for agent TLS. func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger, dc string) CertExpirationMonitor { return CertExpirationMonitor{ - Key: metricsKeyAgentTLSCertExpiry, - Labels: []metrics.Label{ - {Name: "node", Value: c.Base().NodeName}, - {Name: "datacenter", Value: dc}, - }, + Key: metricsKeyAgentTLSCertExpiry, Logger: logger, Query: func() (time.Duration, error) { raw := c.Cert()