From 0cc58f54de7eeae756bd0130a1e6d3121ef6c451 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Wed, 4 Aug 2021 16:34:01 -0400 Subject: [PATCH 1/7] telemetry: improve cert expiry metrics Emit the metric immediately so that after restarting an agent, the new expiry time will be emitted. This is particularly important when this metric is being monitored, because we want the alert to resovle itself immediately. Also fixed a bug that was exposed in one of these metrics. The CARoot can be nil, so we have to handle that case. --- agent/consul/leader_metrics.go | 41 +++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/agent/consul/leader_metrics.go b/agent/consul/leader_metrics.go index 77cde7183b..02a6c6d196 100644 --- a/agent/consul/leader_metrics.go +++ b/agent/consul/leader_metrics.go @@ -97,8 +97,11 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor { func getActiveIntermediateExpiry(s *Server) (time.Duration, error) { state := s.fsm.State() _, root, err := state.CARootActive(nil) - if err != nil { - return 0, err + switch { + case err != nil: + return 0, fmt.Errorf("failed to retrieve root CA: %w", err) + case root == nil: + return 0, fmt.Errorf("no active root CA") } // the CA used in a secondary DC is the active intermediate, @@ -130,24 +133,32 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error { logger := m.Logger.With("metric", strings.Join(m.Key, ".")) + fn := func() { + d, err := m.Query() + if err != nil { + logger.Warn("failed to emit certificate expiry metric", "error", err) + return + } + + if d < 24*time.Hour { + logger.Warn("certificate will expire soon", + "time_to_expiry", d, "expiration", time.Now().Add(d)) + } + + expiry := d / time.Second + metrics.SetGaugeWithLabels(m.Key, float32(expiry), m.Labels) + } + + // emit the metric immediately so that if a cert was just updated the + // new metric will be updated to the new expiration time. + fn() + for { select { case <-ctx.Done(): return nil case <-ticker.C: - d, err := m.Query() - if err != nil { - logger.Warn("failed to emit certificate expiry metric", "error", err) - continue - } - - if d < 24*time.Hour { - logger.Warn("certificate will expire soon", - "time_to_expiry", d, "expiration", time.Now().Add(d)) - } - - expiry := d / time.Second - metrics.SetGaugeWithLabels(m.Key, float32(expiry), m.Labels) + fn() } } } From 7fe60e59898bc39a9dfa693d0e3ea76e26c8ae47 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Thu, 5 Aug 2021 18:38:06 -0400 Subject: [PATCH 2/7] telemetry: prevent stale values from cert monitors Prometheus scrapes metrics from each process, so when leadership transfers to a different node the previous leader would still be reporting the old cached value. By setting NaN, I believe we should zero-out the value, so that prometheus should only consider the value from the new leader. --- agent/consul/leader_metrics.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/agent/consul/leader_metrics.go b/agent/consul/leader_metrics.go index 02a6c6d196..bb65d4dafa 100644 --- a/agent/consul/leader_metrics.go +++ b/agent/consul/leader_metrics.go @@ -5,6 +5,7 @@ import ( "crypto/x509" "errors" "fmt" + "math" "strings" "time" @@ -156,6 +157,9 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error { for { select { case <-ctx.Done(): + // "Zero-out" the metric on exit so that when prometheus scrapes this + // metric from a non-leader, it does not get a stale value. + metrics.SetGauge(m.Key, float32(math.NaN())) return nil case <-ticker.C: fn() From 7948720bbbc9ca8aa6388a48d765566b1a754310 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Tue, 19 Oct 2021 16:49:23 -0400 Subject: [PATCH 3/7] telemetry: only emit leader cert expiry metrics on the servers --- agent/consul/leader_metrics.go | 5 ++++- agent/setup.go | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/agent/consul/leader_metrics.go b/agent/consul/leader_metrics.go index bb65d4dafa..faf8f68eb8 100644 --- a/agent/consul/leader_metrics.go +++ b/agent/consul/leader_metrics.go @@ -22,7 +22,7 @@ import ( var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"} var metricsKeyMeshActiveSigningCAExpiry = []string{"mesh", "active-signing-ca", "expiry"} -var CertExpirationGauges = []prometheus.GaugeDefinition{ +var LeaderCertExpirationGauges = []prometheus.GaugeDefinition{ { Name: metricsKeyMeshRootCAExpiry, Help: "Seconds until the service mesh root certificate expires. Updated every hour", @@ -31,6 +31,9 @@ var CertExpirationGauges = []prometheus.GaugeDefinition{ Name: metricsKeyMeshActiveSigningCAExpiry, Help: "Seconds until the service mesh signing certificate expires. Updated every hour", }, +} + +var AgentCertExpirationGauges = []prometheus.GaugeDefinition{ { Name: metricsKeyAgentTLSCertExpiry, Help: "Seconds until the agent tls certificate expires. Updated every hour", diff --git a/agent/setup.go b/agent/setup.go index 2db06049bc..9d351547a3 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -211,14 +211,16 @@ func getPrometheusDefs(cfg lib.TelemetryConfig, isServer bool) ([]prometheus.Gau xds.StatsGauges, usagemetrics.Gauges, consul.ReplicationGauges, - consul.CertExpirationGauges, + consul.AgentCertExpirationGauges, Gauges, raftGauges, } // TODO(ffmmm): conditionally add only leader specific metrics to gauges, counters, summaries, etc if isServer { - gauges = append(gauges, consul.AutopilotGauges) + gauges = append(gauges, + consul.AutopilotGauges, + consul.LeaderCertExpirationGauges) } // Flatten definitions From 9264ce89d2647daca86f86fc1a396be620ce8482 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Wed, 20 Oct 2021 11:54:11 -0400 Subject: [PATCH 4/7] telemetry: fix cert expiry metrics by removing labels These labels should be set by whatever process scrapes Consul (for prometheus), or by the agent that receives them (for datadog/statsd). We need to remove them here because the labels are part of the "metric key", so we'd have to pre-declare the metrics with the labels. We could do that, but that is extra work for labels that should be added from elsewhere. Also renames the closure to be more descriptive. --- agent/consul/leader_metrics.go | 35 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/agent/consul/leader_metrics.go b/agent/consul/leader_metrics.go index faf8f68eb8..6da2d70a98 100644 --- a/agent/consul/leader_metrics.go +++ b/agent/consul/leader_metrics.go @@ -42,10 +42,7 @@ var AgentCertExpirationGauges = []prometheus.GaugeDefinition{ func rootCAExpiryMonitor(s *Server) CertExpirationMonitor { return CertExpirationMonitor{ - Key: metricsKeyMeshRootCAExpiry, - Labels: []metrics.Label{ - {Name: "datacenter", Value: s.config.Datacenter}, - }, + Key: metricsKeyMeshRootCAExpiry, Logger: s.logger.Named(logging.Connect), Query: func() (time.Duration, error) { return getRootCAExpiry(s) @@ -70,10 +67,7 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor { isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter if isPrimary { return CertExpirationMonitor{ - Key: metricsKeyMeshActiveSigningCAExpiry, - Labels: []metrics.Label{ - {Name: "datacenter", Value: s.config.Datacenter}, - }, + Key: metricsKeyMeshActiveSigningCAExpiry, Logger: s.logger.Named(logging.Connect), Query: func() (time.Duration, error) { provider, _ := s.caManager.getCAProvider() @@ -87,10 +81,7 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor { } return CertExpirationMonitor{ - Key: metricsKeyMeshActiveSigningCAExpiry, - Labels: []metrics.Label{ - {Name: "datacenter", Value: s.config.Datacenter}, - }, + Key: metricsKeyMeshActiveSigningCAExpiry, Logger: s.logger.Named(logging.Connect), Query: func() (time.Duration, error) { return getActiveIntermediateExpiry(s) @@ -121,7 +112,11 @@ func getActiveIntermediateExpiry(s *Server) (time.Duration, error) { } type CertExpirationMonitor struct { - Key []string + Key []string + // Labels to be emitted along with the metric. It is very important that these + // labels be included in the pre-declaration as well. Otherwise, if + // telemetry.prometheus_retention_time is less than certExpirationMonitorInterval + // then the metrics will expire before they are emitted again. Labels []metrics.Label Logger hclog.Logger // Query is called at each interval. It should return the duration until the @@ -137,7 +132,7 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error { logger := m.Logger.With("metric", strings.Join(m.Key, ".")) - fn := func() { + emitMetric := func() { d, err := m.Query() if err != nil { logger.Warn("failed to emit certificate expiry metric", "error", err) @@ -155,17 +150,17 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error { // emit the metric immediately so that if a cert was just updated the // new metric will be updated to the new expiration time. - fn() + emitMetric() for { select { case <-ctx.Done(): // "Zero-out" the metric on exit so that when prometheus scrapes this // metric from a non-leader, it does not get a stale value. - metrics.SetGauge(m.Key, float32(math.NaN())) + metrics.SetGaugeWithLabels(m.Key, float32(math.NaN()), m.Labels) return nil case <-ticker.C: - fn() + emitMetric() } } } @@ -176,11 +171,7 @@ var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"} // monitor the expiration of the certificate used for agent TLS. func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger, dc string) CertExpirationMonitor { return CertExpirationMonitor{ - Key: metricsKeyAgentTLSCertExpiry, - Labels: []metrics.Label{ - {Name: "node", Value: c.Base().NodeName}, - {Name: "datacenter", Value: dc}, - }, + Key: metricsKeyAgentTLSCertExpiry, Logger: logger, Query: func() (time.Duration, error) { raw := c.Cert() From c92513ec167e53390002866ed9671d73667d9222 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Thu, 21 Oct 2021 18:09:30 -0400 Subject: [PATCH 5/7] telemetry: set cert expiry metrics to NaN on start So that followers do not report 0, which would make alerting difficult. --- agent/agent.go | 2 +- agent/consul/leader_metrics.go | 11 ++++++++++- agent/consul/server.go | 2 ++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index bb0b3e832d..0c3a1cc008 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -667,7 +667,7 @@ func (a *Agent) Start(ctx context.Context) error { } if a.tlsConfigurator.Cert() != nil { - m := consul.AgentTLSCertExpirationMonitor(a.tlsConfigurator, a.logger, a.config.Datacenter) + m := consul.AgentTLSCertExpirationMonitor(a.tlsConfigurator, a.logger) go m.Monitor(&lib.StopChannelContext{StopCh: a.shutdownCh}) } diff --git a/agent/consul/leader_metrics.go b/agent/consul/leader_metrics.go index 6da2d70a98..7828bf9f19 100644 --- a/agent/consul/leader_metrics.go +++ b/agent/consul/leader_metrics.go @@ -169,7 +169,7 @@ var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"} // AgentTLSCertExpirationMonitor returns a CertExpirationMonitor which will // monitor the expiration of the certificate used for agent TLS. -func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger, dc string) CertExpirationMonitor { +func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger) CertExpirationMonitor { return CertExpirationMonitor{ Key: metricsKeyAgentTLSCertExpiry, Logger: logger, @@ -187,3 +187,12 @@ func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger, }, } } + +// initLeaderMetrics sets all metrics that are emitted only on leaders to a NaN +// value so that they don't incorrectly report 0 when a server starts as a +// follower. +func initLeaderMetrics() { + for _, g := range LeaderCertExpirationGauges { + metrics.SetGaugeWithLabels(g.Name, float32(math.NaN()), g.ConstLabels) + } +} diff --git a/agent/consul/server.go b/agent/consul/server.go index 969785a23a..524a0f1eae 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -389,6 +389,8 @@ func NewServer(config *Config, flat Deps) (*Server, error) { return nil, err } + initLeaderMetrics() + s.rpcLimiter.Store(rate.NewLimiter(config.RPCRateLimit, config.RPCMaxBurst)) configReplicatorConfig := ReplicatorConfig{ From a8e2e1c36505c80ca9dcac958bf99bd2dd22a604 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Wed, 27 Oct 2021 15:23:29 -0400 Subject: [PATCH 6/7] agent: move agent tls metric monitor to a more appropriate place And add a test for it --- agent/agent.go | 2 +- agent/consul/leader_metrics.go | 32 ---------------- agent/metrics.go | 43 ++++++++++++++++++++++ agent/metrics_test.go | 67 ++++++++++++++++++++++++++++++++-- agent/setup.go | 2 +- 5 files changed, 109 insertions(+), 37 deletions(-) create mode 100644 agent/metrics.go diff --git a/agent/agent.go b/agent/agent.go index 0c3a1cc008..7f603bcdb0 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -667,7 +667,7 @@ func (a *Agent) Start(ctx context.Context) error { } if a.tlsConfigurator.Cert() != nil { - m := consul.AgentTLSCertExpirationMonitor(a.tlsConfigurator, a.logger) + m := tlsCertExpirationMonitor(a.tlsConfigurator, a.logger) go m.Monitor(&lib.StopChannelContext{StopCh: a.shutdownCh}) } diff --git a/agent/consul/leader_metrics.go b/agent/consul/leader_metrics.go index 7828bf9f19..7151adb74d 100644 --- a/agent/consul/leader_metrics.go +++ b/agent/consul/leader_metrics.go @@ -2,7 +2,6 @@ package consul import ( "context" - "crypto/x509" "errors" "fmt" "math" @@ -16,7 +15,6 @@ import ( "github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/logging" - "github.com/hashicorp/consul/tlsutil" ) var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"} @@ -33,13 +31,6 @@ var LeaderCertExpirationGauges = []prometheus.GaugeDefinition{ }, } -var AgentCertExpirationGauges = []prometheus.GaugeDefinition{ - { - Name: metricsKeyAgentTLSCertExpiry, - Help: "Seconds until the agent tls certificate expires. Updated every hour", - }, -} - func rootCAExpiryMonitor(s *Server) CertExpirationMonitor { return CertExpirationMonitor{ Key: metricsKeyMeshRootCAExpiry, @@ -165,29 +156,6 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error { } } -var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"} - -// AgentTLSCertExpirationMonitor returns a CertExpirationMonitor which will -// monitor the expiration of the certificate used for agent TLS. -func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger) CertExpirationMonitor { - return CertExpirationMonitor{ - Key: metricsKeyAgentTLSCertExpiry, - Logger: logger, - Query: func() (time.Duration, error) { - raw := c.Cert() - if raw == nil { - return 0, fmt.Errorf("tls not enabled") - } - - cert, err := x509.ParseCertificate(raw.Certificate[0]) - if err != nil { - return 0, fmt.Errorf("failed to parse agent tls cert: %w", err) - } - return time.Until(cert.NotAfter), nil - }, - } -} - // initLeaderMetrics sets all metrics that are emitted only on leaders to a NaN // value so that they don't incorrectly report 0 when a server starts as a // follower. diff --git a/agent/metrics.go b/agent/metrics.go new file mode 100644 index 0000000000..6406f22bf6 --- /dev/null +++ b/agent/metrics.go @@ -0,0 +1,43 @@ +package agent + +import ( + "crypto/x509" + "fmt" + "time" + + "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-hclog" + + "github.com/hashicorp/consul/agent/consul" + "github.com/hashicorp/consul/tlsutil" +) + +var CertExpirationGauges = []prometheus.GaugeDefinition{ + { + Name: metricsKeyAgentTLSCertExpiry, + Help: "Seconds until the agent tls certificate expires. Updated every hour", + }, +} + +var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"} + +// tlsCertExpirationMonitor returns a CertExpirationMonitor which will +// monitor the expiration of the certificate used for agent TLS. +func tlsCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger) consul.CertExpirationMonitor { + return consul.CertExpirationMonitor{ + Key: metricsKeyAgentTLSCertExpiry, + Logger: logger, + Query: func() (time.Duration, error) { + raw := c.Cert() + if raw == nil { + return 0, fmt.Errorf("tls not enabled") + } + + cert, err := x509.ParseCertificate(raw.Certificate[0]) + if err != nil { + return 0, fmt.Errorf("failed to parse agent tls cert: %w", err) + } + return time.Until(cert.NotAfter), nil + }, + } +} diff --git a/agent/metrics_test.go b/agent/metrics_test.go index f946e469af..cbf1960654 100644 --- a/agent/metrics_test.go +++ b/agent/metrics_test.go @@ -1,20 +1,29 @@ package agent import ( - "github.com/stretchr/testify/require" + "crypto/x509" + "fmt" + "io/ioutil" "net/http" "net/http/httptest" + "path/filepath" "strings" "testing" + + "github.com/hashicorp/consul/sdk/testutil" + "github.com/hashicorp/consul/tlsutil" + + "github.com/stretchr/testify/require" ) -func checkForShortTesting(t *testing.T) { +func skipIfShortTesting(t *testing.T) { if testing.Short() { t.Skip("too slow for testing.Short") } } func recordPromMetrics(t *testing.T, a *TestAgent, respRec *httptest.ResponseRecorder) { + t.Helper() req, err := http.NewRequest("GET", "/v1/agent/metrics?format=prometheus", nil) require.NoError(t, err, "Failed to generate new http request.") @@ -49,7 +58,7 @@ func assertMetricNotExists(t *testing.T, respRec *httptest.ResponseRecorder, met // TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus adds testing around // the published autopilot metrics on https://www.consul.io/docs/agent/telemetry#autopilot func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) { - checkForShortTesting(t) + skipIfShortTesting(t) // This test cannot use t.Parallel() since we modify global state, ie the global metrics instance t.Run("Check consul_autopilot_* are not emitted metrics on clients", func(t *testing.T) { @@ -95,3 +104,55 @@ func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) { assertMetricExistsWithValue(t, respRec, "agent_2_autopilot_failure_tolerance", "NaN") }) } + +func TestHTTPHandlers_AgentMetrics_TLSCertExpiry_Prometheus(t *testing.T) { + skipIfShortTesting(t) + // This test cannot use t.Parallel() since we modify global state, ie the global metrics instance + + dir := testutil.TempDir(t, "ca") + caPEM, caPK, err := tlsutil.GenerateCA(tlsutil.CAOpts{Days: 20, Domain: "consul"}) + require.NoError(t, err) + + caPath := filepath.Join(dir, "ca.pem") + err = ioutil.WriteFile(caPath, []byte(caPEM), 0600) + require.NoError(t, err) + + signer, err := tlsutil.ParseSigner(caPK) + require.NoError(t, err) + + pem, key, err := tlsutil.GenerateCert(tlsutil.CertOpts{ + Signer: signer, + CA: caPEM, + Name: "server.dc1.consul", + Days: 20, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}, + }) + require.NoError(t, err) + + certPath := filepath.Join(dir, "cert.pem") + err = ioutil.WriteFile(certPath, []byte(pem), 0600) + require.NoError(t, err) + + keyPath := filepath.Join(dir, "cert.key") + err = ioutil.WriteFile(keyPath, []byte(key), 0600) + require.NoError(t, err) + + hcl := fmt.Sprintf(` + telemetry = { + prometheus_retention_time = "5s", + disable_hostname = true + metrics_prefix = "agent_3" + } + ca_file = "%s" + cert_file = "%s" + key_file = "%s" + `, caPath, certPath, keyPath) + + a := StartTestAgent(t, TestAgent{HCL: hcl}) + defer a.Shutdown() + + respRec := httptest.NewRecorder() + recordPromMetrics(t, a, respRec) + + require.Contains(t, respRec.Body.String(), "agent_3_agent_tls_cert_expiry 1.7") +} diff --git a/agent/setup.go b/agent/setup.go index 9d351547a3..82543e7fab 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -211,7 +211,7 @@ func getPrometheusDefs(cfg lib.TelemetryConfig, isServer bool) ([]prometheus.Gau xds.StatsGauges, usagemetrics.Gauges, consul.ReplicationGauges, - consul.AgentCertExpirationGauges, + CertExpirationGauges, Gauges, raftGauges, } From 367b664318532e9eb7ab1e3589713cf418057b55 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Wed, 27 Oct 2021 15:56:38 -0400 Subject: [PATCH 7/7] Add tests for cert expiry metrics --- agent/metrics_test.go | 54 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/agent/metrics_test.go b/agent/metrics_test.go index cbf1960654..bbfe2430bf 100644 --- a/agent/metrics_test.go +++ b/agent/metrics_test.go @@ -11,6 +11,7 @@ import ( "testing" "github.com/hashicorp/consul/sdk/testutil" + "github.com/hashicorp/consul/testrpc" "github.com/hashicorp/consul/tlsutil" "github.com/stretchr/testify/require" @@ -156,3 +157,56 @@ func TestHTTPHandlers_AgentMetrics_TLSCertExpiry_Prometheus(t *testing.T) { require.Contains(t, respRec.Body.String(), "agent_3_agent_tls_cert_expiry 1.7") } + +func TestHTTPHandlers_AgentMetrics_CACertExpiry_Prometheus(t *testing.T) { + skipIfShortTesting(t) + // This test cannot use t.Parallel() since we modify global state, ie the global metrics instance + + t.Run("non-leader emits NaN", func(t *testing.T) { + hcl := ` + telemetry = { + prometheus_retention_time = "5s", + disable_hostname = true + metrics_prefix = "agent_4" + } + connect { + enabled = true + } + bootstrap = false + ` + + a := StartTestAgent(t, TestAgent{HCL: hcl}) + defer a.Shutdown() + + respRec := httptest.NewRecorder() + recordPromMetrics(t, a, respRec) + + require.Contains(t, respRec.Body.String(), "agent_4_mesh_active_root_ca_expiry NaN") + require.Contains(t, respRec.Body.String(), "agent_4_mesh_active_signing_ca_expiry NaN") + }) + + t.Run("leader emits a value", func(t *testing.T) { + hcl := ` + telemetry = { + prometheus_retention_time = "5s", + disable_hostname = true + metrics_prefix = "agent_5" + } + connect { + enabled = true + } + ` + + a := StartTestAgent(t, TestAgent{HCL: hcl}) + defer a.Shutdown() + testrpc.WaitForLeader(t, a.RPC, "dc1") + + respRec := httptest.NewRecorder() + recordPromMetrics(t, a, respRec) + + out := respRec.Body.String() + require.Contains(t, out, "agent_5_mesh_active_root_ca_expiry 3.15") + require.Contains(t, out, "agent_5_mesh_active_signing_ca_expiry 3.15") + }) + +}