From a8e2e1c36505c80ca9dcac958bf99bd2dd22a604 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Wed, 27 Oct 2021 15:23:29 -0400 Subject: [PATCH] agent: move agent tls metric monitor to a more appropriate place And add a test for it --- agent/agent.go | 2 +- agent/consul/leader_metrics.go | 32 ---------------- agent/metrics.go | 43 ++++++++++++++++++++++ agent/metrics_test.go | 67 ++++++++++++++++++++++++++++++++-- agent/setup.go | 2 +- 5 files changed, 109 insertions(+), 37 deletions(-) create mode 100644 agent/metrics.go diff --git a/agent/agent.go b/agent/agent.go index 0c3a1cc008..7f603bcdb0 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -667,7 +667,7 @@ func (a *Agent) Start(ctx context.Context) error { } if a.tlsConfigurator.Cert() != nil { - m := consul.AgentTLSCertExpirationMonitor(a.tlsConfigurator, a.logger) + m := tlsCertExpirationMonitor(a.tlsConfigurator, a.logger) go m.Monitor(&lib.StopChannelContext{StopCh: a.shutdownCh}) } diff --git a/agent/consul/leader_metrics.go b/agent/consul/leader_metrics.go index 7828bf9f19..7151adb74d 100644 --- a/agent/consul/leader_metrics.go +++ b/agent/consul/leader_metrics.go @@ -2,7 +2,6 @@ package consul import ( "context" - "crypto/x509" "errors" "fmt" "math" @@ -16,7 +15,6 @@ import ( "github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/connect/ca" "github.com/hashicorp/consul/logging" - "github.com/hashicorp/consul/tlsutil" ) var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"} @@ -33,13 +31,6 @@ var LeaderCertExpirationGauges = []prometheus.GaugeDefinition{ }, } -var AgentCertExpirationGauges = []prometheus.GaugeDefinition{ - { - Name: metricsKeyAgentTLSCertExpiry, - Help: "Seconds until the agent tls certificate expires. Updated every hour", - }, -} - func rootCAExpiryMonitor(s *Server) CertExpirationMonitor { return CertExpirationMonitor{ Key: metricsKeyMeshRootCAExpiry, @@ -165,29 +156,6 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error { } } -var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"} - -// AgentTLSCertExpirationMonitor returns a CertExpirationMonitor which will -// monitor the expiration of the certificate used for agent TLS. -func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger) CertExpirationMonitor { - return CertExpirationMonitor{ - Key: metricsKeyAgentTLSCertExpiry, - Logger: logger, - Query: func() (time.Duration, error) { - raw := c.Cert() - if raw == nil { - return 0, fmt.Errorf("tls not enabled") - } - - cert, err := x509.ParseCertificate(raw.Certificate[0]) - if err != nil { - return 0, fmt.Errorf("failed to parse agent tls cert: %w", err) - } - return time.Until(cert.NotAfter), nil - }, - } -} - // initLeaderMetrics sets all metrics that are emitted only on leaders to a NaN // value so that they don't incorrectly report 0 when a server starts as a // follower. diff --git a/agent/metrics.go b/agent/metrics.go new file mode 100644 index 0000000000..6406f22bf6 --- /dev/null +++ b/agent/metrics.go @@ -0,0 +1,43 @@ +package agent + +import ( + "crypto/x509" + "fmt" + "time" + + "github.com/armon/go-metrics/prometheus" + "github.com/hashicorp/go-hclog" + + "github.com/hashicorp/consul/agent/consul" + "github.com/hashicorp/consul/tlsutil" +) + +var CertExpirationGauges = []prometheus.GaugeDefinition{ + { + Name: metricsKeyAgentTLSCertExpiry, + Help: "Seconds until the agent tls certificate expires. Updated every hour", + }, +} + +var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"} + +// tlsCertExpirationMonitor returns a CertExpirationMonitor which will +// monitor the expiration of the certificate used for agent TLS. +func tlsCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger) consul.CertExpirationMonitor { + return consul.CertExpirationMonitor{ + Key: metricsKeyAgentTLSCertExpiry, + Logger: logger, + Query: func() (time.Duration, error) { + raw := c.Cert() + if raw == nil { + return 0, fmt.Errorf("tls not enabled") + } + + cert, err := x509.ParseCertificate(raw.Certificate[0]) + if err != nil { + return 0, fmt.Errorf("failed to parse agent tls cert: %w", err) + } + return time.Until(cert.NotAfter), nil + }, + } +} diff --git a/agent/metrics_test.go b/agent/metrics_test.go index f946e469af..cbf1960654 100644 --- a/agent/metrics_test.go +++ b/agent/metrics_test.go @@ -1,20 +1,29 @@ package agent import ( - "github.com/stretchr/testify/require" + "crypto/x509" + "fmt" + "io/ioutil" "net/http" "net/http/httptest" + "path/filepath" "strings" "testing" + + "github.com/hashicorp/consul/sdk/testutil" + "github.com/hashicorp/consul/tlsutil" + + "github.com/stretchr/testify/require" ) -func checkForShortTesting(t *testing.T) { +func skipIfShortTesting(t *testing.T) { if testing.Short() { t.Skip("too slow for testing.Short") } } func recordPromMetrics(t *testing.T, a *TestAgent, respRec *httptest.ResponseRecorder) { + t.Helper() req, err := http.NewRequest("GET", "/v1/agent/metrics?format=prometheus", nil) require.NoError(t, err, "Failed to generate new http request.") @@ -49,7 +58,7 @@ func assertMetricNotExists(t *testing.T, respRec *httptest.ResponseRecorder, met // TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus adds testing around // the published autopilot metrics on https://www.consul.io/docs/agent/telemetry#autopilot func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) { - checkForShortTesting(t) + skipIfShortTesting(t) // This test cannot use t.Parallel() since we modify global state, ie the global metrics instance t.Run("Check consul_autopilot_* are not emitted metrics on clients", func(t *testing.T) { @@ -95,3 +104,55 @@ func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) { assertMetricExistsWithValue(t, respRec, "agent_2_autopilot_failure_tolerance", "NaN") }) } + +func TestHTTPHandlers_AgentMetrics_TLSCertExpiry_Prometheus(t *testing.T) { + skipIfShortTesting(t) + // This test cannot use t.Parallel() since we modify global state, ie the global metrics instance + + dir := testutil.TempDir(t, "ca") + caPEM, caPK, err := tlsutil.GenerateCA(tlsutil.CAOpts{Days: 20, Domain: "consul"}) + require.NoError(t, err) + + caPath := filepath.Join(dir, "ca.pem") + err = ioutil.WriteFile(caPath, []byte(caPEM), 0600) + require.NoError(t, err) + + signer, err := tlsutil.ParseSigner(caPK) + require.NoError(t, err) + + pem, key, err := tlsutil.GenerateCert(tlsutil.CertOpts{ + Signer: signer, + CA: caPEM, + Name: "server.dc1.consul", + Days: 20, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}, + }) + require.NoError(t, err) + + certPath := filepath.Join(dir, "cert.pem") + err = ioutil.WriteFile(certPath, []byte(pem), 0600) + require.NoError(t, err) + + keyPath := filepath.Join(dir, "cert.key") + err = ioutil.WriteFile(keyPath, []byte(key), 0600) + require.NoError(t, err) + + hcl := fmt.Sprintf(` + telemetry = { + prometheus_retention_time = "5s", + disable_hostname = true + metrics_prefix = "agent_3" + } + ca_file = "%s" + cert_file = "%s" + key_file = "%s" + `, caPath, certPath, keyPath) + + a := StartTestAgent(t, TestAgent{HCL: hcl}) + defer a.Shutdown() + + respRec := httptest.NewRecorder() + recordPromMetrics(t, a, respRec) + + require.Contains(t, respRec.Body.String(), "agent_3_agent_tls_cert_expiry 1.7") +} diff --git a/agent/setup.go b/agent/setup.go index 9d351547a3..82543e7fab 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -211,7 +211,7 @@ func getPrometheusDefs(cfg lib.TelemetryConfig, isServer bool) ([]prometheus.Gau xds.StatsGauges, usagemetrics.Gauges, consul.ReplicationGauges, - consul.AgentCertExpirationGauges, + CertExpirationGauges, Gauges, raftGauges, }