agent: move agent tls metric monitor to a more appropriate place

And add a test for it
This commit is contained in:
Daniel Nephin 2021-10-27 15:23:29 -04:00
parent c92513ec16
commit a8e2e1c365
5 changed files with 109 additions and 37 deletions

View File

@ -667,7 +667,7 @@ func (a *Agent) Start(ctx context.Context) error {
}
if a.tlsConfigurator.Cert() != nil {
m := consul.AgentTLSCertExpirationMonitor(a.tlsConfigurator, a.logger)
m := tlsCertExpirationMonitor(a.tlsConfigurator, a.logger)
go m.Monitor(&lib.StopChannelContext{StopCh: a.shutdownCh})
}

View File

@ -2,7 +2,6 @@ package consul
import (
"context"
"crypto/x509"
"errors"
"fmt"
"math"
@ -16,7 +15,6 @@ import (
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/connect/ca"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/tlsutil"
)
var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"}
@ -33,13 +31,6 @@ var LeaderCertExpirationGauges = []prometheus.GaugeDefinition{
},
}
var AgentCertExpirationGauges = []prometheus.GaugeDefinition{
{
Name: metricsKeyAgentTLSCertExpiry,
Help: "Seconds until the agent tls certificate expires. Updated every hour",
},
}
func rootCAExpiryMonitor(s *Server) CertExpirationMonitor {
return CertExpirationMonitor{
Key: metricsKeyMeshRootCAExpiry,
@ -165,29 +156,6 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
}
}
var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"}
// AgentTLSCertExpirationMonitor returns a CertExpirationMonitor which will
// monitor the expiration of the certificate used for agent TLS.
func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger) CertExpirationMonitor {
return CertExpirationMonitor{
Key: metricsKeyAgentTLSCertExpiry,
Logger: logger,
Query: func() (time.Duration, error) {
raw := c.Cert()
if raw == nil {
return 0, fmt.Errorf("tls not enabled")
}
cert, err := x509.ParseCertificate(raw.Certificate[0])
if err != nil {
return 0, fmt.Errorf("failed to parse agent tls cert: %w", err)
}
return time.Until(cert.NotAfter), nil
},
}
}
// initLeaderMetrics sets all metrics that are emitted only on leaders to a NaN
// value so that they don't incorrectly report 0 when a server starts as a
// follower.

43
agent/metrics.go Normal file
View File

@ -0,0 +1,43 @@
package agent
import (
"crypto/x509"
"fmt"
"time"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/consul/agent/consul"
"github.com/hashicorp/consul/tlsutil"
)
var CertExpirationGauges = []prometheus.GaugeDefinition{
{
Name: metricsKeyAgentTLSCertExpiry,
Help: "Seconds until the agent tls certificate expires. Updated every hour",
},
}
var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"}
// tlsCertExpirationMonitor returns a CertExpirationMonitor which will
// monitor the expiration of the certificate used for agent TLS.
func tlsCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger) consul.CertExpirationMonitor {
return consul.CertExpirationMonitor{
Key: metricsKeyAgentTLSCertExpiry,
Logger: logger,
Query: func() (time.Duration, error) {
raw := c.Cert()
if raw == nil {
return 0, fmt.Errorf("tls not enabled")
}
cert, err := x509.ParseCertificate(raw.Certificate[0])
if err != nil {
return 0, fmt.Errorf("failed to parse agent tls cert: %w", err)
}
return time.Until(cert.NotAfter), nil
},
}
}

View File

@ -1,20 +1,29 @@
package agent
import (
"github.com/stretchr/testify/require"
"crypto/x509"
"fmt"
"io/ioutil"
"net/http"
"net/http/httptest"
"path/filepath"
"strings"
"testing"
"github.com/hashicorp/consul/sdk/testutil"
"github.com/hashicorp/consul/tlsutil"
"github.com/stretchr/testify/require"
)
func checkForShortTesting(t *testing.T) {
func skipIfShortTesting(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")
}
}
func recordPromMetrics(t *testing.T, a *TestAgent, respRec *httptest.ResponseRecorder) {
t.Helper()
req, err := http.NewRequest("GET", "/v1/agent/metrics?format=prometheus", nil)
require.NoError(t, err, "Failed to generate new http request.")
@ -49,7 +58,7 @@ func assertMetricNotExists(t *testing.T, respRec *httptest.ResponseRecorder, met
// TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus adds testing around
// the published autopilot metrics on https://www.consul.io/docs/agent/telemetry#autopilot
func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) {
checkForShortTesting(t)
skipIfShortTesting(t)
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
t.Run("Check consul_autopilot_* are not emitted metrics on clients", func(t *testing.T) {
@ -95,3 +104,55 @@ func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) {
assertMetricExistsWithValue(t, respRec, "agent_2_autopilot_failure_tolerance", "NaN")
})
}
func TestHTTPHandlers_AgentMetrics_TLSCertExpiry_Prometheus(t *testing.T) {
skipIfShortTesting(t)
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
dir := testutil.TempDir(t, "ca")
caPEM, caPK, err := tlsutil.GenerateCA(tlsutil.CAOpts{Days: 20, Domain: "consul"})
require.NoError(t, err)
caPath := filepath.Join(dir, "ca.pem")
err = ioutil.WriteFile(caPath, []byte(caPEM), 0600)
require.NoError(t, err)
signer, err := tlsutil.ParseSigner(caPK)
require.NoError(t, err)
pem, key, err := tlsutil.GenerateCert(tlsutil.CertOpts{
Signer: signer,
CA: caPEM,
Name: "server.dc1.consul",
Days: 20,
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
})
require.NoError(t, err)
certPath := filepath.Join(dir, "cert.pem")
err = ioutil.WriteFile(certPath, []byte(pem), 0600)
require.NoError(t, err)
keyPath := filepath.Join(dir, "cert.key")
err = ioutil.WriteFile(keyPath, []byte(key), 0600)
require.NoError(t, err)
hcl := fmt.Sprintf(`
telemetry = {
prometheus_retention_time = "5s",
disable_hostname = true
metrics_prefix = "agent_3"
}
ca_file = "%s"
cert_file = "%s"
key_file = "%s"
`, caPath, certPath, keyPath)
a := StartTestAgent(t, TestAgent{HCL: hcl})
defer a.Shutdown()
respRec := httptest.NewRecorder()
recordPromMetrics(t, a, respRec)
require.Contains(t, respRec.Body.String(), "agent_3_agent_tls_cert_expiry 1.7")
}

View File

@ -211,7 +211,7 @@ func getPrometheusDefs(cfg lib.TelemetryConfig, isServer bool) ([]prometheus.Gau
xds.StatsGauges,
usagemetrics.Gauges,
consul.ReplicationGauges,
consul.AgentCertExpirationGauges,
CertExpirationGauges,
Gauges,
raftGauges,
}