mirror of https://github.com/status-im/consul.git
agent: move agent tls metric monitor to a more appropriate place
And add a test for it
This commit is contained in:
parent
c92513ec16
commit
a8e2e1c365
|
@ -667,7 +667,7 @@ func (a *Agent) Start(ctx context.Context) error {
|
|||
}
|
||||
|
||||
if a.tlsConfigurator.Cert() != nil {
|
||||
m := consul.AgentTLSCertExpirationMonitor(a.tlsConfigurator, a.logger)
|
||||
m := tlsCertExpirationMonitor(a.tlsConfigurator, a.logger)
|
||||
go m.Monitor(&lib.StopChannelContext{StopCh: a.shutdownCh})
|
||||
}
|
||||
|
||||
|
|
|
@ -2,7 +2,6 @@ package consul
|
|||
|
||||
import (
|
||||
"context"
|
||||
"crypto/x509"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
|
@ -16,7 +15,6 @@ import (
|
|||
"github.com/hashicorp/consul/agent/connect"
|
||||
"github.com/hashicorp/consul/agent/connect/ca"
|
||||
"github.com/hashicorp/consul/logging"
|
||||
"github.com/hashicorp/consul/tlsutil"
|
||||
)
|
||||
|
||||
var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"}
|
||||
|
@ -33,13 +31,6 @@ var LeaderCertExpirationGauges = []prometheus.GaugeDefinition{
|
|||
},
|
||||
}
|
||||
|
||||
var AgentCertExpirationGauges = []prometheus.GaugeDefinition{
|
||||
{
|
||||
Name: metricsKeyAgentTLSCertExpiry,
|
||||
Help: "Seconds until the agent tls certificate expires. Updated every hour",
|
||||
},
|
||||
}
|
||||
|
||||
func rootCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
||||
return CertExpirationMonitor{
|
||||
Key: metricsKeyMeshRootCAExpiry,
|
||||
|
@ -165,29 +156,6 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
|
|||
}
|
||||
}
|
||||
|
||||
var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"}
|
||||
|
||||
// AgentTLSCertExpirationMonitor returns a CertExpirationMonitor which will
|
||||
// monitor the expiration of the certificate used for agent TLS.
|
||||
func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger) CertExpirationMonitor {
|
||||
return CertExpirationMonitor{
|
||||
Key: metricsKeyAgentTLSCertExpiry,
|
||||
Logger: logger,
|
||||
Query: func() (time.Duration, error) {
|
||||
raw := c.Cert()
|
||||
if raw == nil {
|
||||
return 0, fmt.Errorf("tls not enabled")
|
||||
}
|
||||
|
||||
cert, err := x509.ParseCertificate(raw.Certificate[0])
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to parse agent tls cert: %w", err)
|
||||
}
|
||||
return time.Until(cert.NotAfter), nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// initLeaderMetrics sets all metrics that are emitted only on leaders to a NaN
|
||||
// value so that they don't incorrectly report 0 when a server starts as a
|
||||
// follower.
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
package agent
|
||||
|
||||
import (
|
||||
"crypto/x509"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/go-hclog"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul"
|
||||
"github.com/hashicorp/consul/tlsutil"
|
||||
)
|
||||
|
||||
var CertExpirationGauges = []prometheus.GaugeDefinition{
|
||||
{
|
||||
Name: metricsKeyAgentTLSCertExpiry,
|
||||
Help: "Seconds until the agent tls certificate expires. Updated every hour",
|
||||
},
|
||||
}
|
||||
|
||||
var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"}
|
||||
|
||||
// tlsCertExpirationMonitor returns a CertExpirationMonitor which will
|
||||
// monitor the expiration of the certificate used for agent TLS.
|
||||
func tlsCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger) consul.CertExpirationMonitor {
|
||||
return consul.CertExpirationMonitor{
|
||||
Key: metricsKeyAgentTLSCertExpiry,
|
||||
Logger: logger,
|
||||
Query: func() (time.Duration, error) {
|
||||
raw := c.Cert()
|
||||
if raw == nil {
|
||||
return 0, fmt.Errorf("tls not enabled")
|
||||
}
|
||||
|
||||
cert, err := x509.ParseCertificate(raw.Certificate[0])
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to parse agent tls cert: %w", err)
|
||||
}
|
||||
return time.Until(cert.NotAfter), nil
|
||||
},
|
||||
}
|
||||
}
|
|
@ -1,20 +1,29 @@
|
|||
package agent
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/require"
|
||||
"crypto/x509"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/hashicorp/consul/sdk/testutil"
|
||||
"github.com/hashicorp/consul/tlsutil"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func checkForShortTesting(t *testing.T) {
|
||||
func skipIfShortTesting(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("too slow for testing.Short")
|
||||
}
|
||||
}
|
||||
|
||||
func recordPromMetrics(t *testing.T, a *TestAgent, respRec *httptest.ResponseRecorder) {
|
||||
t.Helper()
|
||||
req, err := http.NewRequest("GET", "/v1/agent/metrics?format=prometheus", nil)
|
||||
require.NoError(t, err, "Failed to generate new http request.")
|
||||
|
||||
|
@ -49,7 +58,7 @@ func assertMetricNotExists(t *testing.T, respRec *httptest.ResponseRecorder, met
|
|||
// TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus adds testing around
|
||||
// the published autopilot metrics on https://www.consul.io/docs/agent/telemetry#autopilot
|
||||
func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) {
|
||||
checkForShortTesting(t)
|
||||
skipIfShortTesting(t)
|
||||
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
|
||||
|
||||
t.Run("Check consul_autopilot_* are not emitted metrics on clients", func(t *testing.T) {
|
||||
|
@ -95,3 +104,55 @@ func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) {
|
|||
assertMetricExistsWithValue(t, respRec, "agent_2_autopilot_failure_tolerance", "NaN")
|
||||
})
|
||||
}
|
||||
|
||||
func TestHTTPHandlers_AgentMetrics_TLSCertExpiry_Prometheus(t *testing.T) {
|
||||
skipIfShortTesting(t)
|
||||
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
|
||||
|
||||
dir := testutil.TempDir(t, "ca")
|
||||
caPEM, caPK, err := tlsutil.GenerateCA(tlsutil.CAOpts{Days: 20, Domain: "consul"})
|
||||
require.NoError(t, err)
|
||||
|
||||
caPath := filepath.Join(dir, "ca.pem")
|
||||
err = ioutil.WriteFile(caPath, []byte(caPEM), 0600)
|
||||
require.NoError(t, err)
|
||||
|
||||
signer, err := tlsutil.ParseSigner(caPK)
|
||||
require.NoError(t, err)
|
||||
|
||||
pem, key, err := tlsutil.GenerateCert(tlsutil.CertOpts{
|
||||
Signer: signer,
|
||||
CA: caPEM,
|
||||
Name: "server.dc1.consul",
|
||||
Days: 20,
|
||||
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
certPath := filepath.Join(dir, "cert.pem")
|
||||
err = ioutil.WriteFile(certPath, []byte(pem), 0600)
|
||||
require.NoError(t, err)
|
||||
|
||||
keyPath := filepath.Join(dir, "cert.key")
|
||||
err = ioutil.WriteFile(keyPath, []byte(key), 0600)
|
||||
require.NoError(t, err)
|
||||
|
||||
hcl := fmt.Sprintf(`
|
||||
telemetry = {
|
||||
prometheus_retention_time = "5s",
|
||||
disable_hostname = true
|
||||
metrics_prefix = "agent_3"
|
||||
}
|
||||
ca_file = "%s"
|
||||
cert_file = "%s"
|
||||
key_file = "%s"
|
||||
`, caPath, certPath, keyPath)
|
||||
|
||||
a := StartTestAgent(t, TestAgent{HCL: hcl})
|
||||
defer a.Shutdown()
|
||||
|
||||
respRec := httptest.NewRecorder()
|
||||
recordPromMetrics(t, a, respRec)
|
||||
|
||||
require.Contains(t, respRec.Body.String(), "agent_3_agent_tls_cert_expiry 1.7")
|
||||
}
|
||||
|
|
|
@ -211,7 +211,7 @@ func getPrometheusDefs(cfg lib.TelemetryConfig, isServer bool) ([]prometheus.Gau
|
|||
xds.StatsGauges,
|
||||
usagemetrics.Gauges,
|
||||
consul.ReplicationGauges,
|
||||
consul.AgentCertExpirationGauges,
|
||||
CertExpirationGauges,
|
||||
Gauges,
|
||||
raftGauges,
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue