mirror of
https://github.com/status-im/consul.git
synced 2025-02-16 15:47:21 +00:00
agent: move agent tls metric monitor to a more appropriate place
And add a test for it
This commit is contained in:
parent
c92513ec16
commit
a8e2e1c365
@ -667,7 +667,7 @@ func (a *Agent) Start(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if a.tlsConfigurator.Cert() != nil {
|
if a.tlsConfigurator.Cert() != nil {
|
||||||
m := consul.AgentTLSCertExpirationMonitor(a.tlsConfigurator, a.logger)
|
m := tlsCertExpirationMonitor(a.tlsConfigurator, a.logger)
|
||||||
go m.Monitor(&lib.StopChannelContext{StopCh: a.shutdownCh})
|
go m.Monitor(&lib.StopChannelContext{StopCh: a.shutdownCh})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,7 +2,6 @@ package consul
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"crypto/x509"
|
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
@ -16,7 +15,6 @@ import (
|
|||||||
"github.com/hashicorp/consul/agent/connect"
|
"github.com/hashicorp/consul/agent/connect"
|
||||||
"github.com/hashicorp/consul/agent/connect/ca"
|
"github.com/hashicorp/consul/agent/connect/ca"
|
||||||
"github.com/hashicorp/consul/logging"
|
"github.com/hashicorp/consul/logging"
|
||||||
"github.com/hashicorp/consul/tlsutil"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"}
|
var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"}
|
||||||
@ -33,13 +31,6 @@ var LeaderCertExpirationGauges = []prometheus.GaugeDefinition{
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var AgentCertExpirationGauges = []prometheus.GaugeDefinition{
|
|
||||||
{
|
|
||||||
Name: metricsKeyAgentTLSCertExpiry,
|
|
||||||
Help: "Seconds until the agent tls certificate expires. Updated every hour",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
func rootCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
func rootCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
||||||
return CertExpirationMonitor{
|
return CertExpirationMonitor{
|
||||||
Key: metricsKeyMeshRootCAExpiry,
|
Key: metricsKeyMeshRootCAExpiry,
|
||||||
@ -165,29 +156,6 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"}
|
|
||||||
|
|
||||||
// AgentTLSCertExpirationMonitor returns a CertExpirationMonitor which will
|
|
||||||
// monitor the expiration of the certificate used for agent TLS.
|
|
||||||
func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger) CertExpirationMonitor {
|
|
||||||
return CertExpirationMonitor{
|
|
||||||
Key: metricsKeyAgentTLSCertExpiry,
|
|
||||||
Logger: logger,
|
|
||||||
Query: func() (time.Duration, error) {
|
|
||||||
raw := c.Cert()
|
|
||||||
if raw == nil {
|
|
||||||
return 0, fmt.Errorf("tls not enabled")
|
|
||||||
}
|
|
||||||
|
|
||||||
cert, err := x509.ParseCertificate(raw.Certificate[0])
|
|
||||||
if err != nil {
|
|
||||||
return 0, fmt.Errorf("failed to parse agent tls cert: %w", err)
|
|
||||||
}
|
|
||||||
return time.Until(cert.NotAfter), nil
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// initLeaderMetrics sets all metrics that are emitted only on leaders to a NaN
|
// initLeaderMetrics sets all metrics that are emitted only on leaders to a NaN
|
||||||
// value so that they don't incorrectly report 0 when a server starts as a
|
// value so that they don't incorrectly report 0 when a server starts as a
|
||||||
// follower.
|
// follower.
|
||||||
|
43
agent/metrics.go
Normal file
43
agent/metrics.go
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
package agent
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/x509"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/armon/go-metrics/prometheus"
|
||||||
|
"github.com/hashicorp/go-hclog"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/agent/consul"
|
||||||
|
"github.com/hashicorp/consul/tlsutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
var CertExpirationGauges = []prometheus.GaugeDefinition{
|
||||||
|
{
|
||||||
|
Name: metricsKeyAgentTLSCertExpiry,
|
||||||
|
Help: "Seconds until the agent tls certificate expires. Updated every hour",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"}
|
||||||
|
|
||||||
|
// tlsCertExpirationMonitor returns a CertExpirationMonitor which will
|
||||||
|
// monitor the expiration of the certificate used for agent TLS.
|
||||||
|
func tlsCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger) consul.CertExpirationMonitor {
|
||||||
|
return consul.CertExpirationMonitor{
|
||||||
|
Key: metricsKeyAgentTLSCertExpiry,
|
||||||
|
Logger: logger,
|
||||||
|
Query: func() (time.Duration, error) {
|
||||||
|
raw := c.Cert()
|
||||||
|
if raw == nil {
|
||||||
|
return 0, fmt.Errorf("tls not enabled")
|
||||||
|
}
|
||||||
|
|
||||||
|
cert, err := x509.ParseCertificate(raw.Certificate[0])
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("failed to parse agent tls cert: %w", err)
|
||||||
|
}
|
||||||
|
return time.Until(cert.NotAfter), nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
@ -1,20 +1,29 @@
|
|||||||
package agent
|
package agent
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/stretchr/testify/require"
|
"crypto/x509"
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/sdk/testutil"
|
||||||
|
"github.com/hashicorp/consul/tlsutil"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func checkForShortTesting(t *testing.T) {
|
func skipIfShortTesting(t *testing.T) {
|
||||||
if testing.Short() {
|
if testing.Short() {
|
||||||
t.Skip("too slow for testing.Short")
|
t.Skip("too slow for testing.Short")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func recordPromMetrics(t *testing.T, a *TestAgent, respRec *httptest.ResponseRecorder) {
|
func recordPromMetrics(t *testing.T, a *TestAgent, respRec *httptest.ResponseRecorder) {
|
||||||
|
t.Helper()
|
||||||
req, err := http.NewRequest("GET", "/v1/agent/metrics?format=prometheus", nil)
|
req, err := http.NewRequest("GET", "/v1/agent/metrics?format=prometheus", nil)
|
||||||
require.NoError(t, err, "Failed to generate new http request.")
|
require.NoError(t, err, "Failed to generate new http request.")
|
||||||
|
|
||||||
@ -49,7 +58,7 @@ func assertMetricNotExists(t *testing.T, respRec *httptest.ResponseRecorder, met
|
|||||||
// TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus adds testing around
|
// TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus adds testing around
|
||||||
// the published autopilot metrics on https://www.consul.io/docs/agent/telemetry#autopilot
|
// the published autopilot metrics on https://www.consul.io/docs/agent/telemetry#autopilot
|
||||||
func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) {
|
func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) {
|
||||||
checkForShortTesting(t)
|
skipIfShortTesting(t)
|
||||||
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
|
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
|
||||||
|
|
||||||
t.Run("Check consul_autopilot_* are not emitted metrics on clients", func(t *testing.T) {
|
t.Run("Check consul_autopilot_* are not emitted metrics on clients", func(t *testing.T) {
|
||||||
@ -95,3 +104,55 @@ func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus(t *testing.T) {
|
|||||||
assertMetricExistsWithValue(t, respRec, "agent_2_autopilot_failure_tolerance", "NaN")
|
assertMetricExistsWithValue(t, respRec, "agent_2_autopilot_failure_tolerance", "NaN")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHTTPHandlers_AgentMetrics_TLSCertExpiry_Prometheus(t *testing.T) {
|
||||||
|
skipIfShortTesting(t)
|
||||||
|
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
|
||||||
|
|
||||||
|
dir := testutil.TempDir(t, "ca")
|
||||||
|
caPEM, caPK, err := tlsutil.GenerateCA(tlsutil.CAOpts{Days: 20, Domain: "consul"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
caPath := filepath.Join(dir, "ca.pem")
|
||||||
|
err = ioutil.WriteFile(caPath, []byte(caPEM), 0600)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
signer, err := tlsutil.ParseSigner(caPK)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
pem, key, err := tlsutil.GenerateCert(tlsutil.CertOpts{
|
||||||
|
Signer: signer,
|
||||||
|
CA: caPEM,
|
||||||
|
Name: "server.dc1.consul",
|
||||||
|
Days: 20,
|
||||||
|
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
certPath := filepath.Join(dir, "cert.pem")
|
||||||
|
err = ioutil.WriteFile(certPath, []byte(pem), 0600)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
keyPath := filepath.Join(dir, "cert.key")
|
||||||
|
err = ioutil.WriteFile(keyPath, []byte(key), 0600)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
hcl := fmt.Sprintf(`
|
||||||
|
telemetry = {
|
||||||
|
prometheus_retention_time = "5s",
|
||||||
|
disable_hostname = true
|
||||||
|
metrics_prefix = "agent_3"
|
||||||
|
}
|
||||||
|
ca_file = "%s"
|
||||||
|
cert_file = "%s"
|
||||||
|
key_file = "%s"
|
||||||
|
`, caPath, certPath, keyPath)
|
||||||
|
|
||||||
|
a := StartTestAgent(t, TestAgent{HCL: hcl})
|
||||||
|
defer a.Shutdown()
|
||||||
|
|
||||||
|
respRec := httptest.NewRecorder()
|
||||||
|
recordPromMetrics(t, a, respRec)
|
||||||
|
|
||||||
|
require.Contains(t, respRec.Body.String(), "agent_3_agent_tls_cert_expiry 1.7")
|
||||||
|
}
|
||||||
|
@ -211,7 +211,7 @@ func getPrometheusDefs(cfg lib.TelemetryConfig, isServer bool) ([]prometheus.Gau
|
|||||||
xds.StatsGauges,
|
xds.StatsGauges,
|
||||||
usagemetrics.Gauges,
|
usagemetrics.Gauges,
|
||||||
consul.ReplicationGauges,
|
consul.ReplicationGauges,
|
||||||
consul.AgentCertExpirationGauges,
|
CertExpirationGauges,
|
||||||
Gauges,
|
Gauges,
|
||||||
raftGauges,
|
raftGauges,
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user