mirror of https://github.com/status-im/consul.git
Merge pull request #10768 from hashicorp/dnephin/agent-tls-cert-expiration-metric
telemetry: add Agent TLS Certificate expiration metric
This commit is contained in:
commit
e94016872a
|
@ -0,0 +1,4 @@
|
||||||
|
```release-note:improvement
|
||||||
|
telemetry: add a new `agent.tls.cert.expiry` metric for tracking when the Agent TLS certificate expires.
|
||||||
|
```
|
||||||
|
|
|
@ -639,6 +639,11 @@ func (a *Agent) Start(ctx context.Context) error {
|
||||||
a.logger.Warn("DEPRECATED Backwards compatibility with pre-1.9 metrics enabled. These metrics will be removed in a future version of Consul. Set `telemetry { disable_compat_1.9 = true }` to disable them.")
|
a.logger.Warn("DEPRECATED Backwards compatibility with pre-1.9 metrics enabled. These metrics will be removed in a future version of Consul. Set `telemetry { disable_compat_1.9 = true }` to disable them.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if a.tlsConfigurator.Cert() != nil {
|
||||||
|
m := consul.AgentTLSCertExpirationMonitor(a.tlsConfigurator, a.logger, a.config.Datacenter)
|
||||||
|
go m.Monitor(&lib.StopChannelContext{StopCh: a.shutdownCh})
|
||||||
|
}
|
||||||
|
|
||||||
// consul version metric with labels
|
// consul version metric with labels
|
||||||
metrics.SetGaugeWithLabels([]string{"version"}, 1, []metrics.Label{
|
metrics.SetGaugeWithLabels([]string{"version"}, 1, []metrics.Label{
|
||||||
{Name: "version", Value: a.config.Version},
|
{Name: "version", Value: a.config.Version},
|
||||||
|
|
|
@ -34,8 +34,8 @@ func (s *Server) startConnectLeader(ctx context.Context) error {
|
||||||
|
|
||||||
s.caManager.Start(ctx)
|
s.caManager.Start(ctx)
|
||||||
s.leaderRoutineManager.Start(ctx, caRootPruningRoutineName, s.runCARootPruning)
|
s.leaderRoutineManager.Start(ctx, caRootPruningRoutineName, s.runCARootPruning)
|
||||||
s.leaderRoutineManager.Start(ctx, caRootMetricRoutineName, rootCAExpiryMonitor(s).monitor)
|
s.leaderRoutineManager.Start(ctx, caRootMetricRoutineName, rootCAExpiryMonitor(s).Monitor)
|
||||||
s.leaderRoutineManager.Start(ctx, caSigningMetricRoutineName, signingCAExpiryMonitor(s).monitor)
|
s.leaderRoutineManager.Start(ctx, caSigningMetricRoutineName, signingCAExpiryMonitor(s).Monitor)
|
||||||
|
|
||||||
return s.startIntentionConfigEntryMigration(ctx)
|
return s.startIntentionConfigEntryMigration(ctx)
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,18 +2,20 @@ package consul
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"crypto/x509"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/hashicorp/consul/agent/connect/ca"
|
|
||||||
|
|
||||||
"github.com/hashicorp/consul/agent/connect"
|
|
||||||
|
|
||||||
"github.com/armon/go-metrics"
|
"github.com/armon/go-metrics"
|
||||||
"github.com/armon/go-metrics/prometheus"
|
"github.com/armon/go-metrics/prometheus"
|
||||||
"github.com/hashicorp/consul/logging"
|
|
||||||
"github.com/hashicorp/go-hclog"
|
"github.com/hashicorp/go-hclog"
|
||||||
|
|
||||||
|
"github.com/hashicorp/consul/agent/connect"
|
||||||
|
"github.com/hashicorp/consul/agent/connect/ca"
|
||||||
|
"github.com/hashicorp/consul/logging"
|
||||||
|
"github.com/hashicorp/consul/tlsutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"}
|
var metricsKeyMeshRootCAExpiry = []string{"mesh", "active-root-ca", "expiry"}
|
||||||
|
@ -28,10 +30,14 @@ var CertExpirationGauges = []prometheus.GaugeDefinition{
|
||||||
Name: metricsKeyMeshActiveSigningCAExpiry,
|
Name: metricsKeyMeshActiveSigningCAExpiry,
|
||||||
Help: "Seconds until the service mesh signing certificate expires. Updated every hour",
|
Help: "Seconds until the service mesh signing certificate expires. Updated every hour",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Name: metricsKeyAgentTLSCertExpiry,
|
||||||
|
Help: "Seconds until the agent tls certificate expires. Updated every hour",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func rootCAExpiryMonitor(s *Server) certExpirationMonitor {
|
func rootCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
||||||
return certExpirationMonitor{
|
return CertExpirationMonitor{
|
||||||
Key: metricsKeyMeshRootCAExpiry,
|
Key: metricsKeyMeshRootCAExpiry,
|
||||||
Labels: []metrics.Label{
|
Labels: []metrics.Label{
|
||||||
{Name: "datacenter", Value: s.config.Datacenter},
|
{Name: "datacenter", Value: s.config.Datacenter},
|
||||||
|
@ -56,10 +62,10 @@ func getRootCAExpiry(s *Server) (time.Duration, error) {
|
||||||
return time.Until(root.NotAfter), nil
|
return time.Until(root.NotAfter), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func signingCAExpiryMonitor(s *Server) certExpirationMonitor {
|
func signingCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
||||||
isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter
|
isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter
|
||||||
if isPrimary {
|
if isPrimary {
|
||||||
return certExpirationMonitor{
|
return CertExpirationMonitor{
|
||||||
Key: metricsKeyMeshActiveSigningCAExpiry,
|
Key: metricsKeyMeshActiveSigningCAExpiry,
|
||||||
Labels: []metrics.Label{
|
Labels: []metrics.Label{
|
||||||
{Name: "datacenter", Value: s.config.Datacenter},
|
{Name: "datacenter", Value: s.config.Datacenter},
|
||||||
|
@ -68,25 +74,23 @@ func signingCAExpiryMonitor(s *Server) certExpirationMonitor {
|
||||||
Query: func() (time.Duration, error) {
|
Query: func() (time.Duration, error) {
|
||||||
provider, _ := s.caManager.getCAProvider()
|
provider, _ := s.caManager.getCAProvider()
|
||||||
|
|
||||||
if _, ok := provider.(ca.PrimaryUsesIntermediate); !ok {
|
if _, ok := provider.(ca.PrimaryUsesIntermediate); ok {
|
||||||
return getActiveIntermediateExpiry(s)
|
return getActiveIntermediateExpiry(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
return getRootCAExpiry(s)
|
return getRootCAExpiry(s)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
},
|
return CertExpirationMonitor{
|
||||||
}
|
Key: metricsKeyMeshActiveSigningCAExpiry,
|
||||||
} else {
|
Labels: []metrics.Label{
|
||||||
return certExpirationMonitor{
|
{Name: "datacenter", Value: s.config.Datacenter},
|
||||||
Key: metricsKeyMeshActiveSigningCAExpiry,
|
},
|
||||||
Labels: []metrics.Label{
|
Logger: s.logger.Named(logging.Connect),
|
||||||
{Name: "datacenter", Value: s.config.Datacenter},
|
Query: func() (time.Duration, error) {
|
||||||
},
|
return getActiveIntermediateExpiry(s)
|
||||||
Logger: s.logger.Named(logging.Connect),
|
},
|
||||||
Query: func() (time.Duration, error) {
|
|
||||||
return getActiveIntermediateExpiry(s)
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,7 +113,7 @@ func getActiveIntermediateExpiry(s *Server) (time.Duration, error) {
|
||||||
return time.Until(cert.NotAfter), nil
|
return time.Until(cert.NotAfter), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type certExpirationMonitor struct {
|
type CertExpirationMonitor struct {
|
||||||
Key []string
|
Key []string
|
||||||
Labels []metrics.Label
|
Labels []metrics.Label
|
||||||
Logger hclog.Logger
|
Logger hclog.Logger
|
||||||
|
@ -120,10 +124,12 @@ type certExpirationMonitor struct {
|
||||||
|
|
||||||
const certExpirationMonitorInterval = time.Hour
|
const certExpirationMonitorInterval = time.Hour
|
||||||
|
|
||||||
func (m certExpirationMonitor) monitor(ctx context.Context) error {
|
func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
|
||||||
ticker := time.NewTicker(certExpirationMonitorInterval)
|
ticker := time.NewTicker(certExpirationMonitorInterval)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
logger := m.Logger.With("metric", strings.Join(m.Key, "."))
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
|
@ -131,10 +137,38 @@ func (m certExpirationMonitor) monitor(ctx context.Context) error {
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
d, err := m.Query()
|
d, err := m.Query()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.Logger.Warn("failed to emit certificate expiry metric", "error", err)
|
logger.Warn("failed to emit certificate expiry metric", "error", err)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
expiry := d / time.Second
|
expiry := d / time.Second
|
||||||
metrics.SetGaugeWithLabels(m.Key, float32(expiry), m.Labels)
|
metrics.SetGaugeWithLabels(m.Key, float32(expiry), m.Labels)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"}
|
||||||
|
|
||||||
|
// AgentTLSCertExpirationMonitor returns a CertExpirationMonitor which will
|
||||||
|
// monitor the expiration of the certificate used for agent TLS.
|
||||||
|
func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger, dc string) CertExpirationMonitor {
|
||||||
|
return CertExpirationMonitor{
|
||||||
|
Key: metricsKeyAgentTLSCertExpiry,
|
||||||
|
Labels: []metrics.Label{
|
||||||
|
{Name: "node", Value: c.Base().NodeName},
|
||||||
|
{Name: "datacenter", Value: dc},
|
||||||
|
},
|
||||||
|
Logger: logger,
|
||||||
|
Query: func() (time.Duration, error) {
|
||||||
|
raw := c.Cert()
|
||||||
|
if raw == nil {
|
||||||
|
return 0, fmt.Errorf("tls not enabled")
|
||||||
|
}
|
||||||
|
|
||||||
|
cert, err := x509.ParseCertificate(raw.Certificate[0])
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("failed to parse agent tls cert: %w", err)
|
||||||
|
}
|
||||||
|
return time.Until(cert.NotAfter), nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -480,6 +480,7 @@ These metrics give insight into the health of the cluster as a whole.
|
||||||
| `consul.catalog.connect.not-found.` | Increments for each connect-based catalog query where the given service could not be found. | queries | counter |
|
| `consul.catalog.connect.not-found.` | Increments for each connect-based catalog query where the given service could not be found. | queries | counter |
|
||||||
| `consul.mesh.active-root-ca.expiry` | The number of seconds until the root CA expires, updated every hour. | seconds | gauge |
|
| `consul.mesh.active-root-ca.expiry` | The number of seconds until the root CA expires, updated every hour. | seconds | gauge |
|
||||||
| `consul.mesh.active-signing-ca.expiry` | The number of seconds until the signing CA expires, updated every hour. | seconds | gauge |
|
| `consul.mesh.active-signing-ca.expiry` | The number of seconds until the signing CA expires, updated every hour. | seconds | gauge |
|
||||||
|
| `consul.agent.tls.cert.expiry` | The number of seconds until the Agent TLS certificate expires, updated every hour. | seconds | gauge |
|
||||||
|
|
||||||
## Connect Built-in Proxy Metrics
|
## Connect Built-in Proxy Metrics
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue