mirror of https://github.com/status-im/consul.git
telemetry: fix cert expiry metrics by removing labels
These labels should be set by whatever process scrapes Consul (for prometheus), or by the agent that receives them (for datadog/statsd). We need to remove them here because the labels are part of the "metric key", so we'd have to pre-declare the metrics with the labels. We could do that, but that is extra work for labels that should be added from elsewhere. Also renames the closure to be more descriptive.
This commit is contained in:
parent
7948720bbb
commit
9264ce89d2
|
@ -42,10 +42,7 @@ var AgentCertExpirationGauges = []prometheus.GaugeDefinition{
|
|||
|
||||
func rootCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
||||
return CertExpirationMonitor{
|
||||
Key: metricsKeyMeshRootCAExpiry,
|
||||
Labels: []metrics.Label{
|
||||
{Name: "datacenter", Value: s.config.Datacenter},
|
||||
},
|
||||
Key: metricsKeyMeshRootCAExpiry,
|
||||
Logger: s.logger.Named(logging.Connect),
|
||||
Query: func() (time.Duration, error) {
|
||||
return getRootCAExpiry(s)
|
||||
|
@ -70,10 +67,7 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
|||
isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter
|
||||
if isPrimary {
|
||||
return CertExpirationMonitor{
|
||||
Key: metricsKeyMeshActiveSigningCAExpiry,
|
||||
Labels: []metrics.Label{
|
||||
{Name: "datacenter", Value: s.config.Datacenter},
|
||||
},
|
||||
Key: metricsKeyMeshActiveSigningCAExpiry,
|
||||
Logger: s.logger.Named(logging.Connect),
|
||||
Query: func() (time.Duration, error) {
|
||||
provider, _ := s.caManager.getCAProvider()
|
||||
|
@ -87,10 +81,7 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
|||
}
|
||||
|
||||
return CertExpirationMonitor{
|
||||
Key: metricsKeyMeshActiveSigningCAExpiry,
|
||||
Labels: []metrics.Label{
|
||||
{Name: "datacenter", Value: s.config.Datacenter},
|
||||
},
|
||||
Key: metricsKeyMeshActiveSigningCAExpiry,
|
||||
Logger: s.logger.Named(logging.Connect),
|
||||
Query: func() (time.Duration, error) {
|
||||
return getActiveIntermediateExpiry(s)
|
||||
|
@ -121,7 +112,11 @@ func getActiveIntermediateExpiry(s *Server) (time.Duration, error) {
|
|||
}
|
||||
|
||||
type CertExpirationMonitor struct {
|
||||
Key []string
|
||||
Key []string
|
||||
// Labels to be emitted along with the metric. It is very important that these
|
||||
// labels be included in the pre-declaration as well. Otherwise, if
|
||||
// telemetry.prometheus_retention_time is less than certExpirationMonitorInterval
|
||||
// then the metrics will expire before they are emitted again.
|
||||
Labels []metrics.Label
|
||||
Logger hclog.Logger
|
||||
// Query is called at each interval. It should return the duration until the
|
||||
|
@ -137,7 +132,7 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
|
|||
|
||||
logger := m.Logger.With("metric", strings.Join(m.Key, "."))
|
||||
|
||||
fn := func() {
|
||||
emitMetric := func() {
|
||||
d, err := m.Query()
|
||||
if err != nil {
|
||||
logger.Warn("failed to emit certificate expiry metric", "error", err)
|
||||
|
@ -155,17 +150,17 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
|
|||
|
||||
// emit the metric immediately so that if a cert was just updated the
|
||||
// new metric will be updated to the new expiration time.
|
||||
fn()
|
||||
emitMetric()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// "Zero-out" the metric on exit so that when prometheus scrapes this
|
||||
// metric from a non-leader, it does not get a stale value.
|
||||
metrics.SetGauge(m.Key, float32(math.NaN()))
|
||||
metrics.SetGaugeWithLabels(m.Key, float32(math.NaN()), m.Labels)
|
||||
return nil
|
||||
case <-ticker.C:
|
||||
fn()
|
||||
emitMetric()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -176,11 +171,7 @@ var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"}
|
|||
// monitor the expiration of the certificate used for agent TLS.
|
||||
func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger, dc string) CertExpirationMonitor {
|
||||
return CertExpirationMonitor{
|
||||
Key: metricsKeyAgentTLSCertExpiry,
|
||||
Labels: []metrics.Label{
|
||||
{Name: "node", Value: c.Base().NodeName},
|
||||
{Name: "datacenter", Value: dc},
|
||||
},
|
||||
Key: metricsKeyAgentTLSCertExpiry,
|
||||
Logger: logger,
|
||||
Query: func() (time.Duration, error) {
|
||||
raw := c.Cert()
|
||||
|
|
Loading…
Reference in New Issue