mirror of
https://github.com/status-im/consul.git
synced 2025-01-15 08:14:54 +00:00
telemetry: fix cert expiry metrics by removing labels
These labels should be set by whatever process scrapes Consul (for prometheus), or by the agent that receives them (for datadog/statsd). We need to remove them here because the labels are part of the "metric key", so we'd have to pre-declare the metrics with the labels. We could do that, but that is extra work for labels that should be added from elsewhere. Also renames the closure to be more descriptive.
This commit is contained in:
parent
7948720bbb
commit
9264ce89d2
@ -42,10 +42,7 @@ var AgentCertExpirationGauges = []prometheus.GaugeDefinition{
|
|||||||
|
|
||||||
func rootCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
func rootCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
||||||
return CertExpirationMonitor{
|
return CertExpirationMonitor{
|
||||||
Key: metricsKeyMeshRootCAExpiry,
|
Key: metricsKeyMeshRootCAExpiry,
|
||||||
Labels: []metrics.Label{
|
|
||||||
{Name: "datacenter", Value: s.config.Datacenter},
|
|
||||||
},
|
|
||||||
Logger: s.logger.Named(logging.Connect),
|
Logger: s.logger.Named(logging.Connect),
|
||||||
Query: func() (time.Duration, error) {
|
Query: func() (time.Duration, error) {
|
||||||
return getRootCAExpiry(s)
|
return getRootCAExpiry(s)
|
||||||
@ -70,10 +67,7 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
|||||||
isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter
|
isPrimary := s.config.Datacenter == s.config.PrimaryDatacenter
|
||||||
if isPrimary {
|
if isPrimary {
|
||||||
return CertExpirationMonitor{
|
return CertExpirationMonitor{
|
||||||
Key: metricsKeyMeshActiveSigningCAExpiry,
|
Key: metricsKeyMeshActiveSigningCAExpiry,
|
||||||
Labels: []metrics.Label{
|
|
||||||
{Name: "datacenter", Value: s.config.Datacenter},
|
|
||||||
},
|
|
||||||
Logger: s.logger.Named(logging.Connect),
|
Logger: s.logger.Named(logging.Connect),
|
||||||
Query: func() (time.Duration, error) {
|
Query: func() (time.Duration, error) {
|
||||||
provider, _ := s.caManager.getCAProvider()
|
provider, _ := s.caManager.getCAProvider()
|
||||||
@ -87,10 +81,7 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return CertExpirationMonitor{
|
return CertExpirationMonitor{
|
||||||
Key: metricsKeyMeshActiveSigningCAExpiry,
|
Key: metricsKeyMeshActiveSigningCAExpiry,
|
||||||
Labels: []metrics.Label{
|
|
||||||
{Name: "datacenter", Value: s.config.Datacenter},
|
|
||||||
},
|
|
||||||
Logger: s.logger.Named(logging.Connect),
|
Logger: s.logger.Named(logging.Connect),
|
||||||
Query: func() (time.Duration, error) {
|
Query: func() (time.Duration, error) {
|
||||||
return getActiveIntermediateExpiry(s)
|
return getActiveIntermediateExpiry(s)
|
||||||
@ -121,7 +112,11 @@ func getActiveIntermediateExpiry(s *Server) (time.Duration, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type CertExpirationMonitor struct {
|
type CertExpirationMonitor struct {
|
||||||
Key []string
|
Key []string
|
||||||
|
// Labels to be emitted along with the metric. It is very important that these
|
||||||
|
// labels be included in the pre-declaration as well. Otherwise, if
|
||||||
|
// telemetry.prometheus_retention_time is less than certExpirationMonitorInterval
|
||||||
|
// then the metrics will expire before they are emitted again.
|
||||||
Labels []metrics.Label
|
Labels []metrics.Label
|
||||||
Logger hclog.Logger
|
Logger hclog.Logger
|
||||||
// Query is called at each interval. It should return the duration until the
|
// Query is called at each interval. It should return the duration until the
|
||||||
@ -137,7 +132,7 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
|
|||||||
|
|
||||||
logger := m.Logger.With("metric", strings.Join(m.Key, "."))
|
logger := m.Logger.With("metric", strings.Join(m.Key, "."))
|
||||||
|
|
||||||
fn := func() {
|
emitMetric := func() {
|
||||||
d, err := m.Query()
|
d, err := m.Query()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Warn("failed to emit certificate expiry metric", "error", err)
|
logger.Warn("failed to emit certificate expiry metric", "error", err)
|
||||||
@ -155,17 +150,17 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
|
|||||||
|
|
||||||
// emit the metric immediately so that if a cert was just updated the
|
// emit the metric immediately so that if a cert was just updated the
|
||||||
// new metric will be updated to the new expiration time.
|
// new metric will be updated to the new expiration time.
|
||||||
fn()
|
emitMetric()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
// "Zero-out" the metric on exit so that when prometheus scrapes this
|
// "Zero-out" the metric on exit so that when prometheus scrapes this
|
||||||
// metric from a non-leader, it does not get a stale value.
|
// metric from a non-leader, it does not get a stale value.
|
||||||
metrics.SetGauge(m.Key, float32(math.NaN()))
|
metrics.SetGaugeWithLabels(m.Key, float32(math.NaN()), m.Labels)
|
||||||
return nil
|
return nil
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
fn()
|
emitMetric()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -176,11 +171,7 @@ var metricsKeyAgentTLSCertExpiry = []string{"agent", "tls", "cert", "expiry"}
|
|||||||
// monitor the expiration of the certificate used for agent TLS.
|
// monitor the expiration of the certificate used for agent TLS.
|
||||||
func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger, dc string) CertExpirationMonitor {
|
func AgentTLSCertExpirationMonitor(c *tlsutil.Configurator, logger hclog.Logger, dc string) CertExpirationMonitor {
|
||||||
return CertExpirationMonitor{
|
return CertExpirationMonitor{
|
||||||
Key: metricsKeyAgentTLSCertExpiry,
|
Key: metricsKeyAgentTLSCertExpiry,
|
||||||
Labels: []metrics.Label{
|
|
||||||
{Name: "node", Value: c.Base().NodeName},
|
|
||||||
{Name: "datacenter", Value: dc},
|
|
||||||
},
|
|
||||||
Logger: logger,
|
Logger: logger,
|
||||||
Query: func() (time.Duration, error) {
|
Query: func() (time.Duration, error) {
|
||||||
raw := c.Cert()
|
raw := c.Cert()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user