telemetry: improve cert expiry metrics

Emit the metric immediately so that after restarting an agent, the new expiry time will be
emitted. This is particularly important when this metric is being monitored, because we want
the alert to resovle itself immediately.

Also fixed a bug that was exposed in one of these metrics. The CARoot can be nil, so we have
to handle that case.
This commit is contained in:
Daniel Nephin 2021-08-04 16:34:01 -04:00
parent fbcf9f3f6c
commit 0cc58f54de
1 changed files with 26 additions and 15 deletions

View File

@ -97,8 +97,11 @@ func signingCAExpiryMonitor(s *Server) CertExpirationMonitor {
func getActiveIntermediateExpiry(s *Server) (time.Duration, error) { func getActiveIntermediateExpiry(s *Server) (time.Duration, error) {
state := s.fsm.State() state := s.fsm.State()
_, root, err := state.CARootActive(nil) _, root, err := state.CARootActive(nil)
if err != nil { switch {
return 0, err case err != nil:
return 0, fmt.Errorf("failed to retrieve root CA: %w", err)
case root == nil:
return 0, fmt.Errorf("no active root CA")
} }
// the CA used in a secondary DC is the active intermediate, // the CA used in a secondary DC is the active intermediate,
@ -130,24 +133,32 @@ func (m CertExpirationMonitor) Monitor(ctx context.Context) error {
logger := m.Logger.With("metric", strings.Join(m.Key, ".")) logger := m.Logger.With("metric", strings.Join(m.Key, "."))
fn := func() {
d, err := m.Query()
if err != nil {
logger.Warn("failed to emit certificate expiry metric", "error", err)
return
}
if d < 24*time.Hour {
logger.Warn("certificate will expire soon",
"time_to_expiry", d, "expiration", time.Now().Add(d))
}
expiry := d / time.Second
metrics.SetGaugeWithLabels(m.Key, float32(expiry), m.Labels)
}
// emit the metric immediately so that if a cert was just updated the
// new metric will be updated to the new expiration time.
fn()
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
return nil return nil
case <-ticker.C: case <-ticker.C:
d, err := m.Query() fn()
if err != nil {
logger.Warn("failed to emit certificate expiry metric", "error", err)
continue
}
if d < 24*time.Hour {
logger.Warn("certificate will expire soon",
"time_to_expiry", d, "expiration", time.Now().Add(d))
}
expiry := d / time.Second
metrics.SetGaugeWithLabels(m.Key, float32(expiry), m.Labels)
} }
} }
} }