mirror of
https://github.com/status-im/consul.git
synced 2025-01-24 12:40:17 +00:00
fix consul_autopilot_healthy metric emission (#11231)
https://github.com/hashicorp/consul/issues/10730
This commit is contained in:
parent
fca0e117e5
commit
a0bba9171d
3
.changelog/11231.txt
Normal file
3
.changelog/11231.txt
Normal file
@ -0,0 +1,3 @@
|
||||
```release-note:bug
|
||||
telemetry: fixes a bug with Prometheus consul_autopilot_healthy metric where 0 is reported instead of NaN on servers.
|
||||
```
|
@ -9,6 +9,7 @@ import (
|
||||
"github.com/hashicorp/raft"
|
||||
autopilot "github.com/hashicorp/raft-autopilot"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
"math"
|
||||
|
||||
"github.com/hashicorp/consul/agent/metadata"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
@ -52,6 +53,12 @@ func (d *AutopilotDelegate) NotifyState(state *autopilot.State) {
|
||||
} else {
|
||||
metrics.SetGauge([]string{"autopilot", "healthy"}, 0)
|
||||
}
|
||||
} else {
|
||||
|
||||
// if we are not a leader, emit NaN per
|
||||
// https://www.consul.io/docs/agent/telemetry#autopilot
|
||||
metrics.SetGauge([]string{"autopilot", "healthy"}, float32(math.NaN()))
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,6 +81,8 @@ func (s *Server) initAutopilot(config *Config) {
|
||||
autopilot.WithUpdateInterval(config.ServerHealthInterval),
|
||||
autopilot.WithPromoter(s.autopilotPromoter()),
|
||||
)
|
||||
|
||||
metrics.SetGauge([]string{"autopilot", "healthy"}, float32(math.NaN()))
|
||||
}
|
||||
|
||||
func (s *Server) autopilotServers() map[raft.ServerID]*autopilot.Server {
|
||||
|
53
agent/metrics_test.go
Normal file
53
agent/metrics_test.go
Normal file
@ -0,0 +1,53 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestHTTPHandlers_AgentMetrics_ConsulAutopilotHealthy_Prometheus adds testing around
|
||||
// the published autopilot metrics on https://www.consul.io/docs/agent/telemetry#autopilot
|
||||
func TestHTTPHandlers_AgentMetrics_ConsulAutopilotHealthy_Prometheus(t *testing.T) {
|
||||
checkForShortTesting(t)
|
||||
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
|
||||
|
||||
// don't bootstrap agent so as not to
|
||||
// become a leader
|
||||
hcl := `
|
||||
telemetry = {
|
||||
prometheus_retention_time = "5s",
|
||||
disable_hostname = true
|
||||
}
|
||||
bootstrap = false
|
||||
`
|
||||
|
||||
a := StartTestAgent(t, TestAgent{HCL: hcl})
|
||||
defer a.Shutdown()
|
||||
|
||||
req, err := http.NewRequest("GET", "/v1/agent/metrics?format=prometheus", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to generate new http request. err: %v", err)
|
||||
}
|
||||
|
||||
respRec := httptest.NewRecorder()
|
||||
_, err = a.srv.AgentMetrics(respRec, req)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to serve agent metrics. err: %v", err)
|
||||
}
|
||||
|
||||
t.Run("Check consul_autopilot_healthy metric value on startup", func(t *testing.T) {
|
||||
target := "consul_autopilot_healthy NaN"
|
||||
keyValue := strings.Split(target, " ")
|
||||
if !strings.Contains(respRec.Body.String(), target) {
|
||||
t.Fatalf("Could not find the metric \"%s\" with value \"%s\" in the /v1/agent/metrics response", keyValue[0], keyValue[1])
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func checkForShortTesting(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("too slow for testing.Short")
|
||||
}
|
||||
}
|
@ -185,7 +185,12 @@ func (a *TestAgent) Start(t *testing.T) error {
|
||||
}
|
||||
result, err := config.Load(opts)
|
||||
if result.RuntimeConfig != nil {
|
||||
result.RuntimeConfig.Telemetry.Disable = true
|
||||
// If prom metrics need to be enabled, do not disable telemetry
|
||||
if result.RuntimeConfig.Telemetry.PrometheusOpts.Expiration > 0 {
|
||||
result.RuntimeConfig.Telemetry.Disable = false
|
||||
} else {
|
||||
result.RuntimeConfig.Telemetry.Disable = true
|
||||
}
|
||||
}
|
||||
return result, err
|
||||
}
|
||||
@ -195,7 +200,11 @@ func (a *TestAgent) Start(t *testing.T) error {
|
||||
}
|
||||
|
||||
bd.Logger = logger
|
||||
bd.MetricsHandler = metrics.NewInmemSink(1*time.Second, time.Minute)
|
||||
// if we are not testing telemetry things, let's use a "mock" sink for metrics
|
||||
if bd.RuntimeConfig.Telemetry.Disable {
|
||||
bd.MetricsHandler = metrics.NewInmemSink(1*time.Second, time.Minute)
|
||||
}
|
||||
|
||||
a.Config = bd.RuntimeConfig
|
||||
|
||||
agent, err := New(bd)
|
||||
|
Loading…
x
Reference in New Issue
Block a user