Remove redundant usage metrics (#20674)

* Remove redundant usage metrics

* Add the changelog

* Update website/content/docs/upgrading/upgrade-specific.mdx

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>

* Update website/content/docs/upgrading/upgrade-specific.mdx

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>

* Update website/content/docs/upgrading/upgrade-specific.mdx

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>

* Update website/content/docs/upgrading/upgrade-specific.mdx

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>

* Update website/content/docs/upgrading/upgrade-specific.mdx

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>

---------

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>
This commit is contained in:
Matt Keeler 2024-03-05 14:09:47 -05:00 committed by GitHub
parent 4e7982a5b7
commit abe14f11e6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 99 additions and 632 deletions

7
.changelog/20674.txt Normal file
View File

@ -0,0 +1,7 @@
```release-note:breaking-change
telemetry: State store usage metrics with a double `consul` element in the metric name have been removed. Please use the same metric without the second `consul` instead. As an example instead of `consul.consul.state.config_entries` use `consul.state.config_entries`
```
```release-note: improvement
telemetry: Improved the performance usage metrics emission by not outputting redundant metrics.
```

View File

@ -20,74 +20,38 @@ import (
) )
var Gauges = []prometheus.GaugeDefinition{ var Gauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "state", "nodes"},
Help: "Deprecated - please use state_nodes instead.",
},
{ {
Name: []string{"state", "nodes"}, Name: []string{"state", "nodes"},
Help: "Measures the current number of nodes registered with Consul. It is only emitted by Consul servers. Added in v1.9.0.", Help: "Measures the current number of nodes registered with Consul. It is only emitted by Consul servers. Added in v1.9.0.",
}, },
{
Name: []string{"consul", "state", "peerings"},
Help: "Deprecated - please use state_peerings instead.",
},
{ {
Name: []string{"state", "peerings"}, Name: []string{"state", "peerings"},
Help: "Measures the current number of peerings registered with Consul. It is only emitted by Consul servers. Added in v1.13.0.", Help: "Measures the current number of peerings registered with Consul. It is only emitted by Consul servers. Added in v1.13.0.",
}, },
{
Name: []string{"consul", "state", "services"},
Help: "Deprecated - please use state_services instead.",
},
{ {
Name: []string{"state", "services"}, Name: []string{"state", "services"},
Help: "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.", Help: "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.",
}, },
{
Name: []string{"consul", "state", "service_instances"},
Help: "Deprecated - please use state_service_instances instead.",
},
{ {
Name: []string{"state", "service_instances"}, Name: []string{"state", "service_instances"},
Help: "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.", Help: "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.",
}, },
{
Name: []string{"consul", "members", "clients"},
Help: "Deprecated - please use members_clients instead.",
},
{ {
Name: []string{"members", "clients"}, Name: []string{"members", "clients"},
Help: "Measures the current number of client agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6.", Help: "Measures the current number of client agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6.",
}, },
{
Name: []string{"consul", "members", "servers"},
Help: "Deprecated - please use members_servers instead.",
},
{ {
Name: []string{"members", "servers"}, Name: []string{"members", "servers"},
Help: "Measures the current number of server agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6.", Help: "Measures the current number of server agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6.",
}, },
{
Name: []string{"consul", "state", "kv_entries"},
Help: "Deprecated - please use kv_entries instead.",
},
{ {
Name: []string{"state", "kv_entries"}, Name: []string{"state", "kv_entries"},
Help: "Measures the current number of entries in the Consul KV store. It is only emitted by Consul servers. Added in v1.10.3.", Help: "Measures the current number of entries in the Consul KV store. It is only emitted by Consul servers. Added in v1.10.3.",
}, },
{
Name: []string{"consul", "state", "connect_instances"},
Help: "Deprecated - please use state_connect_instances instead.",
},
{ {
Name: []string{"state", "connect_instances"}, Name: []string{"state", "connect_instances"},
Help: "Measures the current number of unique connect service instances registered with Consul, labeled by Kind. It is only emitted by Consul servers. Added in v1.10.4.", Help: "Measures the current number of unique connect service instances registered with Consul, labeled by Kind. It is only emitted by Consul servers. Added in v1.10.4.",
}, },
{
Name: []string{"consul", "state", "config_entries"},
Help: "Deprecated - please use state_config_entries instead.",
},
{ {
Name: []string{"state", "config_entries"}, Name: []string{"state", "config_entries"},
Help: "Measures the current number of unique configuration entries registered with Consul, labeled by Kind. It is only emitted by Consul servers. Added in v1.10.4.", Help: "Measures the current number of unique configuration entries registered with Consul, labeled by Kind. It is only emitted by Consul servers. Added in v1.10.4.",

View File

@ -15,11 +15,6 @@ import (
) )
func (u *UsageMetricsReporter) emitNodeUsage(nodeUsage state.NodeUsage) { func (u *UsageMetricsReporter) emitNodeUsage(nodeUsage state.NodeUsage) {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "nodes"},
float32(nodeUsage.Nodes),
u.metricLabels,
)
metrics.SetGaugeWithLabels( metrics.SetGaugeWithLabels(
[]string{"state", "nodes"}, []string{"state", "nodes"},
float32(nodeUsage.Nodes), float32(nodeUsage.Nodes),
@ -28,11 +23,6 @@ func (u *UsageMetricsReporter) emitNodeUsage(nodeUsage state.NodeUsage) {
} }
func (u *UsageMetricsReporter) emitPeeringUsage(peeringUsage state.PeeringUsage) { func (u *UsageMetricsReporter) emitPeeringUsage(peeringUsage state.PeeringUsage) {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "peerings"},
float32(peeringUsage.Peerings),
u.metricLabels,
)
metrics.SetGaugeWithLabels( metrics.SetGaugeWithLabels(
[]string{"state", "peerings"}, []string{"state", "peerings"},
float32(peeringUsage.Peerings), float32(peeringUsage.Peerings),
@ -54,22 +44,12 @@ func (u *UsageMetricsReporter) emitMemberUsage(members []serf.Member) {
} }
} }
metrics.SetGaugeWithLabels(
[]string{"consul", "members", "clients"},
float32(clients),
u.metricLabels,
)
metrics.SetGaugeWithLabels( metrics.SetGaugeWithLabels(
[]string{"members", "clients"}, []string{"members", "clients"},
float32(clients), float32(clients),
u.metricLabels, u.metricLabels,
) )
metrics.SetGaugeWithLabels(
[]string{"consul", "members", "servers"},
float32(servers),
u.metricLabels,
)
metrics.SetGaugeWithLabels( metrics.SetGaugeWithLabels(
[]string{"members", "servers"}, []string{"members", "servers"},
float32(servers), float32(servers),
@ -78,22 +58,12 @@ func (u *UsageMetricsReporter) emitMemberUsage(members []serf.Member) {
} }
func (u *UsageMetricsReporter) emitServiceUsage(serviceUsage structs.ServiceUsage) { func (u *UsageMetricsReporter) emitServiceUsage(serviceUsage structs.ServiceUsage) {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "services"},
float32(serviceUsage.Services),
u.metricLabels,
)
metrics.SetGaugeWithLabels( metrics.SetGaugeWithLabels(
[]string{"state", "services"}, []string{"state", "services"},
float32(serviceUsage.Services), float32(serviceUsage.Services),
u.metricLabels, u.metricLabels,
) )
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "service_instances"},
float32(serviceUsage.ServiceInstances),
u.metricLabels,
)
metrics.SetGaugeWithLabels( metrics.SetGaugeWithLabels(
[]string{"state", "service_instances"}, []string{"state", "service_instances"},
float32(serviceUsage.ServiceInstances), float32(serviceUsage.ServiceInstances),
@ -106,11 +76,6 @@ func (u *UsageMetricsReporter) emitServiceUsage(serviceUsage structs.ServiceUsag
) )
for k, i := range serviceUsage.ConnectServiceInstances { for k, i := range serviceUsage.ConnectServiceInstances {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "connect_instances"},
float32(i),
append(u.metricLabels, metrics.Label{Name: "kind", Value: k}),
)
metrics.SetGaugeWithLabels( metrics.SetGaugeWithLabels(
[]string{"state", "connect_instances"}, []string{"state", "connect_instances"},
float32(i), float32(i),
@ -120,11 +85,6 @@ func (u *UsageMetricsReporter) emitServiceUsage(serviceUsage structs.ServiceUsag
} }
func (u *UsageMetricsReporter) emitKVUsage(kvUsage state.KVUsage) { func (u *UsageMetricsReporter) emitKVUsage(kvUsage state.KVUsage) {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "kv_entries"},
float32(kvUsage.KVCount),
u.metricLabels,
)
metrics.SetGaugeWithLabels( metrics.SetGaugeWithLabels(
[]string{"state", "kv_entries"}, []string{"state", "kv_entries"},
float32(kvUsage.KVCount), float32(kvUsage.KVCount),
@ -134,11 +94,6 @@ func (u *UsageMetricsReporter) emitKVUsage(kvUsage state.KVUsage) {
func (u *UsageMetricsReporter) emitConfigEntryUsage(configUsage state.ConfigEntryUsage) { func (u *UsageMetricsReporter) emitConfigEntryUsage(configUsage state.ConfigEntryUsage) {
for k, i := range configUsage.ConfigByKind { for k, i := range configUsage.ConfigByKind {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "config_entries"},
float32(i),
append(u.metricLabels, metrics.Label{Name: "kind", Value: k}),
)
metrics.SetGaugeWithLabels( metrics.SetGaugeWithLabels(
[]string{"state", "config_entries"}, []string{"state", "config_entries"},
float32(i), float32(i),

File diff suppressed because it is too large Load Diff

View File

@ -4,13 +4,21 @@
package usagemetrics package usagemetrics
import ( import (
"fmt"
"testing" "testing"
"time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock" "github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/serf/serf"
) )
type mockStateProvider struct { type mockStateProvider struct {
@ -39,3 +47,75 @@ func assertEqualGaugeMaps(t *testing.T, expectedMap, foundMap map[string]metrics
assert.Equal(t, expected, foundMap[key], "gauge key mismatch on %q", key) assert.Equal(t, expected, foundMap[key], "gauge key mismatch on %q", key)
} }
} }
func BenchmarkRunOnce(b *testing.B) {
const index = 123
store := state.NewStateStore(nil)
// This loop generates:
//
// 4 (service kind) * 100 (service) * 5 * (node) = 2000 proxy services. And 500 non-proxy services.
for _, kind := range []structs.ServiceKind{
// These will be included in the count.
structs.ServiceKindConnectProxy,
structs.ServiceKindIngressGateway,
structs.ServiceKindTerminatingGateway,
structs.ServiceKindMeshGateway,
// This one will not.
structs.ServiceKindTypical,
} {
for i := 0; i < 100; i++ {
serviceName := fmt.Sprintf("%s-%d", kind, i)
for j := 0; j < 5; j++ {
nodeName := fmt.Sprintf("%s-node-%d", serviceName, j)
require.NoError(b, store.EnsureRegistration(index, &structs.RegisterRequest{
Node: nodeName,
Service: &structs.NodeService{
ID: serviceName,
Service: serviceName,
Kind: kind,
},
}))
}
}
}
benchmarkRunOnce(b, store)
}
func benchmarkRunOnce(b *testing.B, store *state.Store) {
b.Helper()
config := lib.TelemetryConfig{
MetricsPrefix: "consul",
FilterDefault: true,
PrometheusOpts: prometheus.PrometheusOpts{
Expiration: time.Second * 30,
Name: "consul",
},
}
lib.InitTelemetry(config, hclog.NewNullLogger())
um, err := NewUsageMetricsReporter(&Config{
stateProvider: benchStateProvider(func() *state.Store { return store }),
logger: hclog.NewNullLogger(),
getMembersFunc: func() []serf.Member { return nil },
})
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
um.runOnce()
}
}
type benchStateProvider func() *state.Store
func (b benchStateProvider) State() *state.Store {
return b()
}

View File

@ -14,6 +14,17 @@ provided for their upgrades as a result of new features or changed behavior.
This page is used to document those details separately from the standard This page is used to document those details separately from the standard
upgrade flow. upgrade flow.
## Consul v1.19.x
### Metrics removal
In previous versions, Consul emitted redundant state store usage metrics that contained two instances of `consul` in the metric name. As an example, config entry usage counts were emitted as both:
- `consul.state.config_entries`
- `consul.consul.state.config_entries`
As of Consul v1.19, Consul does not emit the redundant metric with the double `consul.consul` in its name. Any monitoring alerts and dashboards that you may have utilizing these metrics may require edits to update to the simplified metric name.
## Consul 1.17.x ## Consul 1.17.x
### Known issues ### Known issues