mirror of https://github.com/status-im/consul.git
rate: add prometheus definitions, docs, and clearer names (#15945)
This commit is contained in:
parent
8d4c3aa42c
commit
e40b731a52
|
@ -10,8 +10,9 @@ import (
|
|||
"sync/atomic"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/hashicorp/consul/agent/consul/multilimiter"
|
||||
"github.com/hashicorp/go-hclog"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/multilimiter"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -214,7 +215,7 @@ func (h *Handler) Allow(op Operation) error {
|
|||
"limit_enforced", enforced,
|
||||
)
|
||||
|
||||
metrics.IncrCounterWithLabels([]string{"consul", "rate_limit"}, 1, []metrics.Label{
|
||||
metrics.IncrCounterWithLabels([]string{"rpc", "rate_limit", "exceeded"}, 1, []metrics.Label{
|
||||
{
|
||||
Name: "limit_type",
|
||||
Value: l.desc,
|
||||
|
|
|
@ -104,7 +104,7 @@ func TestHandler(t *testing.T) {
|
|||
expectErr: nil,
|
||||
expectLog: true,
|
||||
expectMetric: true,
|
||||
expectMetricName: "consul.rate_limit;limit_type=global/write;op=Foo.Bar;mode=permissive",
|
||||
expectMetricName: "rpc.rate_limit.exceeded;limit_type=global/write;op=Foo.Bar;mode=permissive",
|
||||
expectMetricCount: 1,
|
||||
},
|
||||
"global write limit exceeded (enforcing, leader)": {
|
||||
|
@ -121,7 +121,7 @@ func TestHandler(t *testing.T) {
|
|||
expectErr: ErrRetryLater,
|
||||
expectLog: true,
|
||||
expectMetric: true,
|
||||
expectMetricName: "consul.rate_limit;limit_type=global/write;op=Foo.Bar;mode=enforcing",
|
||||
expectMetricName: "rpc.rate_limit.exceeded;limit_type=global/write;op=Foo.Bar;mode=enforcing",
|
||||
expectMetricCount: 1,
|
||||
},
|
||||
"global write limit exceeded (enforcing, follower)": {
|
||||
|
@ -138,7 +138,7 @@ func TestHandler(t *testing.T) {
|
|||
expectErr: ErrRetryElsewhere,
|
||||
expectLog: true,
|
||||
expectMetric: true,
|
||||
expectMetricName: "consul.rate_limit;limit_type=global/write;op=Foo.Bar;mode=enforcing",
|
||||
expectMetricName: "rpc.rate_limit.exceeded;limit_type=global/write;op=Foo.Bar;mode=enforcing",
|
||||
expectMetricCount: 1,
|
||||
},
|
||||
"global read limit disabled": {
|
||||
|
@ -180,7 +180,7 @@ func TestHandler(t *testing.T) {
|
|||
expectErr: nil,
|
||||
expectLog: true,
|
||||
expectMetric: true,
|
||||
expectMetricName: "consul.rate_limit;limit_type=global/read;op=Foo.Bar;mode=permissive",
|
||||
expectMetricName: "rpc.rate_limit.exceeded;limit_type=global/read;op=Foo.Bar;mode=permissive",
|
||||
expectMetricCount: 1,
|
||||
},
|
||||
"global read limit exceeded (enforcing, leader)": {
|
||||
|
@ -197,7 +197,7 @@ func TestHandler(t *testing.T) {
|
|||
expectErr: ErrRetryElsewhere,
|
||||
expectLog: true,
|
||||
expectMetric: true,
|
||||
expectMetricName: "consul.rate_limit;limit_type=global/read;op=Foo.Bar;mode=enforcing",
|
||||
expectMetricName: "rpc.rate_limit.exceeded;limit_type=global/read;op=Foo.Bar;mode=enforcing",
|
||||
expectMetricCount: 1,
|
||||
},
|
||||
"global read limit exceeded (enforcing, follower)": {
|
||||
|
@ -214,7 +214,7 @@ func TestHandler(t *testing.T) {
|
|||
expectErr: ErrRetryElsewhere,
|
||||
expectLog: true,
|
||||
expectMetric: true,
|
||||
expectMetricName: "consul.rate_limit;limit_type=global/read;op=Foo.Bar;mode=enforcing",
|
||||
expectMetricName: "rpc.rate_limit.exceeded;limit_type=global/read;op=Foo.Bar;mode=enforcing",
|
||||
expectMetricCount: 1,
|
||||
},
|
||||
}
|
||||
|
|
|
@ -3,6 +3,10 @@ package rate
|
|||
import "github.com/armon/go-metrics/prometheus"
|
||||
|
||||
var Counters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"rpc", "rate_limit", "exceeded"},
|
||||
Help: "Increments whenever an RPC is over a configured rate limit. Note: in permissive mode, the RPC will have still been allowed to proceed.",
|
||||
},
|
||||
{
|
||||
Name: []string{"rpc", "rate_limit", "log_dropped"},
|
||||
Help: "Increments whenever a log that is emitted because an RPC exceeded a rate limit gets dropped because the output buffer is full.",
|
||||
|
|
|
@ -172,25 +172,31 @@ func TestServerRequestRateLimit(t *testing.T) {
|
|||
}
|
||||
|
||||
func checkForMetric(t *retry.R, metricsInfo *api.MetricsInfo, operationName string, expectedLimitType string) {
|
||||
for _, counter := range metricsInfo.Counters {
|
||||
if counter.Name == "consul.rate.limit" {
|
||||
operation, ok := counter.Labels["op"]
|
||||
require.True(t, ok)
|
||||
const counterName = "rpc.rate_limit.exceeded"
|
||||
|
||||
limitType, ok := counter.Labels["limit_type"]
|
||||
require.True(t, ok)
|
||||
|
||||
mode, ok := counter.Labels["mode"]
|
||||
require.True(t, ok)
|
||||
|
||||
if operation == operationName {
|
||||
require.Equal(t, 2, counter.Count)
|
||||
require.Equal(t, expectedLimitType, limitType)
|
||||
require.Equal(t, "disabled", mode)
|
||||
}
|
||||
var counter api.SampledValue
|
||||
for _, c := range metricsInfo.Counters {
|
||||
if counter.Name == counterName {
|
||||
counter = c
|
||||
break
|
||||
}
|
||||
}
|
||||
require.NotNilf(t, counter, "counter not found: %s", counterName)
|
||||
|
||||
operation, ok := counter.Labels["op"]
|
||||
require.True(t, ok)
|
||||
|
||||
limitType, ok := counter.Labels["limit_type"]
|
||||
require.True(t, ok)
|
||||
|
||||
mode, ok := counter.Labels["mode"]
|
||||
require.True(t, ok)
|
||||
|
||||
if operation == operationName {
|
||||
require.Equal(t, 2, counter.Count)
|
||||
require.Equal(t, expectedLimitType, limitType)
|
||||
require.Equal(t, "disabled", mode)
|
||||
}
|
||||
}
|
||||
|
||||
func checkLogsForMessage(t *retry.R, logs []string, msg string, operationName string, logType string, logShouldExist bool) {
|
||||
|
|
|
@ -477,6 +477,7 @@ These metrics are used to monitor the health of the Consul servers.
|
|||
| `consul.raft.transition.heartbeat_timeout` | The number of times an agent has transitioned to the Candidate state, after receive no heartbeat messages from the last known leader. | timeouts / interval | counter |
|
||||
| `consul.raft.verify_leader` | This metric doesn't have a direct correlation to the leader change. It just counts the number of times an agent checks if it is still the leader or not. For example, during every consistent read, the check is done. Depending on the load in the system, this metric count can be high as it is incremented each time a consistent read is completed. | checks / interval | Counter |
|
||||
| `consul.rpc.accept_conn` | Increments when a server accepts an RPC connection. | connections | counter |
|
||||
| `consul.rpc.rate_limit.exceeded` | Increments whenever an RPC is over a configured rate limit. In permissive mode, the RPC is still allowed to proceed. | RPCs | counter |
|
||||
| `consul.rpc.rate_limit.log_dropped` | Increments whenever a log that is emitted because an RPC exceeded a rate limit gets dropped because the output buffer is full. | log messages dropped | counter |
|
||||
| `consul.catalog.register` | Measures the time it takes to complete a catalog register operation. | ms | timer |
|
||||
| `consul.catalog.deregister` | Measures the time it takes to complete a catalog deregister operation. | ms | timer |
|
||||
|
|
Loading…
Reference in New Issue