From d9a23bb2fa5c6981bb904a63d2433e54557d9f43 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Tue, 17 Apr 2018 10:17:16 +0200 Subject: [PATCH 1/6] Track calls blocked by ACLs using metrics --- agent/local/state.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/agent/local/state.go b/agent/local/state.go index f19e88a766..cacbd6607b 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -10,6 +10,7 @@ import ( "sync/atomic" "time" + metrics "github.com/armon/go-metrics" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/token" @@ -842,6 +843,7 @@ func (l *State) deleteService(id string) error { // todo(fs): some backoff strategy might be a better solution l.services[id].InSync = true l.logger.Printf("[WARN] agent: Service %q deregistration blocked by ACLs", id) + metrics.IncrCounter([]string{"consul", "acl", "blocked", "service", "deregistration"}, 1) return nil default: @@ -879,6 +881,7 @@ func (l *State) deleteCheck(id types.CheckID) error { // todo(fs): some backoff strategy might be a better solution l.checks[id].InSync = true l.logger.Printf("[WARN] agent: Check %q deregistration blocked by ACLs", id) + metrics.IncrCounter([]string{"consul", "acl", "blocked", "check", "deregistration"}, 1) return nil default: @@ -949,6 +952,7 @@ func (l *State) syncService(id string) error { l.checks[check.CheckID].InSync = true } l.logger.Printf("[WARN] agent: Service %q registration blocked by ACLs", id) + metrics.IncrCounter([]string{"consul", "acl", "blocked", "service", "registration"}, 1) return nil default: @@ -994,6 +998,7 @@ func (l *State) syncCheck(id types.CheckID) error { // todo(fs): some backoff strategy might be a better solution l.checks[id].InSync = true l.logger.Printf("[WARN] agent: Check %q registration blocked by ACLs", id) + metrics.IncrCounter([]string{"consul", "acl", "blocked", "service", "registration"}, 1) return nil default: @@ -1025,6 +1030,7 @@ func (l *State) syncNodeInfo() error { // todo(fs): some backoff strategy might be a better solution l.nodeInfoInSync = true l.logger.Printf("[WARN] agent: Node info update blocked by ACLs") + metrics.IncrCounter([]string{"consul", "acl", "blocked", "node", "registration"}, 1) return nil default: From f13aa5ba9bbbaa2a3ba8ad539358bbfe3daba40c Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 18 Apr 2018 16:51:22 +0200 Subject: [PATCH 2/6] Added labels to improve new metric --- agent/local/state.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/agent/local/state.go b/agent/local/state.go index cacbd6607b..0e9b513e6c 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -843,7 +843,7 @@ func (l *State) deleteService(id string) error { // todo(fs): some backoff strategy might be a better solution l.services[id].InSync = true l.logger.Printf("[WARN] agent: Service %q deregistration blocked by ACLs", id) - metrics.IncrCounter([]string{"consul", "acl", "blocked", "service", "deregistration"}, 1) + metrics.IncrCounterWithLabels([]string{"consul", "acl", "blocked", "service", "deregistration"}, 1, []metrics.Label{{Name: "id", Value: id}}) return nil default: @@ -881,7 +881,7 @@ func (l *State) deleteCheck(id types.CheckID) error { // todo(fs): some backoff strategy might be a better solution l.checks[id].InSync = true l.logger.Printf("[WARN] agent: Check %q deregistration blocked by ACLs", id) - metrics.IncrCounter([]string{"consul", "acl", "blocked", "check", "deregistration"}, 1) + metrics.IncrCounterWithLabels([]string{"consul", "acl", "blocked", "check", "deregistration"}, 1, []metrics.Label{{Name: "id", Value: string(id)}}) return nil default: @@ -952,7 +952,7 @@ func (l *State) syncService(id string) error { l.checks[check.CheckID].InSync = true } l.logger.Printf("[WARN] agent: Service %q registration blocked by ACLs", id) - metrics.IncrCounter([]string{"consul", "acl", "blocked", "service", "registration"}, 1) + metrics.IncrCounterWithLabels([]string{"consul", "acl", "blocked", "service", "registration"}, 1, []metrics.Label{{Name: "id", Value: id}}) return nil default: @@ -998,7 +998,7 @@ func (l *State) syncCheck(id types.CheckID) error { // todo(fs): some backoff strategy might be a better solution l.checks[id].InSync = true l.logger.Printf("[WARN] agent: Check %q registration blocked by ACLs", id) - metrics.IncrCounter([]string{"consul", "acl", "blocked", "service", "registration"}, 1) + metrics.IncrCounterWithLabels([]string{"consul", "acl", "blocked", "check", "registration"}, 1, []metrics.Label{{Name: "check", Value: string(id)}}) return nil default: @@ -1030,7 +1030,7 @@ func (l *State) syncNodeInfo() error { // todo(fs): some backoff strategy might be a better solution l.nodeInfoInSync = true l.logger.Printf("[WARN] agent: Node info update blocked by ACLs") - metrics.IncrCounter([]string{"consul", "acl", "blocked", "node", "registration"}, 1) + metrics.IncrCounterWithLabels([]string{"consul", "acl", "blocked", "node", "registration"}, 1, []metrics.Label{{Name: "id", Value: string(l.config.NodeID)}, {Name: "name", Value: l.config.NodeName}}) return nil default: From 65d3a2b26e1fca8a54d88ec6390b48bd22d04dd7 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Wed, 18 Apr 2018 17:09:25 +0200 Subject: [PATCH 3/6] Fixed import --- agent/local/state.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/local/state.go b/agent/local/state.go index 0e9b513e6c..c8505b2e27 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -10,7 +10,7 @@ import ( "sync/atomic" "time" - metrics "github.com/armon/go-metrics" + "github.com/armon/go-metrics" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/token" From 064f8ad1704943505a2d1f372555d25c0053c15b Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Fri, 8 Jun 2018 11:51:50 +0200 Subject: [PATCH 4/6] Removed consul prefix from metrics as requested by @kyhavlov --- agent/local/state.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/agent/local/state.go b/agent/local/state.go index c8505b2e27..da0b354505 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -843,7 +843,7 @@ func (l *State) deleteService(id string) error { // todo(fs): some backoff strategy might be a better solution l.services[id].InSync = true l.logger.Printf("[WARN] agent: Service %q deregistration blocked by ACLs", id) - metrics.IncrCounterWithLabels([]string{"consul", "acl", "blocked", "service", "deregistration"}, 1, []metrics.Label{{Name: "id", Value: id}}) + metrics.IncrCounterWithLabels([]string{"acl", "blocked", "service", "deregistration"}, 1, []metrics.Label{{Name: "id", Value: id}}) return nil default: @@ -881,7 +881,7 @@ func (l *State) deleteCheck(id types.CheckID) error { // todo(fs): some backoff strategy might be a better solution l.checks[id].InSync = true l.logger.Printf("[WARN] agent: Check %q deregistration blocked by ACLs", id) - metrics.IncrCounterWithLabels([]string{"consul", "acl", "blocked", "check", "deregistration"}, 1, []metrics.Label{{Name: "id", Value: string(id)}}) + metrics.IncrCounterWithLabels([]string{"acl", "blocked", "check", "deregistration"}, 1, []metrics.Label{{Name: "id", Value: string(id)}}) return nil default: @@ -952,7 +952,7 @@ func (l *State) syncService(id string) error { l.checks[check.CheckID].InSync = true } l.logger.Printf("[WARN] agent: Service %q registration blocked by ACLs", id) - metrics.IncrCounterWithLabels([]string{"consul", "acl", "blocked", "service", "registration"}, 1, []metrics.Label{{Name: "id", Value: id}}) + metrics.IncrCounterWithLabels([]string{"acl", "blocked", "service", "registration"}, 1, []metrics.Label{{Name: "id", Value: id}}) return nil default: @@ -998,7 +998,7 @@ func (l *State) syncCheck(id types.CheckID) error { // todo(fs): some backoff strategy might be a better solution l.checks[id].InSync = true l.logger.Printf("[WARN] agent: Check %q registration blocked by ACLs", id) - metrics.IncrCounterWithLabels([]string{"consul", "acl", "blocked", "check", "registration"}, 1, []metrics.Label{{Name: "check", Value: string(id)}}) + metrics.IncrCounterWithLabels([]string{"acl", "blocked", "check", "registration"}, 1, []metrics.Label{{Name: "check", Value: string(id)}}) return nil default: @@ -1030,7 +1030,7 @@ func (l *State) syncNodeInfo() error { // todo(fs): some backoff strategy might be a better solution l.nodeInfoInSync = true l.logger.Printf("[WARN] agent: Node info update blocked by ACLs") - metrics.IncrCounterWithLabels([]string{"consul", "acl", "blocked", "node", "registration"}, 1, []metrics.Label{{Name: "id", Value: string(l.config.NodeID)}, {Name: "name", Value: l.config.NodeName}}) + metrics.IncrCounterWithLabels([]string{"acl", "blocked", "node", "registration"}, 1, []metrics.Label{{Name: "id", Value: string(l.config.NodeID)}, {Name: "name", Value: l.config.NodeName}}) return nil default: From c83124a94cbf22664fa467008ed98f04658fd069 Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Fri, 8 Jun 2018 11:56:46 +0200 Subject: [PATCH 5/6] Removed labels from new ACL denied metrics --- agent/local/state.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/agent/local/state.go b/agent/local/state.go index da0b354505..ba56636cda 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -843,7 +843,7 @@ func (l *State) deleteService(id string) error { // todo(fs): some backoff strategy might be a better solution l.services[id].InSync = true l.logger.Printf("[WARN] agent: Service %q deregistration blocked by ACLs", id) - metrics.IncrCounterWithLabels([]string{"acl", "blocked", "service", "deregistration"}, 1, []metrics.Label{{Name: "id", Value: id}}) + metrics.IncrCounter([]string{"acl", "blocked", "service", "deregistration"}, 1) return nil default: @@ -881,7 +881,7 @@ func (l *State) deleteCheck(id types.CheckID) error { // todo(fs): some backoff strategy might be a better solution l.checks[id].InSync = true l.logger.Printf("[WARN] agent: Check %q deregistration blocked by ACLs", id) - metrics.IncrCounterWithLabels([]string{"acl", "blocked", "check", "deregistration"}, 1, []metrics.Label{{Name: "id", Value: string(id)}}) + metrics.IncrCounter([]string{"acl", "blocked", "check", "deregistration"}, 1) return nil default: @@ -952,7 +952,7 @@ func (l *State) syncService(id string) error { l.checks[check.CheckID].InSync = true } l.logger.Printf("[WARN] agent: Service %q registration blocked by ACLs", id) - metrics.IncrCounterWithLabels([]string{"acl", "blocked", "service", "registration"}, 1, []metrics.Label{{Name: "id", Value: id}}) + metrics.IncrCounter([]string{"acl", "blocked", "service", "registration"}, 1) return nil default: @@ -998,7 +998,7 @@ func (l *State) syncCheck(id types.CheckID) error { // todo(fs): some backoff strategy might be a better solution l.checks[id].InSync = true l.logger.Printf("[WARN] agent: Check %q registration blocked by ACLs", id) - metrics.IncrCounterWithLabels([]string{"acl", "blocked", "check", "registration"}, 1, []metrics.Label{{Name: "check", Value: string(id)}}) + metrics.IncrCounter([]string{"acl", "blocked", "check", "registration"}, 1) return nil default: @@ -1030,7 +1030,7 @@ func (l *State) syncNodeInfo() error { // todo(fs): some backoff strategy might be a better solution l.nodeInfoInSync = true l.logger.Printf("[WARN] agent: Node info update blocked by ACLs") - metrics.IncrCounterWithLabels([]string{"acl", "blocked", "node", "registration"}, 1, []metrics.Label{{Name: "id", Value: string(l.config.NodeID)}, {Name: "name", Value: l.config.NodeName}}) + metrics.IncrCounter([]string{"acl", "blocked", "node", "registration"}, 1) return nil default: From a937c7fa7073954a4220d168fa2292c757460c3a Mon Sep 17 00:00:00 2001 From: Pierre Souchay Date: Mon, 9 Jul 2018 11:36:33 +0200 Subject: [PATCH 6/6] Added new ACL blocked Metrics to telemetry.html --- website/source/docs/agent/telemetry.html.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/website/source/docs/agent/telemetry.html.md b/website/source/docs/agent/telemetry.html.md index a0a9685c36..ad268d1588 100644 --- a/website/source/docs/agent/telemetry.html.md +++ b/website/source/docs/agent/telemetry.html.md @@ -138,6 +138,18 @@ This is a full list of metrics emitted by Consul. Unit Type + + `consul.acl.blocked.service.registration` + This increments whenever a deregistration fails for a service (blocked by an ACL) + requests + counter + + + `consul.acl.blocked.<check|node|service>.registration` + This increments whenever a registration fails for an entity (check, node or service) is blocked by an ACL + requests + counter + `consul.client.rpc` This increments whenever a Consul agent in client mode makes an RPC request to a Consul server. This gives a measure of how much a given agent is loading the Consul servers. Currently, this is only generated by agents in client mode, not Consul servers.