From 5da2f1efa89852c3f835ac3d0a66ced5eada5ce9 Mon Sep 17 00:00:00 2001 From: Kit Patella Date: Fri, 13 Nov 2020 16:26:08 -0800 Subject: [PATCH] finish adding static server metrics --- agent/cache/cache.go | 31 ++++++++ agent/catalog_endpoint.go | 47 ++++++------ agent/consul/acl_endpoint_legacy.go | 8 ++ agent/consul/autopilot.go | 2 +- agent/consul/catalog_endpoint.go | 12 +-- agent/consul/fsm/commands_oss.go | 93 ++++++++++++++++++++++- agent/consul/fsm/snapshot.go | 8 ++ agent/consul/leader.go | 16 ++++ agent/consul/prepared_query_endpoint.go | 8 +- agent/consul/rpc.go | 16 ++-- agent/consul/segment_oss.go | 8 ++ agent/consul/session_endpoint.go | 12 +++ agent/consul/session_ttl.go | 7 +- agent/consul/txn_endpoint.go | 2 +- agent/consul/usagemetrics/usagemetrics.go | 12 +-- agent/dns.go | 6 +- agent/grpc/stats.go | 16 ++-- agent/http.go | 2 +- agent/local/state.go | 26 ++++--- agent/setup.go | 20 +++-- 20 files changed, 268 insertions(+), 84 deletions(-) diff --git a/agent/cache/cache.go b/agent/cache/cache.go index 1a5193792b..62dc8619ba 100644 --- a/agent/cache/cache.go +++ b/agent/cache/cache.go @@ -24,6 +24,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "golang.org/x/time/rate" "github.com/hashicorp/consul/lib" @@ -32,6 +33,34 @@ import ( //go:generate mockery -all -inpkg +// TODO(kit): remove the namespace from these once the metrics themselves change +var Gauges = []prometheus.GaugeDefinition{ + { + Name: []string{"consul", "cache", "entries_count"}, + Help: "", + }, +} + +// TODO(kit): remove the namespace from these once the metrics themselves change +var Counters = []prometheus.CounterDefinition{ + { + Name: []string{"consul", "cache", "bypass"}, + Help: "", + }, + { + Name: []string{"consul", "cache", "fetch_success"}, + Help: "", + }, + { + Name: []string{"consul", "cache", "fetch_error"}, + Help: "", + }, + { + Name: []string{"consul", "cache", "evict_expired"}, + Help: "", + }, +} + // Constants related to refresh backoff. We probably don't ever need to // make these configurable knobs since they primarily exist to lower load. const ( @@ -629,6 +658,7 @@ func (c *Cache) fetch(key string, r getOptions, allowNew bool, attempt uint, ign // Error handling if err == nil { labels := []metrics.Label{{Name: "result_not_modified", Value: strconv.FormatBool(result.NotModified)}} + // TODO(kit): move tEntry.Name to a label on the first write here and deprecate the second write metrics.IncrCounterWithLabels([]string{"consul", "cache", "fetch_success"}, 1, labels) metrics.IncrCounterWithLabels([]string{"consul", "cache", tEntry.Name, "fetch_success"}, 1, labels) @@ -658,6 +688,7 @@ func (c *Cache) fetch(key string, r getOptions, allowNew bool, attempt uint, ign newEntry.RefreshLostContact = time.Time{} } } else { + // TODO(kit): Add tEntry.Name to label on fetch_error and deprecate second write metrics.IncrCounter([]string{"consul", "cache", "fetch_error"}, 1) metrics.IncrCounter([]string{"consul", "cache", tEntry.Name, "fetch_error"}, 1) diff --git a/agent/catalog_endpoint.go b/agent/catalog_endpoint.go index df78384ea0..3cda7c7cd8 100644 --- a/agent/catalog_endpoint.go +++ b/agent/catalog_endpoint.go @@ -11,7 +11,6 @@ import ( "github.com/hashicorp/consul/agent/structs" ) -// TODO(kit): Add help strings for each var CatalogCounters = []prometheus.CounterDefinition{ { Name: []string{"client", "api", "catalog_register"}, @@ -19,71 +18,71 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"client", "rpc", "error", "catalog_register"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a catalog register request.", }, { Name: []string{"client", "api", "success", "catalog_register"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a catalog register request.", }, { Name: []string{"client", "api", "catalog_deregister"}, - Help: "", + Help: "This increments whenever a Consul agent receives a catalog deregister request.", }, { Name: []string{"client", "api", "catalog_datacenters"}, - Help: "", + Help: "This increments whenever a Consul agent receives a request to list datacenters in the catalog.", }, { Name: []string{"client", "rpc", "error", "catalog_deregister"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a catalog deregister request.", }, { Name: []string{"client", "api", "success", "catalog_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list nodes.", }, { Name: []string{"client", "rpc", "error", "catalog_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list nodes.", }, { Name: []string{"client", "api", "success", "catalog_deregister"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a catalog deregister request.", }, { Name: []string{"client", "rpc", "error", "catalog_datacenters"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list datacenters.", }, { Name: []string{"client", "api", "success", "catalog_datacenters"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list datacenters.", }, { Name: []string{"client", "api", "catalog_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent receives a request to list nodes from the catalog.", }, { Name: []string{"client", "api", "catalog_services"}, - Help: "", + Help: "This increments whenever a Consul agent receives a request to list services from the catalog.", }, { Name: []string{"client", "rpc", "error", "catalog_services"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list services.", }, { Name: []string{"client", "api", "success", "catalog_services"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list services.", }, { Name: []string{"client", "api", "catalog_service_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent receives a request to list nodes offering a service.", }, { Name: []string{"client", "rpc", "error", "catalog_service_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list nodes offering a service.", }, { Name: []string{"client", "api", "success", "catalog_service_nodes"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list nodes offering a service.", }, { Name: []string{"client", "api", "error", "catalog_service_nodes"}, @@ -91,15 +90,15 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"client", "api", "catalog_node_services"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list nodes offering a service.", }, { Name: []string{"client", "api", "success", "catalog_node_services"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list services in a node.", }, { Name: []string{"client", "rpc", "error", "catalog_node_services"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list services in a node.", }, { Name: []string{"client", "api", "catalog_node_service_list"}, @@ -115,15 +114,15 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"client", "api", "catalog_gateway_services"}, - Help: "", + Help: "This increments whenever a Consul agent receives a request to list services associated with a gateway.", }, { Name: []string{"client", "rpc", "error", "catalog_gateway_services"}, - Help: "", + Help: "This increments whenever a Consul agent receives an RPC error for a request to list services associated with a gateway.", }, { Name: []string{"client", "api", "success", "catalog_gateway_services"}, - Help: "", + Help: "This increments whenever a Consul agent successfully responds to a request to list services associated with a gateway.", }, } diff --git a/agent/consul/acl_endpoint_legacy.go b/agent/consul/acl_endpoint_legacy.go index 22838aca0d..16fa917fc6 100644 --- a/agent/consul/acl_endpoint_legacy.go +++ b/agent/consul/acl_endpoint_legacy.go @@ -5,6 +5,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" @@ -12,6 +13,13 @@ import ( "github.com/hashicorp/go-memdb" ) +var ACLEndpointLegacySummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"acl", "apply"}, + Help: "This measures the time it takes to complete an update to the ACL store.", + }, +} + // Bootstrap is used to perform a one-time ACL bootstrap operation on // a cluster to get the first management token. func (a *ACL) Bootstrap(args *structs.DCSpecificRequest, reply *structs.ACL) error { diff --git a/agent/consul/autopilot.go b/agent/consul/autopilot.go index 22f50259c5..e84451ebdf 100644 --- a/agent/consul/autopilot.go +++ b/agent/consul/autopilot.go @@ -16,7 +16,7 @@ import ( var AutopilotGauges = []prometheus.GaugeDefinition{ { Name: []string{"autopilot", "failure_tolerance"}, - Help: "", + Help: "This tracks the number of voting servers that the cluster can lose while continuing to function.", }, { Name: []string{"autopilot", "healthy"}, diff --git a/agent/consul/catalog_endpoint.go b/agent/consul/catalog_endpoint.go index 151367cf4b..95e405dcd0 100644 --- a/agent/consul/catalog_endpoint.go +++ b/agent/consul/catalog_endpoint.go @@ -21,7 +21,7 @@ import ( var CatalogCounters = []prometheus.CounterDefinition{ { Name: []string{"catalog", "service", "query"}, - Help: "", + Help: "This increments for each catalog query for the given service.", }, { Name: []string{"catalog", "connect", "query"}, @@ -29,7 +29,7 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"catalog", "service", "query-tag"}, - Help: "", + Help: "This increments for each catalog query for the given service with the given tag.", }, { Name: []string{"catalog", "connect", "query-tag"}, @@ -37,7 +37,7 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"catalog", "service", "query-tags"}, - Help: "", + Help: "This increments for each catalog query for the given service with the given tags.", }, { Name: []string{"catalog", "connect", "query-tags"}, @@ -45,7 +45,7 @@ var CatalogCounters = []prometheus.CounterDefinition{ }, { Name: []string{"catalog", "service", "not-found"}, - Help: "", + Help: "This increments for each catalog query where the given service could not be found.", }, { Name: []string{"catalog", "connect", "not-found"}, @@ -56,11 +56,11 @@ var CatalogCounters = []prometheus.CounterDefinition{ var CatalogSummaries = []prometheus.SummaryDefinition{ { Name: []string{"catalog", "deregister"}, - Help: "", + Help: "This measures the time it takes to complete a catalog deregister operation.", }, { Name: []string{"catalog", "register"}, - Help: "", + Help: "This measures the time it takes to complete a catalog register operation.", }, } diff --git a/agent/consul/fsm/commands_oss.go b/agent/consul/fsm/commands_oss.go index 5a5a530c8d..4c3bf3c0d1 100644 --- a/agent/consul/fsm/commands_oss.go +++ b/agent/consul/fsm/commands_oss.go @@ -4,11 +4,102 @@ import ( "fmt" "time" - metrics "github.com/armon/go-metrics" + "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/api" ) +var CommandsSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"fsm", "register"}, + Help: "This measures the time it takes to apply a catalog register operation to the FSM.", + }, + { + Name: []string{"fsm", "deregister"}, + Help: "This measures the time it takes to apply a catalog deregister operation to the FSM.", + }, + { + Name: []string{"fsm", "kvs"}, + Help: "This measures the time it takes to apply the given KV operation to the FSM.", + }, + { + Name: []string{"fsm", "session"}, + Help: "This measures the time it takes to apply the given session operation to the FSM.", + }, + { + Name: []string{"fsm", "acl"}, + Help: "This measures the time it takes to apply the given ACL operation to the FSM.", + }, + { + Name: []string{"fsm", "tombstone"}, + Help: "This measures the time it takes to apply the given tombstone operation to the FSM.", + }, + { + Name: []string{"fsm", "coordinate", "batch-update"}, + Help: "This measures the time it takes to apply the given batch coordinate update to the FSM.", + }, + { + Name: []string{"fsm", "prepared-query"}, + Help: "This measures the time it takes to apply the given prepared query update operation to the FSM.", + }, + { + Name: []string{"fsm", "txn"}, + Help: "This measures the time it takes to apply the given transaction update to the FSM.", + }, + { + Name: []string{"fsm", "autopilot"}, + Help: "This measures the time it takes to apply the given autopilot update to the FSM.", + }, + { + Name: []string{"consul", "fsm", "intention"}, + Help: "", + }, + { + Name: []string{"fsm", "intention"}, + Help: "", + }, + { + Name: []string{"consul", "fsm", "ca"}, + Help: "", + }, + { + Name: []string{"fsm", "ca", "leaf"}, + Help: "", + }, + { + Name: []string{"fsm", "acl", "token"}, + Help: "", + }, + { + Name: []string{"fsm", "ca", "leaf"}, + Help: "", + }, + { + Name: []string{"fsm", "acl", "policy"}, + Help: "", + }, + { + Name: []string{"fsm", "acl", "bindingrule"}, + Help: "", + }, + { + Name: []string{"fsm", "acl", "authmethod"}, + Help: "", + }, + { + Name: []string{"fsm", "system_metadata"}, + Help: "", + }, + // TODO(kit): We generate the config-entry fsm summaries by reading off of the request. It is + // possible to statically declare these when we know all of the names, but I didn't get to it + // in this patch. Config-entries are known though and we should add these in the future. + // { + // Name: []string{"fsm", "config_entry", req.Entry.GetKind()}, + // Help: "", + // }, +} + func init() { registerCommand(structs.RegisterRequestType, (*FSM).applyRegister) registerCommand(structs.DeregisterRequestType, (*FSM).applyDeregister) diff --git a/agent/consul/fsm/snapshot.go b/agent/consul/fsm/snapshot.go index e4c9c0bb45..55a13f4af7 100644 --- a/agent/consul/fsm/snapshot.go +++ b/agent/consul/fsm/snapshot.go @@ -5,6 +5,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/go-msgpack/codec" @@ -12,6 +13,13 @@ import ( "github.com/hashicorp/raft" ) +var SnapshotSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"fsm", "persist"}, + Help: "This measures the time it takes to persist the FSM to a raft snapshot.", + }, +} + // snapshot is used to provide a snapshot of the current // state in a way that can be accessed concurrently with operations // that may modify the live state. diff --git a/agent/consul/leader.go b/agent/consul/leader.go index 6fba3af672..e7e028e6e8 100644 --- a/agent/consul/leader.go +++ b/agent/consul/leader.go @@ -11,6 +11,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/structs" @@ -27,6 +28,21 @@ import ( "golang.org/x/time/rate" ) +var LeaderSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"leader", "barrier"}, + Help: "This measures the time spent waiting for the raft barrier upon gaining leadership.", + }, + { + Name: []string{"leader", "reconcileMember"}, + Help: "This measures the time spent updating the raft store for a single serf member's information.", + }, + { + Name: []string{"leader", "reapTombstones"}, + Help: "This measures the time spent clearing tombstones.", + }, +} + const ( newLeaderEvent = "consul:new-leader" barrierWriteTimeout = 2 * time.Minute diff --git a/agent/consul/prepared_query_endpoint.go b/agent/consul/prepared_query_endpoint.go index d796c6f6cb..4b515deb36 100644 --- a/agent/consul/prepared_query_endpoint.go +++ b/agent/consul/prepared_query_endpoint.go @@ -19,19 +19,19 @@ import ( var PreparedQuerySummaries = []prometheus.SummaryDefinition{ { Name: []string{"prepared-query", "apply"}, - Help: "", + Help: "This measures the time it takes to apply a prepared query update.", }, { Name: []string{"prepared-query", "explain"}, - Help: "", + Help: "This measures the time it takes to process a prepared query explain request.", }, { Name: []string{"prepared-query", "execute"}, - Help: "", + Help: "This measures the time it takes to process a prepared query execute request.", }, { Name: []string{"prepared-query", "execute_remote"}, - Help: "", + Help: "This measures the time it takes to process a prepared query execute request that was forwarded to another datacenter.", }, } diff --git a/agent/consul/rpc.go b/agent/consul/rpc.go index 775d311bda..aab09a3270 100644 --- a/agent/consul/rpc.go +++ b/agent/consul/rpc.go @@ -35,41 +35,41 @@ import ( var RPCCounters = []prometheus.CounterDefinition{ { Name: []string{"rpc", "accept_conn"}, - Help: "", + Help: "This increments when a server accepts an RPC connection.", }, { Name: []string{"rpc", "raft_handoff"}, - Help: "", + Help: "This increments when a server accepts a Raft-related RPC connection.", }, { Name: []string{"rpc", "request_error"}, - Help: "", + Help: "This increments when a server returns an error from an RPC request.", }, { Name: []string{"rpc", "request"}, - Help: "", + Help: "This increments when a server receives a Consul-related RPC request.", }, { Name: []string{"rpc", "cross-dc"}, - Help: "", + Help: "This increments when a server sends a (potentially blocking) cross datacenter RPC query.", }, { Name: []string{"rpc", "query"}, - Help: "", + Help: "This increments when a server receives a new blocking RPC request, indicating the rate of new blocking query calls.", }, } var RPCGauges = []prometheus.GaugeDefinition{ { Name: []string{"rpc", "queries_blocking"}, - Help: "", + Help: "This shows the current number of in-flight blocking queries the server is handling.", }, } var RPCSummaries = []prometheus.SummaryDefinition{ { Name: []string{"rpc", "consistentRead"}, - Help: "", + Help: "This measures the time spent confirming that a consistent read can be performed.", }, } diff --git a/agent/consul/segment_oss.go b/agent/consul/segment_oss.go index 11b06b6959..db910e8c8d 100644 --- a/agent/consul/segment_oss.go +++ b/agent/consul/segment_oss.go @@ -7,10 +7,18 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/serf/serf" ) +var SegmentOSSSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"leader", "reconcile"}, + Help: "This measures the time spent updating the raft store from the serf member information.", + }, +} + // LANMembersAllSegments returns members from all segments. func (s *Server) LANMembersAllSegments() ([]serf.Member, error) { return s.LANMembers(), nil diff --git a/agent/consul/session_endpoint.go b/agent/consul/session_endpoint.go index 3ac8b41dc0..669e638800 100644 --- a/agent/consul/session_endpoint.go +++ b/agent/consul/session_endpoint.go @@ -5,6 +5,7 @@ import ( "time" "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/structs" @@ -13,6 +14,17 @@ import ( "github.com/hashicorp/go-uuid" ) +var SessionEndpointSummaries = []prometheus.SummaryDefinition{ + { + Name: []string{"session", "apply"}, + Help: "This measures the time spent applying a session update.", + }, + { + Name: []string{"session", "renew"}, + Help: "This measures the time spent renewing a session.", + }, +} + // Session endpoint is used to manipulate sessions for KV type Session struct { srv *Server diff --git a/agent/consul/session_ttl.go b/agent/consul/session_ttl.go index 7387e42a9f..193dc18e31 100644 --- a/agent/consul/session_ttl.go +++ b/agent/consul/session_ttl.go @@ -4,16 +4,15 @@ import ( "fmt" "time" - "github.com/armon/go-metrics/prometheus" - "github.com/armon/go-metrics" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/structs" ) var SessionGauges = []prometheus.GaugeDefinition{ { Name: []string{"session_ttl", "active"}, - Help: "", + Help: "This tracks the active number of sessions being tracked.", }, { Name: []string{"raft", "applied_index"}, @@ -28,7 +27,7 @@ var SessionGauges = []prometheus.GaugeDefinition{ var SessionSummaries = []prometheus.SummaryDefinition{ { Name: []string{"session_ttl", "invalidate"}, - Help: "", + Help: "This measures the time spent invalidating an expired session.", }, } diff --git a/agent/consul/txn_endpoint.go b/agent/consul/txn_endpoint.go index 50a57f9e16..c4a9314314 100644 --- a/agent/consul/txn_endpoint.go +++ b/agent/consul/txn_endpoint.go @@ -19,7 +19,7 @@ var TxnSummaries = []prometheus.SummaryDefinition{ }, { Name: []string{"txn", "read"}, - Help: "", + Help: "This measures the time spent returning a read transaction.", }, } diff --git a/agent/consul/usagemetrics/usagemetrics.go b/agent/consul/usagemetrics/usagemetrics.go index 7f2207fa60..ac74eca2e1 100644 --- a/agent/consul/usagemetrics/usagemetrics.go +++ b/agent/consul/usagemetrics/usagemetrics.go @@ -15,16 +15,16 @@ import ( var Gauges = []prometheus.GaugeDefinition{ { - Name: []string{"state", "nodes"}, - Help: "", + Name: []string{"consul", "state", "nodes"}, + Help: "This measures the current number of nodes registered with Consul. It is only emitted by Consul servers. Added in v1.9.0.", }, { - Name: []string{"state", "services"}, - Help: "", + Name: []string{"consul", "state", "services"}, + Help: "This measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.", }, { - Name: []string{"state", "service_instances"}, - Help: "", + Name: []string{"consul", "state", "service_instances"}, + Help: "This measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.", }, } diff --git a/agent/dns.go b/agent/dns.go index 6d541aeaaa..880184c398 100644 --- a/agent/dns.go +++ b/agent/dns.go @@ -31,18 +31,18 @@ import ( var DNSCounters = []prometheus.CounterDefinition{ { Name: []string{"dns", "stale_queries"}, - Help: "", + Help: "This increments when an agent serves a query within the allowed stale threshold.", }, } var DNSSummaries = []prometheus.SummaryDefinition{ { Name: []string{"dns", "ptr_query"}, - Help: "", + Help: "This measures the time spent handling a reverse DNS query for the given node.", }, { Name: []string{"dns", "domain_query"}, - Help: "", + Help: "This measures the time spent handling a domain query for the given node.", }, } diff --git a/agent/grpc/stats.go b/agent/grpc/stats.go index b1a0c5a232..d8bd6298a2 100644 --- a/agent/grpc/stats.go +++ b/agent/grpc/stats.go @@ -14,37 +14,37 @@ var defaultMetrics = metrics.Default() var StatsGauges = []prometheus.GaugeDefinition{ { Name: []string{"grpc", "server", "connections"}, - Help: "", + Help: "This metric measures the number of active gRPC connections open on the server.", }, { Name: []string{"grpc", "client", "connections"}, - Help: "", + Help: "This metric measures the number of active gRPC connections open from the client agent to any Consul servers.", }, { Name: []string{"grpc", "server", "streams"}, - Help: "", + Help: "This metric measures the number of active gRPC streams handled by the server.", }, } var StatsCounters = []prometheus.CounterDefinition{ { Name: []string{"grpc", "client", "request", "count"}, - Help: "", + Help: "This metric counts the number of gRPC requests made by the client agent to a Consul server.", }, { Name: []string{"grpc", "server", "request", "count"}, - Help: "", + Help: "This metric counts the number of gRPC requests received by the server.", }, { Name: []string{"grpc", "client", "connection", "count"}, - Help: "", + Help: "This metric counts the number of new gRPC connections opened by the client agent to a Consul server.", }, { Name: []string{"grpc", "server", "connection", "count"}, - Help: "", + Help: "This metric counts the number of new gRPC connections received by the server.", }, { Name: []string{"grpc", "server", "stream", "count"}, - Help: "", + Help: "This metric counts the number of new gRPC streams received by the server.", }, } diff --git a/agent/http.go b/agent/http.go index 9b5fcf5e37..9e24fe1ab9 100644 --- a/agent/http.go +++ b/agent/http.go @@ -35,7 +35,7 @@ import ( var HTTPSummaries = []prometheus.SummaryDefinition{ { Name: []string{"api", "http"}, - Help: "", + Help: "Samples how long it takes to service the given HTTP request for the given verb and path.", }, } diff --git a/agent/local/state.go b/agent/local/state.go index 5ca827607d..8a74189e3e 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -21,25 +21,29 @@ import ( ) var StateCounters = []prometheus.CounterDefinition{ - { - Name: []string{"acl", "blocked", "service", "deregistration"}, - Help: "", - }, - { - Name: []string{"acl", "blocked", "check", "deregistration"}, - Help: "", - }, { Name: []string{"acl", "blocked", "service", "registration"}, - Help: "", + Help: "This increments whenever a registration fails for a service (blocked by an ACL)", + }, + { + Name: []string{"acl", "blocked", "service", "deregistration"}, + Help: "This increments whenever a deregistration fails for a service (blocked by an ACL)", }, { Name: []string{"acl", "blocked", "check", "registration"}, - Help: "", + Help: "This increments whenever a registration fails for a check (blocked by an ACL)", + }, + { + Name: []string{"acl", "blocked", "check", "deregistration"}, + Help: "This increments whenever a deregistration fails for a check (blocked by an ACL)", }, { Name: []string{"acl", "blocked", "node", "registration"}, - Help: "", + Help: "This increments whenever a registration fails for a node (blocked by an ACL)", + }, + { + Name: []string{"acl", "blocked", "node", "deregistration"}, + Help: "This increments whenever a deregistration fails for a node (blocked by an ACL)", }, } diff --git a/agent/setup.go b/agent/setup.go index c3f4dbf08f..49a586ebbc 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -8,6 +8,8 @@ import ( "sync" "time" + "github.com/hashicorp/consul/agent/consul/fsm" + "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/consul/agent/consul/usagemetrics" "github.com/hashicorp/consul/agent/local" @@ -187,6 +189,7 @@ func registerWithGRPC(b grpcresolver.Builder) { func getPrometheusDefs() lib.PrometheusDefs { serviceName := []string{"consul"} var gauges = [][]prometheus.GaugeDefinition{ + cache.Gauges, consul.AutopilotGauges, consul.RPCGauges, consul.SessionGauges, @@ -205,9 +208,8 @@ func getPrometheusDefs() lib.PrometheusDefs { } raftCounters := []prometheus.CounterDefinition{ - // TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry - // package within. In the mean time, we're going to define them here because it's important that they're always - // present for Consul users setting up dashboards. + // TODO(kit): "raft..." metrics come from the raft lib and we should migrate these to a telemetry + // package within. In the mean time, we're going to define a few here because they're key to monitoring Consul. { Name: []string{"raft", "apply"}, Help: "This counts the number of Raft transactions occurring over the interval.", @@ -224,6 +226,7 @@ func getPrometheusDefs() lib.PrometheusDefs { var counters = [][]prometheus.CounterDefinition{ CatalogCounters, + cache.Counters, consul.ACLCounters, consul.CatalogCounters, consul.ClientCounters, @@ -244,9 +247,8 @@ func getPrometheusDefs() lib.PrometheusDefs { } raftSummaries := []prometheus.SummaryDefinition{ - // TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry - // package within. In the mean time, we're going to define them here because it's important that they're always - // present for Consul users setting up dashboards. + // TODO(kit): "raft..." metrics come from the raft lib and we should migrate these to a telemetry + // package within. In the mean time, we're going to define a few here because they're key to monitoring Consul. { Name: []string{"raft", "commitTime"}, Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.", @@ -261,14 +263,20 @@ func getPrometheusDefs() lib.PrometheusDefs { HTTPSummaries, consul.ACLSummaries, consul.ACLEndpointSummaries, + consul.ACLEndpointLegacySummaries, consul.CatalogSummaries, consul.FederationStateSummaries, consul.IntentionSummaries, consul.KVSummaries, + consul.LeaderSummaries, consul.PreparedQuerySummaries, consul.RPCSummaries, + consul.SegmentOSSSummaries, consul.SessionSummaries, + consul.SessionEndpointSummaries, consul.TxnSummaries, + fsm.CommandsSummaries, + fsm.SnapshotSummaries, raftSummaries, } var summaryDefs []prometheus.SummaryDefinition