finish adding static server metrics

This commit is contained in:
Kit Patella 2020-11-13 16:26:08 -08:00
parent 06d59c03b9
commit 5da2f1efa8
20 changed files with 268 additions and 84 deletions

31
agent/cache/cache.go vendored
View File

@ -24,6 +24,7 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"golang.org/x/time/rate" "golang.org/x/time/rate"
"github.com/hashicorp/consul/lib" "github.com/hashicorp/consul/lib"
@ -32,6 +33,34 @@ import (
//go:generate mockery -all -inpkg //go:generate mockery -all -inpkg
// TODO(kit): remove the namespace from these once the metrics themselves change
var Gauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "cache", "entries_count"},
Help: "",
},
}
// TODO(kit): remove the namespace from these once the metrics themselves change
var Counters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "cache", "bypass"},
Help: "",
},
{
Name: []string{"consul", "cache", "fetch_success"},
Help: "",
},
{
Name: []string{"consul", "cache", "fetch_error"},
Help: "",
},
{
Name: []string{"consul", "cache", "evict_expired"},
Help: "",
},
}
// Constants related to refresh backoff. We probably don't ever need to // Constants related to refresh backoff. We probably don't ever need to
// make these configurable knobs since they primarily exist to lower load. // make these configurable knobs since they primarily exist to lower load.
const ( const (
@ -629,6 +658,7 @@ func (c *Cache) fetch(key string, r getOptions, allowNew bool, attempt uint, ign
// Error handling // Error handling
if err == nil { if err == nil {
labels := []metrics.Label{{Name: "result_not_modified", Value: strconv.FormatBool(result.NotModified)}} labels := []metrics.Label{{Name: "result_not_modified", Value: strconv.FormatBool(result.NotModified)}}
// TODO(kit): move tEntry.Name to a label on the first write here and deprecate the second write
metrics.IncrCounterWithLabels([]string{"consul", "cache", "fetch_success"}, 1, labels) metrics.IncrCounterWithLabels([]string{"consul", "cache", "fetch_success"}, 1, labels)
metrics.IncrCounterWithLabels([]string{"consul", "cache", tEntry.Name, "fetch_success"}, 1, labels) metrics.IncrCounterWithLabels([]string{"consul", "cache", tEntry.Name, "fetch_success"}, 1, labels)
@ -658,6 +688,7 @@ func (c *Cache) fetch(key string, r getOptions, allowNew bool, attempt uint, ign
newEntry.RefreshLostContact = time.Time{} newEntry.RefreshLostContact = time.Time{}
} }
} else { } else {
// TODO(kit): Add tEntry.Name to label on fetch_error and deprecate second write
metrics.IncrCounter([]string{"consul", "cache", "fetch_error"}, 1) metrics.IncrCounter([]string{"consul", "cache", "fetch_error"}, 1)
metrics.IncrCounter([]string{"consul", "cache", tEntry.Name, "fetch_error"}, 1) metrics.IncrCounter([]string{"consul", "cache", tEntry.Name, "fetch_error"}, 1)

View File

@ -11,7 +11,6 @@ import (
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
) )
// TODO(kit): Add help strings for each
var CatalogCounters = []prometheus.CounterDefinition{ var CatalogCounters = []prometheus.CounterDefinition{
{ {
Name: []string{"client", "api", "catalog_register"}, Name: []string{"client", "api", "catalog_register"},
@ -19,71 +18,71 @@ var CatalogCounters = []prometheus.CounterDefinition{
}, },
{ {
Name: []string{"client", "rpc", "error", "catalog_register"}, Name: []string{"client", "rpc", "error", "catalog_register"},
Help: "", Help: "This increments whenever a Consul agent receives an RPC error for a catalog register request.",
}, },
{ {
Name: []string{"client", "api", "success", "catalog_register"}, Name: []string{"client", "api", "success", "catalog_register"},
Help: "", Help: "This increments whenever a Consul agent successfully responds to a catalog register request.",
}, },
{ {
Name: []string{"client", "api", "catalog_deregister"}, Name: []string{"client", "api", "catalog_deregister"},
Help: "", Help: "This increments whenever a Consul agent receives a catalog deregister request.",
}, },
{ {
Name: []string{"client", "api", "catalog_datacenters"}, Name: []string{"client", "api", "catalog_datacenters"},
Help: "", Help: "This increments whenever a Consul agent receives a request to list datacenters in the catalog.",
}, },
{ {
Name: []string{"client", "rpc", "error", "catalog_deregister"}, Name: []string{"client", "rpc", "error", "catalog_deregister"},
Help: "", Help: "This increments whenever a Consul agent receives an RPC error for a catalog deregister request.",
}, },
{ {
Name: []string{"client", "api", "success", "catalog_nodes"}, Name: []string{"client", "api", "success", "catalog_nodes"},
Help: "", Help: "This increments whenever a Consul agent successfully responds to a request to list nodes.",
}, },
{ {
Name: []string{"client", "rpc", "error", "catalog_nodes"}, Name: []string{"client", "rpc", "error", "catalog_nodes"},
Help: "", Help: "This increments whenever a Consul agent receives an RPC error for a request to list nodes.",
}, },
{ {
Name: []string{"client", "api", "success", "catalog_deregister"}, Name: []string{"client", "api", "success", "catalog_deregister"},
Help: "", Help: "This increments whenever a Consul agent successfully responds to a catalog deregister request.",
}, },
{ {
Name: []string{"client", "rpc", "error", "catalog_datacenters"}, Name: []string{"client", "rpc", "error", "catalog_datacenters"},
Help: "", Help: "This increments whenever a Consul agent receives an RPC error for a request to list datacenters.",
}, },
{ {
Name: []string{"client", "api", "success", "catalog_datacenters"}, Name: []string{"client", "api", "success", "catalog_datacenters"},
Help: "", Help: "This increments whenever a Consul agent successfully responds to a request to list datacenters.",
}, },
{ {
Name: []string{"client", "api", "catalog_nodes"}, Name: []string{"client", "api", "catalog_nodes"},
Help: "", Help: "This increments whenever a Consul agent receives a request to list nodes from the catalog.",
}, },
{ {
Name: []string{"client", "api", "catalog_services"}, Name: []string{"client", "api", "catalog_services"},
Help: "", Help: "This increments whenever a Consul agent receives a request to list services from the catalog.",
}, },
{ {
Name: []string{"client", "rpc", "error", "catalog_services"}, Name: []string{"client", "rpc", "error", "catalog_services"},
Help: "", Help: "This increments whenever a Consul agent receives an RPC error for a request to list services.",
}, },
{ {
Name: []string{"client", "api", "success", "catalog_services"}, Name: []string{"client", "api", "success", "catalog_services"},
Help: "", Help: "This increments whenever a Consul agent successfully responds to a request to list services.",
}, },
{ {
Name: []string{"client", "api", "catalog_service_nodes"}, Name: []string{"client", "api", "catalog_service_nodes"},
Help: "", Help: "This increments whenever a Consul agent receives a request to list nodes offering a service.",
}, },
{ {
Name: []string{"client", "rpc", "error", "catalog_service_nodes"}, Name: []string{"client", "rpc", "error", "catalog_service_nodes"},
Help: "", Help: "This increments whenever a Consul agent receives an RPC error for a request to list nodes offering a service.",
}, },
{ {
Name: []string{"client", "api", "success", "catalog_service_nodes"}, Name: []string{"client", "api", "success", "catalog_service_nodes"},
Help: "", Help: "This increments whenever a Consul agent successfully responds to a request to list nodes offering a service.",
}, },
{ {
Name: []string{"client", "api", "error", "catalog_service_nodes"}, Name: []string{"client", "api", "error", "catalog_service_nodes"},
@ -91,15 +90,15 @@ var CatalogCounters = []prometheus.CounterDefinition{
}, },
{ {
Name: []string{"client", "api", "catalog_node_services"}, Name: []string{"client", "api", "catalog_node_services"},
Help: "", Help: "This increments whenever a Consul agent successfully responds to a request to list nodes offering a service.",
}, },
{ {
Name: []string{"client", "api", "success", "catalog_node_services"}, Name: []string{"client", "api", "success", "catalog_node_services"},
Help: "", Help: "This increments whenever a Consul agent successfully responds to a request to list services in a node.",
}, },
{ {
Name: []string{"client", "rpc", "error", "catalog_node_services"}, Name: []string{"client", "rpc", "error", "catalog_node_services"},
Help: "", Help: "This increments whenever a Consul agent receives an RPC error for a request to list services in a node.",
}, },
{ {
Name: []string{"client", "api", "catalog_node_service_list"}, Name: []string{"client", "api", "catalog_node_service_list"},
@ -115,15 +114,15 @@ var CatalogCounters = []prometheus.CounterDefinition{
}, },
{ {
Name: []string{"client", "api", "catalog_gateway_services"}, Name: []string{"client", "api", "catalog_gateway_services"},
Help: "", Help: "This increments whenever a Consul agent receives a request to list services associated with a gateway.",
}, },
{ {
Name: []string{"client", "rpc", "error", "catalog_gateway_services"}, Name: []string{"client", "rpc", "error", "catalog_gateway_services"},
Help: "", Help: "This increments whenever a Consul agent receives an RPC error for a request to list services associated with a gateway.",
}, },
{ {
Name: []string{"client", "api", "success", "catalog_gateway_services"}, Name: []string{"client", "api", "success", "catalog_gateway_services"},
Help: "", Help: "This increments whenever a Consul agent successfully responds to a request to list services associated with a gateway.",
}, },
} }

View File

@ -5,6 +5,7 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
@ -12,6 +13,13 @@ import (
"github.com/hashicorp/go-memdb" "github.com/hashicorp/go-memdb"
) )
var ACLEndpointLegacySummaries = []prometheus.SummaryDefinition{
{
Name: []string{"acl", "apply"},
Help: "This measures the time it takes to complete an update to the ACL store.",
},
}
// Bootstrap is used to perform a one-time ACL bootstrap operation on // Bootstrap is used to perform a one-time ACL bootstrap operation on
// a cluster to get the first management token. // a cluster to get the first management token.
func (a *ACL) Bootstrap(args *structs.DCSpecificRequest, reply *structs.ACL) error { func (a *ACL) Bootstrap(args *structs.DCSpecificRequest, reply *structs.ACL) error {

View File

@ -16,7 +16,7 @@ import (
var AutopilotGauges = []prometheus.GaugeDefinition{ var AutopilotGauges = []prometheus.GaugeDefinition{
{ {
Name: []string{"autopilot", "failure_tolerance"}, Name: []string{"autopilot", "failure_tolerance"},
Help: "", Help: "This tracks the number of voting servers that the cluster can lose while continuing to function.",
}, },
{ {
Name: []string{"autopilot", "healthy"}, Name: []string{"autopilot", "healthy"},

View File

@ -21,7 +21,7 @@ import (
var CatalogCounters = []prometheus.CounterDefinition{ var CatalogCounters = []prometheus.CounterDefinition{
{ {
Name: []string{"catalog", "service", "query"}, Name: []string{"catalog", "service", "query"},
Help: "", Help: "This increments for each catalog query for the given service.",
}, },
{ {
Name: []string{"catalog", "connect", "query"}, Name: []string{"catalog", "connect", "query"},
@ -29,7 +29,7 @@ var CatalogCounters = []prometheus.CounterDefinition{
}, },
{ {
Name: []string{"catalog", "service", "query-tag"}, Name: []string{"catalog", "service", "query-tag"},
Help: "", Help: "This increments for each catalog query for the given service with the given tag.",
}, },
{ {
Name: []string{"catalog", "connect", "query-tag"}, Name: []string{"catalog", "connect", "query-tag"},
@ -37,7 +37,7 @@ var CatalogCounters = []prometheus.CounterDefinition{
}, },
{ {
Name: []string{"catalog", "service", "query-tags"}, Name: []string{"catalog", "service", "query-tags"},
Help: "", Help: "This increments for each catalog query for the given service with the given tags.",
}, },
{ {
Name: []string{"catalog", "connect", "query-tags"}, Name: []string{"catalog", "connect", "query-tags"},
@ -45,7 +45,7 @@ var CatalogCounters = []prometheus.CounterDefinition{
}, },
{ {
Name: []string{"catalog", "service", "not-found"}, Name: []string{"catalog", "service", "not-found"},
Help: "", Help: "This increments for each catalog query where the given service could not be found.",
}, },
{ {
Name: []string{"catalog", "connect", "not-found"}, Name: []string{"catalog", "connect", "not-found"},
@ -56,11 +56,11 @@ var CatalogCounters = []prometheus.CounterDefinition{
var CatalogSummaries = []prometheus.SummaryDefinition{ var CatalogSummaries = []prometheus.SummaryDefinition{
{ {
Name: []string{"catalog", "deregister"}, Name: []string{"catalog", "deregister"},
Help: "", Help: "This measures the time it takes to complete a catalog deregister operation.",
}, },
{ {
Name: []string{"catalog", "register"}, Name: []string{"catalog", "register"},
Help: "", Help: "This measures the time it takes to complete a catalog register operation.",
}, },
} }

View File

@ -4,11 +4,102 @@ import (
"fmt" "fmt"
"time" "time"
metrics "github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/api" "github.com/hashicorp/consul/api"
) )
var CommandsSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"fsm", "register"},
Help: "This measures the time it takes to apply a catalog register operation to the FSM.",
},
{
Name: []string{"fsm", "deregister"},
Help: "This measures the time it takes to apply a catalog deregister operation to the FSM.",
},
{
Name: []string{"fsm", "kvs"},
Help: "This measures the time it takes to apply the given KV operation to the FSM.",
},
{
Name: []string{"fsm", "session"},
Help: "This measures the time it takes to apply the given session operation to the FSM.",
},
{
Name: []string{"fsm", "acl"},
Help: "This measures the time it takes to apply the given ACL operation to the FSM.",
},
{
Name: []string{"fsm", "tombstone"},
Help: "This measures the time it takes to apply the given tombstone operation to the FSM.",
},
{
Name: []string{"fsm", "coordinate", "batch-update"},
Help: "This measures the time it takes to apply the given batch coordinate update to the FSM.",
},
{
Name: []string{"fsm", "prepared-query"},
Help: "This measures the time it takes to apply the given prepared query update operation to the FSM.",
},
{
Name: []string{"fsm", "txn"},
Help: "This measures the time it takes to apply the given transaction update to the FSM.",
},
{
Name: []string{"fsm", "autopilot"},
Help: "This measures the time it takes to apply the given autopilot update to the FSM.",
},
{
Name: []string{"consul", "fsm", "intention"},
Help: "",
},
{
Name: []string{"fsm", "intention"},
Help: "",
},
{
Name: []string{"consul", "fsm", "ca"},
Help: "",
},
{
Name: []string{"fsm", "ca", "leaf"},
Help: "",
},
{
Name: []string{"fsm", "acl", "token"},
Help: "",
},
{
Name: []string{"fsm", "ca", "leaf"},
Help: "",
},
{
Name: []string{"fsm", "acl", "policy"},
Help: "",
},
{
Name: []string{"fsm", "acl", "bindingrule"},
Help: "",
},
{
Name: []string{"fsm", "acl", "authmethod"},
Help: "",
},
{
Name: []string{"fsm", "system_metadata"},
Help: "",
},
// TODO(kit): We generate the config-entry fsm summaries by reading off of the request. It is
// possible to statically declare these when we know all of the names, but I didn't get to it
// in this patch. Config-entries are known though and we should add these in the future.
// {
// Name: []string{"fsm", "config_entry", req.Entry.GetKind()},
// Help: "",
// },
}
func init() { func init() {
registerCommand(structs.RegisterRequestType, (*FSM).applyRegister) registerCommand(structs.RegisterRequestType, (*FSM).applyRegister)
registerCommand(structs.DeregisterRequestType, (*FSM).applyDeregister) registerCommand(structs.DeregisterRequestType, (*FSM).applyDeregister)

View File

@ -5,6 +5,7 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/go-msgpack/codec" "github.com/hashicorp/go-msgpack/codec"
@ -12,6 +13,13 @@ import (
"github.com/hashicorp/raft" "github.com/hashicorp/raft"
) )
var SnapshotSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"fsm", "persist"},
Help: "This measures the time it takes to persist the FSM to a raft snapshot.",
},
}
// snapshot is used to provide a snapshot of the current // snapshot is used to provide a snapshot of the current
// state in a way that can be accessed concurrently with operations // state in a way that can be accessed concurrently with operations
// that may modify the live state. // that may modify the live state.

View File

@ -11,6 +11,7 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
@ -27,6 +28,21 @@ import (
"golang.org/x/time/rate" "golang.org/x/time/rate"
) )
var LeaderSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"leader", "barrier"},
Help: "This measures the time spent waiting for the raft barrier upon gaining leadership.",
},
{
Name: []string{"leader", "reconcileMember"},
Help: "This measures the time spent updating the raft store for a single serf member's information.",
},
{
Name: []string{"leader", "reapTombstones"},
Help: "This measures the time spent clearing tombstones.",
},
}
const ( const (
newLeaderEvent = "consul:new-leader" newLeaderEvent = "consul:new-leader"
barrierWriteTimeout = 2 * time.Minute barrierWriteTimeout = 2 * time.Minute

View File

@ -19,19 +19,19 @@ import (
var PreparedQuerySummaries = []prometheus.SummaryDefinition{ var PreparedQuerySummaries = []prometheus.SummaryDefinition{
{ {
Name: []string{"prepared-query", "apply"}, Name: []string{"prepared-query", "apply"},
Help: "", Help: "This measures the time it takes to apply a prepared query update.",
}, },
{ {
Name: []string{"prepared-query", "explain"}, Name: []string{"prepared-query", "explain"},
Help: "", Help: "This measures the time it takes to process a prepared query explain request.",
}, },
{ {
Name: []string{"prepared-query", "execute"}, Name: []string{"prepared-query", "execute"},
Help: "", Help: "This measures the time it takes to process a prepared query execute request.",
}, },
{ {
Name: []string{"prepared-query", "execute_remote"}, Name: []string{"prepared-query", "execute_remote"},
Help: "", Help: "This measures the time it takes to process a prepared query execute request that was forwarded to another datacenter.",
}, },
} }

View File

@ -35,41 +35,41 @@ import (
var RPCCounters = []prometheus.CounterDefinition{ var RPCCounters = []prometheus.CounterDefinition{
{ {
Name: []string{"rpc", "accept_conn"}, Name: []string{"rpc", "accept_conn"},
Help: "", Help: "This increments when a server accepts an RPC connection.",
}, },
{ {
Name: []string{"rpc", "raft_handoff"}, Name: []string{"rpc", "raft_handoff"},
Help: "", Help: "This increments when a server accepts a Raft-related RPC connection.",
}, },
{ {
Name: []string{"rpc", "request_error"}, Name: []string{"rpc", "request_error"},
Help: "", Help: "This increments when a server returns an error from an RPC request.",
}, },
{ {
Name: []string{"rpc", "request"}, Name: []string{"rpc", "request"},
Help: "", Help: "This increments when a server receives a Consul-related RPC request.",
}, },
{ {
Name: []string{"rpc", "cross-dc"}, Name: []string{"rpc", "cross-dc"},
Help: "", Help: "This increments when a server sends a (potentially blocking) cross datacenter RPC query.",
}, },
{ {
Name: []string{"rpc", "query"}, Name: []string{"rpc", "query"},
Help: "", Help: "This increments when a server receives a new blocking RPC request, indicating the rate of new blocking query calls.",
}, },
} }
var RPCGauges = []prometheus.GaugeDefinition{ var RPCGauges = []prometheus.GaugeDefinition{
{ {
Name: []string{"rpc", "queries_blocking"}, Name: []string{"rpc", "queries_blocking"},
Help: "", Help: "This shows the current number of in-flight blocking queries the server is handling.",
}, },
} }
var RPCSummaries = []prometheus.SummaryDefinition{ var RPCSummaries = []prometheus.SummaryDefinition{
{ {
Name: []string{"rpc", "consistentRead"}, Name: []string{"rpc", "consistentRead"},
Help: "", Help: "This measures the time spent confirming that a consistent read can be performed.",
}, },
} }

View File

@ -7,10 +7,18 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/serf/serf" "github.com/hashicorp/serf/serf"
) )
var SegmentOSSSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"leader", "reconcile"},
Help: "This measures the time spent updating the raft store from the serf member information.",
},
}
// LANMembersAllSegments returns members from all segments. // LANMembersAllSegments returns members from all segments.
func (s *Server) LANMembersAllSegments() ([]serf.Member, error) { func (s *Server) LANMembersAllSegments() ([]serf.Member, error) {
return s.LANMembers(), nil return s.LANMembers(), nil

View File

@ -5,6 +5,7 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
@ -13,6 +14,17 @@ import (
"github.com/hashicorp/go-uuid" "github.com/hashicorp/go-uuid"
) )
var SessionEndpointSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"session", "apply"},
Help: "This measures the time spent applying a session update.",
},
{
Name: []string{"session", "renew"},
Help: "This measures the time spent renewing a session.",
},
}
// Session endpoint is used to manipulate sessions for KV // Session endpoint is used to manipulate sessions for KV
type Session struct { type Session struct {
srv *Server srv *Server

View File

@ -4,16 +4,15 @@ import (
"fmt" "fmt"
"time" "time"
"github.com/armon/go-metrics/prometheus"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
) )
var SessionGauges = []prometheus.GaugeDefinition{ var SessionGauges = []prometheus.GaugeDefinition{
{ {
Name: []string{"session_ttl", "active"}, Name: []string{"session_ttl", "active"},
Help: "", Help: "This tracks the active number of sessions being tracked.",
}, },
{ {
Name: []string{"raft", "applied_index"}, Name: []string{"raft", "applied_index"},
@ -28,7 +27,7 @@ var SessionGauges = []prometheus.GaugeDefinition{
var SessionSummaries = []prometheus.SummaryDefinition{ var SessionSummaries = []prometheus.SummaryDefinition{
{ {
Name: []string{"session_ttl", "invalidate"}, Name: []string{"session_ttl", "invalidate"},
Help: "", Help: "This measures the time spent invalidating an expired session.",
}, },
} }

View File

@ -19,7 +19,7 @@ var TxnSummaries = []prometheus.SummaryDefinition{
}, },
{ {
Name: []string{"txn", "read"}, Name: []string{"txn", "read"},
Help: "", Help: "This measures the time spent returning a read transaction.",
}, },
} }

View File

@ -15,16 +15,16 @@ import (
var Gauges = []prometheus.GaugeDefinition{ var Gauges = []prometheus.GaugeDefinition{
{ {
Name: []string{"state", "nodes"}, Name: []string{"consul", "state", "nodes"},
Help: "", Help: "This measures the current number of nodes registered with Consul. It is only emitted by Consul servers. Added in v1.9.0.",
}, },
{ {
Name: []string{"state", "services"}, Name: []string{"consul", "state", "services"},
Help: "", Help: "This measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.",
}, },
{ {
Name: []string{"state", "service_instances"}, Name: []string{"consul", "state", "service_instances"},
Help: "", Help: "This measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.",
}, },
} }

View File

@ -31,18 +31,18 @@ import (
var DNSCounters = []prometheus.CounterDefinition{ var DNSCounters = []prometheus.CounterDefinition{
{ {
Name: []string{"dns", "stale_queries"}, Name: []string{"dns", "stale_queries"},
Help: "", Help: "This increments when an agent serves a query within the allowed stale threshold.",
}, },
} }
var DNSSummaries = []prometheus.SummaryDefinition{ var DNSSummaries = []prometheus.SummaryDefinition{
{ {
Name: []string{"dns", "ptr_query"}, Name: []string{"dns", "ptr_query"},
Help: "", Help: "This measures the time spent handling a reverse DNS query for the given node.",
}, },
{ {
Name: []string{"dns", "domain_query"}, Name: []string{"dns", "domain_query"},
Help: "", Help: "This measures the time spent handling a domain query for the given node.",
}, },
} }

View File

@ -14,37 +14,37 @@ var defaultMetrics = metrics.Default()
var StatsGauges = []prometheus.GaugeDefinition{ var StatsGauges = []prometheus.GaugeDefinition{
{ {
Name: []string{"grpc", "server", "connections"}, Name: []string{"grpc", "server", "connections"},
Help: "", Help: "This metric measures the number of active gRPC connections open on the server.",
}, },
{ {
Name: []string{"grpc", "client", "connections"}, Name: []string{"grpc", "client", "connections"},
Help: "", Help: "This metric measures the number of active gRPC connections open from the client agent to any Consul servers.",
}, },
{ {
Name: []string{"grpc", "server", "streams"}, Name: []string{"grpc", "server", "streams"},
Help: "", Help: "This metric measures the number of active gRPC streams handled by the server.",
}, },
} }
var StatsCounters = []prometheus.CounterDefinition{ var StatsCounters = []prometheus.CounterDefinition{
{ {
Name: []string{"grpc", "client", "request", "count"}, Name: []string{"grpc", "client", "request", "count"},
Help: "", Help: "This metric counts the number of gRPC requests made by the client agent to a Consul server.",
}, },
{ {
Name: []string{"grpc", "server", "request", "count"}, Name: []string{"grpc", "server", "request", "count"},
Help: "", Help: "This metric counts the number of gRPC requests received by the server.",
}, },
{ {
Name: []string{"grpc", "client", "connection", "count"}, Name: []string{"grpc", "client", "connection", "count"},
Help: "", Help: "This metric counts the number of new gRPC connections opened by the client agent to a Consul server.",
}, },
{ {
Name: []string{"grpc", "server", "connection", "count"}, Name: []string{"grpc", "server", "connection", "count"},
Help: "", Help: "This metric counts the number of new gRPC connections received by the server.",
}, },
{ {
Name: []string{"grpc", "server", "stream", "count"}, Name: []string{"grpc", "server", "stream", "count"},
Help: "", Help: "This metric counts the number of new gRPC streams received by the server.",
}, },
} }

View File

@ -35,7 +35,7 @@ import (
var HTTPSummaries = []prometheus.SummaryDefinition{ var HTTPSummaries = []prometheus.SummaryDefinition{
{ {
Name: []string{"api", "http"}, Name: []string{"api", "http"},
Help: "", Help: "Samples how long it takes to service the given HTTP request for the given verb and path.",
}, },
} }

View File

@ -21,25 +21,29 @@ import (
) )
var StateCounters = []prometheus.CounterDefinition{ var StateCounters = []prometheus.CounterDefinition{
{
Name: []string{"acl", "blocked", "service", "deregistration"},
Help: "",
},
{
Name: []string{"acl", "blocked", "check", "deregistration"},
Help: "",
},
{ {
Name: []string{"acl", "blocked", "service", "registration"}, Name: []string{"acl", "blocked", "service", "registration"},
Help: "", Help: "This increments whenever a registration fails for a service (blocked by an ACL)",
},
{
Name: []string{"acl", "blocked", "service", "deregistration"},
Help: "This increments whenever a deregistration fails for a service (blocked by an ACL)",
}, },
{ {
Name: []string{"acl", "blocked", "check", "registration"}, Name: []string{"acl", "blocked", "check", "registration"},
Help: "", Help: "This increments whenever a registration fails for a check (blocked by an ACL)",
},
{
Name: []string{"acl", "blocked", "check", "deregistration"},
Help: "This increments whenever a deregistration fails for a check (blocked by an ACL)",
}, },
{ {
Name: []string{"acl", "blocked", "node", "registration"}, Name: []string{"acl", "blocked", "node", "registration"},
Help: "", Help: "This increments whenever a registration fails for a node (blocked by an ACL)",
},
{
Name: []string{"acl", "blocked", "node", "deregistration"},
Help: "This increments whenever a deregistration fails for a node (blocked by an ACL)",
}, },
} }

View File

@ -8,6 +8,8 @@ import (
"sync" "sync"
"time" "time"
"github.com/hashicorp/consul/agent/consul/fsm"
"github.com/armon/go-metrics/prometheus" "github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/agent/consul/usagemetrics" "github.com/hashicorp/consul/agent/consul/usagemetrics"
"github.com/hashicorp/consul/agent/local" "github.com/hashicorp/consul/agent/local"
@ -187,6 +189,7 @@ func registerWithGRPC(b grpcresolver.Builder) {
func getPrometheusDefs() lib.PrometheusDefs { func getPrometheusDefs() lib.PrometheusDefs {
serviceName := []string{"consul"} serviceName := []string{"consul"}
var gauges = [][]prometheus.GaugeDefinition{ var gauges = [][]prometheus.GaugeDefinition{
cache.Gauges,
consul.AutopilotGauges, consul.AutopilotGauges,
consul.RPCGauges, consul.RPCGauges,
consul.SessionGauges, consul.SessionGauges,
@ -205,9 +208,8 @@ func getPrometheusDefs() lib.PrometheusDefs {
} }
raftCounters := []prometheus.CounterDefinition{ raftCounters := []prometheus.CounterDefinition{
// TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry // TODO(kit): "raft..." metrics come from the raft lib and we should migrate these to a telemetry
// package within. In the mean time, we're going to define them here because it's important that they're always // package within. In the mean time, we're going to define a few here because they're key to monitoring Consul.
// present for Consul users setting up dashboards.
{ {
Name: []string{"raft", "apply"}, Name: []string{"raft", "apply"},
Help: "This counts the number of Raft transactions occurring over the interval.", Help: "This counts the number of Raft transactions occurring over the interval.",
@ -224,6 +226,7 @@ func getPrometheusDefs() lib.PrometheusDefs {
var counters = [][]prometheus.CounterDefinition{ var counters = [][]prometheus.CounterDefinition{
CatalogCounters, CatalogCounters,
cache.Counters,
consul.ACLCounters, consul.ACLCounters,
consul.CatalogCounters, consul.CatalogCounters,
consul.ClientCounters, consul.ClientCounters,
@ -244,9 +247,8 @@ func getPrometheusDefs() lib.PrometheusDefs {
} }
raftSummaries := []prometheus.SummaryDefinition{ raftSummaries := []prometheus.SummaryDefinition{
// TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry // TODO(kit): "raft..." metrics come from the raft lib and we should migrate these to a telemetry
// package within. In the mean time, we're going to define them here because it's important that they're always // package within. In the mean time, we're going to define a few here because they're key to monitoring Consul.
// present for Consul users setting up dashboards.
{ {
Name: []string{"raft", "commitTime"}, Name: []string{"raft", "commitTime"},
Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.", Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.",
@ -261,14 +263,20 @@ func getPrometheusDefs() lib.PrometheusDefs {
HTTPSummaries, HTTPSummaries,
consul.ACLSummaries, consul.ACLSummaries,
consul.ACLEndpointSummaries, consul.ACLEndpointSummaries,
consul.ACLEndpointLegacySummaries,
consul.CatalogSummaries, consul.CatalogSummaries,
consul.FederationStateSummaries, consul.FederationStateSummaries,
consul.IntentionSummaries, consul.IntentionSummaries,
consul.KVSummaries, consul.KVSummaries,
consul.LeaderSummaries,
consul.PreparedQuerySummaries, consul.PreparedQuerySummaries,
consul.RPCSummaries, consul.RPCSummaries,
consul.SegmentOSSSummaries,
consul.SessionSummaries, consul.SessionSummaries,
consul.SessionEndpointSummaries,
consul.TxnSummaries, consul.TxnSummaries,
fsm.CommandsSummaries,
fsm.SnapshotSummaries,
raftSummaries, raftSummaries,
} }
var summaryDefs []prometheus.SummaryDefinition var summaryDefs []prometheus.SummaryDefinition