first pass on agent-configured prometheusDefs and adding defs for every consul metric

This commit is contained in:
Kit Patella 2020-11-12 18:12:12 -08:00
parent 77451d944e
commit 24a2471029
23 changed files with 687 additions and 78 deletions

View File

@ -5,11 +5,128 @@ import (
"net/http" "net/http"
"strings" "strings"
metrics "github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
cachetype "github.com/hashicorp/consul/agent/cache-types" cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
) )
// TODO(kit): Add help strings for each
var CatalogCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "client", "api", "catalog_register"},
Help: "Increments whenever a Consul agent receives a catalog register request.",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_register"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_register"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_deregister"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_datacenters"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_deregister"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_deregister"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_datacenters"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_datacenters"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_services"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_services"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_services"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_service_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_service_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_service_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "error", "catalog_service_nodes"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_node_services"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_node_services"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_node_services"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_node_service_list"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_node_service_list"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_node_service_list"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "catalog_gateway_services"},
Help: "",
},
{
Name: []string{"consul", "client", "rpc", "error", "catalog_gateway_services"},
Help: "",
},
{
Name: []string{"consul", "client", "api", "success", "catalog_gateway_services"},
Help: "",
},
}
func (s *HTTPHandlers) CatalogRegister(resp http.ResponseWriter, req *http.Request) (interface{}, error) { func (s *HTTPHandlers) CatalogRegister(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_register"}, 1, metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_register"}, 1,
[]metrics.Label{{Name: "node", Value: s.nodeName()}}) []metrics.Label{{Name: "node", Value: s.nodeName()}})

View File

@ -6,7 +6,8 @@ import (
"sync" "sync"
"time" "time"
metrics "github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/logging" "github.com/hashicorp/consul/logging"
@ -15,6 +16,32 @@ import (
"golang.org/x/time/rate" "golang.org/x/time/rate"
) )
var ACLCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "acl", "token", "cache_hit"},
Help: "",
},
{
Name: []string{"consul", "acl", "token", "cache_miss"},
Help: "",
},
}
var ACLSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "acl", "resolveTokenLegacy"},
Help: "",
},
{
Name: []string{"consul", "acl", "ResolveToken"},
Help: "",
},
{
Name: []string{"consul", "acl", "ResolveTokenToIdentity"},
Help: "",
},
}
// These must be kept in sync with the constants in command/agent/acl.go. // These must be kept in sync with the constants in command/agent/acl.go.
const ( const (
// anonymousToken is the token ID we re-write to if there is no token ID // anonymousToken is the token ID we re-write to if there is no token ID

View File

@ -11,7 +11,8 @@ import (
"regexp" "regexp"
"time" "time"
metrics "github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/authmethod" "github.com/hashicorp/consul/agent/consul/authmethod"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
@ -30,6 +31,73 @@ const (
aclBootstrapReset = "acl-bootstrap-reset" aclBootstrapReset = "acl-bootstrap-reset"
) )
var ACLEndpointSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "acl", "token", "clone"},
Help: "",
},
{
Name: []string{"consul", "acl", "token", "upsert"},
Help: "",
},
{
Name: []string{"consul", "acl", "token", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "policy", "upsert"},
Help: "",
},
{
Name: []string{"consul", "acl", "policy", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "policy", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "role", "upsert"},
Help: "",
},
{
Name: []string{"consul", "acl", "role", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "bindingrule", "upsert"},
Help: "",
},
{
Name: []string{"consul", "acl", "bindingrule", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "authmethod", "upsert"},
Help: "",
},
{
Name: []string{"consul", "acl", "authmethod", "delete"},
Help: "",
},
{
Name: []string{"consul", "acl", "login"},
Help: "",
},
{
Name: []string{"consul", "acl", "login"},
Help: "",
},
{
Name: []string{"consul", "acl", "logout"},
Help: "",
},
{
Name: []string{"consul", "acl", "logout"},
Help: "",
},
}
// Regex for matching // Regex for matching
var ( var (
validPolicyName = regexp.MustCompile(`^[A-Za-z0-9\-_]{1,128}$`) validPolicyName = regexp.MustCompile(`^[A-Za-z0-9\-_]{1,128}$`)

View File

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/types" "github.com/hashicorp/consul/types"
"github.com/hashicorp/raft" "github.com/hashicorp/raft"
@ -12,6 +13,17 @@ import (
"github.com/hashicorp/serf/serf" "github.com/hashicorp/serf/serf"
) )
var AutopilotGauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "autopilot", "failure_tolerance"},
Help: "",
},
{
Name: []string{"consul", "autopilot", "healthy"},
Help: "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.",
},
}
// AutopilotDelegate is a Consul delegate for autopilot operations. // AutopilotDelegate is a Consul delegate for autopilot operations.
type AutopilotDelegate struct { type AutopilotDelegate struct {
server *Server server *Server

View File

@ -6,6 +6,7 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
@ -17,6 +18,52 @@ import (
"github.com/hashicorp/go-uuid" "github.com/hashicorp/go-uuid"
) )
var CatalogCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "catalog", "service", "query"},
Help: "",
},
{
Name: []string{"consul", "catalog", "connect", "query"},
Help: "",
},
{
Name: []string{"consul", "catalog", "service", "query-tag"},
Help: "",
},
{
Name: []string{"consul", "catalog", "connect", "query-tag"},
Help: "",
},
{
Name: []string{"consul", "catalog", "service", "query-tags"},
Help: "",
},
{
Name: []string{"consul", "catalog", "connect", "query-tags"},
Help: "",
},
{
Name: []string{"consul", "catalog", "service", "not-found"},
Help: "",
},
{
Name: []string{"consul", "catalog", "connect", "not-found"},
Help: "",
},
}
var CatalogSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "catalog", "deregister"},
Help: "",
},
{
Name: []string{"consul", "catalog", "register"},
Help: "",
},
}
// Catalog endpoint is used to manipulate the service catalog // Catalog endpoint is used to manipulate the service catalog
type Catalog struct { type Catalog struct {
srv *Server srv *Server

View File

@ -9,6 +9,7 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/agent/pool" "github.com/hashicorp/consul/agent/pool"
"github.com/hashicorp/consul/agent/router" "github.com/hashicorp/consul/agent/router"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
@ -21,6 +22,21 @@ import (
"golang.org/x/time/rate" "golang.org/x/time/rate"
) )
var ClientCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "client", "rpc"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server.",
},
{
Name: []string{"consul", "client", "rpc", "exceeded"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration.",
},
{
Name: []string{"consul", "client", "rpc", "failed"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails.",
},
}
const ( const (
// serfEventBacklog is the maximum number of unprocessed Serf Events // serfEventBacklog is the maximum number of unprocessed Serf Events
// that will be held in queue before new serf events block. A // that will be held in queue before new serf events block. A

View File

@ -4,6 +4,8 @@ import (
"fmt" "fmt"
"time" "time"
"github.com/armon/go-metrics/prometheus"
metrics "github.com/armon/go-metrics" metrics "github.com/armon/go-metrics"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
@ -12,6 +14,33 @@ import (
"github.com/mitchellh/copystructure" "github.com/mitchellh/copystructure"
) )
var ConfigSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "config_entry", "apply"},
Help: "",
},
{
Name: []string{"consul", "config_entry", "get"},
Help: "",
},
{
Name: []string{"consul", "config_entry", "list"},
Help: "",
},
{
Name: []string{"consul", "config_entry", "listAll"},
Help: "",
},
{
Name: []string{"consul", "config_entry", "delete"},
Help: "",
},
{
Name: []string{"consul", "config_entry", "resolve_service_config"},
Help: "",
},
}
// The ConfigEntry endpoint is used to query centralized config information // The ConfigEntry endpoint is used to query centralized config information
type ConfigEntry struct { type ConfigEntry struct {
srv *Server srv *Server

View File

@ -5,13 +5,33 @@ import (
"fmt" "fmt"
"time" "time"
metrics "github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
memdb "github.com/hashicorp/go-memdb" memdb "github.com/hashicorp/go-memdb"
) )
var FederationStateSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "federation_state", "apply"},
Help: "",
},
{
Name: []string{"consul", "federation_state", "get"},
Help: "",
},
{
Name: []string{"consul", "federation_state", "list"},
Help: "",
},
{
Name: []string{"consul", "federation_state", "list_mesh_gateways"},
Help: "",
},
}
var ( var (
errFederationStatesNotEnabled = errors.New("Federation states are currently disabled until all servers in the datacenter support the feature") errFederationStatesNotEnabled = errors.New("Federation states are currently disabled until all servers in the datacenter support the feature")
) )

View File

@ -6,6 +6,7 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/connect" "github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
@ -16,6 +17,13 @@ import (
"github.com/hashicorp/go-memdb" "github.com/hashicorp/go-memdb"
) )
var IntentionSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "intention", "apply"},
Help: "",
},
}
var ( var (
// ErrIntentionNotFound is returned if the intention lookup failed. // ErrIntentionNotFound is returned if the intention lookup failed.
ErrIntentionNotFound = errors.New("Intention not found") ErrIntentionNotFound = errors.New("Intention not found")
@ -252,6 +260,7 @@ func (s *Intention) Apply(
if done, err := s.srv.ForwardRPC("Intention.Apply", args, args, reply); done { if done, err := s.srv.ForwardRPC("Intention.Apply", args, args, reply); done {
return err return err
} }
// TODO(Kit): Why do we have summaries for intentions both with and without the consul namespace?
defer metrics.MeasureSince([]string{"consul", "intention", "apply"}, time.Now()) defer metrics.MeasureSince([]string{"consul", "intention", "apply"}, time.Now())
defer metrics.MeasureSince([]string{"intention", "apply"}, time.Now()) defer metrics.MeasureSince([]string{"intention", "apply"}, time.Now())

View File

@ -6,6 +6,7 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
@ -14,6 +15,13 @@ import (
"github.com/hashicorp/go-memdb" "github.com/hashicorp/go-memdb"
) )
var KVSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "kvs", "apply"},
Help: "This measures the time it takes to complete an update to the KV store.",
},
}
// KVS endpoint is used to manipulate the Key-Value store // KVS endpoint is used to manipulate the Key-Value store
type KVS struct { type KVS struct {
srv *Server srv *Server

View File

@ -6,6 +6,7 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
@ -15,6 +16,25 @@ import (
"github.com/hashicorp/go-uuid" "github.com/hashicorp/go-uuid"
) )
var PreparedQuerySummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "prepared-query", "apply"},
Help: "",
},
{
Name: []string{"consul", "prepared-query", "explain"},
Help: "",
},
{
Name: []string{"consul", "prepared-query", "execute"},
Help: "",
},
{
Name: []string{"consul", "prepared-query", "execute_remote"},
Help: "",
},
}
// PreparedQuery manages the prepared query endpoint. // PreparedQuery manages the prepared query endpoint.
type PreparedQuery struct { type PreparedQuery struct {
srv *Server srv *Server

View File

@ -13,6 +13,7 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/consul/wanfed" "github.com/hashicorp/consul/agent/consul/wanfed"
@ -31,6 +32,47 @@ import (
"github.com/hashicorp/yamux" "github.com/hashicorp/yamux"
) )
var RPCCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "rpc", "accept_conn"},
Help: "",
},
{
Name: []string{"consul", "rpc", "raft_handoff"},
Help: "",
},
{
Name: []string{"consul", "rpc", "request_error"},
Help: "",
},
{
Name: []string{"consul", "rpc", "request"},
Help: "",
},
{
Name: []string{"consul", "rpc", "cross-dc"},
Help: "",
},
{
Name: []string{"consul", "rpc", "query"},
Help: "",
},
}
var RPCGauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "rpc", "queries_blocking"},
Help: "",
},
}
var RPCSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "rpc", "consistentRead"},
Help: "",
},
}
const ( const (
// jitterFraction is a the limit to the amount of jitter we apply // jitterFraction is a the limit to the amount of jitter we apply
// to a user specified MaxQueryTime. We divide the specified time by // to a user specified MaxQueryTime. We divide the specified time by

View File

@ -17,7 +17,7 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
metrics "github.com/armon/go-metrics" "github.com/armon/go-metrics"
connlimit "github.com/hashicorp/go-connlimit" connlimit "github.com/hashicorp/go-connlimit"
"github.com/hashicorp/go-hclog" "github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb" "github.com/hashicorp/go-memdb"
@ -50,6 +50,8 @@ import (
"github.com/hashicorp/consul/types" "github.com/hashicorp/consul/types"
) )
// NOTE The "consul.client.rpc" and "consul.client.rpc.exceeded" counters are defined in consul/client.go
// These are the protocol versions that Consul can _understand_. These are // These are the protocol versions that Consul can _understand_. These are
// Consul-level protocol versions, that are used to configure the Serf // Consul-level protocol versions, that are used to configure the Serf
// protocol versions. // protocol versions.

View File

@ -4,10 +4,34 @@ import (
"fmt" "fmt"
"time" "time"
"github.com/armon/go-metrics/prometheus"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
) )
var SessionGauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "session_ttl", "active"},
Help: "",
},
{
Name: []string{"consul", "raft", "applied_index"},
Help: "",
},
{
Name: []string{"consul", "raft", "last_index"},
Help: "",
},
}
var SessionSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "session_ttl", "invalidate"},
Help: "",
},
}
const ( const (
// maxInvalidateAttempts limits how many invalidate attempts are made // maxInvalidateAttempts limits how many invalidate attempts are made
maxInvalidateAttempts = 6 maxInvalidateAttempts = 6

View File

@ -5,12 +5,24 @@ import (
"time" "time"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/api" "github.com/hashicorp/consul/api"
"github.com/hashicorp/go-hclog" "github.com/hashicorp/go-hclog"
) )
var TxnSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "txn", "apply"},
Help: "This measures the time spent applying a transaction operation.",
},
{
Name: []string{"consul", "txn", "read"},
Help: "",
},
}
// Txn endpoint is used to perform multi-object atomic transactions. // Txn endpoint is used to perform multi-object atomic transactions.
type Txn struct { type Txn struct {
srv *Server srv *Server

View File

@ -5,12 +5,29 @@ import (
"errors" "errors"
"time" "time"
"github.com/armon/go-metrics/prometheus"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/hashicorp/consul/agent/consul/state" "github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/logging" "github.com/hashicorp/consul/logging"
"github.com/hashicorp/go-hclog" "github.com/hashicorp/go-hclog"
) )
var Gauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "state", "nodes"},
Help: "",
},
{
Name: []string{"consul", "state", "services"},
Help: "",
},
{
Name: []string{"consul", "state", "service_instances"},
Help: "",
},
}
// Config holds the settings for various parameters for the // Config holds the settings for various parameters for the
// UsageMetricsReporter // UsageMetricsReporter
type Config struct { type Config struct {

View File

@ -10,6 +10,8 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/armon/go-metrics/prometheus"
metrics "github.com/armon/go-metrics" metrics "github.com/armon/go-metrics"
radix "github.com/armon/go-radix" radix "github.com/armon/go-radix"
"github.com/coredns/coredns/plugin/pkg/dnsutil" "github.com/coredns/coredns/plugin/pkg/dnsutil"
@ -26,6 +28,24 @@ import (
"github.com/hashicorp/consul/logging" "github.com/hashicorp/consul/logging"
) )
var DNSCounters = []prometheus.CounterDefinition{
{
Name: []string{"dns", "stale_queries"},
Help: "",
},
}
var DNSSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"dns", "ptr_query"},
Help: "",
},
{
Name: []string{"dns", "domain_query"},
Help: "",
},
}
const ( const (
// UDP can fit ~25 A records in a 512B response, and ~14 AAAA // UDP can fit ~25 A records in a 512B response, and ~14 AAAA
// records. Limit further to prevent unintentional configuration // records. Limit further to prevent unintentional configuration

View File

@ -5,11 +5,48 @@ import (
"sync/atomic" "sync/atomic"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"google.golang.org/grpc" "google.golang.org/grpc"
"google.golang.org/grpc/stats" "google.golang.org/grpc/stats"
) )
var defaultMetrics = metrics.Default() var defaultMetrics = metrics.Default()
var StatsGauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "grpc", "server", "connections"},
Help: "",
},
{
Name: []string{"consul", "grpc", "client", "connections"},
Help: "",
},
{
Name: []string{"consul", "grpc", "server", "streams"},
Help: "",
},
}
var StatsCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "grpc", "client", "request", "count"},
Help: "",
},
{
Name: []string{"consul", "grpc", "server", "request", "count"},
Help: "",
},
{
Name: []string{"consul", "grpc", "client", "connection", "count"},
Help: "",
},
{
Name: []string{"consul", "grpc", "server", "connection", "count"},
Help: "",
},
{
Name: []string{"consul", "grpc", "server", "stream", "count"},
Help: "",
},
}
// statsHandler is a grpc/stats.StatsHandler which emits connection and // statsHandler is a grpc/stats.StatsHandler which emits connection and
// request metrics to go-metrics. // request metrics to go-metrics.

View File

@ -17,6 +17,7 @@ import (
"github.com/NYTimes/gziphandler" "github.com/NYTimes/gziphandler"
"github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/cache" "github.com/hashicorp/consul/agent/cache"
"github.com/hashicorp/consul/agent/config" "github.com/hashicorp/consul/agent/config"
@ -31,6 +32,13 @@ import (
"github.com/pkg/errors" "github.com/pkg/errors"
) )
var HTTPSummaries = []prometheus.SummaryDefinition{
{
Name: []string{"consul", "api", "http"},
Help: "",
},
}
// MethodNotAllowedError should be returned by a handler when the HTTP method is not allowed. // MethodNotAllowedError should be returned by a handler when the HTTP method is not allowed.
type MethodNotAllowedError struct { type MethodNotAllowedError struct {
Method string Method string

View File

@ -9,8 +9,8 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
metrics "github.com/armon/go-metrics" "github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/acl" "github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/structs" "github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token" "github.com/hashicorp/consul/agent/token"
@ -20,6 +20,29 @@ import (
"github.com/hashicorp/go-hclog" "github.com/hashicorp/go-hclog"
) )
var StateCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "acl", "blocked", "service", "deregistration"},
Help: "",
},
{
Name: []string{"consul", "acl", "blocked", "check", "deregistration"},
Help: "",
},
{
Name: []string{"consul", "acl", "blocked", "service", "registration"},
Help: "",
},
{
Name: []string{"consul", "acl", "blocked", "check", "registration"},
Help: "",
},
{
Name: []string{"consul", "acl", "blocked", "node", "registration"},
Help: "",
},
}
const fullSyncReadMaxStale = 2 * time.Second const fullSyncReadMaxStale = 2 * time.Second
// Config is the configuration for the State. // Config is the configuration for the State.

View File

@ -8,6 +8,10 @@ import (
"sync" "sync"
"time" "time"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/agent/consul/usagemetrics"
"github.com/hashicorp/consul/agent/local"
"github.com/hashicorp/go-hclog" "github.com/hashicorp/go-hclog"
"google.golang.org/grpc/grpclog" "google.golang.org/grpc/grpclog"
grpcresolver "google.golang.org/grpc/resolver" grpcresolver "google.golang.org/grpc/resolver"
@ -72,7 +76,7 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer) (BaseDeps, error)
return d, fmt.Errorf("failed to setup node ID: %w", err) return d, fmt.Errorf("failed to setup node ID: %w", err)
} }
d.MetricsHandler, err = lib.InitTelemetry(cfg.Telemetry) d.MetricsHandler, err = lib.InitTelemetry(cfg.Telemetry, getPrometheusDefs())
if err != nil { if err != nil {
return d, fmt.Errorf("failed to initialize telemetry: %w", err) return d, fmt.Errorf("failed to initialize telemetry: %w", err)
} }
@ -177,3 +181,91 @@ func registerWithGRPC(b grpcresolver.Builder) {
defer registerLock.Unlock() defer registerLock.Unlock()
grpcresolver.Register(b) grpcresolver.Register(b)
} }
// getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends
// all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics.
func getPrometheusDefs() lib.PrometheusDefs {
var gauges = [][]prometheus.GaugeDefinition{
consul.AutopilotGauges,
consul.RPCGauges,
consul.SessionGauges,
grpc.StatsGauges,
usagemetrics.Gauges,
}
var gaugeDefs []prometheus.GaugeDefinition
for _, g := range gauges {
gaugeDefs = append(gaugeDefs, g...)
}
raftCounters := []prometheus.CounterDefinition{
// TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry
// package within. In the mean time, we're going to define them here because it's important that they're always
// present for Consul users setting up dashboards.
{
Name: []string{"consul", "raft", "apply"},
Help: "This counts the number of Raft transactions occurring over the interval.",
},
{
Name: []string{"consul", "raft", "state", "candidate"},
Help: "This increments whenever a Consul server starts an election.",
},
{
Name: []string{"consul", "raft", "state", "leader"},
Help: "This increments whenever a Consul server becomes a leader.",
},
}
var counters = [][]prometheus.CounterDefinition{
CatalogCounters,
consul.ACLCounters,
consul.CatalogCounters,
consul.ClientCounters,
consul.RPCCounters,
grpc.StatsCounters,
local.StateCounters,
raftCounters,
}
var counterDefs []prometheus.CounterDefinition
for _, c := range counters {
counterDefs = append(counterDefs, c...)
}
raftSummaries := []prometheus.SummaryDefinition{
// TODO(kit): "consul.raft..." metrics come from the raft lib and we should migrate these to a telemetry
// package within. In the mean time, we're going to define them here because it's important that they're always
// present for Consul users setting up dashboards.
{
Name: []string{"consul", "raft", "commitTime"},
Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.",
},
{
Name: []string{"consul", "raft", "leader", "lastContact"},
Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.",
},
}
var summaries = [][]prometheus.SummaryDefinition{
HTTPSummaries,
consul.ACLSummaries,
consul.ACLEndpointSummaries,
consul.CatalogSummaries,
consul.FederationStateSummaries,
consul.IntentionSummaries,
consul.KVSummaries,
consul.PreparedQuerySummaries,
consul.RPCSummaries,
consul.SessionSummaries,
consul.TxnSummaries,
raftSummaries,
}
var summaryDefs []prometheus.SummaryDefinition
for _, s := range summaries {
summaryDefs = append(summaryDefs, s...)
}
return lib.PrometheusDefs{
Gauges: gaugeDefs,
Counters: counterDefs,
Summaries: summaryDefs,
}
}

View File

@ -54,7 +54,9 @@ func (p *Proxy) Serve() error {
// Initial setup // Initial setup
// Setup telemetry if configured // Setup telemetry if configured
_, err := lib.InitTelemetry(newCfg.Telemetry) // NOTE(kit): As far as I can tell, all of the metrics in the proxy are generated at runtime, so we
// don't have any static metrics we initialize at start.
_, err := lib.InitTelemetry(newCfg.Telemetry, lib.EmptyPrometheusDefs())
if err != nil { if err != nil {
p.logger.Error("proxy telemetry config error", "error", err) p.logger.Error("proxy telemetry config error", "error", err)
} }

View File

@ -276,79 +276,17 @@ func dogstatdSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, err
return sink, nil return sink, nil
} }
func prometheusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, error) { func prometheusSink(cfg TelemetryConfig, hostname string, defs PrometheusDefs) (metrics.MetricSink, error) {
if cfg.PrometheusRetentionTime.Nanoseconds() < 1 { if cfg.PrometheusRetentionTime.Nanoseconds() < 1 {
return nil, nil return nil, nil
} }
// TODO(kit) define these in vars in the package/file they're used
gaugeDefs := []prometheus.GaugeDefinition{
{
Name: []string{"consul", "autopilot", "healthy"},
Help: "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.",
},
}
// TODO(kit) define these in vars in the package/file they're used
counterDefs := []prometheus.CounterDefinition{
{
Name: []string{"consul", "raft", "apply"},
Help: "This counts the number of Raft transactions occurring over the interval.",
},
{
Name: []string{"consul", "raft", "state", "candidate"},
Help: "This increments whenever a Consul server starts an election.",
},
{
Name: []string{"consul", "raft", "state", "leader"},
Help: "This increments whenever a Consul server becomes a leader.",
},
{
Name: []string{"consul", "client", "api", "catalog_register"},
Help: "Increments whenever a Consul agent receives a catalog register request.",
},
{
Name: []string{"consul", "runtime", "total_gc_pause_ns"},
Help: "Number of nanoseconds consumed by stop-the-world garbage collection (GC) pauses since Consul started.",
},
{
Name: []string{"consul", "client", "rpc"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server.",
},
{
Name: []string{"consul", "client", "rpc", "exceeded"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration.",
},
{
Name: []string{"consul", "client", "rpc", "failed"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails.",
},
}
// TODO(kit) define these in vars in the package/file they're used
summaryDefs := []prometheus.SummaryDefinition{
{
Name: []string{"consul", "kvs", "apply"},
Help: "This measures the time it takes to complete an update to the KV store.",
},
{
Name: []string{"consul", "txn", "apply"},
Help: "This measures the time spent applying a transaction operation.",
},
{
Name: []string{"consul", "raft", "commitTime"},
Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.",
},
{
Name: []string{"consul", "raft", "leader", "lastContact"},
Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.",
},
}
prometheusOpts := prometheus.PrometheusOpts{ prometheusOpts := prometheus.PrometheusOpts{
Expiration: cfg.PrometheusRetentionTime, Expiration: cfg.PrometheusRetentionTime,
GaugeDefinitions: gaugeDefs, GaugeDefinitions: defs.Gauges,
CounterDefinitions: counterDefs, CounterDefinitions: defs.Counters,
SummaryDefinitions: summaryDefs, SummaryDefinitions: defs.Summaries,
} }
sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts) sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts)
if err != nil { if err != nil {
@ -399,9 +337,25 @@ func circonusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, err
return sink, nil return sink, nil
} }
// PrometheusDefs wraps collections of metric definitions to pass into the PrometheusSink
type PrometheusDefs struct {
Gauges []prometheus.GaugeDefinition
Counters []prometheus.CounterDefinition
Summaries []prometheus.SummaryDefinition
}
// EmptyPrometheusDefs returns a PrometheusDefs struct where each of the slices have zero elements, but not nil.
func EmptyPrometheusDefs() PrometheusDefs {
return PrometheusDefs{
Gauges: []prometheus.GaugeDefinition{},
Counters: []prometheus.CounterDefinition{},
Summaries: []prometheus.SummaryDefinition{},
}
}
// InitTelemetry configures go-metrics based on map of telemetry config // InitTelemetry configures go-metrics based on map of telemetry config
// values as returned by Runtimecfg.Config(). // values as returned by Runtimecfg.Config().
func InitTelemetry(cfg TelemetryConfig) (*metrics.InmemSink, error) { func InitTelemetry(cfg TelemetryConfig, defs PrometheusDefs) (*metrics.InmemSink, error) {
if cfg.Disable { if cfg.Disable {
return nil, nil return nil, nil
} }
@ -440,9 +394,12 @@ func InitTelemetry(cfg TelemetryConfig) (*metrics.InmemSink, error) {
if err := addSink(circonusSink); err != nil { if err := addSink(circonusSink); err != nil {
return nil, err return nil, err
} }
if err := addSink(prometheusSink); err != nil {
promSink, err := prometheusSink(cfg, metricsConf.HostName, defs)
if err != nil {
return nil, err return nil, err
} }
sinks = append(sinks, promSink)
if len(sinks) > 0 { if len(sinks) > 0 {
sinks = append(sinks, memSink) sinks = append(sinks, memSink)