mirror of https://github.com/status-im/consul.git
Merge pull request #9198 from hashicorp/mkcp/telemetry/add-all-metric-definitions
Add metric definitions for all metrics known at Consul start
This commit is contained in:
parent
dd857bfa37
commit
88b013be99
|
@ -0,0 +1,3 @@
|
|||
```release-note:improvement
|
||||
agent: All metrics should be present and available to prometheus scrapers when Consul starts. If any non-deprecated metrics are missing please submit an issue with its name.
|
||||
```
|
|
@ -136,7 +136,7 @@ func (s *HTTPHandlers) AgentMetrics(resp http.ResponseWriter, req *http.Request)
|
|||
return nil, acl.ErrPermissionDenied
|
||||
}
|
||||
if enablePrometheusOutput(req) {
|
||||
if s.agent.config.Telemetry.PrometheusRetentionTime < 1 {
|
||||
if s.agent.config.Telemetry.PrometheusOpts.Expiration < 1 {
|
||||
resp.WriteHeader(http.StatusUnsupportedMediaType)
|
||||
fmt.Fprint(resp, "Prometheus is not enabled since its retention time is not positive")
|
||||
return nil, nil
|
||||
|
|
|
@ -24,6 +24,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/hashicorp/consul/lib"
|
||||
|
@ -32,6 +33,34 @@ import (
|
|||
|
||||
//go:generate mockery -all -inpkg
|
||||
|
||||
// TODO(kit): remove the namespace from these once the metrics themselves change
|
||||
var Gauges = []prometheus.GaugeDefinition{
|
||||
{
|
||||
Name: []string{"consul", "cache", "entries_count"},
|
||||
Help: "",
|
||||
},
|
||||
}
|
||||
|
||||
// TODO(kit): remove the namespace from these once the metrics themselves change
|
||||
var Counters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"consul", "cache", "bypass"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "cache", "fetch_success"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "cache", "fetch_error"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "cache", "evict_expired"},
|
||||
Help: "",
|
||||
},
|
||||
}
|
||||
|
||||
// Constants related to refresh backoff. We probably don't ever need to
|
||||
// make these configurable knobs since they primarily exist to lower load.
|
||||
const (
|
||||
|
@ -629,6 +658,7 @@ func (c *Cache) fetch(key string, r getOptions, allowNew bool, attempt uint, ign
|
|||
// Error handling
|
||||
if err == nil {
|
||||
labels := []metrics.Label{{Name: "result_not_modified", Value: strconv.FormatBool(result.NotModified)}}
|
||||
// TODO(kit): move tEntry.Name to a label on the first write here and deprecate the second write
|
||||
metrics.IncrCounterWithLabels([]string{"consul", "cache", "fetch_success"}, 1, labels)
|
||||
metrics.IncrCounterWithLabels([]string{"consul", "cache", tEntry.Name, "fetch_success"}, 1, labels)
|
||||
|
||||
|
@ -658,6 +688,7 @@ func (c *Cache) fetch(key string, r getOptions, allowNew bool, attempt uint, ign
|
|||
newEntry.RefreshLostContact = time.Time{}
|
||||
}
|
||||
} else {
|
||||
// TODO(kit): Add tEntry.Name to label on fetch_error and deprecate second write
|
||||
metrics.IncrCounter([]string{"consul", "cache", "fetch_error"}, 1)
|
||||
metrics.IncrCounter([]string{"consul", "cache", tEntry.Name, "fetch_error"}, 1)
|
||||
|
||||
|
|
|
@ -5,11 +5,127 @@ import (
|
|||
"net/http"
|
||||
"strings"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
cachetype "github.com/hashicorp/consul/agent/cache-types"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
)
|
||||
|
||||
var CatalogCounters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"client", "api", "catalog_register"},
|
||||
Help: "Increments whenever a Consul agent receives a catalog register request.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "rpc", "error", "catalog_register"},
|
||||
Help: "Increments whenever a Consul agent receives an RPC error for a catalog register request.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "success", "catalog_register"},
|
||||
Help: "Increments whenever a Consul agent successfully responds to a catalog register request.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "catalog_deregister"},
|
||||
Help: "Increments whenever a Consul agent receives a catalog deregister request.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "catalog_datacenters"},
|
||||
Help: "Increments whenever a Consul agent receives a request to list datacenters in the catalog.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "rpc", "error", "catalog_deregister"},
|
||||
Help: "Increments whenever a Consul agent receives an RPC error for a catalog deregister request.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "success", "catalog_nodes"},
|
||||
Help: "Increments whenever a Consul agent successfully responds to a request to list nodes.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "rpc", "error", "catalog_nodes"},
|
||||
Help: "Increments whenever a Consul agent receives an RPC error for a request to list nodes.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "success", "catalog_deregister"},
|
||||
Help: "Increments whenever a Consul agent successfully responds to a catalog deregister request.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "rpc", "error", "catalog_datacenters"},
|
||||
Help: "Increments whenever a Consul agent receives an RPC error for a request to list datacenters.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "success", "catalog_datacenters"},
|
||||
Help: "Increments whenever a Consul agent successfully responds to a request to list datacenters.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "catalog_nodes"},
|
||||
Help: "Increments whenever a Consul agent receives a request to list nodes from the catalog.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "catalog_services"},
|
||||
Help: "Increments whenever a Consul agent receives a request to list services from the catalog.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "rpc", "error", "catalog_services"},
|
||||
Help: "Increments whenever a Consul agent receives an RPC error for a request to list services.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "success", "catalog_services"},
|
||||
Help: "Increments whenever a Consul agent successfully responds to a request to list services.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "catalog_service_nodes"},
|
||||
Help: "Increments whenever a Consul agent receives a request to list nodes offering a service.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "rpc", "error", "catalog_service_nodes"},
|
||||
Help: "Increments whenever a Consul agent receives an RPC error for a request to list nodes offering a service.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "success", "catalog_service_nodes"},
|
||||
Help: "Increments whenever a Consul agent successfully responds to a request to list nodes offering a service.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "error", "catalog_service_nodes"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "catalog_node_services"},
|
||||
Help: "Increments whenever a Consul agent successfully responds to a request to list nodes offering a service.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "success", "catalog_node_services"},
|
||||
Help: "Increments whenever a Consul agent successfully responds to a request to list services in a node.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "rpc", "error", "catalog_node_services"},
|
||||
Help: "Increments whenever a Consul agent receives an RPC error for a request to list services in a node.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "catalog_node_service_list"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "rpc", "error", "catalog_node_service_list"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "success", "catalog_node_service_list"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "catalog_gateway_services"},
|
||||
Help: "Increments whenever a Consul agent receives a request to list services associated with a gateway.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "rpc", "error", "catalog_gateway_services"},
|
||||
Help: "Increments whenever a Consul agent receives an RPC error for a request to list services associated with a gateway.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "api", "success", "catalog_gateway_services"},
|
||||
Help: "Increments whenever a Consul agent successfully responds to a request to list services associated with a gateway.",
|
||||
},
|
||||
}
|
||||
|
||||
func (s *HTTPHandlers) CatalogRegister(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
metrics.IncrCounterWithLabels([]string{"client", "api", "catalog_register"}, 1,
|
||||
[]metrics.Label{{Name: "node", Value: s.nodeName()}})
|
||||
|
|
|
@ -17,6 +17,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/go-bexpr"
|
||||
"github.com/hashicorp/go-hclog"
|
||||
"github.com/hashicorp/go-multierror"
|
||||
|
@ -942,13 +943,15 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
|
|||
DisableHostname: b.boolVal(c.Telemetry.DisableHostname),
|
||||
DogstatsdAddr: b.stringVal(c.Telemetry.DogstatsdAddr),
|
||||
DogstatsdTags: c.Telemetry.DogstatsdTags,
|
||||
PrometheusRetentionTime: b.durationVal("prometheus_retention_time", c.Telemetry.PrometheusRetentionTime),
|
||||
FilterDefault: b.boolVal(c.Telemetry.FilterDefault),
|
||||
AllowedPrefixes: telemetryAllowedPrefixes,
|
||||
BlockedPrefixes: telemetryBlockedPrefixes,
|
||||
MetricsPrefix: b.stringVal(c.Telemetry.MetricsPrefix),
|
||||
StatsdAddr: b.stringVal(c.Telemetry.StatsdAddr),
|
||||
StatsiteAddr: b.stringVal(c.Telemetry.StatsiteAddr),
|
||||
PrometheusOpts: prometheus.PrometheusOpts{
|
||||
Expiration: b.durationVal("prometheus_retention_time", c.Telemetry.PrometheusRetentionTime),
|
||||
},
|
||||
},
|
||||
|
||||
// Agent
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/hashicorp/consul/agent/cache"
|
||||
|
@ -7103,9 +7104,11 @@ func TestFullConfig(t *testing.T) {
|
|||
AllowedPrefixes: []string{"oJotS8XJ"},
|
||||
BlockedPrefixes: []string{"cazlEhGn"},
|
||||
MetricsPrefix: "ftO6DySn",
|
||||
PrometheusRetentionTime: 15 * time.Second,
|
||||
StatsdAddr: "drce87cy",
|
||||
StatsiteAddr: "HpFwKB8R",
|
||||
PrometheusOpts: prometheus.PrometheusOpts{
|
||||
Expiration: 15 * time.Second,
|
||||
},
|
||||
},
|
||||
TLSCipherSuites: []uint16{tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, tls.TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256},
|
||||
TLSMinVersion: "pAOWafkR",
|
||||
|
@ -7814,9 +7817,15 @@ func TestSanitize(t *testing.T) {
|
|||
"DogstatsdTags": [],
|
||||
"FilterDefault": false,
|
||||
"MetricsPrefix": "",
|
||||
"PrometheusRetentionTime": "0s",
|
||||
"StatsdAddr": "",
|
||||
"StatsiteAddr": ""
|
||||
"StatsiteAddr": "",
|
||||
"PrometheusOpts": {
|
||||
"Expiration": "0s",
|
||||
"Registerer": null,
|
||||
"GaugeDefinitions": [],
|
||||
"CounterDefinitions": [],
|
||||
"SummaryDefinitions": []
|
||||
}
|
||||
},
|
||||
"TranslateWANAddrs": false,
|
||||
"TxnMaxReqLen": 5678000000000000,
|
||||
|
|
|
@ -6,7 +6,8 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/logging"
|
||||
|
@ -15,6 +16,32 @@ import (
|
|||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
var ACLCounters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"acl", "token", "cache_hit"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "token", "cache_miss"},
|
||||
Help: "",
|
||||
},
|
||||
}
|
||||
|
||||
var ACLSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"acl", "resolveTokenLegacy"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "ResolveToken"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "ResolveTokenToIdentity"},
|
||||
Help: "",
|
||||
},
|
||||
}
|
||||
|
||||
// These must be kept in sync with the constants in command/agent/acl.go.
|
||||
const (
|
||||
// anonymousToken is the token ID we re-write to if there is no token ID
|
||||
|
|
|
@ -11,7 +11,8 @@ import (
|
|||
"regexp"
|
||||
"time"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/consul/authmethod"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
|
@ -30,6 +31,73 @@ const (
|
|||
aclBootstrapReset = "acl-bootstrap-reset"
|
||||
)
|
||||
|
||||
var ACLEndpointSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"acl", "token", "clone"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "token", "upsert"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "token", "delete"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "policy", "upsert"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "policy", "delete"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "policy", "delete"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "role", "upsert"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "role", "delete"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "bindingrule", "upsert"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "bindingrule", "delete"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "authmethod", "upsert"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "authmethod", "delete"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "login"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "login"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "logout"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "logout"},
|
||||
Help: "",
|
||||
},
|
||||
}
|
||||
|
||||
// Regex for matching
|
||||
var (
|
||||
validPolicyName = regexp.MustCompile(`^[A-Za-z0-9\-_]{1,128}$`)
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
|
@ -12,6 +13,13 @@ import (
|
|||
"github.com/hashicorp/go-memdb"
|
||||
)
|
||||
|
||||
var ACLEndpointLegacySummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"acl", "apply"},
|
||||
Help: "Measures the time it takes to complete an update to the ACL store.",
|
||||
},
|
||||
}
|
||||
|
||||
// Bootstrap is used to perform a one-time ACL bootstrap operation on
|
||||
// a cluster to get the first management token.
|
||||
func (a *ACL) Bootstrap(args *structs.DCSpecificRequest, reply *structs.ACL) error {
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"fmt"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/agent/metadata"
|
||||
"github.com/hashicorp/consul/types"
|
||||
"github.com/hashicorp/raft"
|
||||
|
@ -12,6 +13,17 @@ import (
|
|||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
var AutopilotGauges = []prometheus.GaugeDefinition{
|
||||
{
|
||||
Name: []string{"autopilot", "failure_tolerance"},
|
||||
Help: "Tracks the number of voting servers that the cluster can lose while continuing to function.",
|
||||
},
|
||||
{
|
||||
Name: []string{"autopilot", "healthy"},
|
||||
Help: "Tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.",
|
||||
},
|
||||
}
|
||||
|
||||
// AutopilotDelegate is a Consul delegate for autopilot operations.
|
||||
type AutopilotDelegate struct {
|
||||
server *Server
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
|
@ -17,6 +18,52 @@ import (
|
|||
"github.com/hashicorp/go-uuid"
|
||||
)
|
||||
|
||||
var CatalogCounters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"catalog", "service", "query"},
|
||||
Help: "Increments for each catalog query for the given service.",
|
||||
},
|
||||
{
|
||||
Name: []string{"catalog", "connect", "query"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"catalog", "service", "query-tag"},
|
||||
Help: "Increments for each catalog query for the given service with the given tag.",
|
||||
},
|
||||
{
|
||||
Name: []string{"catalog", "connect", "query-tag"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"catalog", "service", "query-tags"},
|
||||
Help: "Increments for each catalog query for the given service with the given tags.",
|
||||
},
|
||||
{
|
||||
Name: []string{"catalog", "connect", "query-tags"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"catalog", "service", "not-found"},
|
||||
Help: "Increments for each catalog query where the given service could not be found.",
|
||||
},
|
||||
{
|
||||
Name: []string{"catalog", "connect", "not-found"},
|
||||
Help: "",
|
||||
},
|
||||
}
|
||||
|
||||
var CatalogSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"catalog", "deregister"},
|
||||
Help: "Measures the time it takes to complete a catalog deregister operation.",
|
||||
},
|
||||
{
|
||||
Name: []string{"catalog", "register"},
|
||||
Help: "Measures the time it takes to complete a catalog register operation.",
|
||||
},
|
||||
}
|
||||
|
||||
// Catalog endpoint is used to manipulate the service catalog
|
||||
type Catalog struct {
|
||||
srv *Server
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/agent/pool"
|
||||
"github.com/hashicorp/consul/agent/router"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
|
@ -21,6 +22,21 @@ import (
|
|||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
var ClientCounters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"client", "rpc"},
|
||||
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "rpc", "exceeded"},
|
||||
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration.",
|
||||
},
|
||||
{
|
||||
Name: []string{"client", "rpc", "failed"},
|
||||
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails.",
|
||||
},
|
||||
}
|
||||
|
||||
const (
|
||||
// serfEventBacklog is the maximum number of unprocessed Serf Events
|
||||
// that will be held in queue before new serf events block. A
|
||||
|
|
|
@ -4,6 +4,8 @@ import (
|
|||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
|
@ -12,6 +14,33 @@ import (
|
|||
"github.com/mitchellh/copystructure"
|
||||
)
|
||||
|
||||
var ConfigSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"config_entry", "apply"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"config_entry", "get"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"config_entry", "list"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"config_entry", "listAll"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"config_entry", "delete"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"config_entry", "resolve_service_config"},
|
||||
Help: "",
|
||||
},
|
||||
}
|
||||
|
||||
// The ConfigEntry endpoint is used to query centralized config information
|
||||
type ConfigEntry struct {
|
||||
srv *Server
|
||||
|
|
|
@ -5,13 +5,33 @@ import (
|
|||
"fmt"
|
||||
"time"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
memdb "github.com/hashicorp/go-memdb"
|
||||
)
|
||||
|
||||
var FederationStateSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"federation_state", "apply"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"federation_state", "get"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"federation_state", "list"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"federation_state", "list_mesh_gateways"},
|
||||
Help: "",
|
||||
},
|
||||
}
|
||||
|
||||
var (
|
||||
errFederationStatesNotEnabled = errors.New("Federation states are currently disabled until all servers in the datacenter support the feature")
|
||||
)
|
||||
|
|
|
@ -4,11 +4,102 @@ import (
|
|||
"fmt"
|
||||
"time"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/api"
|
||||
)
|
||||
|
||||
var CommandsSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"fsm", "register"},
|
||||
Help: "Measures the time it takes to apply a catalog register operation to the FSM.",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "deregister"},
|
||||
Help: "Measures the time it takes to apply a catalog deregister operation to the FSM.",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "kvs"},
|
||||
Help: "Measures the time it takes to apply the given KV operation to the FSM.",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "session"},
|
||||
Help: "Measures the time it takes to apply the given session operation to the FSM.",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "acl"},
|
||||
Help: "Measures the time it takes to apply the given ACL operation to the FSM.",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "tombstone"},
|
||||
Help: "Measures the time it takes to apply the given tombstone operation to the FSM.",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "coordinate", "batch-update"},
|
||||
Help: "Measures the time it takes to apply the given batch coordinate update to the FSM.",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "prepared-query"},
|
||||
Help: "Measures the time it takes to apply the given prepared query update operation to the FSM.",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "txn"},
|
||||
Help: "Measures the time it takes to apply the given transaction update to the FSM.",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "autopilot"},
|
||||
Help: "Measures the time it takes to apply the given autopilot update to the FSM.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "fsm", "intention"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "intention"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "fsm", "ca"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "ca", "leaf"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "acl", "token"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "ca", "leaf"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "acl", "policy"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "acl", "bindingrule"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "acl", "authmethod"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"fsm", "system_metadata"},
|
||||
Help: "",
|
||||
},
|
||||
// TODO(kit): We generate the config-entry fsm summaries by reading off of the request. It is
|
||||
// possible to statically declare these when we know all of the names, but I didn't get to it
|
||||
// in this patch. Config-entries are known though and we should add these in the future.
|
||||
// {
|
||||
// Name: []string{"fsm", "config_entry", req.Entry.GetKind()},
|
||||
// Help: "",
|
||||
// },
|
||||
}
|
||||
|
||||
func init() {
|
||||
registerCommand(structs.RegisterRequestType, (*FSM).applyRegister)
|
||||
registerCommand(structs.DeregisterRequestType, (*FSM).applyDeregister)
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/go-msgpack/codec"
|
||||
|
@ -12,6 +13,13 @@ import (
|
|||
"github.com/hashicorp/raft"
|
||||
)
|
||||
|
||||
var SnapshotSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"fsm", "persist"},
|
||||
Help: "Measures the time it takes to persist the FSM to a raft snapshot.",
|
||||
},
|
||||
}
|
||||
|
||||
// snapshot is used to provide a snapshot of the current
|
||||
// state in a way that can be accessed concurrently with operations
|
||||
// that may modify the live state.
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/connect"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
|
@ -16,6 +17,17 @@ import (
|
|||
"github.com/hashicorp/go-memdb"
|
||||
)
|
||||
|
||||
var IntentionSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"consul", "intention", "apply"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"intention", "apply"},
|
||||
Help: "",
|
||||
},
|
||||
}
|
||||
|
||||
var (
|
||||
// ErrIntentionNotFound is returned if the intention lookup failed.
|
||||
ErrIntentionNotFound = errors.New("Intention not found")
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
|
@ -14,6 +15,13 @@ import (
|
|||
"github.com/hashicorp/go-memdb"
|
||||
)
|
||||
|
||||
var KVSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"kvs", "apply"},
|
||||
Help: "Measures the time it takes to complete an update to the KV store.",
|
||||
},
|
||||
}
|
||||
|
||||
// KVS endpoint is used to manipulate the Key-Value store
|
||||
type KVS struct {
|
||||
srv *Server
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/metadata"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
|
@ -27,6 +28,21 @@ import (
|
|||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
var LeaderSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"leader", "barrier"},
|
||||
Help: "Measures the time spent waiting for the raft barrier upon gaining leadership.",
|
||||
},
|
||||
{
|
||||
Name: []string{"leader", "reconcileMember"},
|
||||
Help: "Measures the time spent updating the raft store for a single serf member's information.",
|
||||
},
|
||||
{
|
||||
Name: []string{"leader", "reapTombstones"},
|
||||
Help: "Measures the time spent clearing tombstones.",
|
||||
},
|
||||
}
|
||||
|
||||
const (
|
||||
newLeaderEvent = "consul:new-leader"
|
||||
barrierWriteTimeout = 2 * time.Minute
|
||||
|
|
|
@ -6,6 +6,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
|
@ -15,6 +16,25 @@ import (
|
|||
"github.com/hashicorp/go-uuid"
|
||||
)
|
||||
|
||||
var PreparedQuerySummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"prepared-query", "apply"},
|
||||
Help: "Measures the time it takes to apply a prepared query update.",
|
||||
},
|
||||
{
|
||||
Name: []string{"prepared-query", "explain"},
|
||||
Help: "Measures the time it takes to process a prepared query explain request.",
|
||||
},
|
||||
{
|
||||
Name: []string{"prepared-query", "execute"},
|
||||
Help: "Measures the time it takes to process a prepared query execute request.",
|
||||
},
|
||||
{
|
||||
Name: []string{"prepared-query", "execute_remote"},
|
||||
Help: "Measures the time it takes to process a prepared query execute request that was forwarded to another datacenter.",
|
||||
},
|
||||
}
|
||||
|
||||
// PreparedQuery manages the prepared query endpoint.
|
||||
type PreparedQuery struct {
|
||||
srv *Server
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
"github.com/hashicorp/consul/agent/consul/wanfed"
|
||||
|
@ -31,6 +32,47 @@ import (
|
|||
"github.com/hashicorp/yamux"
|
||||
)
|
||||
|
||||
var RPCCounters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"rpc", "accept_conn"},
|
||||
Help: "Increments when a server accepts an RPC connection.",
|
||||
},
|
||||
{
|
||||
Name: []string{"rpc", "raft_handoff"},
|
||||
Help: "Increments when a server accepts a Raft-related RPC connection.",
|
||||
},
|
||||
{
|
||||
Name: []string{"rpc", "request_error"},
|
||||
Help: "Increments when a server returns an error from an RPC request.",
|
||||
},
|
||||
{
|
||||
Name: []string{"rpc", "request"},
|
||||
Help: "Increments when a server receives a Consul-related RPC request.",
|
||||
},
|
||||
{
|
||||
Name: []string{"rpc", "cross-dc"},
|
||||
Help: "Increments when a server sends a (potentially blocking) cross datacenter RPC query.",
|
||||
},
|
||||
{
|
||||
Name: []string{"rpc", "query"},
|
||||
Help: "Increments when a server receives a new blocking RPC request, indicating the rate of new blocking query calls.",
|
||||
},
|
||||
}
|
||||
|
||||
var RPCGauges = []prometheus.GaugeDefinition{
|
||||
{
|
||||
Name: []string{"rpc", "queries_blocking"},
|
||||
Help: "Shows the current number of in-flight blocking queries the server is handling.",
|
||||
},
|
||||
}
|
||||
|
||||
var RPCSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"rpc", "consistentRead"},
|
||||
Help: "Measures the time spent confirming that a consistent read can be performed.",
|
||||
},
|
||||
}
|
||||
|
||||
const (
|
||||
// jitterFraction is a the limit to the amount of jitter we apply
|
||||
// to a user specified MaxQueryTime. We divide the specified time by
|
||||
|
|
|
@ -7,10 +7,18 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
)
|
||||
|
||||
var SegmentOSSSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"leader", "reconcile"},
|
||||
Help: "Measures the time spent updating the raft store from the serf member information.",
|
||||
},
|
||||
}
|
||||
|
||||
// LANMembersAllSegments returns members from all segments.
|
||||
func (s *Server) LANMembersAllSegments() ([]serf.Member, error) {
|
||||
return s.LANMembers(), nil
|
||||
|
|
|
@ -17,7 +17,7 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics"
|
||||
connlimit "github.com/hashicorp/go-connlimit"
|
||||
"github.com/hashicorp/go-hclog"
|
||||
"github.com/hashicorp/go-memdb"
|
||||
|
@ -50,6 +50,8 @@ import (
|
|||
"github.com/hashicorp/consul/types"
|
||||
)
|
||||
|
||||
// NOTE The "consul.client.rpc" and "consul.client.rpc.exceeded" counters are defined in consul/client.go
|
||||
|
||||
// These are the protocol versions that Consul can _understand_. These are
|
||||
// Consul-level protocol versions, that are used to configure the Serf
|
||||
// protocol versions.
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
|
@ -13,6 +14,17 @@ import (
|
|||
"github.com/hashicorp/go-uuid"
|
||||
)
|
||||
|
||||
var SessionEndpointSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"session", "apply"},
|
||||
Help: "Measures the time spent applying a session update.",
|
||||
},
|
||||
{
|
||||
Name: []string{"session", "renew"},
|
||||
Help: "Measures the time spent renewing a session.",
|
||||
},
|
||||
}
|
||||
|
||||
// Session endpoint is used to manipulate sessions for KV
|
||||
type Session struct {
|
||||
srv *Server
|
||||
|
|
|
@ -5,9 +5,32 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
)
|
||||
|
||||
var SessionGauges = []prometheus.GaugeDefinition{
|
||||
{
|
||||
Name: []string{"session_ttl", "active"},
|
||||
Help: "Tracks the active number of sessions being tracked.",
|
||||
},
|
||||
{
|
||||
Name: []string{"raft", "applied_index"},
|
||||
Help: "",
|
||||
},
|
||||
{
|
||||
Name: []string{"raft", "last_index"},
|
||||
Help: "",
|
||||
},
|
||||
}
|
||||
|
||||
var SessionSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"session_ttl", "invalidate"},
|
||||
Help: "Measures the time spent invalidating an expired session.",
|
||||
},
|
||||
}
|
||||
|
||||
const (
|
||||
// maxInvalidateAttempts limits how many invalidate attempts are made
|
||||
maxInvalidateAttempts = 6
|
||||
|
|
|
@ -5,12 +5,24 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/api"
|
||||
"github.com/hashicorp/go-hclog"
|
||||
)
|
||||
|
||||
var TxnSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"txn", "apply"},
|
||||
Help: "Measures the time spent applying a transaction operation.",
|
||||
},
|
||||
{
|
||||
Name: []string{"txn", "read"},
|
||||
Help: "Measures the time spent returning a read transaction.",
|
||||
},
|
||||
}
|
||||
|
||||
// Txn endpoint is used to perform multi-object atomic transactions.
|
||||
type Txn struct {
|
||||
srv *Server
|
||||
|
|
|
@ -5,12 +5,29 @@ import (
|
|||
"errors"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/hashicorp/consul/agent/consul/state"
|
||||
"github.com/hashicorp/consul/logging"
|
||||
"github.com/hashicorp/go-hclog"
|
||||
)
|
||||
|
||||
var Gauges = []prometheus.GaugeDefinition{
|
||||
{
|
||||
Name: []string{"consul", "state", "nodes"},
|
||||
Help: "Measures the current number of nodes registered with Consul. It is only emitted by Consul servers. Added in v1.9.0.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "state", "services"},
|
||||
Help: "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "state", "service_instances"},
|
||||
Help: "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.",
|
||||
},
|
||||
}
|
||||
|
||||
// Config holds the settings for various parameters for the
|
||||
// UsageMetricsReporter
|
||||
type Config struct {
|
||||
|
|
20
agent/dns.go
20
agent/dns.go
|
@ -10,6 +10,8 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
radix "github.com/armon/go-radix"
|
||||
"github.com/coredns/coredns/plugin/pkg/dnsutil"
|
||||
|
@ -26,6 +28,24 @@ import (
|
|||
"github.com/hashicorp/consul/logging"
|
||||
)
|
||||
|
||||
var DNSCounters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"dns", "stale_queries"},
|
||||
Help: "Increments when an agent serves a query within the allowed stale threshold.",
|
||||
},
|
||||
}
|
||||
|
||||
var DNSSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"dns", "ptr_query"},
|
||||
Help: "Measures the time spent handling a reverse DNS query for the given node.",
|
||||
},
|
||||
{
|
||||
Name: []string{"dns", "domain_query"},
|
||||
Help: "Measures the time spent handling a domain query for the given node.",
|
||||
},
|
||||
}
|
||||
|
||||
const (
|
||||
// UDP can fit ~25 A records in a 512B response, and ~14 AAAA
|
||||
// records. Limit further to prevent unintentional configuration
|
||||
|
|
|
@ -5,10 +5,48 @@ import (
|
|||
"sync/atomic"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/stats"
|
||||
)
|
||||
|
||||
var StatsGauges = []prometheus.GaugeDefinition{
|
||||
{
|
||||
Name: []string{"grpc", "server", "connections"},
|
||||
Help: "Measures the number of active gRPC connections open on the server.",
|
||||
},
|
||||
{
|
||||
Name: []string{"grpc", "client", "connections"},
|
||||
Help: "Measures the number of active gRPC connections open from the client agent to any Consul servers.",
|
||||
},
|
||||
{
|
||||
Name: []string{"grpc", "server", "streams"},
|
||||
Help: "Measures the number of active gRPC streams handled by the server.",
|
||||
},
|
||||
}
|
||||
var StatsCounters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"grpc", "client", "request", "count"},
|
||||
Help: "Counts the number of gRPC requests made by the client agent to a Consul server.",
|
||||
},
|
||||
{
|
||||
Name: []string{"grpc", "server", "request", "count"},
|
||||
Help: "Counts the number of gRPC requests received by the server.",
|
||||
},
|
||||
{
|
||||
Name: []string{"grpc", "client", "connection", "count"},
|
||||
Help: "Counts the number of new gRPC connections opened by the client agent to a Consul server.",
|
||||
},
|
||||
{
|
||||
Name: []string{"grpc", "server", "connection", "count"},
|
||||
Help: "Counts the number of new gRPC connections received by the server.",
|
||||
},
|
||||
{
|
||||
Name: []string{"grpc", "server", "stream", "count"},
|
||||
Help: "Counts the number of new gRPC streams received by the server.",
|
||||
},
|
||||
}
|
||||
|
||||
var defaultMetrics = metrics.Default
|
||||
|
||||
// statsHandler is a grpc/stats.StatsHandler which emits connection and
|
||||
|
|
|
@ -17,6 +17,7 @@ import (
|
|||
|
||||
"github.com/NYTimes/gziphandler"
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/cache"
|
||||
"github.com/hashicorp/consul/agent/config"
|
||||
|
@ -31,6 +32,13 @@ import (
|
|||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var HTTPSummaries = []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"api", "http"},
|
||||
Help: "Samples how long it takes to service the given HTTP request for the given verb and path.",
|
||||
},
|
||||
}
|
||||
|
||||
// MethodNotAllowedError should be returned by a handler when the HTTP method is not allowed.
|
||||
type MethodNotAllowedError struct {
|
||||
Method string
|
||||
|
|
|
@ -9,8 +9,8 @@ import (
|
|||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/acl"
|
||||
"github.com/hashicorp/consul/agent/structs"
|
||||
"github.com/hashicorp/consul/agent/token"
|
||||
|
@ -20,6 +20,33 @@ import (
|
|||
"github.com/hashicorp/go-hclog"
|
||||
)
|
||||
|
||||
var StateCounters = []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"acl", "blocked", "service", "registration"},
|
||||
Help: "Increments whenever a registration fails for a service (blocked by an ACL)",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "blocked", "service", "deregistration"},
|
||||
Help: "Increments whenever a deregistration fails for a service (blocked by an ACL)",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "blocked", "check", "registration"},
|
||||
Help: "Increments whenever a registration fails for a check (blocked by an ACL)",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "blocked", "check", "deregistration"},
|
||||
Help: "Increments whenever a deregistration fails for a check (blocked by an ACL)",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "blocked", "node", "registration"},
|
||||
Help: "Increments whenever a registration fails for a node (blocked by an ACL)",
|
||||
},
|
||||
{
|
||||
Name: []string{"acl", "blocked", "node", "deregistration"},
|
||||
Help: "Increments whenever a deregistration fails for a node (blocked by an ACL)",
|
||||
},
|
||||
}
|
||||
|
||||
const fullSyncReadMaxStale = 2 * time.Second
|
||||
|
||||
// Config is the configuration for the State.
|
||||
|
|
126
agent/setup.go
126
agent/setup.go
|
@ -8,6 +8,12 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/agent/consul/fsm"
|
||||
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
"github.com/hashicorp/consul/agent/consul/usagemetrics"
|
||||
"github.com/hashicorp/consul/agent/local"
|
||||
|
||||
"github.com/hashicorp/go-hclog"
|
||||
"google.golang.org/grpc/grpclog"
|
||||
grpcresolver "google.golang.org/grpc/resolver"
|
||||
|
@ -72,6 +78,10 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer) (BaseDeps, error)
|
|||
return d, fmt.Errorf("failed to setup node ID: %w", err)
|
||||
}
|
||||
|
||||
gauges, counters, summaries := getPrometheusDefs(cfg.Telemetry)
|
||||
cfg.Telemetry.PrometheusOpts.GaugeDefinitions = gauges
|
||||
cfg.Telemetry.PrometheusOpts.CounterDefinitions = counters
|
||||
cfg.Telemetry.PrometheusOpts.SummaryDefinitions = summaries
|
||||
d.MetricsHandler, err = lib.InitTelemetry(cfg.Telemetry)
|
||||
if err != nil {
|
||||
return d, fmt.Errorf("failed to initialize telemetry: %w", err)
|
||||
|
@ -177,3 +187,119 @@ func registerWithGRPC(b grpcresolver.Builder) {
|
|||
defer registerLock.Unlock()
|
||||
grpcresolver.Register(b)
|
||||
}
|
||||
|
||||
// getPrometheusDefs reaches into every slice of prometheus defs we've defined in each part of the agent, and appends
|
||||
// all of our slices into one nice slice of definitions per metric type for the Consul agent to pass to go-metrics.
|
||||
func getPrometheusDefs(cfg lib.TelemetryConfig) ([]prometheus.GaugeDefinition, []prometheus.CounterDefinition, []prometheus.SummaryDefinition) {
|
||||
// Build slice of slices for all gauge definitions
|
||||
var gauges = [][]prometheus.GaugeDefinition{
|
||||
cache.Gauges,
|
||||
consul.AutopilotGauges,
|
||||
consul.RPCGauges,
|
||||
consul.SessionGauges,
|
||||
grpc.StatsGauges,
|
||||
usagemetrics.Gauges,
|
||||
}
|
||||
// Flatten definitions
|
||||
// NOTE(kit): Do we actually want to create a set here so we can ensure definition names are unique?
|
||||
var gaugeDefs []prometheus.GaugeDefinition
|
||||
for _, g := range gauges {
|
||||
// Set Consul to each definition's namespace
|
||||
// TODO(kit): Prepending the service to each definition should be handled by go-metrics
|
||||
var withService []prometheus.GaugeDefinition
|
||||
for _, gauge := range g {
|
||||
gauge.Name = append([]string{cfg.MetricsPrefix}, gauge.Name...)
|
||||
withService = append(withService, gauge)
|
||||
}
|
||||
gaugeDefs = append(gaugeDefs, withService...)
|
||||
}
|
||||
|
||||
raftCounters := []prometheus.CounterDefinition{
|
||||
// TODO(kit): "raft..." metrics come from the raft lib and we should migrate these to a telemetry
|
||||
// package within. In the mean time, we're going to define a few here because they're key to monitoring Consul.
|
||||
{
|
||||
Name: []string{"raft", "apply"},
|
||||
Help: "This counts the number of Raft transactions occurring over the interval.",
|
||||
},
|
||||
{
|
||||
Name: []string{"raft", "state", "candidate"},
|
||||
Help: "This increments whenever a Consul server starts an election.",
|
||||
},
|
||||
{
|
||||
Name: []string{"raft", "state", "leader"},
|
||||
Help: "This increments whenever a Consul server becomes a leader.",
|
||||
},
|
||||
}
|
||||
|
||||
var counters = [][]prometheus.CounterDefinition{
|
||||
CatalogCounters,
|
||||
cache.Counters,
|
||||
consul.ACLCounters,
|
||||
consul.CatalogCounters,
|
||||
consul.ClientCounters,
|
||||
consul.RPCCounters,
|
||||
grpc.StatsCounters,
|
||||
local.StateCounters,
|
||||
raftCounters,
|
||||
}
|
||||
// Flatten definitions
|
||||
// NOTE(kit): Do we actually want to create a set here so we can ensure definition names are unique?
|
||||
var counterDefs []prometheus.CounterDefinition
|
||||
for _, c := range counters {
|
||||
// TODO(kit): Prepending the service to each definition should be handled by go-metrics
|
||||
var withService []prometheus.CounterDefinition
|
||||
for _, counter := range c {
|
||||
counter.Name = append([]string{cfg.MetricsPrefix}, counter.Name...)
|
||||
withService = append(withService, counter)
|
||||
}
|
||||
counterDefs = append(counterDefs, withService...)
|
||||
}
|
||||
|
||||
raftSummaries := []prometheus.SummaryDefinition{
|
||||
// TODO(kit): "raft..." metrics come from the raft lib and we should migrate these to a telemetry
|
||||
// package within. In the mean time, we're going to define a few here because they're key to monitoring Consul.
|
||||
{
|
||||
Name: []string{"raft", "commitTime"},
|
||||
Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.",
|
||||
},
|
||||
{
|
||||
Name: []string{"raft", "leader", "lastContact"},
|
||||
Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.",
|
||||
},
|
||||
}
|
||||
|
||||
var summaries = [][]prometheus.SummaryDefinition{
|
||||
HTTPSummaries,
|
||||
consul.ACLSummaries,
|
||||
consul.ACLEndpointSummaries,
|
||||
consul.ACLEndpointLegacySummaries,
|
||||
consul.CatalogSummaries,
|
||||
consul.FederationStateSummaries,
|
||||
consul.IntentionSummaries,
|
||||
consul.KVSummaries,
|
||||
consul.LeaderSummaries,
|
||||
consul.PreparedQuerySummaries,
|
||||
consul.RPCSummaries,
|
||||
consul.SegmentOSSSummaries,
|
||||
consul.SessionSummaries,
|
||||
consul.SessionEndpointSummaries,
|
||||
consul.TxnSummaries,
|
||||
fsm.CommandsSummaries,
|
||||
fsm.SnapshotSummaries,
|
||||
raftSummaries,
|
||||
}
|
||||
// Flatten definitions
|
||||
// NOTE(kit): Do we actually want to create a set here so we can ensure definition names are unique?
|
||||
var summaryDefs []prometheus.SummaryDefinition
|
||||
for _, s := range summaries {
|
||||
// TODO(kit): Prepending the service to each definition should be handled by go-metrics
|
||||
var withService []prometheus.SummaryDefinition
|
||||
for _, summary := range s {
|
||||
summary.Name = append([]string{cfg.MetricsPrefix}, summary.Name...)
|
||||
withService = append(withService, summary)
|
||||
}
|
||||
summaryDefs = append(summaryDefs, withService...)
|
||||
}
|
||||
|
||||
return gaugeDefs, counterDefs, summaryDefs
|
||||
}
|
||||
|
|
|
@ -54,6 +54,8 @@ func (p *Proxy) Serve() error {
|
|||
// Initial setup
|
||||
|
||||
// Setup telemetry if configured
|
||||
// NOTE(kit): As far as I can tell, all of the metrics in the proxy are generated at runtime, so we
|
||||
// don't have any static metrics we initialize at start.
|
||||
_, err := lib.InitTelemetry(newCfg.Telemetry)
|
||||
if err != nil {
|
||||
p.logger.Error("proxy telemetry config error", "error", err)
|
||||
|
|
|
@ -4,7 +4,7 @@ import (
|
|||
"reflect"
|
||||
"time"
|
||||
|
||||
metrics "github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics"
|
||||
"github.com/armon/go-metrics/circonus"
|
||||
"github.com/armon/go-metrics/datadog"
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
|
@ -154,14 +154,6 @@ type TelemetryConfig struct {
|
|||
// hcl: telemetry { dogstatsd_tags = []string }
|
||||
DogstatsdTags []string `json:"dogstatsd_tags,omitempty" mapstructure:"dogstatsd_tags"`
|
||||
|
||||
// PrometheusRetentionTime is the retention time for prometheus metrics if greater than 0.
|
||||
// A value of 0 disable Prometheus support. Regarding Prometheus, it is considered a good
|
||||
// practice to put large values here (such as a few days), and at least the interval between
|
||||
// prometheus requests.
|
||||
//
|
||||
// hcl: telemetry { prometheus_retention_time = "duration" }
|
||||
PrometheusRetentionTime time.Duration `json:"prometheus_retention_time,omitempty" mapstructure:"prometheus_retention_time"`
|
||||
|
||||
// FilterDefault is the default for whether to allow a metric that's not
|
||||
// covered by the filter.
|
||||
//
|
||||
|
@ -199,10 +191,18 @@ type TelemetryConfig struct {
|
|||
//
|
||||
// hcl: telemetry { statsite_address = string }
|
||||
StatsiteAddr string `json:"statsite_address,omitempty" mapstructure:"statsite_address"`
|
||||
|
||||
// PrometheusOpts provides configuration for the PrometheusSink. Currently the only configuration
|
||||
// we acquire from hcl is the retention time. We also use definition slices that are set in agent setup
|
||||
// before being passed to InitTelemmetry.
|
||||
//
|
||||
// hcl: telemetry { prometheus_retention_time = "duration" }
|
||||
PrometheusOpts prometheus.PrometheusOpts
|
||||
}
|
||||
|
||||
// MergeDefaults copies any non-zero field from defaults into the current
|
||||
// config.
|
||||
// TODO(kit): We no longer use this function and can probably delete it
|
||||
func (c *TelemetryConfig) MergeDefaults(defaults *TelemetryConfig) {
|
||||
if defaults == nil {
|
||||
return
|
||||
|
@ -221,6 +221,10 @@ func (c *TelemetryConfig) MergeDefaults(defaults *TelemetryConfig) {
|
|||
// implementing this for the types we actually have for now. Test failure
|
||||
// should catch the case where we add new types later.
|
||||
switch f.Kind() {
|
||||
case reflect.Struct:
|
||||
if f.Type() == reflect.TypeOf(prometheus.PrometheusOpts{}) {
|
||||
continue
|
||||
}
|
||||
case reflect.Slice:
|
||||
if !f.IsNil() {
|
||||
continue
|
||||
|
@ -277,80 +281,12 @@ func dogstatdSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, err
|
|||
}
|
||||
|
||||
func prometheusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, error) {
|
||||
if cfg.PrometheusRetentionTime.Nanoseconds() < 1 {
|
||||
|
||||
if cfg.PrometheusOpts.Expiration.Nanoseconds() < 1 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// TODO(kit) define these in vars in the package/file they're used
|
||||
gaugeDefs := []prometheus.GaugeDefinition{
|
||||
{
|
||||
Name: []string{"consul", "autopilot", "healthy"},
|
||||
Help: "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.",
|
||||
},
|
||||
}
|
||||
|
||||
// TODO(kit) define these in vars in the package/file they're used
|
||||
counterDefs := []prometheus.CounterDefinition{
|
||||
{
|
||||
Name: []string{"consul", "raft", "apply"},
|
||||
Help: "This counts the number of Raft transactions occurring over the interval.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "raft", "state", "candidate"},
|
||||
Help: "This increments whenever a Consul server starts an election.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "raft", "state", "leader"},
|
||||
Help: "This increments whenever a Consul server becomes a leader.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "client", "api", "catalog_register"},
|
||||
Help: "Increments whenever a Consul agent receives a catalog register request.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "runtime", "total_gc_pause_ns"},
|
||||
Help: "Number of nanoseconds consumed by stop-the-world garbage collection (GC) pauses since Consul started.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "client", "rpc"},
|
||||
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "client", "rpc", "exceeded"},
|
||||
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "client", "rpc", "failed"},
|
||||
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails.",
|
||||
},
|
||||
}
|
||||
|
||||
// TODO(kit) define these in vars in the package/file they're used
|
||||
summaryDefs := []prometheus.SummaryDefinition{
|
||||
{
|
||||
Name: []string{"consul", "kvs", "apply"},
|
||||
Help: "This measures the time it takes to complete an update to the KV store.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "txn", "apply"},
|
||||
Help: "This measures the time spent applying a transaction operation.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "raft", "commitTime"},
|
||||
Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.",
|
||||
},
|
||||
{
|
||||
Name: []string{"consul", "raft", "leader", "lastContact"},
|
||||
Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.",
|
||||
},
|
||||
}
|
||||
prometheusOpts := prometheus.PrometheusOpts{
|
||||
Expiration: cfg.PrometheusRetentionTime,
|
||||
GaugeDefinitions: gaugeDefs,
|
||||
CounterDefinitions: counterDefs,
|
||||
SummaryDefinitions: summaryDefs,
|
||||
}
|
||||
sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts)
|
||||
sink, err := prometheus.NewPrometheusSinkFrom(cfg.PrometheusOpts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -440,6 +376,9 @@ func InitTelemetry(cfg TelemetryConfig) (*metrics.InmemSink, error) {
|
|||
if err := addSink(circonusSink); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := addSink(circonusSink); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := addSink(prometheusSink); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -5,11 +5,14 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics/prometheus"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func makeFullTelemetryConfig(t *testing.T) TelemetryConfig {
|
||||
var (
|
||||
promOpts = prometheus.PrometheusOpts{}
|
||||
strSliceVal = []string{"foo"}
|
||||
strVal = "foo"
|
||||
intVal = int64(1 * time.Second)
|
||||
|
@ -27,6 +30,12 @@ func makeFullTelemetryConfig(t *testing.T) TelemetryConfig {
|
|||
// now for brevity but will fail the test if a new field type is added since
|
||||
// this is likely not implemented in MergeDefaults either.
|
||||
switch f.Kind() {
|
||||
case reflect.Struct:
|
||||
if f.Type() != reflect.TypeOf(promOpts) {
|
||||
t.Fatalf("unknown struct type in TelemetryConfig: actual %v, expected: %v", f.Type(), reflect.TypeOf(promOpts))
|
||||
}
|
||||
// TODO(kit): This should delve into the fields and set them individually rather than using an empty struct
|
||||
f.Set(reflect.ValueOf(promOpts))
|
||||
case reflect.Slice:
|
||||
if f.Type() != reflect.TypeOf(strSliceVal) {
|
||||
t.Fatalf("unknown slice type in TelemetryConfig." +
|
||||
|
|
Loading…
Reference in New Issue