Merge pull request #9261 from hashicorp/telemetry/fix-missing-and-stale-docs-2

Telemetry/fix missing and stale docs
This commit is contained in:
Kit Patella 2020-11-23 13:34:19 -08:00 committed by GitHub
commit 0ba7f4ce39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 50 additions and 27 deletions

10
agent/cache/cache.go vendored
View File

@ -37,7 +37,7 @@ import (
var Gauges = []prometheus.GaugeDefinition{ var Gauges = []prometheus.GaugeDefinition{
{ {
Name: []string{"consul", "cache", "entries_count"}, Name: []string{"consul", "cache", "entries_count"},
Help: "", Help: "Represents the number of entries in this cache.",
}, },
} }
@ -45,19 +45,19 @@ var Gauges = []prometheus.GaugeDefinition{
var Counters = []prometheus.CounterDefinition{ var Counters = []prometheus.CounterDefinition{
{ {
Name: []string{"consul", "cache", "bypass"}, Name: []string{"consul", "cache", "bypass"},
Help: "", Help: "Counts how many times a request bypassed the cache because no cache-key was provided.",
}, },
{ {
Name: []string{"consul", "cache", "fetch_success"}, Name: []string{"consul", "cache", "fetch_success"},
Help: "", Help: "Counts the number of successful fetches by the cache.",
}, },
{ {
Name: []string{"consul", "cache", "fetch_error"}, Name: []string{"consul", "cache", "fetch_error"},
Help: "", Help: "Counts the number of failed fetches by the cache.",
}, },
{ {
Name: []string{"consul", "cache", "evict_expired"}, Name: []string{"consul", "cache", "evict_expired"},
Help: "", Help: "Counts the number of expired entries that are evicted.",
}, },
} }

View File

@ -25,7 +25,7 @@ var CatalogCounters = []prometheus.CounterDefinition{
}, },
{ {
Name: []string{"catalog", "connect", "query"}, Name: []string{"catalog", "connect", "query"},
Help: "", Help: "Increments for each connect-based catalog query for the given service.",
}, },
{ {
Name: []string{"catalog", "service", "query-tag"}, Name: []string{"catalog", "service", "query-tag"},
@ -33,7 +33,7 @@ var CatalogCounters = []prometheus.CounterDefinition{
}, },
{ {
Name: []string{"catalog", "connect", "query-tag"}, Name: []string{"catalog", "connect", "query-tag"},
Help: "", Help: "Increments for each connect-based catalog query for the given service with the given tag.",
}, },
{ {
Name: []string{"catalog", "service", "query-tags"}, Name: []string{"catalog", "service", "query-tags"},
@ -41,7 +41,7 @@ var CatalogCounters = []prometheus.CounterDefinition{
}, },
{ {
Name: []string{"catalog", "connect", "query-tags"}, Name: []string{"catalog", "connect", "query-tags"},
Help: "", Help: "Increments for each connect-based catalog query for the given service with the given tags.",
}, },
{ {
Name: []string{"catalog", "service", "not-found"}, Name: []string{"catalog", "service", "not-found"},
@ -49,7 +49,7 @@ var CatalogCounters = []prometheus.CounterDefinition{
}, },
{ {
Name: []string{"catalog", "connect", "not-found"}, Name: []string{"catalog", "connect", "not-found"},
Help: "", Help: "Increments for each connect-based catalog query where the given service could not be found.",
}, },
} }

View File

@ -53,43 +53,43 @@ var CommandsSummaries = []prometheus.SummaryDefinition{
}, },
{ {
Name: []string{"consul", "fsm", "intention"}, Name: []string{"consul", "fsm", "intention"},
Help: "", Help: "Deprecated - use fsm_intention instead",
}, },
{ {
Name: []string{"fsm", "intention"}, Name: []string{"fsm", "intention"},
Help: "", Help: "Measures the time it takes to apply an intention operation to the FSM.",
}, },
{ {
Name: []string{"consul", "fsm", "ca"}, Name: []string{"consul", "fsm", "ca"},
Help: "", Help: "Deprecated - use fsm_ca instead",
},
{
Name: []string{"fsm", "ca"},
Help: "Measures the time it takes to apply CA configuration operations to the FSM.",
}, },
{ {
Name: []string{"fsm", "ca", "leaf"}, Name: []string{"fsm", "ca", "leaf"},
Help: "", Help: "Measures the time it takes to apply an operation while signing a leaf certificate.",
}, },
{ {
Name: []string{"fsm", "acl", "token"}, Name: []string{"fsm", "acl", "token"},
Help: "", Help: "Measures the time it takes to apply an ACL token operation to the FSM.",
},
{
Name: []string{"fsm", "ca", "leaf"},
Help: "",
}, },
{ {
Name: []string{"fsm", "acl", "policy"}, Name: []string{"fsm", "acl", "policy"},
Help: "", Help: "Measures the time it takes to apply an ACL policy operation to the FSM.",
}, },
{ {
Name: []string{"fsm", "acl", "bindingrule"}, Name: []string{"fsm", "acl", "bindingrule"},
Help: "", Help: "Measures the time it takes to apply an ACL binding rule operation to the FSM.",
}, },
{ {
Name: []string{"fsm", "acl", "authmethod"}, Name: []string{"fsm", "acl", "authmethod"},
Help: "", Help: "Measures the time it takes to apply an ACL authmethod operation to the FSM.",
}, },
{ {
Name: []string{"fsm", "system_metadata"}, Name: []string{"fsm", "system_metadata"},
Help: "", Help: "Measures the time it takes to apply a system metadata operation to the FSM.",
}, },
// TODO(kit): We generate the config-entry fsm summaries by reading off of the request. It is // TODO(kit): We generate the config-entry fsm summaries by reading off of the request. It is
// possible to statically declare these when we know all of the names, but I didn't get to it // possible to statically declare these when we know all of the names, but I didn't get to it
@ -378,8 +378,12 @@ func (c *FSM) applyIntentionOperation(buf []byte, index uint64) interface{} {
panic(fmt.Errorf("failed to decode request: %v", err)) panic(fmt.Errorf("failed to decode request: %v", err))
} }
// TODO(kit): We should deprecate this first metric that writes the metrics_prefix itself,
// the config we use to flag this out, telemetry.disable_compat_1.9 is on the agent - how do
// we access it here?
defer metrics.MeasureSinceWithLabels([]string{"consul", "fsm", "intention"}, time.Now(), defer metrics.MeasureSinceWithLabels([]string{"consul", "fsm", "intention"}, time.Now(),
[]metrics.Label{{Name: "op", Value: string(req.Op)}}) []metrics.Label{{Name: "op", Value: string(req.Op)}})
defer metrics.MeasureSinceWithLabels([]string{"fsm", "intention"}, time.Now(), defer metrics.MeasureSinceWithLabels([]string{"fsm", "intention"}, time.Now(),
[]metrics.Label{{Name: "op", Value: string(req.Op)}}) []metrics.Label{{Name: "op", Value: string(req.Op)}})
@ -474,6 +478,7 @@ func (c *FSM) applyConnectCAOperation(buf []byte, index uint64) interface{} {
} }
} }
// applyConnectCALeafOperation applies an operation while signing a leaf certificate.
func (c *FSM) applyConnectCALeafOperation(buf []byte, index uint64) interface{} { func (c *FSM) applyConnectCALeafOperation(buf []byte, index uint64) interface{} {
var req structs.CALeafRequest var req structs.CALeafRequest
if err := structs.Decode(buf, &req); err != nil { if err := structs.Decode(buf, &req); err != nil {

View File

@ -16,11 +16,11 @@ var SessionGauges = []prometheus.GaugeDefinition{
}, },
{ {
Name: []string{"raft", "applied_index"}, Name: []string{"raft", "applied_index"},
Help: "", Help: "Represents the raft applied index.",
}, },
{ {
Name: []string{"raft", "last_index"}, Name: []string{"raft", "last_index"},
Help: "", Help: "Represents the raft last index.",
}, },
} }
@ -153,7 +153,7 @@ func (s *Server) clearAllSessionTimers() {
s.sessionTimers.StopAll() s.sessionTimers.StopAll()
} }
// updateMetrics is a long running routine used to uddate a // updateMetrics is a long running routine used to update a
// number of server periodic metrics // number of server periodic metrics
func (s *Server) updateMetrics() { func (s *Server) updateMetrics() {
for { for {

View File

@ -196,6 +196,10 @@ These metrics are used to monitor the health of the Consul servers.
| `consul.acl.ResolveTokenToIdentity` | This measures the time it takes to resolve an ACL token to an Identity. | ms | timer | | `consul.acl.ResolveTokenToIdentity` | This measures the time it takes to resolve an ACL token to an Identity. | ms | timer |
| `consul.acl.token.cache_hit` | Increments if Consul is able to resolve a token's identity, or a legacy token, from the cache. | cache read op | counter | | `consul.acl.token.cache_hit` | Increments if Consul is able to resolve a token's identity, or a legacy token, from the cache. | cache read op | counter |
| `consul.acl.token.cache_miss` | Increments if Consul cannot resolve a token's identity, or a legacy token, from the cache. | cache read op | counter | | `consul.acl.token.cache_miss` | Increments if Consul cannot resolve a token's identity, or a legacy token, from the cache. | cache read op | counter |
| `consul.cache.bypass` | Counts how many times a request bypassed the cache because no cache-key was provided. | counter | counter |
| `consul.cache.fetch_success` | Counts the number of successful fetches by the cache. | counter | counter |
| `consul.cache.fetch_error` | Counts the number of failed fetches by the cache. | counter | counter |
| `consul.cache.evict_expired` | Counts the number of expired entries that are evicted. | counter | counter |
| `consul.raft.fsm.snapshot` | This metric measures the time taken by the FSM to record the current state for the snapshot. | ms | timer | | `consul.raft.fsm.snapshot` | This metric measures the time taken by the FSM to record the current state for the snapshot. | ms | timer |
| `consul.raft.fsm.apply` | This metric gives the number of logs committed since the last interval. | commit logs / interval | counter | | `consul.raft.fsm.apply` | This metric gives the number of logs committed since the last interval. | commit logs / interval | counter |
| `consul.raft.commitNumLogs` | This metric measures the count of logs processed for application to the FSM in a single batch. | logs | gauge | | `consul.raft.commitNumLogs` | This metric measures the count of logs processed for application to the FSM in a single batch. | logs | gauge |
@ -207,6 +211,8 @@ These metrics are used to monitor the health of the Consul servers.
| `consul.raft.replication.heartbeat` | This metric measures the time taken to invoke appendEntries on a peer, so that it doesnt timeout on a periodic basis. | ms | timer | | `consul.raft.replication.heartbeat` | This metric measures the time taken to invoke appendEntries on a peer, so that it doesnt timeout on a periodic basis. | ms | timer |
| `consul.serf.snapshot.appendLine` | This metric measures the time taken by the Consul agent to append an entry into the existing log. | ms | timer | | `consul.serf.snapshot.appendLine` | This metric measures the time taken by the Consul agent to append an entry into the existing log. | ms | timer |
| `consul.serf.snapshot.compact` | This metric measures the time taken by the Consul agent to compact a log. This operation occurs only when the snapshot becomes large enough to justify the compaction . | ms | timer | | `consul.serf.snapshot.compact` | This metric measures the time taken by the Consul agent to compact a log. This operation occurs only when the snapshot becomes large enough to justify the compaction . | ms | timer |
| `consul.raft.applied_index` | Represents the raft applied index. | index | gauge |
| `consul.raft.last_index` | Represents the raft applied index. | index | gauge |
| `consul.raft.state.leader` | This increments whenever a Consul server becomes a leader. If there are frequent leadership changes this may be indication that the servers are overloaded and aren't meeting the soft real-time requirements for Raft, or that there are networking problems between the servers. | leadership transitions / interval | counter | | `consul.raft.state.leader` | This increments whenever a Consul server becomes a leader. If there are frequent leadership changes this may be indication that the servers are overloaded and aren't meeting the soft real-time requirements for Raft, or that there are networking problems between the servers. | leadership transitions / interval | counter |
| `consul.raft.state.candidate` | This increments whenever a Consul server starts an election. If this increments without a leadership change occurring it could indicate that a single server is overloaded or is experiencing network connectivity issues. | election attempts / interval | counter | | `consul.raft.state.candidate` | This increments whenever a Consul server starts an election. If this increments without a leadership change occurring it could indicate that a single server is overloaded or is experiencing network connectivity issues. | election attempts / interval | counter |
| `consul.raft.apply` | This counts the number of Raft transactions occurring over the interval, which is a general indicator of the write load on the Consul servers. | raft transactions / interval | counter | | `consul.raft.apply` | This counts the number of Raft transactions occurring over the interval, which is a general indicator of the write load on the Consul servers. | raft transactions / interval | counter |
@ -243,6 +249,14 @@ These metrics are used to monitor the health of the Consul servers.
| `consul.fsm.txn` | This measures the time it takes to apply the given transaction update to the FSM. | ms | timer | | `consul.fsm.txn` | This measures the time it takes to apply the given transaction update to the FSM. | ms | timer |
| `consul.fsm.autopilot` | This measures the time it takes to apply the given autopilot update to the FSM. | ms | timer | | `consul.fsm.autopilot` | This measures the time it takes to apply the given autopilot update to the FSM. | ms | timer |
| `consul.fsm.persist` | This measures the time it takes to persist the FSM to a raft snapshot. | ms | timer | | `consul.fsm.persist` | This measures the time it takes to persist the FSM to a raft snapshot. | ms | timer |
| `consul.fsm.intention` | Measures the time it takes to apply an intention operation to the state store. | ms | timer |
| `consul.fsm.ca` | Measures the time it takes to apply CA configuration operations to the FSM. | ms | timer |
| `consul.fsm.ca.leaf` | Measures the time it takes to apply an operation while signing a leaf certificate. | ms | timer |
| `consul.fsm.acl.token` | Measures the time it takes to apply an ACL token operation to the FSM. | ms | timer |
| `consul.fsm.acl.policy` | Measures the time it takes to apply an ACL policy operation to the FSM. | ms | timer |
| `consul.fsm.acl.bindingrule` | Measures the time it takes to apply an ACL binding rule operation to the FSM. | ms | timer |
| `consul.fsm.acl.authmethod` | Measures the time it takes to apply an ACL authmethod operation to the FSM. | ms | timer |
| `consul.fsm.system_metadata` | Measures the time it takes to apply a system metadata operation to the FSM. | ms | timer |
| `consul.kvs.apply` | This measures the time it takes to complete an update to the KV store. | ms | timer | | `consul.kvs.apply` | This measures the time it takes to complete an update to the KV store. | ms | timer |
| `consul.leader.barrier` | This measures the time spent waiting for the raft barrier upon gaining leadership. | ms | timer | | `consul.leader.barrier` | This measures the time spent waiting for the raft barrier upon gaining leadership. | ms | timer |
| `consul.leader.reconcile` | This measures the time spent updating the raft store from the serf member information. | ms | timer | | `consul.leader.reconcile` | This measures the time spent updating the raft store from the serf member information. | ms | timer |
@ -306,6 +320,10 @@ These metrics give insight into the health of the cluster as a whole.
| `consul.catalog.service.query-tag..` | This increments for each catalog query for the given service with the given tag. | queries | counter | | `consul.catalog.service.query-tag..` | This increments for each catalog query for the given service with the given tag. | queries | counter |
| `consul.catalog.service.query-tags..` | This increments for each catalog query for the given service with the given tags. | queries | counter | | `consul.catalog.service.query-tags..` | This increments for each catalog query for the given service with the given tags. | queries | counter |
| `consul.catalog.service.not-found.` | This increments for each catalog query where the given service could not be found. | queries | counter | | `consul.catalog.service.not-found.` | This increments for each catalog query where the given service could not be found. | queries | counter |
| `consul.catalog.connect.query.` | This increments for each connect-based catalog query for the given service. | queries | counter |
| `consul.catalog.connect.query-tag..` | This increments for each connect-based catalog query for the given service with the given tag. | queries | counter |
| `consul.catalog.connect.query-tags..` | This increments for each connect-based catalog query for the given service with the given tags. | queries | counter |
| `consul.catalog.connect.not-found.` | This increments for each connect-based catalog query where the given service could not be found. | queries | counter |
## Connect Built-in Proxy Metrics ## Connect Built-in Proxy Metrics