2023-03-28 19:39:22 +01:00
// Copyright (c) HashiCorp, Inc.
2023-08-11 09:12:13 -04:00
// SPDX-License-Identifier: BUSL-1.1
2023-03-28 19:39:22 +01:00
2021-10-08 10:31:50 -07:00
package agent
import (
2022-06-03 13:07:37 -04:00
"context"
2021-10-27 15:23:29 -04:00
"crypto/x509"
"fmt"
2021-10-08 10:31:50 -07:00
"net/http"
"net/http/httptest"
2022-11-10 10:26:01 -06:00
"os"
2021-10-27 15:23:29 -04:00
"path/filepath"
2021-10-08 10:31:50 -07:00
"strings"
2023-11-07 10:15:50 -06:00
"sync/atomic"
2021-10-08 10:31:50 -07:00
"testing"
2021-10-27 15:23:29 -04:00
2022-06-29 09:46:37 -07:00
"github.com/stretchr/testify/require"
2022-03-31 10:49:37 -07:00
"github.com/hashicorp/consul/agent/rpc/middleware"
2021-10-27 15:23:29 -04:00
"github.com/hashicorp/consul/sdk/testutil"
2023-11-07 10:15:50 -06:00
"github.com/hashicorp/consul/sdk/testutil/retry"
2023-10-05 17:27:28 -04:00
testretry "github.com/hashicorp/consul/sdk/testutil/retry"
2021-10-27 15:56:38 -04:00
"github.com/hashicorp/consul/testrpc"
2021-10-27 15:23:29 -04:00
"github.com/hashicorp/consul/tlsutil"
2021-10-08 10:31:50 -07:00
)
2023-11-07 10:15:50 -06:00
var metricsPrefixCounter atomic . Uint64
// getUniqueMetricsPrefix generates a unique ID for each test to use as a metrics prefix.
// This is needed because go-metrics is effectively a global variable.
func getUniqueMetricsPrefix ( ) string {
return fmt . Sprint ( "metrics_" , metricsPrefixCounter . Add ( 1 ) )
}
2021-10-27 15:23:29 -04:00
func skipIfShortTesting ( t * testing . T ) {
2021-10-25 10:55:59 -07:00
if testing . Short ( ) {
t . Skip ( "too slow for testing.Short" )
}
}
2023-10-05 17:27:28 -04:00
func recordPromMetrics ( t require . TestingT , a * TestAgent , respRec * httptest . ResponseRecorder ) {
if tt , ok := t . ( * testing . T ) ; ok {
tt . Helper ( )
}
2021-10-25 10:55:59 -07:00
req , err := http . NewRequest ( "GET" , "/v1/agent/metrics?format=prometheus" , nil )
require . NoError ( t , err , "Failed to generate new http request." )
2023-09-15 19:15:42 -04:00
a . srv . h . ServeHTTP ( respRec , req )
require . Equalf ( t , 200 , respRec . Code , "expected 200, got %d, body: %s" , respRec . Code , respRec . Body . String ( ) )
2021-10-25 10:55:59 -07:00
}
2022-03-17 16:02:26 -07:00
func assertMetricExists ( t * testing . T , respRec * httptest . ResponseRecorder , metric string ) {
if respRec . Body . String ( ) == "" {
t . Fatalf ( "Response body is empty." )
}
if ! strings . Contains ( respRec . Body . String ( ) , metric ) {
t . Fatalf ( "Could not find the metric \"%s\" in the /v1/agent/metrics response" , metric )
}
}
2022-09-15 18:04:37 +01:00
// assertMetricExistsWithLabels looks in the prometheus metrics response for the metric name and all the labels. eg:
2022-03-31 10:49:37 -07:00
// new_rpc_metrics_rpc_server_call{errored="false",method="Status.Ping",request_type="unknown",rpc_type="net/rpc"}
func assertMetricExistsWithLabels ( t * testing . T , respRec * httptest . ResponseRecorder , metric string , labelNames [ ] string ) {
if respRec . Body . String ( ) == "" {
t . Fatalf ( "Response body is empty." )
}
if ! strings . Contains ( respRec . Body . String ( ) , metric ) {
t . Fatalf ( "Could not find the metric \"%s\" in the /v1/agent/metrics response" , metric )
}
foundAllLabels := false
metrics := respRec . Body . String ( )
for _ , line := range strings . Split ( metrics , "\n" ) {
// skip help lines
if len ( line ) < 1 || line [ 0 ] == '#' {
continue
}
if strings . Contains ( line , metric ) {
hasAllLabels := true
for _ , labelName := range labelNames {
if ! strings . Contains ( line , labelName ) {
hasAllLabels = false
break
}
}
if hasAllLabels {
foundAllLabels = true
// done!
break
}
}
}
if ! foundAllLabels {
t . Fatalf ( "Could not verify that all named labels \"%s\" exist for the metric \"%s\" in the /v1/agent/metrics response" , strings . Join ( labelNames , ", " ) , metric )
}
}
func assertLabelWithValueForMetricExistsNTime ( t * testing . T , respRec * httptest . ResponseRecorder , metric string , label string , labelValue string , occurrences int ) {
if respRec . Body . String ( ) == "" {
t . Fatalf ( "Response body is empty." )
}
if ! strings . Contains ( respRec . Body . String ( ) , metric ) {
t . Fatalf ( "Could not find the metric \"%s\" in the /v1/agent/metrics response" , metric )
}
metrics := respRec . Body . String ( )
// don't look at _sum or _count or other aggregates
metricTarget := metric + "{"
// eg method="Status.Ping"
labelWithValueTarget := label + "=" + "\"" + labelValue + "\""
matchesFound := 0
for _ , line := range strings . Split ( metrics , "\n" ) {
// skip help lines
if len ( line ) < 1 || line [ 0 ] == '#' {
continue
}
if strings . Contains ( line , metricTarget ) {
if strings . Contains ( line , labelWithValueTarget ) {
matchesFound ++
}
}
}
if matchesFound < occurrences {
t . Fatalf ( "Only found metric \"%s\" %d times. Wanted %d times." , metric , matchesFound , occurrences )
}
}
2021-10-25 10:55:59 -07:00
func assertMetricExistsWithValue ( t * testing . T , respRec * httptest . ResponseRecorder , metric string , value string ) {
if respRec . Body . String ( ) == "" {
t . Fatalf ( "Response body is empty." )
}
// eg "consul_autopilot_healthy NaN"
target := metric + " " + value
if ! strings . Contains ( respRec . Body . String ( ) , target ) {
t . Fatalf ( "Could not find the metric \"%s\" with value \"%s\" in the /v1/agent/metrics response" , metric , value )
}
}
2022-09-15 18:04:37 +01:00
func assertMetricsWithLabelIsNonZero ( t * testing . T , respRec * httptest . ResponseRecorder , label , labelValue string ) {
if respRec . Body . String ( ) == "" {
t . Fatalf ( "Response body is empty." )
}
metrics := respRec . Body . String ( )
labelWithValueTarget := label + "=" + "\"" + labelValue + "\""
for _ , line := range strings . Split ( metrics , "\n" ) {
if len ( line ) < 1 || line [ 0 ] == '#' {
continue
}
if strings . Contains ( line , labelWithValueTarget ) {
s := strings . SplitN ( line , " " , 2 )
if s [ 1 ] == "0" {
t . Fatalf ( "Metric with label provided \"%s:%s\" has the value 0" , label , labelValue )
}
}
}
}
2021-10-25 10:55:59 -07:00
func assertMetricNotExists ( t * testing . T , respRec * httptest . ResponseRecorder , metric string ) {
if respRec . Body . String ( ) == "" {
t . Fatalf ( "Response body is empty." )
}
if strings . Contains ( respRec . Body . String ( ) , metric ) {
t . Fatalf ( "Didn't expect to find the metric \"%s\" in the /v1/agent/metrics response" , metric )
}
}
2022-03-31 10:49:37 -07:00
// TestAgent_OneTwelveRPCMetrics test for the 1.12 style RPC metrics. These are the labeled metrics coming from
2022-03-17 16:02:26 -07:00
// agent.rpc.middleware.interceptors package.
2022-03-31 10:49:37 -07:00
func TestAgent_OneTwelveRPCMetrics ( t * testing . T ) {
2022-03-17 16:02:26 -07:00
skipIfShortTesting ( t )
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
2022-03-31 10:49:37 -07:00
t . Run ( "Check that 1.12 rpc metrics are not emitted by default." , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
2022-03-17 16:02:26 -07:00
hcl := fmt . Sprintf ( `
telemetry = {
prometheus_retention_time = "5s"
disable_hostname = true
metrics_prefix = "%s"
}
` , metricsPrefix )
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
var out struct { }
2022-12-14 09:24:22 -06:00
err := a . RPC ( context . Background ( ) , "Status.Ping" , struct { } { } , & out )
2022-03-17 16:02:26 -07:00
require . NoError ( t , err )
respRec := httptest . NewRecorder ( )
recordPromMetrics ( t , a , respRec )
2022-03-31 10:49:37 -07:00
assertMetricNotExists ( t , respRec , metricsPrefix + "_rpc_server_call" )
} )
t . Run ( "Check that 1.12 rpc metrics are emitted when specified by operator." , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
2022-03-31 10:49:37 -07:00
allowRPCMetricRule := metricsPrefix + "." + strings . Join ( middleware . OneTwelveRPCSummary [ 0 ] . Name , "." )
hcl := fmt . Sprintf ( `
telemetry = {
prometheus_retention_time = "5s"
disable_hostname = true
metrics_prefix = "%s"
prefix_filter = [ "+%s" ]
}
` , metricsPrefix , allowRPCMetricRule )
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
var out struct { }
2022-12-14 09:24:22 -06:00
err := a . RPC ( context . Background ( ) , "Status.Ping" , struct { } { } , & out )
2022-03-31 10:49:37 -07:00
require . NoError ( t , err )
2022-12-14 09:24:22 -06:00
err = a . RPC ( context . Background ( ) , "Status.Ping" , struct { } { } , & out )
2022-03-31 10:49:37 -07:00
require . NoError ( t , err )
2022-12-14 09:24:22 -06:00
err = a . RPC ( context . Background ( ) , "Status.Ping" , struct { } { } , & out )
2022-03-31 10:49:37 -07:00
require . NoError ( t , err )
respRec := httptest . NewRecorder ( )
recordPromMetrics ( t , a , respRec )
// make sure the labels exist for this metric
2022-04-12 10:50:25 -07:00
assertMetricExistsWithLabels ( t , respRec , metricsPrefix + "_rpc_server_call" , [ ] string { "errored" , "method" , "request_type" , "rpc_type" , "leader" } )
2022-03-31 10:49:37 -07:00
// make sure we see 3 Status.Ping metrics corresponding to the calls we made above
assertLabelWithValueForMetricExistsNTime ( t , respRec , metricsPrefix + "_rpc_server_call" , "method" , "Status.Ping" , 3 )
2022-09-15 18:04:37 +01:00
// make sure rpc calls with elapsed time below 1ms are reported as decimal
assertMetricsWithLabelIsNonZero ( t , respRec , "method" , "Status.Ping" )
2022-03-17 16:02:26 -07:00
} )
}
2022-06-03 13:07:37 -04:00
func TestHTTPHandlers_AgentMetrics_LeaderShipMetrics ( t * testing . T ) {
skipIfShortTesting ( t )
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
t . Run ( "check that metric isLeader is set properly on server" , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix1 := getUniqueMetricsPrefix ( )
metricsPrefix2 := getUniqueMetricsPrefix ( )
hcl1 := fmt . Sprintf ( `
server = true
2022-06-03 13:07:37 -04:00
telemetry = {
2023-11-07 10:15:50 -06:00
prometheus_retention_time = "25s" ,
disable_hostname = true
metrics_prefix = "%s"
2022-06-03 13:07:37 -04:00
}
2023-11-07 10:15:50 -06:00
` , metricsPrefix1 )
2022-06-03 13:07:37 -04:00
2023-11-07 10:15:50 -06:00
hcl2 := fmt . Sprintf ( `
server = true
telemetry = {
prometheus_retention_time = "25s" ,
disable_hostname = true
metrics_prefix = "%s"
}
` , metricsPrefix2 )
2022-06-03 13:07:37 -04:00
2023-11-07 10:15:50 -06:00
overrides := `
bootstrap = false
bootstrap_expect = 2
`
s1 := StartTestAgent ( t , TestAgent { Name : "s1" , HCL : hcl1 , Overrides : overrides } )
s2 := StartTestAgent ( t , TestAgent { Name : "s2" , HCL : hcl2 , Overrides : overrides } )
defer s1 . Shutdown ( )
defer s2 . Shutdown ( )
2022-06-03 13:07:37 -04:00
// agent hasn't become a leader
2023-11-07 10:15:50 -06:00
retry . RunWith ( retry . ThirtySeconds ( ) , t , func ( r * testretry . R ) {
respRec := httptest . NewRecorder ( )
recordPromMetrics ( r , s1 , respRec )
found := strings . Contains ( respRec . Body . String ( ) , metricsPrefix1 + "_server_isLeader 0" )
require . True ( r , found , "non-leader server should have isLeader 0" )
} )
2022-06-03 13:07:37 -04:00
2023-11-07 10:15:50 -06:00
_ , err := s2 . JoinLAN ( [ ] string { s1 . Config . SerfBindAddrLAN . String ( ) } , nil )
require . NoError ( t , err )
testrpc . WaitForLeader ( t , s1 . RPC , "dc1" )
testrpc . WaitForLeader ( t , s2 . RPC , "dc1" )
2022-06-03 13:07:37 -04:00
// Verify agent's isLeader metrics is 1
2023-11-07 10:15:50 -06:00
retry . RunWith ( retry . ThirtySeconds ( ) , t , func ( r * testretry . R ) {
respRec1 := httptest . NewRecorder ( )
recordPromMetrics ( r , s1 , respRec1 )
found1 := strings . Contains ( respRec1 . Body . String ( ) , metricsPrefix1 + "_server_isLeader 1" )
respRec2 := httptest . NewRecorder ( )
recordPromMetrics ( r , s2 , respRec2 )
found2 := strings . Contains ( respRec2 . Body . String ( ) , metricsPrefix2 + "_server_isLeader 1" )
require . True ( r , found1 || found2 , "leader server should have isLeader 1" )
} )
2022-06-03 13:07:37 -04:00
} )
}
2021-10-25 10:55:59 -07:00
// TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus adds testing around
2021-10-08 10:31:50 -07:00
// the published autopilot metrics on https://www.consul.io/docs/agent/telemetry#autopilot
2021-10-25 10:55:59 -07:00
func TestHTTPHandlers_AgentMetrics_ConsulAutopilot_Prometheus ( t * testing . T ) {
2021-10-27 15:23:29 -04:00
skipIfShortTesting ( t )
2021-10-08 10:31:50 -07:00
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
2021-10-25 10:55:59 -07:00
t . Run ( "Check consul_autopilot_* are not emitted metrics on clients" , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
hcl := fmt . Sprintf ( `
2021-10-25 10:55:59 -07:00
telemetry = {
prometheus_retention_time = "5s"
disable_hostname = true
2023-11-07 10:15:50 -06:00
metrics_prefix = "%s"
2021-10-25 10:55:59 -07:00
}
bootstrap = false
server = false
2023-11-07 10:15:50 -06:00
` , metricsPrefix )
2021-10-08 10:31:50 -07:00
2021-10-25 10:55:59 -07:00
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
2021-10-08 10:31:50 -07:00
2021-10-25 10:55:59 -07:00
respRec := httptest . NewRecorder ( )
recordPromMetrics ( t , a , respRec )
2021-10-08 10:31:50 -07:00
2023-11-07 10:15:50 -06:00
assertMetricNotExists ( t , respRec , metricsPrefix + "_autopilot_healthy" )
assertMetricNotExists ( t , respRec , metricsPrefix + "_autopilot_failure_tolerance" )
2021-10-25 10:55:59 -07:00
} )
2021-10-08 10:31:50 -07:00
t . Run ( "Check consul_autopilot_healthy metric value on startup" , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
hcl := fmt . Sprintf ( `
2021-10-25 10:55:59 -07:00
// don't bootstrap agent so as not to
// become a leader
telemetry = {
prometheus_retention_time = "5s" ,
disable_hostname = true
2023-11-07 10:15:50 -06:00
metrics_prefix = "%s"
2021-10-08 10:31:50 -07:00
}
2021-10-25 10:55:59 -07:00
bootstrap = false
2023-11-07 10:15:50 -06:00
` , metricsPrefix )
2021-10-08 10:31:50 -07:00
2021-10-25 10:55:59 -07:00
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
respRec := httptest . NewRecorder ( )
recordPromMetrics ( t , a , respRec )
2023-11-07 10:15:50 -06:00
assertMetricExistsWithValue ( t , respRec , metricsPrefix + "_autopilot_healthy" , "1" )
assertMetricExistsWithValue ( t , respRec , metricsPrefix + "_autopilot_failure_tolerance" , "0" )
2021-10-25 10:55:59 -07:00
} )
2021-10-08 10:31:50 -07:00
}
2021-10-27 15:23:29 -04:00
func TestHTTPHandlers_AgentMetrics_TLSCertExpiry_Prometheus ( t * testing . T ) {
skipIfShortTesting ( t )
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
dir := testutil . TempDir ( t , "ca" )
caPEM , caPK , err := tlsutil . GenerateCA ( tlsutil . CAOpts { Days : 20 , Domain : "consul" } )
require . NoError ( t , err )
caPath := filepath . Join ( dir , "ca.pem" )
2022-11-10 10:26:01 -06:00
err = os . WriteFile ( caPath , [ ] byte ( caPEM ) , 0600 )
2021-10-27 15:23:29 -04:00
require . NoError ( t , err )
signer , err := tlsutil . ParseSigner ( caPK )
require . NoError ( t , err )
pem , key , err := tlsutil . GenerateCert ( tlsutil . CertOpts {
Signer : signer ,
CA : caPEM ,
Name : "server.dc1.consul" ,
Days : 20 ,
ExtKeyUsage : [ ] x509 . ExtKeyUsage { x509 . ExtKeyUsageClientAuth } ,
} )
require . NoError ( t , err )
certPath := filepath . Join ( dir , "cert.pem" )
2022-11-10 10:26:01 -06:00
err = os . WriteFile ( certPath , [ ] byte ( pem ) , 0600 )
2021-10-27 15:23:29 -04:00
require . NoError ( t , err )
keyPath := filepath . Join ( dir , "cert.key" )
2022-11-10 10:26:01 -06:00
err = os . WriteFile ( keyPath , [ ] byte ( key ) , 0600 )
2021-10-27 15:23:29 -04:00
require . NoError ( t , err )
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
2021-10-27 15:23:29 -04:00
hcl := fmt . Sprintf ( `
telemetry = {
prometheus_retention_time = "5s" ,
disable_hostname = true
2023-11-07 10:15:50 -06:00
metrics_prefix = "%s"
2021-10-27 15:23:29 -04:00
}
ca_file = "%s"
cert_file = "%s"
key_file = "%s"
2023-11-07 10:15:50 -06:00
` , metricsPrefix , caPath , certPath , keyPath )
2021-10-27 15:23:29 -04:00
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
respRec := httptest . NewRecorder ( )
recordPromMetrics ( t , a , respRec )
2023-11-07 10:15:50 -06:00
require . Contains ( t , respRec . Body . String ( ) , metricsPrefix + "_agent_tls_cert_expiry 1.7" )
2021-10-27 15:23:29 -04:00
}
2021-10-27 15:56:38 -04:00
func TestHTTPHandlers_AgentMetrics_CACertExpiry_Prometheus ( t * testing . T ) {
skipIfShortTesting ( t )
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
t . Run ( "non-leader emits NaN" , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
hcl := fmt . Sprintf ( `
2021-10-27 15:56:38 -04:00
telemetry = {
prometheus_retention_time = "5s" ,
disable_hostname = true
2023-11-07 10:15:50 -06:00
metrics_prefix = "%s"
2021-10-27 15:56:38 -04:00
}
connect {
enabled = true
}
bootstrap = false
2023-11-07 10:15:50 -06:00
` , metricsPrefix )
2021-10-27 15:56:38 -04:00
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
respRec := httptest . NewRecorder ( )
recordPromMetrics ( t , a , respRec )
2023-11-07 10:15:50 -06:00
require . Contains ( t , respRec . Body . String ( ) , metricsPrefix + "_mesh_active_root_ca_expiry NaN" )
require . Contains ( t , respRec . Body . String ( ) , metricsPrefix + "_mesh_active_signing_ca_expiry NaN" )
2021-10-27 15:56:38 -04:00
} )
t . Run ( "leader emits a value" , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
hcl := fmt . Sprintf ( `
2021-10-27 15:56:38 -04:00
telemetry = {
prometheus_retention_time = "5s" ,
disable_hostname = true
2023-11-07 10:15:50 -06:00
metrics_prefix = "%s"
2021-10-27 15:56:38 -04:00
}
connect {
enabled = true
}
2023-11-07 10:15:50 -06:00
` , metricsPrefix )
2021-10-27 15:56:38 -04:00
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
testrpc . WaitForLeader ( t , a . RPC , "dc1" )
respRec := httptest . NewRecorder ( )
recordPromMetrics ( t , a , respRec )
out := respRec . Body . String ( )
2023-11-07 10:15:50 -06:00
require . Contains ( t , out , metricsPrefix + "_mesh_active_root_ca_expiry 3.15" )
require . Contains ( t , out , metricsPrefix + "_mesh_active_signing_ca_expiry 3.15" )
2021-10-27 15:56:38 -04:00
} )
}
2023-02-23 14:07:17 +00:00
func TestHTTPHandlers_AgentMetrics_WAL_Prometheus ( t * testing . T ) {
skipIfShortTesting ( t )
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
t . Run ( "client agent emits nothing" , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
hcl := fmt . Sprintf ( `
2023-02-23 14:07:17 +00:00
server = false
telemetry = {
prometheus_retention_time = "5s" ,
disable_hostname = true
2023-11-07 10:15:50 -06:00
metrics_prefix = "%s"
2023-02-23 14:07:17 +00:00
}
raft_logstore {
backend = "wal"
}
bootstrap = false
2023-11-07 10:15:50 -06:00
` , metricsPrefix )
2023-02-23 14:07:17 +00:00
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
respRec := httptest . NewRecorder ( )
recordPromMetrics ( t , a , respRec )
2023-11-07 10:15:50 -06:00
require . NotContains ( t , respRec . Body . String ( ) , metricsPrefix + "_raft_wal" )
2023-02-23 14:07:17 +00:00
} )
t . Run ( "server with WAL enabled emits WAL metrics" , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
hcl := fmt . Sprintf ( `
2023-02-23 14:07:17 +00:00
server = true
bootstrap = true
telemetry = {
prometheus_retention_time = "5s" ,
disable_hostname = true
2023-11-07 10:15:50 -06:00
metrics_prefix = "%s"
2023-02-23 14:07:17 +00:00
}
connect {
enabled = true
}
raft_logstore {
backend = "wal"
}
2023-11-07 10:15:50 -06:00
` , metricsPrefix )
2023-02-23 14:07:17 +00:00
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
testrpc . WaitForLeader ( t , a . RPC , "dc1" )
2023-10-05 17:27:28 -04:00
testretry . Run ( t , func ( r * testretry . R ) {
respRec := httptest . NewRecorder ( )
recordPromMetrics ( r , a , respRec )
out := respRec . Body . String ( )
2023-11-07 10:15:50 -06:00
require . Contains ( r , out , metricsPrefix + "_raft_wal_head_truncations" )
require . Contains ( r , out , metricsPrefix + "_raft_wal_last_segment_age_seconds" )
require . Contains ( r , out , metricsPrefix + "_raft_wal_log_appends" )
require . Contains ( r , out , metricsPrefix + "_raft_wal_log_entries_read" )
require . Contains ( r , out , metricsPrefix + "_raft_wal_log_entries_written" )
require . Contains ( r , out , metricsPrefix + "_raft_wal_log_entry_bytes_read" )
require . Contains ( r , out , metricsPrefix + "_raft_wal_log_entry_bytes_written" )
require . Contains ( r , out , metricsPrefix + "_raft_wal_segment_rotations" )
require . Contains ( r , out , metricsPrefix + "_raft_wal_stable_gets" )
require . Contains ( r , out , metricsPrefix + "_raft_wal_stable_sets" )
require . Contains ( r , out , metricsPrefix + "_raft_wal_tail_truncations" )
2023-10-05 17:27:28 -04:00
} )
2023-02-23 14:07:17 +00:00
} )
t . Run ( "server without WAL enabled emits no WAL metrics" , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
hcl := fmt . Sprintf ( `
2023-02-23 14:07:17 +00:00
server = true
bootstrap = true
telemetry = {
prometheus_retention_time = "5s" ,
disable_hostname = true
2023-11-07 10:15:50 -06:00
metrics_prefix = "%s"
2023-02-23 14:07:17 +00:00
}
connect {
enabled = true
}
raft_logstore {
backend = "boltdb"
}
2023-11-07 10:15:50 -06:00
` , metricsPrefix )
2023-02-23 14:07:17 +00:00
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
testrpc . WaitForLeader ( t , a . RPC , "dc1" )
respRec := httptest . NewRecorder ( )
recordPromMetrics ( t , a , respRec )
2023-11-07 10:15:50 -06:00
require . NotContains ( t , respRec . Body . String ( ) , metricsPrefix + "_raft_wal" )
2023-02-23 14:07:17 +00:00
} )
}
func TestHTTPHandlers_AgentMetrics_LogVerifier_Prometheus ( t * testing . T ) {
skipIfShortTesting ( t )
// This test cannot use t.Parallel() since we modify global state, ie the global metrics instance
t . Run ( "client agent emits nothing" , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
hcl := fmt . Sprintf ( `
2023-02-23 14:07:17 +00:00
server = false
telemetry = {
prometheus_retention_time = "5s" ,
disable_hostname = true
2023-11-07 10:15:50 -06:00
metrics_prefix = "%s"
2023-02-23 14:07:17 +00:00
}
raft_logstore {
verification {
enabled = true
interval = "1s"
}
}
bootstrap = false
2023-11-07 10:15:50 -06:00
` , metricsPrefix )
2023-02-23 14:07:17 +00:00
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
respRec := httptest . NewRecorder ( )
recordPromMetrics ( t , a , respRec )
2023-11-07 10:15:50 -06:00
require . NotContains ( t , respRec . Body . String ( ) , metricsPrefix + "_raft_logstore_verifier" )
2023-02-23 14:07:17 +00:00
} )
t . Run ( "server with verifier enabled emits all metrics" , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
hcl := fmt . Sprintf ( `
2023-02-23 14:07:17 +00:00
server = true
bootstrap = true
telemetry = {
prometheus_retention_time = "5s" ,
disable_hostname = true
2023-11-07 10:15:50 -06:00
metrics_prefix = "%s"
2023-02-23 14:07:17 +00:00
}
connect {
enabled = true
}
raft_logstore {
verification {
enabled = true
interval = "1s"
}
}
2023-11-07 10:15:50 -06:00
` , metricsPrefix )
2023-02-23 14:07:17 +00:00
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
testrpc . WaitForLeader ( t , a . RPC , "dc1" )
2023-10-05 17:27:28 -04:00
testretry . Run ( t , func ( r * testretry . R ) {
respRec := httptest . NewRecorder ( )
recordPromMetrics ( r , a , respRec )
out := respRec . Body . String ( )
2023-11-07 10:15:50 -06:00
require . Contains ( r , out , metricsPrefix + "_raft_logstore_verifier_checkpoints_written" )
require . Contains ( r , out , metricsPrefix + "_raft_logstore_verifier_dropped_reports" )
require . Contains ( r , out , metricsPrefix + "_raft_logstore_verifier_ranges_verified" )
require . Contains ( r , out , metricsPrefix + "_raft_logstore_verifier_read_checksum_failures" )
require . Contains ( r , out , metricsPrefix + "_raft_logstore_verifier_write_checksum_failures" )
2023-10-05 17:27:28 -04:00
} )
2023-02-23 14:07:17 +00:00
} )
t . Run ( "server with verifier disabled emits no extra metrics" , func ( t * testing . T ) {
2023-11-07 10:15:50 -06:00
metricsPrefix := getUniqueMetricsPrefix ( )
hcl := fmt . Sprintf ( `
2023-02-23 14:07:17 +00:00
server = true
bootstrap = true
telemetry = {
prometheus_retention_time = "5s" ,
disable_hostname = true
2023-11-07 10:15:50 -06:00
metrics_prefix = "%s"
2023-02-23 14:07:17 +00:00
}
connect {
enabled = true
}
raft_logstore {
verification {
enabled = false
}
}
2023-11-07 10:15:50 -06:00
` , metricsPrefix )
2023-02-23 14:07:17 +00:00
a := StartTestAgent ( t , TestAgent { HCL : hcl } )
defer a . Shutdown ( )
testrpc . WaitForLeader ( t , a . RPC , "dc1" )
respRec := httptest . NewRecorder ( )
recordPromMetrics ( t , a , respRec )
2023-11-07 10:15:50 -06:00
require . NotContains ( t , respRec . Body . String ( ) , metricsPrefix + "_raft_logstore_verifier" )
2023-02-23 14:07:17 +00:00
} )
}