troubleshoot: output messages for the troubleshoot proxy command (#16208)

This commit is contained in:
Nitya Dhanushkodi 2023-02-08 13:03:15 -08:00 committed by GitHub
parent 898e59b13c
commit 1f25289048
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 211 additions and 99 deletions

View File

@ -55,7 +55,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.ListenerType], listenerName)
return ir
},
err: "no listener",
err: "no listener for upstream \"db\"",
},
{
name: "tcp-missing-cluster",
@ -66,7 +66,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.ClusterType], sni)
return ir
},
err: "no cluster",
err: "no cluster \"db.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul\" for upstream \"db\"",
},
{
name: "http-success",
@ -124,7 +124,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.RouteType], "db")
return ir
},
err: "no route",
err: "no route for upstream \"db\"",
},
{
name: "redirect",
@ -170,7 +170,7 @@ func TestValidateUpstreams(t *testing.T) {
delete(ir.Index[xdscommon.ClusterType], sni)
return ir
},
err: "no cluster",
err: "no cluster \"google.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul\" for upstream \"240.0.0.1\"",
},
{
name: "tproxy-http-redirect-success",
@ -225,13 +225,17 @@ func TestValidateUpstreams(t *testing.T) {
// This only tests validation for listeners, routes, and clusters. Endpoints validation is done in a top
// level test that can parse the output of the /clusters endpoint. So for this test, we set clusters to nil.
err = troubleshoot.Validate(indexedResources, envoyID, vip, false, nil)
messages := troubleshoot.Validate(indexedResources, envoyID, vip, false, nil)
var outputErrors string
for _, msgError := range messages.Errors() {
outputErrors += msgError.Message
outputErrors += msgError.PossibleActions
}
if len(tt.err) == 0 {
require.NoError(t, err)
require.True(t, messages.Success())
} else {
require.Error(t, err)
require.Contains(t, err.Error(), tt.err)
require.Contains(t, outputErrors, tt.err)
}
})
}

View File

@ -76,14 +76,21 @@ func (c *cmd) Run(args []string) int {
c.UI.Error("error generating troubleshoot client: " + err.Error())
return 1
}
output, err := t.RunAllTests(c.upstream)
messages, err := t.RunAllTests(c.upstream)
if err != nil {
c.UI.Error("error running the tests: " + err.Error())
return 1
}
for _, o := range output {
c.UI.Output(o)
for _, o := range messages {
if o.Success {
c.UI.Output(o.Message)
} else {
c.UI.Error(o.Message)
if o.PossibleActions != "" {
c.UI.Output(o.PossibleActions)
}
}
}
return 0
}

View File

@ -16,6 +16,7 @@ require (
github.com/hashicorp/consul/api v1.18.0
github.com/hashicorp/consul/envoyextensions v0.0.0-00010101000000-000000000000
github.com/hashicorp/go-multierror v1.1.1
github.com/pkg/errors v0.9.1
github.com/stretchr/testify v1.8.0
google.golang.org/protobuf v1.28.1
)

View File

@ -166,6 +166,7 @@ github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144T
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=

View File

@ -1,43 +1,58 @@
package troubleshoot
import (
"errors"
"fmt"
"time"
envoy_admin_v3 "github.com/envoyproxy/go-control-plane/envoy/admin/v3"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/consul/troubleshoot/validate"
"google.golang.org/protobuf/encoding/protojson"
)
func (t *Troubleshoot) validateCerts(certs *envoy_admin_v3.Certificates) error {
func (t *Troubleshoot) validateCerts(certs *envoy_admin_v3.Certificates) validate.Messages {
var certMessages validate.Messages
// TODO: we can probably warn if the expiration date is close
var resultErr error
now := time.Now()
if certs == nil {
return errors.New("certs object is nil")
msg := validate.Message{
Success: false,
Message: "certificate object is nil in the proxy configuration",
}
return []validate.Message{msg}
}
if len(certs.GetCertificates()) == 0 {
return errors.New("no certificates provided")
msg := validate.Message{
Success: false,
Message: "no certificates found",
}
return []validate.Message{msg}
}
for _, cert := range certs.GetCertificates() {
for _, cacert := range cert.GetCaCert() {
if now.After(cacert.GetExpirationTime().AsTime()) {
resultErr = multierror.Append(resultErr, fmt.Errorf("Ca cert is expired"))
msg := validate.Message{
Success: false,
Message: "ca certificate is expired",
}
certMessages = append(certMessages, msg)
}
}
for _, cc := range cert.GetCertChain() {
if now.After(cc.GetExpirationTime().AsTime()) {
resultErr = multierror.Append(resultErr, fmt.Errorf("cert chain is expired"))
msg := validate.Message{
Success: false,
Message: "certificate chain is expired",
}
certMessages = append(certMessages, msg)
}
}
}
return resultErr
return certMessages
}
func (t *Troubleshoot) getEnvoyCerts() (*envoy_admin_v3.Certificates, error) {

View File

@ -15,21 +15,21 @@ func TestValidateCerts(t *testing.T) {
anHourAgo := timestamppb.New(time.Now().Add(-1 * time.Hour))
x := []struct {
cases := map[string]struct {
certs *envoy_admin_v3.Certificates
expectedError string
}{
{
"cert is nil": {
certs: nil,
expectedError: "certs object is nil",
expectedError: "certificate object is nil in the proxy configuration",
},
{
"no certificates": {
certs: &envoy_admin_v3.Certificates{
Certificates: []*envoy_admin_v3.Certificate{},
},
expectedError: "no certificates provided",
expectedError: "no certificates found",
},
{
"ca expired": {
certs: &envoy_admin_v3.Certificates{
Certificates: []*envoy_admin_v3.Certificate{
{
@ -41,9 +41,9 @@ func TestValidateCerts(t *testing.T) {
},
},
},
expectedError: "Ca cert is expired",
expectedError: "ca certificate is expired",
},
{
"cert expired": {
certs: &envoy_admin_v3.Certificates{
Certificates: []*envoy_admin_v3.Certificate{
{
@ -55,17 +55,27 @@ func TestValidateCerts(t *testing.T) {
},
},
},
expectedError: "cert chain is expired",
expectedError: "certificate chain is expired",
},
}
ts := Troubleshoot{}
for _, tc := range x {
err := ts.validateCerts(tc.certs)
if tc.expectedError != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tc.expectedError)
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
messages := ts.validateCerts(tc.certs)
var outputErrors string
for _, msgError := range messages.Errors() {
outputErrors += msgError.Message
outputErrors += msgError.PossibleActions
}
if tc.expectedError == "" {
require.True(t, messages.Success())
} else {
require.Contains(t, outputErrors, tc.expectedError)
}
})
}
}

View File

@ -7,7 +7,7 @@ import (
envoy_admin_v3 "github.com/envoyproxy/go-control-plane/envoy/admin/v3"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/consul/troubleshoot/validate"
)
const (
@ -52,27 +52,36 @@ func NewTroubleshoot(envoyIP *net.IPAddr, envoyPort string) (*Troubleshoot, erro
}, nil
}
func (t *Troubleshoot) RunAllTests(envoyID string) ([]string, error) {
var resultErr error
var output []string
func (t *Troubleshoot) RunAllTests(envoyID string) (validate.Messages, error) {
var allTestMessages validate.Messages
// Validate certs
// Get all info from proxy to set up validations.
err := t.GetEnvoyConfigDump()
if err != nil {
return nil, fmt.Errorf("unable to get Envoy config dump: cannot connect to Envoy: %w", err)
}
err = t.getEnvoyClusters()
if err != nil {
return nil, fmt.Errorf("unable to get Envoy clusters: cannot connect to Envoy: %w", err)
}
certs, err := t.getEnvoyCerts()
if err != nil {
resultErr = multierror.Append(resultErr, fmt.Errorf("unable to get certs: %w", err))
return nil, fmt.Errorf("unable to get Envoy certificates: cannot connect to Envoy: %w", err)
}
indexedResources, err := ProxyConfigDumpToIndexedResources(t.envoyConfigDump)
if err != nil {
return nil, fmt.Errorf("unable to index Envoy resources: %w", err)
}
if certs != nil && len(certs.GetCertificates()) != 0 {
err = t.validateCerts(certs)
if err != nil {
resultErr = multierror.Append(resultErr, fmt.Errorf("unable to validate certs: %w", err))
} else {
output = append(output, "certs are valid")
// Validate certs.
messages := t.validateCerts(certs)
allTestMessages = append(allTestMessages, messages...)
if errors := messages.Errors(); len(errors) == 0 {
msg := validate.Message{
Success: true,
Message: "certificates are valid",
}
} else {
resultErr = multierror.Append(resultErr, fmt.Errorf("no certificate found"))
allTestMessages = append(allTestMessages, msg)
}
// getStats usage example
@ -81,18 +90,16 @@ func (t *Troubleshoot) RunAllTests(envoyID string) ([]string, error) {
// resultErr = multierror.Append(resultErr, err)
// }
// Validate listeners, routes, clusters, endpoints
t.GetEnvoyConfigDump()
t.getEnvoyClusters()
indexedResources, err := ProxyConfigDumpToIndexedResources(t.envoyConfigDump)
if err != nil {
resultErr = multierror.Append(resultErr, fmt.Errorf("unable to index resources: %v", err))
// Validate listeners, routes, clusters, endpoints.
messages = Validate(indexedResources, envoyID, "", true, t.envoyClusters)
allTestMessages = append(allTestMessages, messages...)
if errors := messages.Errors(); len(errors) == 0 {
msg := validate.Message{
Success: true,
Message: "upstream resources are valid",
}
allTestMessages = append(allTestMessages, msg)
}
err = Validate(indexedResources, envoyID, "", true, t.envoyClusters)
if err != nil {
resultErr = multierror.Append(resultErr, fmt.Errorf("unable to validate proxy config: %v", err))
}
return output, resultErr
return allTestMessages, nil
}

View File

@ -54,7 +54,6 @@ func (t *Troubleshoot) GetEnvoyConfigDump() error {
if err != nil {
return err
}
// TODO: validate here
t.envoyConfigDump = config
return nil
}
@ -81,10 +80,10 @@ func (t *Troubleshoot) parseClusters(clusters *envoy_admin_v3.Clusters) ([]strin
return upstreams, nil
}
func (t *Troubleshoot) getEnvoyClusters() (*envoy_admin_v3.Clusters, error) {
func (t *Troubleshoot) getEnvoyClusters() error {
clustersRaw, err := t.request("clusters?format=json")
if err != nil {
return nil, err
return err
}
clusters := &envoy_admin_v3.Clusters{}
@ -93,10 +92,9 @@ func (t *Troubleshoot) getEnvoyClusters() (*envoy_admin_v3.Clusters, error) {
}
err = unmarshal.Unmarshal(clustersRaw, clusters)
if err != nil {
return nil, err
return err
}
// TODO: validate here
t.envoyClusters = clusters
return clusters, nil
return nil
}

View File

@ -51,7 +51,7 @@ func ParseClusters(rawClusters []byte) (*envoy_admin_v3.Clusters, error) {
// Validate validates the Envoy resources (indexedResources) for a given upstream service, peer, and vip. The peer
// should be "" for an upstream not on a remote peer. The vip is required for a transparent proxy upstream.
func Validate(indexedResources *xdscommon.IndexedResources, envoyID string, vip string, validateEndpoints bool, clusters *envoy_admin_v3.Clusters) error {
func Validate(indexedResources *xdscommon.IndexedResources, envoyID string, vip string, validateEndpoints bool, clusters *envoy_admin_v3.Clusters) validate.Messages {
// Get all SNIs from the clusters in the configuration. Not all SNIs will need to be validated, but this ensures we
// capture SNIs which aren't directly identical to the upstream service name, but are still used for that upstream
// service. For example, in the case of having a splitter/redirect or another L7 config entry, the upstream service
@ -91,19 +91,19 @@ func Validate(indexedResources *xdscommon.IndexedResources, envoyID string, vip
}
basicExtension, err := validate.MakeValidate(extConfig)
if err != nil {
return err
return []validate.Message{{Message: err.Error()}}
}
extender := extensioncommon.BasicEnvoyExtender{
Extension: basicExtension,
}
err = extender.Validate(&extConfig)
if err != nil {
return err
return []validate.Message{{Message: err.Error()}}
}
_, err = extender.Extend(indexedResources, &extConfig)
if err != nil {
return err
return []validate.Message{{Message: err.Error()}}
}
v, ok := extender.Extension.(*validate.Validate)
@ -111,7 +111,7 @@ func Validate(indexedResources *xdscommon.IndexedResources, envoyID string, vip
panic("validate plugin was not correctly created")
}
return v.Errors(validateEndpoints, validate.DoEndpointValidation, clusters)
return v.GetMessages(validateEndpoints, validate.DoEndpointValidation, clusters)
}
func ProxyConfigDumpToIndexedResources(config *envoy_admin_v3.ConfigDump) (*xdscommon.IndexedResources, error) {

View File

@ -17,8 +17,8 @@ import (
func TestValidateFromJSON(t *testing.T) {
indexedResources := getConfig(t)
clusters := getClusters(t)
err := Validate(indexedResources, "backend", "", true, clusters)
require.NoError(t, err)
messages := Validate(indexedResources, "backend", "", true, clusters)
require.True(t, messages.Success())
}
// TODO: Manually inspect the config and clusters files and hardcode the list of expected resource names for higher

View File

@ -11,7 +11,6 @@ import (
envoy_aggregate_cluster_v3 "github.com/envoyproxy/go-control-plane/envoy/extensions/clusters/aggregate/v3"
envoy_resource_v3 "github.com/envoyproxy/go-control-plane/pkg/resource/v3"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/go-multierror"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/anypb"
@ -26,6 +25,10 @@ type Validate struct {
// envoyID is an argument to the Validate plugin and identifies which listener to begin the validation with.
envoyID string
// vip is an argument to the Validate plugin and identifies which transparent proxy listener to begin the validation
// with.
vip string
// snis is all of the upstream SNIs for this proxy. It is set via ExtensionConfiguration.
snis map[string]struct{}
@ -93,6 +96,7 @@ func MakeValidate(ext extensioncommon.RuntimeConfig) (extensioncommon.BasicExten
if mainEnvoyID == "" && vip == "" {
return nil, fmt.Errorf("envoyID or virtual IP is required")
}
plugin.vip = vip
plugin.envoyID = mainEnvoyID
plugin.snis = snis
plugin.resources = make(map[string]*resource)
@ -100,15 +104,61 @@ func MakeValidate(ext extensioncommon.RuntimeConfig) (extensioncommon.BasicExten
return &plugin, resultErr
}
// Errors returns the error based only on Validate's state.
func (v *Validate) Errors(validateEndpoints bool, endpointValidator EndpointValidator, clusters *envoy_admin_v3.Clusters) error {
var resultErr error
type Messages []Message
type Message struct {
Success bool
Message string
PossibleActions string
}
func (m Messages) Success() bool {
for _, message := range m {
if !message.Success {
return false
}
}
return true
}
func (m Messages) Errors() Messages {
var errors Messages
for _, message := range m {
if !message.Success {
errors = append(errors, message)
}
}
return errors
}
// GetMessages returns the error based only on Validate's state.
func (v *Validate) GetMessages(validateEndpoints bool, endpointValidator EndpointValidator, clusters *envoy_admin_v3.Clusters) Messages {
var messages Messages
var upstream string
upstream = v.envoyID
if v.envoyID == "" {
upstream = v.vip
}
if !v.listener {
resultErr = multierror.Append(resultErr, fmt.Errorf("no listener"))
messages = append(messages, Message{Message: fmt.Sprintf("no listener for upstream %q", upstream)})
} else {
messages = append(messages, Message{
Message: fmt.Sprintf("listener for upstream %q found", upstream),
Success: true,
})
}
if v.usesRDS && !v.route {
resultErr = multierror.Append(resultErr, fmt.Errorf("no route"))
messages = append(messages, Message{Message: fmt.Sprintf("no route for upstream %q", upstream)})
} else {
messages = append(messages, Message{
Message: fmt.Sprintf("route for upstream %q found", upstream),
Success: true,
})
}
numRequiredResources := 0
@ -122,8 +172,13 @@ func (v *Validate) Errors(validateEndpoints bool, endpointValidator EndpointVali
_, ok := v.snis[sni]
if !ok || !resource.cluster {
resultErr = multierror.Append(resultErr, fmt.Errorf("no cluster for sni %s", sni))
messages = append(messages, Message{Message: fmt.Sprintf("no cluster %q for upstream %q", sni, upstream)})
continue
} else {
messages = append(messages, Message{
Message: fmt.Sprintf("cluster %q for upstream %q found", sni, upstream),
Success: true,
})
}
if validateEndpoints {
@ -140,16 +195,25 @@ func (v *Validate) Errors(validateEndpoints bool, endpointValidator EndpointVali
}
}
if !oneClusterHasEndpoints {
resultErr = multierror.Append(resultErr, fmt.Errorf("zero healthy endpoints for aggregate cluster %s", sni))
messages = append(messages, Message{Message: fmt.Sprintf("no healthy endpoints for aggregate cluster %q for upstream %q", sni, upstream)})
} else {
messages = append(messages, Message{
Message: fmt.Sprintf("healthy endpoints for aggregate cluster %q for upstream %q", sni, upstream),
Success: true,
})
}
} else if resource.parentCluster == "" {
// Top-level non-aggregate cluster case: check for load assignment and healthy endpoints.
endpointValidator(resource, sni, clusters)
if resource.usesEDS && !resource.loadAssignment {
resultErr = multierror.Append(resultErr, fmt.Errorf("no cluster load assignment for cluster %s", sni))
}
if resource.endpoints == 0 {
resultErr = multierror.Append(resultErr, fmt.Errorf("zero healthy endpoints for cluster %s", sni))
if (resource.usesEDS && !resource.loadAssignment) || resource.endpoints == 0 {
messages = append(messages, Message{
Message: fmt.Sprintf("no healthy endpoints for cluster %q for upstream %q", sni, upstream),
})
} else {
messages = append(messages, Message{
Message: fmt.Sprintf("healthy endpoints for cluster %q for upstream %q", sni, upstream),
Success: true,
})
}
} else {
// Child cluster case: skip, since it'll be verified by the parent aggregate cluster.
@ -160,10 +224,10 @@ func (v *Validate) Errors(validateEndpoints bool, endpointValidator EndpointVali
}
if numRequiredResources == 0 {
resultErr = multierror.Append(resultErr, fmt.Errorf("no clusters found on route or listener"))
messages = append(messages, Message{Message: fmt.Sprintf("no clusters found on route or listener")})
}
return resultErr
return messages
}
// DoEndpointValidation implements the EndpointVerifier function type.

View File

@ -86,7 +86,7 @@ func TestErrors(t *testing.T) {
endpointValidator: func(r *resource, s string, clusters *envoy_admin_v3.Clusters) {
r.loadAssignment = true
},
err: "zero healthy endpoints",
err: "no healthy endpoints for cluster \"db-sni\" for upstream \"db\"",
},
"success: aggregate cluster with one target with endpoints": {
validate: func() *Validate {
@ -169,21 +169,26 @@ func TestErrors(t *testing.T) {
r.loadAssignment = true
r.endpoints = 0
},
err: "zero healthy endpoints for aggregate cluster",
err: "no healthy endpoints for aggregate cluster \"db-sni\" for upstream \"db\"",
},
}
for n, tc := range cases {
t.Run(n, func(t *testing.T) {
v := tc.validate()
err := v.Errors(true, tc.endpointValidator, nil)
messages := v.GetMessages(true, tc.endpointValidator, nil)
if len(tc.err) == 0 {
require.NoError(t, err)
} else {
require.Error(t, err)
require.Contains(t, err.Error(), tc.err)
var outputErrors string
for _, msgError := range messages.Errors() {
outputErrors += msgError.Message
outputErrors += msgError.PossibleActions
}
if tc.err == "" {
require.True(t, messages.Success())
} else {
require.Contains(t, outputErrors, tc.err)
}
})
}