From a6981423252f0cffceb641e8bf84494a93af3902 Mon Sep 17 00:00:00 2001
From: Derek Menteer <105233703+hashi-derek@users.noreply.github.com>
Date: Fri, 1 Sep 2023 12:29:09 -0500
Subject: [PATCH] Add extra logging for mesh health endpoints. (#18647)

---
 agent/proxycfg-glue/health_blocking.go | 13 +++++++++++++
 agent/proxycfg/upstreams.go            |  4 ++++
 agent/xds/endpoints.go                 |  4 +++-
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/agent/proxycfg-glue/health_blocking.go b/agent/proxycfg-glue/health_blocking.go
index bb2fe948b4..8ed384f837 100644
--- a/agent/proxycfg-glue/health_blocking.go
+++ b/agent/proxycfg-glue/health_blocking.go
@@ -106,6 +106,12 @@ func (h *serverHealthBlocking) Notify(ctx context.Context, args *structs.Service
 					// their data, rather than holding onto the last-known list of healthy nodes indefinitely.
 					if hadResults {
 						hadResults = false
+						h.deps.Logger.Debug("serverHealthBlocking emitting zero check-service-nodes due to insufficient ACL privileges",
+							"serviceName", structs.NewServiceName(args.ServiceName, &args.EnterpriseMeta),
+							"correlationID", correlationID,
+							"connect", args.Connect,
+							"ingress", args.Ingress,
+						)
 						return 0, &structs.IndexedCheckServiceNodes{}, watch.ErrorACLResetData
 					}
 					return 0, nil, acl.ErrPermissionDenied
@@ -132,6 +138,13 @@ func (h *serverHealthBlocking) Notify(ctx context.Context, args *structs.Service
 			}
 
 			hadResults = true
+			h.deps.Logger.Trace("serverHealthBlocking emitting check-service-nodes",
+				"serviceName", structs.NewServiceName(args.ServiceName, &args.EnterpriseMeta),
+				"correlationID", correlationID,
+				"connect", args.Connect,
+				"ingress", args.Ingress,
+				"nodes", len(thisReply.Nodes),
+			)
 			return thisReply.Index, &thisReply, nil
 		},
 		dispatchBlockingQueryUpdate[*structs.IndexedCheckServiceNodes](ch),
diff --git a/agent/proxycfg/upstreams.go b/agent/proxycfg/upstreams.go
index ff2cbd212a..fe2d502339 100644
--- a/agent/proxycfg/upstreams.go
+++ b/agent/proxycfg/upstreams.go
@@ -136,6 +136,10 @@ func (s *handlerUpstreams) handleUpdateUpstreams(ctx context.Context, u UpdateEv
 
 		uid := UpstreamIDFromString(uidString)
 
+		s.logger.Debug("upstream-target watch fired",
+			"correlationID", correlationID,
+			"nodes", len(resp.Nodes),
+		)
 		if _, ok := upstreamsSnapshot.WatchedUpstreamEndpoints[uid]; !ok {
 			upstreamsSnapshot.WatchedUpstreamEndpoints[uid] = make(map[string]structs.CheckServiceNodes)
 		}
diff --git a/agent/xds/endpoints.go b/agent/xds/endpoints.go
index a2c36f06bd..ff486f3228 100644
--- a/agent/xds/endpoints.go
+++ b/agent/xds/endpoints.go
@@ -750,6 +750,7 @@ func (s *ResourceGenerator) endpointsFromDiscoveryChain(
 			}
 			switch len(groupedTarget.Targets) {
 			case 0:
+				s.Logger.Trace("skipping endpoint generation for zero-length target group", "cluster", clusterName)
 				continue
 			case 1:
 				// We expect one target so this passes through to continue setting the load assignment up.
@@ -757,7 +758,7 @@ func (s *ResourceGenerator) endpointsFromDiscoveryChain(
 				return nil, fmt.Errorf("cannot have more than one target")
 			}
 			ti := groupedTarget.Targets[0]
-			s.Logger.Debug("generating endpoints for", "cluster", clusterName, "targetID", ti.TargetID)
+			s.Logger.Trace("generating endpoints for", "cluster", clusterName, "targetID", ti.TargetID, "gatewayKey", gatewayKey)
 			targetUID := proxycfg.NewUpstreamIDFromTargetID(ti.TargetID)
 			if targetUID.Peer != "" {
 				loadAssignment, err := s.makeUpstreamLoadAssignmentForPeerService(cfgSnap, clusterName, targetUID, mgwMode)
@@ -779,6 +780,7 @@ func (s *ResourceGenerator) endpointsFromDiscoveryChain(
 				forMeshGateway,
 			)
 			if !valid {
+				s.Logger.Trace("skipping endpoint generation for invalid target group", "cluster", clusterName)
 				continue // skip the cluster if we're still populating the snapshot
 			}