diff --git a/.changelog/20168.txt b/.changelog/20168.txt new file mode 100644 index 0000000000..354991e324 --- /dev/null +++ b/.changelog/20168.txt @@ -0,0 +1,7 @@ +```release-note:enhancement +ProxyCfg: avoid setting a watch on `Internal.ServiceDump` when mesh gateway is not used. +``` + +```release-note:enhancement +ProxyCfg: only return the nodes list when querying the `Internal.ServiceDump` watch from proxycfg +``` diff --git a/agent/consul/internal_endpoint.go b/agent/consul/internal_endpoint.go index 1a8a0fca30..84ea48d056 100644 --- a/agent/consul/internal_endpoint.go +++ b/agent/consul/internal_endpoint.go @@ -197,43 +197,44 @@ func (m *Internal) ServiceDump(args *structs.ServiceDumpRequest, reply *structs. } reply.Nodes = nodes - // get a list of all peerings - index, listedPeerings, err := state.PeeringList(ws, args.EnterpriseMeta) - if err != nil { - return fmt.Errorf("could not list peers for service dump %w", err) - } - - if index > maxIndex { - maxIndex = index - } - - for _, p := range listedPeerings { - // Note we fetch imported services with wildcard namespace because imported services' namespaces - // are in a different locality; regardless of our local namespace, we return all imported services - // of the local partition. - index, importedNodes, err := state.ServiceDump(ws, args.ServiceKind, args.UseServiceKind, args.EnterpriseMeta.WithWildcardNamespace(), p.Name) + if !args.NodesOnly { + // get a list of all peerings + index, listedPeerings, err := state.PeeringList(ws, args.EnterpriseMeta) if err != nil { - return fmt.Errorf("could not get a service dump for peer %q: %w", p.Name, err) + return fmt.Errorf("could not list peers for service dump %w", err) } if index > maxIndex { maxIndex = index } - reply.ImportedNodes = append(reply.ImportedNodes, importedNodes...) - } - // Get, store, and filter gateway services - idx, gatewayServices, err := state.DumpGatewayServices(ws) - if err != nil { - return err - } - reply.Gateways = gatewayServices + for _, p := range listedPeerings { + // Note we fetch imported services with wildcard namespace because imported services' namespaces + // are in a different locality; regardless of our local namespace, we return all imported services + // of the local partition. + index, importedNodes, err := state.ServiceDump(ws, args.ServiceKind, args.UseServiceKind, args.EnterpriseMeta.WithWildcardNamespace(), p.Name) + if err != nil { + return fmt.Errorf("could not get a service dump for peer %q: %w", p.Name, err) + } - if idx > maxIndex { - maxIndex = idx + if index > maxIndex { + maxIndex = index + } + reply.ImportedNodes = append(reply.ImportedNodes, importedNodes...) + } + + // Get, store, and filter gateway services + idx, gatewayServices, err := state.DumpGatewayServices(ws) + if err != nil { + return err + } + reply.Gateways = gatewayServices + + if idx > maxIndex { + maxIndex = idx + } } reply.Index = maxIndex - raw, err := filter.Execute(reply.Nodes) if err != nil { return fmt.Errorf("could not filter local service dump: %w", err) @@ -241,12 +242,13 @@ func (m *Internal) ServiceDump(args *structs.ServiceDumpRequest, reply *structs. reply.Nodes = raw.(structs.CheckServiceNodes) } - importedRaw, err := filter.Execute(reply.ImportedNodes) - if err != nil { - return fmt.Errorf("could not filter peer service dump: %w", err) + if !args.NodesOnly { + importedRaw, err := filter.Execute(reply.ImportedNodes) + if err != nil { + return fmt.Errorf("could not filter peer service dump: %w", err) + } + reply.ImportedNodes = importedRaw.(structs.CheckServiceNodes) } - reply.ImportedNodes = importedRaw.(structs.CheckServiceNodes) - // Note: we filter the results with ACLs *after* applying the user-supplied // bexpr filter, to ensure QueryMeta.ResultsFilteredByACLs does not include // results that would be filtered out even if the user did have permission. diff --git a/agent/consul/internal_endpoint_test.go b/agent/consul/internal_endpoint_test.go index 3f853df7ce..c11f43494c 100644 --- a/agent/consul/internal_endpoint_test.go +++ b/agent/consul/internal_endpoint_test.go @@ -1779,10 +1779,11 @@ func TestInternal_ServiceDump_Peering(t *testing.T) { // prep the cluster with some data we can use in our filters registerTestCatalogEntries(t, codec) - doRequest := func(t *testing.T, filter string) structs.IndexedNodesWithGateways { + doRequest := func(t *testing.T, filter string, onlyNodes bool) structs.IndexedNodesWithGateways { t.Helper() - args := structs.DCSpecificRequest{ + args := structs.ServiceDumpRequest{ QueryOptions: structs.QueryOptions{Filter: filter}, + NodesOnly: onlyNodes, } var out structs.IndexedNodesWithGateways @@ -1792,7 +1793,7 @@ func TestInternal_ServiceDump_Peering(t *testing.T) { } t.Run("No peerings", func(t *testing.T) { - nodes := doRequest(t, "") + nodes := doRequest(t, "", false) // redis (3), web (3), critical (1), warning (1) and consul (1) require.Len(t, nodes.Nodes, 9) require.Len(t, nodes.ImportedNodes, 0) @@ -1809,19 +1810,27 @@ func TestInternal_ServiceDump_Peering(t *testing.T) { require.NoError(t, err) t.Run("peerings", func(t *testing.T) { - nodes := doRequest(t, "") + nodes := doRequest(t, "", false) // redis (3), web (3), critical (1), warning (1) and consul (1) require.Len(t, nodes.Nodes, 9) // service (1) require.Len(t, nodes.ImportedNodes, 1) }) + t.Run("peerings onlynodes", func(t *testing.T) { + nodes := doRequest(t, "", true) + // redis (3), web (3), critical (1), warning (1) and consul (1) + require.Len(t, nodes.Nodes, 9) + // service (1) + require.Len(t, nodes.ImportedNodes, 0) + }) + t.Run("peerings w filter", func(t *testing.T) { - nodes := doRequest(t, "Node.PeerName == foo") + nodes := doRequest(t, "Node.PeerName == foo", false) require.Len(t, nodes.Nodes, 0) require.Len(t, nodes.ImportedNodes, 0) - nodes2 := doRequest(t, "Node.PeerName == peer1") + nodes2 := doRequest(t, "Node.PeerName == peer1", false) require.Len(t, nodes2.Nodes, 0) require.Len(t, nodes2.ImportedNodes, 1) }) diff --git a/agent/proxycfg/mesh_gateway.go b/agent/proxycfg/mesh_gateway.go index 0237e84232..c9fe60a892 100644 --- a/agent/proxycfg/mesh_gateway.go +++ b/agent/proxycfg/mesh_gateway.go @@ -295,6 +295,7 @@ func (s *handlerMeshGateway) handleUpdate(ctx context.Context, u UpdateEvent, sn QueryOptions: structs.QueryOptions{Token: s.token}, ServiceKind: structs.ServiceKindMeshGateway, UseServiceKind: true, + NodesOnly: true, Source: *s.source, EnterpriseMeta: *entMeta, }, fmt.Sprintf("mesh-gateway:%s", gk.String()), s.ch) diff --git a/agent/proxycfg/state.go b/agent/proxycfg/state.go index 853dca5a9f..a113903c15 100644 --- a/agent/proxycfg/state.go +++ b/agent/proxycfg/state.go @@ -544,6 +544,7 @@ func watchMeshGateway(ctx context.Context, opts gatewayWatchOpts) error { QueryOptions: structs.QueryOptions{Token: opts.token}, ServiceKind: structs.ServiceKindMeshGateway, UseServiceKind: true, + NodesOnly: true, Source: opts.source, EnterpriseMeta: *structs.DefaultEnterpriseMetaInPartition(opts.key.Partition), }, correlationId, opts.notifyCh) diff --git a/agent/proxycfg/state_test.go b/agent/proxycfg/state_test.go index 2a6792aa22..43743eb40a 100644 --- a/agent/proxycfg/state_test.go +++ b/agent/proxycfg/state_test.go @@ -838,6 +838,282 @@ func TestState_WatchesAndUpdates(t *testing.T) { stages: []verificationStage{stage0, stage1}, } } + newConnectProxyCaseMeshDefault := func() testCase { + ns := structs.NodeService{ + Kind: structs.ServiceKindConnectProxy, + ID: "web-sidecar-proxy", + Service: "web-sidecar-proxy", + Address: "10.0.1.1", + Port: 443, + Proxy: structs.ConnectProxyConfig{ + DestinationServiceName: "web", + Upstreams: structs.Upstreams{ + structs.Upstream{ + DestinationType: structs.UpstreamDestTypePreparedQuery, + DestinationName: "query", + LocalBindPort: 10001, + }, + structs.Upstream{ + DestinationType: structs.UpstreamDestTypeService, + DestinationName: "api", + LocalBindPort: 10002, + }, + structs.Upstream{ + DestinationType: structs.UpstreamDestTypeService, + DestinationName: "api-failover-direct", + Datacenter: "dc2", + LocalBindPort: 10005, + MeshGateway: structs.MeshGatewayConfig{ + Mode: structs.MeshGatewayModeNone, + }, + }, + structs.Upstream{ + DestinationType: structs.UpstreamDestTypeService, + DestinationName: "api-failover-to-peer", + LocalBindPort: 10007, + }, + structs.Upstream{ + DestinationType: structs.UpstreamDestTypeService, + DestinationName: "api-dc2", + LocalBindPort: 10006, + }, + }, + }, + } + + ixnMatch := TestIntentions() + + stage0 := verificationStage{ + requiredWatches: map[string]verifyWatchRequest{ + intentionsWatchID: genVerifyIntentionWatch("web", "dc1"), + meshConfigEntryID: genVerifyMeshConfigWatch("dc1"), + fmt.Sprintf("discovery-chain:%s", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{ + Name: "api", + EvaluateInDatacenter: "dc1", + EvaluateInNamespace: "default", + EvaluateInPartition: "default", + Datacenter: "dc1", + OverrideMeshGateway: structs.MeshGatewayConfig{ + Mode: structs.MeshGatewayModeDefault, + }, + QueryOptions: structs.QueryOptions{ + Token: aclToken, + }, + }), + fmt.Sprintf("discovery-chain:%s-failover-direct?dc=dc2", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{ + Name: "api-failover-direct", + EvaluateInDatacenter: "dc2", + EvaluateInNamespace: "default", + EvaluateInPartition: "default", + Datacenter: "dc1", + OverrideMeshGateway: structs.MeshGatewayConfig{ + Mode: structs.MeshGatewayModeNone, + }, + QueryOptions: structs.QueryOptions{ + Token: aclToken, + }, + }), + fmt.Sprintf("discovery-chain:%s-failover-to-peer", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{ + Name: "api-failover-to-peer", + EvaluateInDatacenter: "dc1", + EvaluateInNamespace: "default", + EvaluateInPartition: "default", + Datacenter: "dc1", + OverrideMeshGateway: structs.MeshGatewayConfig{ + Mode: structs.MeshGatewayModeDefault, + }, + QueryOptions: structs.QueryOptions{ + Token: aclToken, + }, + }), + fmt.Sprintf("discovery-chain:%s-dc2", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{ + Name: "api-dc2", + EvaluateInDatacenter: "dc1", + EvaluateInNamespace: "default", + EvaluateInPartition: "default", + Datacenter: "dc1", + OverrideMeshGateway: structs.MeshGatewayConfig{ + Mode: structs.MeshGatewayModeDefault, + }, + QueryOptions: structs.QueryOptions{ + Token: aclToken, + }, + }), + "upstream:" + pqUID.String(): genVerifyPreparedQueryWatch("query", "dc1"), + rootsWatchID: genVerifyDCSpecificWatch("dc1"), + leafWatchID: genVerifyLeafWatch("web", "dc1"), + }, + events: []UpdateEvent{ + rootWatchEvent(), + { + CorrelationID: leafWatchID, + Result: issuedCert, + Err: nil, + }, + { + CorrelationID: intentionsWatchID, + Result: ixnMatch, + Err: nil, + }, + { + CorrelationID: meshConfigEntryID, + Result: &structs.ConfigEntryResponse{}, + }, + { + CorrelationID: fmt.Sprintf("discovery-chain:%s", apiUID.String()), + Result: &structs.DiscoveryChainResponse{ + Chain: discoverychain.TestCompileConfigEntries(t, "api", "default", "default", "dc1", "trustdomain.consul", + func(req *discoverychain.CompileRequest) { + req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault + }, nil), + }, + Err: nil, + }, + { + CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-remote?dc=dc2", apiUID.String()), + Result: &structs.DiscoveryChainResponse{ + Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-remote", "default", "default", "dc2", "trustdomain.consul", + func(req *discoverychain.CompileRequest) { + req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault + }, nil), + }, + Err: nil, + }, + { + CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-local?dc=dc2", apiUID.String()), + Result: &structs.DiscoveryChainResponse{ + Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-local", "default", "default", "dc2", "trustdomain.consul", + func(req *discoverychain.CompileRequest) { + req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault + }, nil), + }, + Err: nil, + }, + { + CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-direct?dc=dc2", apiUID.String()), + Result: &structs.DiscoveryChainResponse{ + Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-direct", "default", "default", "dc2", "trustdomain.consul", + func(req *discoverychain.CompileRequest) { + req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault + }, nil), + }, + Err: nil, + }, + { + CorrelationID: fmt.Sprintf("discovery-chain:%s-dc2", apiUID.String()), + Result: &structs.DiscoveryChainResponse{ + Chain: discoverychain.TestCompileConfigEntries(t, "api-dc2", "default", "default", "dc1", "trustdomain.consul", + func(req *discoverychain.CompileRequest) { + req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault + }, discoChainSetWithEntries(&structs.ServiceResolverConfigEntry{ + Kind: structs.ServiceResolver, + Name: "api-dc2", + Redirect: &structs.ServiceResolverRedirect{ + Service: "api", + Datacenter: "dc2", + }, + })), + }, + Err: nil, + }, + { + CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-to-peer", apiUID.String()), + Result: &structs.DiscoveryChainResponse{ + Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-to-peer", "default", "default", "dc1", "trustdomain.consul", + func(req *discoverychain.CompileRequest) { + req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault + }, discoChainSetWithEntries(&structs.ServiceResolverConfigEntry{ + Kind: structs.ServiceResolver, + Name: "api-failover-to-peer", + Failover: map[string]structs.ServiceResolverFailover{ + "*": { + Targets: []structs.ServiceResolverFailoverTarget{ + {Peer: "cluster-01"}, + }, + }, + }, + })), + }, + Err: nil, + }, + { + CorrelationID: "mesh-gateway:dc1", + Result: &structs.IndexedCheckServiceNodes{ + Nodes: structs.CheckServiceNodes{ + { + Node: &structs.Node{ + Node: "node1", + Address: "10.1.2.3", + }, + Service: structs.TestNodeServiceMeshGateway(t), + }, + }, + }, + }, + }, + verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) { + require.True(t, snap.Valid()) + require.True(t, snap.MeshGateway.isEmpty()) + require.Equal(t, indexedRoots, snap.Roots) + + require.Equal(t, issuedCert, snap.ConnectProxy.Leaf) + require.Len(t, snap.ConnectProxy.DiscoveryChain, 4, "%+v", snap.ConnectProxy.DiscoveryChain) + require.Len(t, snap.ConnectProxy.WatchedUpstreams, 4, "%+v", snap.ConnectProxy.WatchedUpstreams) + require.Len(t, snap.ConnectProxy.WatchedUpstreamEndpoints, 4, "%+v", snap.ConnectProxy.WatchedUpstreamEndpoints) + require.Len(t, snap.ConnectProxy.WatchedGateways, 4, "%+v", snap.ConnectProxy.WatchedGateways) + require.Len(t, snap.ConnectProxy.WatchedGatewayEndpoints, 4, "%+v", snap.ConnectProxy.WatchedGatewayEndpoints) + + require.Len(t, snap.ConnectProxy.WatchedServiceChecks, 0, "%+v", snap.ConnectProxy.WatchedServiceChecks) + require.Len(t, snap.ConnectProxy.PreparedQueryEndpoints, 0, "%+v", snap.ConnectProxy.PreparedQueryEndpoints) + + require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.PeerUpstreamEndpoints.Len()) + require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.UpstreamPeerTrustBundles.Len()) + + require.True(t, snap.ConnectProxy.IntentionsSet) + require.Equal(t, ixnMatch, snap.ConnectProxy.Intentions) + require.True(t, snap.ConnectProxy.MeshConfigSet) + + // No event is expected as all services use default or remote mode + require.Equal(t, 0, snap.ConnectProxy.WatchedLocalGWEndpoints.Len()) + }, + } + + stage1 := verificationStage{ + requiredWatches: map[string]verifyWatchRequest{ + fmt.Sprintf("upstream-target:api.default.default.dc1:%s", apiUID.String()): genVerifyServiceSpecificRequest("api", "", "dc1", true), + fmt.Sprintf("upstream-target:api-failover-direct.default.default.dc2:%s-failover-direct?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-direct", "", "dc2", true), + upstreamPeerWatchIDPrefix + fmt.Sprintf("%s-failover-to-peer?peer=cluster-01", apiUID.String()): genVerifyServiceSpecificPeeredRequest("api-failover-to-peer", "", "dc1", "cluster-01", true), + }, + verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) { + require.True(t, snap.Valid()) + require.True(t, snap.MeshGateway.isEmpty()) + require.Equal(t, indexedRoots, snap.Roots) + + require.Equal(t, issuedCert, snap.ConnectProxy.Leaf) + require.Len(t, snap.ConnectProxy.DiscoveryChain, 4, "%+v", snap.ConnectProxy.DiscoveryChain) + require.Len(t, snap.ConnectProxy.WatchedUpstreams, 4, "%+v", snap.ConnectProxy.WatchedUpstreams) + require.Len(t, snap.ConnectProxy.WatchedUpstreamEndpoints, 4, "%+v", snap.ConnectProxy.WatchedUpstreamEndpoints) + require.Len(t, snap.ConnectProxy.WatchedGateways, 4, "%+v", snap.ConnectProxy.WatchedGateways) + require.Len(t, snap.ConnectProxy.WatchedGatewayEndpoints, 4, "%+v", snap.ConnectProxy.WatchedGatewayEndpoints) + + require.Len(t, snap.ConnectProxy.WatchedServiceChecks, 0, "%+v", snap.ConnectProxy.WatchedServiceChecks) + require.Len(t, snap.ConnectProxy.PreparedQueryEndpoints, 0, "%+v", snap.ConnectProxy.PreparedQueryEndpoints) + + require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.PeerUpstreamEndpoints.Len()) + require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.UpstreamPeerTrustBundles.Len()) + + require.True(t, snap.ConnectProxy.IntentionsSet) + require.Equal(t, ixnMatch, snap.ConnectProxy.Intentions) + + }, + } + + return testCase{ + ns: ns, + sourceDC: "dc1", + stages: []verificationStage{stage0, stage1}, + } + } dbIxnMatch := structs.SimplifiedIntentions{ { @@ -3444,8 +3720,9 @@ func TestState_WatchesAndUpdates(t *testing.T) { }, }, }, - "connect-proxy": newConnectProxyCase(structs.MeshGatewayModeDefault), - "connect-proxy-mesh-gateway-local": newConnectProxyCase(structs.MeshGatewayModeLocal), + "connect-proxy": newConnectProxyCase(structs.MeshGatewayModeDefault), + "connect-proxy-mesh-gateway-default": newConnectProxyCaseMeshDefault(), + "connect-proxy-mesh-gateway-local": newConnectProxyCase(structs.MeshGatewayModeLocal), "connect-proxy-with-peers": { ns: structs.NodeService{ Kind: structs.ServiceKindConnectProxy, diff --git a/agent/proxycfg/upstreams.go b/agent/proxycfg/upstreams.go index fe2d502339..209a3446d9 100644 --- a/agent/proxycfg/upstreams.go +++ b/agent/proxycfg/upstreams.go @@ -354,6 +354,14 @@ func (s *handlerUpstreams) resetWatchesFromChain( Partition: s.proxyID.PartitionOrDefault(), Datacenter: s.source.Datacenter, } + default: + // if target.MeshGateway.Mode is not set and target is not peered we don't want to set up watches for the gateway. + // This is important specifically in wan-fed without mesh gateway use case, as for this case + //the source and target DC could be different but there is not mesh-gateway so no need to watch + // a costly watch (Internal.ServiceDump) + if target.Peer == "" { + continue + } } if s.source.Datacenter != target.Datacenter || s.proxyID.PartitionOrDefault() != target.Partition { needGateways[gk.String()] = struct{}{} @@ -393,6 +401,7 @@ func (s *handlerUpstreams) resetWatchesFromChain( if _, ok := snap.WatchedGateways[uid][key]; ok { continue } + gwKey := gatewayKeyFromString(key) s.logger.Trace("initializing watch of mesh gateway", @@ -411,13 +420,14 @@ func (s *handlerUpstreams) resetWatchesFromChain( key: gwKey, upstreamID: uid, } + err := watchMeshGateway(ctx, opts) if err != nil { cancel() return err } - snap.WatchedGateways[uid][key] = cancel + } for key, cancelFn := range snap.WatchedGateways[uid] { diff --git a/agent/structs/structs.go b/agent/structs/structs.go index 9b2685946c..d9fbac2bfe 100644 --- a/agent/structs/structs.go +++ b/agent/structs/structs.go @@ -666,6 +666,7 @@ type ServiceDumpRequest struct { Datacenter string ServiceKind ServiceKind UseServiceKind bool + NodesOnly bool Source QuerySource acl.EnterpriseMeta `hcl:",squash" mapstructure:",squash"` PeerName string @@ -698,6 +699,7 @@ func (r *ServiceDumpRequest) CacheInfo() cache.RequestInfo { v, err := hashstructure.Hash([]interface{}{ keyKind, r.UseServiceKind, + r.NodesOnly, r.Filter, r.EnterpriseMeta, }, nil)