Fix to not create a watch to `Internal.ServiceDump` when mesh gateway is not used (#20168)

This add a fix to properly verify the gateway mode before creating a watch specific to mesh gateways. This watch have a high performance cost and when mesh gateways are not used is not used.

This also adds an optimization to only return the nodes when watching the Internal.ServiceDump RPC to avoid unnecessary disco chain compilation. As watches in proxy config only need the nodes.
This commit is contained in:
Dhia Ayachi 2024-01-18 17:44:53 -05:00 committed by GitHub
parent 539659508a
commit d641998641
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 350 additions and 41 deletions

7
.changelog/20168.txt Normal file
View File

@ -0,0 +1,7 @@
```release-note:enhancement
ProxyCfg: avoid setting a watch on `Internal.ServiceDump` when mesh gateway is not used.
```
```release-note:enhancement
ProxyCfg: only return the nodes list when querying the `Internal.ServiceDump` watch from proxycfg
```

View File

@ -197,43 +197,44 @@ func (m *Internal) ServiceDump(args *structs.ServiceDumpRequest, reply *structs.
}
reply.Nodes = nodes
// get a list of all peerings
index, listedPeerings, err := state.PeeringList(ws, args.EnterpriseMeta)
if err != nil {
return fmt.Errorf("could not list peers for service dump %w", err)
}
if index > maxIndex {
maxIndex = index
}
for _, p := range listedPeerings {
// Note we fetch imported services with wildcard namespace because imported services' namespaces
// are in a different locality; regardless of our local namespace, we return all imported services
// of the local partition.
index, importedNodes, err := state.ServiceDump(ws, args.ServiceKind, args.UseServiceKind, args.EnterpriseMeta.WithWildcardNamespace(), p.Name)
if !args.NodesOnly {
// get a list of all peerings
index, listedPeerings, err := state.PeeringList(ws, args.EnterpriseMeta)
if err != nil {
return fmt.Errorf("could not get a service dump for peer %q: %w", p.Name, err)
return fmt.Errorf("could not list peers for service dump %w", err)
}
if index > maxIndex {
maxIndex = index
}
reply.ImportedNodes = append(reply.ImportedNodes, importedNodes...)
}
// Get, store, and filter gateway services
idx, gatewayServices, err := state.DumpGatewayServices(ws)
if err != nil {
return err
}
reply.Gateways = gatewayServices
for _, p := range listedPeerings {
// Note we fetch imported services with wildcard namespace because imported services' namespaces
// are in a different locality; regardless of our local namespace, we return all imported services
// of the local partition.
index, importedNodes, err := state.ServiceDump(ws, args.ServiceKind, args.UseServiceKind, args.EnterpriseMeta.WithWildcardNamespace(), p.Name)
if err != nil {
return fmt.Errorf("could not get a service dump for peer %q: %w", p.Name, err)
}
if idx > maxIndex {
maxIndex = idx
if index > maxIndex {
maxIndex = index
}
reply.ImportedNodes = append(reply.ImportedNodes, importedNodes...)
}
// Get, store, and filter gateway services
idx, gatewayServices, err := state.DumpGatewayServices(ws)
if err != nil {
return err
}
reply.Gateways = gatewayServices
if idx > maxIndex {
maxIndex = idx
}
}
reply.Index = maxIndex
raw, err := filter.Execute(reply.Nodes)
if err != nil {
return fmt.Errorf("could not filter local service dump: %w", err)
@ -241,12 +242,13 @@ func (m *Internal) ServiceDump(args *structs.ServiceDumpRequest, reply *structs.
reply.Nodes = raw.(structs.CheckServiceNodes)
}
importedRaw, err := filter.Execute(reply.ImportedNodes)
if err != nil {
return fmt.Errorf("could not filter peer service dump: %w", err)
if !args.NodesOnly {
importedRaw, err := filter.Execute(reply.ImportedNodes)
if err != nil {
return fmt.Errorf("could not filter peer service dump: %w", err)
}
reply.ImportedNodes = importedRaw.(structs.CheckServiceNodes)
}
reply.ImportedNodes = importedRaw.(structs.CheckServiceNodes)
// Note: we filter the results with ACLs *after* applying the user-supplied
// bexpr filter, to ensure QueryMeta.ResultsFilteredByACLs does not include
// results that would be filtered out even if the user did have permission.

View File

@ -1779,10 +1779,11 @@ func TestInternal_ServiceDump_Peering(t *testing.T) {
// prep the cluster with some data we can use in our filters
registerTestCatalogEntries(t, codec)
doRequest := func(t *testing.T, filter string) structs.IndexedNodesWithGateways {
doRequest := func(t *testing.T, filter string, onlyNodes bool) structs.IndexedNodesWithGateways {
t.Helper()
args := structs.DCSpecificRequest{
args := structs.ServiceDumpRequest{
QueryOptions: structs.QueryOptions{Filter: filter},
NodesOnly: onlyNodes,
}
var out structs.IndexedNodesWithGateways
@ -1792,7 +1793,7 @@ func TestInternal_ServiceDump_Peering(t *testing.T) {
}
t.Run("No peerings", func(t *testing.T) {
nodes := doRequest(t, "")
nodes := doRequest(t, "", false)
// redis (3), web (3), critical (1), warning (1) and consul (1)
require.Len(t, nodes.Nodes, 9)
require.Len(t, nodes.ImportedNodes, 0)
@ -1809,19 +1810,27 @@ func TestInternal_ServiceDump_Peering(t *testing.T) {
require.NoError(t, err)
t.Run("peerings", func(t *testing.T) {
nodes := doRequest(t, "")
nodes := doRequest(t, "", false)
// redis (3), web (3), critical (1), warning (1) and consul (1)
require.Len(t, nodes.Nodes, 9)
// service (1)
require.Len(t, nodes.ImportedNodes, 1)
})
t.Run("peerings onlynodes", func(t *testing.T) {
nodes := doRequest(t, "", true)
// redis (3), web (3), critical (1), warning (1) and consul (1)
require.Len(t, nodes.Nodes, 9)
// service (1)
require.Len(t, nodes.ImportedNodes, 0)
})
t.Run("peerings w filter", func(t *testing.T) {
nodes := doRequest(t, "Node.PeerName == foo")
nodes := doRequest(t, "Node.PeerName == foo", false)
require.Len(t, nodes.Nodes, 0)
require.Len(t, nodes.ImportedNodes, 0)
nodes2 := doRequest(t, "Node.PeerName == peer1")
nodes2 := doRequest(t, "Node.PeerName == peer1", false)
require.Len(t, nodes2.Nodes, 0)
require.Len(t, nodes2.ImportedNodes, 1)
})

View File

@ -295,6 +295,7 @@ func (s *handlerMeshGateway) handleUpdate(ctx context.Context, u UpdateEvent, sn
QueryOptions: structs.QueryOptions{Token: s.token},
ServiceKind: structs.ServiceKindMeshGateway,
UseServiceKind: true,
NodesOnly: true,
Source: *s.source,
EnterpriseMeta: *entMeta,
}, fmt.Sprintf("mesh-gateway:%s", gk.String()), s.ch)

View File

@ -544,6 +544,7 @@ func watchMeshGateway(ctx context.Context, opts gatewayWatchOpts) error {
QueryOptions: structs.QueryOptions{Token: opts.token},
ServiceKind: structs.ServiceKindMeshGateway,
UseServiceKind: true,
NodesOnly: true,
Source: opts.source,
EnterpriseMeta: *structs.DefaultEnterpriseMetaInPartition(opts.key.Partition),
}, correlationId, opts.notifyCh)

View File

@ -838,6 +838,282 @@ func TestState_WatchesAndUpdates(t *testing.T) {
stages: []verificationStage{stage0, stage1},
}
}
newConnectProxyCaseMeshDefault := func() testCase {
ns := structs.NodeService{
Kind: structs.ServiceKindConnectProxy,
ID: "web-sidecar-proxy",
Service: "web-sidecar-proxy",
Address: "10.0.1.1",
Port: 443,
Proxy: structs.ConnectProxyConfig{
DestinationServiceName: "web",
Upstreams: structs.Upstreams{
structs.Upstream{
DestinationType: structs.UpstreamDestTypePreparedQuery,
DestinationName: "query",
LocalBindPort: 10001,
},
structs.Upstream{
DestinationType: structs.UpstreamDestTypeService,
DestinationName: "api",
LocalBindPort: 10002,
},
structs.Upstream{
DestinationType: structs.UpstreamDestTypeService,
DestinationName: "api-failover-direct",
Datacenter: "dc2",
LocalBindPort: 10005,
MeshGateway: structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeNone,
},
},
structs.Upstream{
DestinationType: structs.UpstreamDestTypeService,
DestinationName: "api-failover-to-peer",
LocalBindPort: 10007,
},
structs.Upstream{
DestinationType: structs.UpstreamDestTypeService,
DestinationName: "api-dc2",
LocalBindPort: 10006,
},
},
},
}
ixnMatch := TestIntentions()
stage0 := verificationStage{
requiredWatches: map[string]verifyWatchRequest{
intentionsWatchID: genVerifyIntentionWatch("web", "dc1"),
meshConfigEntryID: genVerifyMeshConfigWatch("dc1"),
fmt.Sprintf("discovery-chain:%s", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{
Name: "api",
EvaluateInDatacenter: "dc1",
EvaluateInNamespace: "default",
EvaluateInPartition: "default",
Datacenter: "dc1",
OverrideMeshGateway: structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeDefault,
},
QueryOptions: structs.QueryOptions{
Token: aclToken,
},
}),
fmt.Sprintf("discovery-chain:%s-failover-direct?dc=dc2", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{
Name: "api-failover-direct",
EvaluateInDatacenter: "dc2",
EvaluateInNamespace: "default",
EvaluateInPartition: "default",
Datacenter: "dc1",
OverrideMeshGateway: structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeNone,
},
QueryOptions: structs.QueryOptions{
Token: aclToken,
},
}),
fmt.Sprintf("discovery-chain:%s-failover-to-peer", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{
Name: "api-failover-to-peer",
EvaluateInDatacenter: "dc1",
EvaluateInNamespace: "default",
EvaluateInPartition: "default",
Datacenter: "dc1",
OverrideMeshGateway: structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeDefault,
},
QueryOptions: structs.QueryOptions{
Token: aclToken,
},
}),
fmt.Sprintf("discovery-chain:%s-dc2", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{
Name: "api-dc2",
EvaluateInDatacenter: "dc1",
EvaluateInNamespace: "default",
EvaluateInPartition: "default",
Datacenter: "dc1",
OverrideMeshGateway: structs.MeshGatewayConfig{
Mode: structs.MeshGatewayModeDefault,
},
QueryOptions: structs.QueryOptions{
Token: aclToken,
},
}),
"upstream:" + pqUID.String(): genVerifyPreparedQueryWatch("query", "dc1"),
rootsWatchID: genVerifyDCSpecificWatch("dc1"),
leafWatchID: genVerifyLeafWatch("web", "dc1"),
},
events: []UpdateEvent{
rootWatchEvent(),
{
CorrelationID: leafWatchID,
Result: issuedCert,
Err: nil,
},
{
CorrelationID: intentionsWatchID,
Result: ixnMatch,
Err: nil,
},
{
CorrelationID: meshConfigEntryID,
Result: &structs.ConfigEntryResponse{},
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api", "default", "default", "dc1", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, nil),
},
Err: nil,
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-remote?dc=dc2", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-remote", "default", "default", "dc2", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, nil),
},
Err: nil,
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-local?dc=dc2", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-local", "default", "default", "dc2", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, nil),
},
Err: nil,
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-direct?dc=dc2", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-direct", "default", "default", "dc2", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, nil),
},
Err: nil,
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s-dc2", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api-dc2", "default", "default", "dc1", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, discoChainSetWithEntries(&structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Name: "api-dc2",
Redirect: &structs.ServiceResolverRedirect{
Service: "api",
Datacenter: "dc2",
},
})),
},
Err: nil,
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-to-peer", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-to-peer", "default", "default", "dc1", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = structs.MeshGatewayModeDefault
}, discoChainSetWithEntries(&structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Name: "api-failover-to-peer",
Failover: map[string]structs.ServiceResolverFailover{
"*": {
Targets: []structs.ServiceResolverFailoverTarget{
{Peer: "cluster-01"},
},
},
},
})),
},
Err: nil,
},
{
CorrelationID: "mesh-gateway:dc1",
Result: &structs.IndexedCheckServiceNodes{
Nodes: structs.CheckServiceNodes{
{
Node: &structs.Node{
Node: "node1",
Address: "10.1.2.3",
},
Service: structs.TestNodeServiceMeshGateway(t),
},
},
},
},
},
verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) {
require.True(t, snap.Valid())
require.True(t, snap.MeshGateway.isEmpty())
require.Equal(t, indexedRoots, snap.Roots)
require.Equal(t, issuedCert, snap.ConnectProxy.Leaf)
require.Len(t, snap.ConnectProxy.DiscoveryChain, 4, "%+v", snap.ConnectProxy.DiscoveryChain)
require.Len(t, snap.ConnectProxy.WatchedUpstreams, 4, "%+v", snap.ConnectProxy.WatchedUpstreams)
require.Len(t, snap.ConnectProxy.WatchedUpstreamEndpoints, 4, "%+v", snap.ConnectProxy.WatchedUpstreamEndpoints)
require.Len(t, snap.ConnectProxy.WatchedGateways, 4, "%+v", snap.ConnectProxy.WatchedGateways)
require.Len(t, snap.ConnectProxy.WatchedGatewayEndpoints, 4, "%+v", snap.ConnectProxy.WatchedGatewayEndpoints)
require.Len(t, snap.ConnectProxy.WatchedServiceChecks, 0, "%+v", snap.ConnectProxy.WatchedServiceChecks)
require.Len(t, snap.ConnectProxy.PreparedQueryEndpoints, 0, "%+v", snap.ConnectProxy.PreparedQueryEndpoints)
require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.PeerUpstreamEndpoints.Len())
require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.UpstreamPeerTrustBundles.Len())
require.True(t, snap.ConnectProxy.IntentionsSet)
require.Equal(t, ixnMatch, snap.ConnectProxy.Intentions)
require.True(t, snap.ConnectProxy.MeshConfigSet)
// No event is expected as all services use default or remote mode
require.Equal(t, 0, snap.ConnectProxy.WatchedLocalGWEndpoints.Len())
},
}
stage1 := verificationStage{
requiredWatches: map[string]verifyWatchRequest{
fmt.Sprintf("upstream-target:api.default.default.dc1:%s", apiUID.String()): genVerifyServiceSpecificRequest("api", "", "dc1", true),
fmt.Sprintf("upstream-target:api-failover-direct.default.default.dc2:%s-failover-direct?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-direct", "", "dc2", true),
upstreamPeerWatchIDPrefix + fmt.Sprintf("%s-failover-to-peer?peer=cluster-01", apiUID.String()): genVerifyServiceSpecificPeeredRequest("api-failover-to-peer", "", "dc1", "cluster-01", true),
},
verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) {
require.True(t, snap.Valid())
require.True(t, snap.MeshGateway.isEmpty())
require.Equal(t, indexedRoots, snap.Roots)
require.Equal(t, issuedCert, snap.ConnectProxy.Leaf)
require.Len(t, snap.ConnectProxy.DiscoveryChain, 4, "%+v", snap.ConnectProxy.DiscoveryChain)
require.Len(t, snap.ConnectProxy.WatchedUpstreams, 4, "%+v", snap.ConnectProxy.WatchedUpstreams)
require.Len(t, snap.ConnectProxy.WatchedUpstreamEndpoints, 4, "%+v", snap.ConnectProxy.WatchedUpstreamEndpoints)
require.Len(t, snap.ConnectProxy.WatchedGateways, 4, "%+v", snap.ConnectProxy.WatchedGateways)
require.Len(t, snap.ConnectProxy.WatchedGatewayEndpoints, 4, "%+v", snap.ConnectProxy.WatchedGatewayEndpoints)
require.Len(t, snap.ConnectProxy.WatchedServiceChecks, 0, "%+v", snap.ConnectProxy.WatchedServiceChecks)
require.Len(t, snap.ConnectProxy.PreparedQueryEndpoints, 0, "%+v", snap.ConnectProxy.PreparedQueryEndpoints)
require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.PeerUpstreamEndpoints.Len())
require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.UpstreamPeerTrustBundles.Len())
require.True(t, snap.ConnectProxy.IntentionsSet)
require.Equal(t, ixnMatch, snap.ConnectProxy.Intentions)
},
}
return testCase{
ns: ns,
sourceDC: "dc1",
stages: []verificationStage{stage0, stage1},
}
}
dbIxnMatch := structs.SimplifiedIntentions{
{
@ -3444,8 +3720,9 @@ func TestState_WatchesAndUpdates(t *testing.T) {
},
},
},
"connect-proxy": newConnectProxyCase(structs.MeshGatewayModeDefault),
"connect-proxy-mesh-gateway-local": newConnectProxyCase(structs.MeshGatewayModeLocal),
"connect-proxy": newConnectProxyCase(structs.MeshGatewayModeDefault),
"connect-proxy-mesh-gateway-default": newConnectProxyCaseMeshDefault(),
"connect-proxy-mesh-gateway-local": newConnectProxyCase(structs.MeshGatewayModeLocal),
"connect-proxy-with-peers": {
ns: structs.NodeService{
Kind: structs.ServiceKindConnectProxy,

View File

@ -354,6 +354,14 @@ func (s *handlerUpstreams) resetWatchesFromChain(
Partition: s.proxyID.PartitionOrDefault(),
Datacenter: s.source.Datacenter,
}
default:
// if target.MeshGateway.Mode is not set and target is not peered we don't want to set up watches for the gateway.
// This is important specifically in wan-fed without mesh gateway use case, as for this case
//the source and target DC could be different but there is not mesh-gateway so no need to watch
// a costly watch (Internal.ServiceDump)
if target.Peer == "" {
continue
}
}
if s.source.Datacenter != target.Datacenter || s.proxyID.PartitionOrDefault() != target.Partition {
needGateways[gk.String()] = struct{}{}
@ -393,6 +401,7 @@ func (s *handlerUpstreams) resetWatchesFromChain(
if _, ok := snap.WatchedGateways[uid][key]; ok {
continue
}
gwKey := gatewayKeyFromString(key)
s.logger.Trace("initializing watch of mesh gateway",
@ -411,13 +420,14 @@ func (s *handlerUpstreams) resetWatchesFromChain(
key: gwKey,
upstreamID: uid,
}
err := watchMeshGateway(ctx, opts)
if err != nil {
cancel()
return err
}
snap.WatchedGateways[uid][key] = cancel
}
for key, cancelFn := range snap.WatchedGateways[uid] {

View File

@ -666,6 +666,7 @@ type ServiceDumpRequest struct {
Datacenter string
ServiceKind ServiceKind
UseServiceKind bool
NodesOnly bool
Source QuerySource
acl.EnterpriseMeta `hcl:",squash" mapstructure:",squash"`
PeerName string
@ -698,6 +699,7 @@ func (r *ServiceDumpRequest) CacheInfo() cache.RequestInfo {
v, err := hashstructure.Hash([]interface{}{
keyKind,
r.UseServiceKind,
r.NodesOnly,
r.Filter,
r.EnterpriseMeta,
}, nil)