From 4c9577678e5870f8cbd63f24ad8bd57004fc9de1 Mon Sep 17 00:00:00 2001 From: Matt Keeler Date: Wed, 19 Feb 2020 11:57:55 -0500 Subject: [PATCH] xDS Mesh Gateway Resolver Subset Fixes (#7294) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * xDS Mesh Gateway Resolver Subset Fixes The first fix was that clusters were being generated for every service resolver subset regardless of there being any service instances of the associated service in that dc. The previous logic didn’t care at all but now it will omit generating those clusters unless we also have service instances that should be proxied. The second fix was to respect the DefaultSubset of a service resolver so that mesh-gateways would configure the endpoints of the unnamed subset cluster to only those endpoints matched by the default subsets filters. * Refactor the gateway endpoint generation to be a little easier to read --- agent/proxycfg/snapshot.go | 21 +- agent/xds/clusters.go | 19 +- agent/xds/clusters_test.go | 36 +++ agent/xds/endpoints.go | 70 +++--- agent/xds/endpoints_test.go | 36 +++ ...mesh-gateway-ignore-extra-resolvers.golden | 87 +++++++ ...mesh-gateway-default-service-subset.golden | 221 ++++++++++++++++++ 7 files changed, 448 insertions(+), 42 deletions(-) create mode 100644 agent/xds/testdata/clusters/mesh-gateway-ignore-extra-resolvers.golden create mode 100644 agent/xds/testdata/endpoints/mesh-gateway-default-service-subset.golden diff --git a/agent/proxycfg/snapshot.go b/agent/proxycfg/snapshot.go index 1de0b0f3a4..52d8a33ab6 100644 --- a/agent/proxycfg/snapshot.go +++ b/agent/proxycfg/snapshot.go @@ -34,12 +34,25 @@ func (c *configSnapshotConnectProxy) IsEmpty() bool { } type configSnapshotMeshGateway struct { - WatchedServices map[structs.ServiceID]context.CancelFunc + // map of service id to a cancel function. This cancel function is tied to the watch of + // connect enabled services for the given id. If the main datacenter services watch would + // indicate the removal of a service all together we then cancel watching that service for + // its connect endpoints. + WatchedServices map[structs.ServiceID]context.CancelFunc + // Indicates that the watch on the datacenters services has completed. Even when there + // are no connect services, this being set (and the Connect roots being available) will be enough for + // the config snapshot to be considered valid. In the case of Envoy, this allows it to start its listeners + // even when no services would be proxied and allow its health check to pass. WatchedServicesSet bool + // map of datacenter name to a cancel function. This cancel function is tied + // to the watch of mesh-gateway services in that datacenter. WatchedDatacenters map[string]context.CancelFunc - ServiceGroups map[structs.ServiceID]structs.CheckServiceNodes - ServiceResolvers map[structs.ServiceID]*structs.ServiceResolverConfigEntry - GatewayGroups map[string]structs.CheckServiceNodes + // map of service id to the service instances of that service in the local datacenter + ServiceGroups map[structs.ServiceID]structs.CheckServiceNodes + // map of service id to an associated service-resolver config entry for that service + ServiceResolvers map[structs.ServiceID]*structs.ServiceResolverConfigEntry + // map of datacenter names to services of kind mesh-gateway in that datacenter + GatewayGroups map[string]structs.CheckServiceNodes } func (c *configSnapshotMeshGateway) IsEmpty() bool { diff --git a/agent/xds/clusters.go b/agent/xds/clusters.go index f8f78c846a..df1bf2994f 100644 --- a/agent/xds/clusters.go +++ b/agent/xds/clusters.go @@ -136,18 +136,19 @@ func (s *Server) clustersFromSnapshotMeshGateway(cfgSnap *proxycfg.ConfigSnapsho return nil, err } clusters = append(clusters, cluster) - } - // generate the service subset clusters - for svc, resolver := range cfgSnap.MeshGateway.ServiceResolvers { - for subsetName, _ := range resolver.Subsets { - clusterName := connect.ServiceSNI(svc.ID, subsetName, svc.NamespaceOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain) + // if there is a service-resolver for this service then also setup subset clusters for it + if resolver, ok := cfgSnap.MeshGateway.ServiceResolvers[svc]; ok { + // generate 1 cluster for each service subset + for subsetName, _ := range resolver.Subsets { + clusterName := connect.ServiceSNI(svc.ID, subsetName, svc.NamespaceOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain) - cluster, err := s.makeMeshGatewayCluster(clusterName, cfgSnap) - if err != nil { - return nil, err + cluster, err := s.makeMeshGatewayCluster(clusterName, cfgSnap) + if err != nil { + return nil, err + } + clusters = append(clusters, cluster) } - clusters = append(clusters, cluster) } } diff --git a/agent/xds/clusters_test.go b/agent/xds/clusters_test.go index 1ef4f6dbaa..831b59d405 100644 --- a/agent/xds/clusters_test.go +++ b/agent/xds/clusters_test.go @@ -275,6 +275,42 @@ func TestClustersFromSnapshot(t *testing.T) { } }, }, + { + name: "mesh-gateway-ignore-extra-resolvers", + create: proxycfg.TestConfigSnapshotMeshGateway, + setup: func(snap *proxycfg.ConfigSnapshot) { + snap.MeshGateway.ServiceResolvers = map[structs.ServiceID]*structs.ServiceResolverConfigEntry{ + structs.NewServiceID("bar", nil): &structs.ServiceResolverConfigEntry{ + Kind: structs.ServiceResolver, + Name: "bar", + DefaultSubset: "v2", + Subsets: map[string]structs.ServiceResolverSubset{ + "v1": structs.ServiceResolverSubset{ + Filter: "Service.Meta.Version == 1", + }, + "v2": structs.ServiceResolverSubset{ + Filter: "Service.Meta.Version == 2", + OnlyPassing: true, + }, + }, + }, + structs.NewServiceID("notfound", nil): &structs.ServiceResolverConfigEntry{ + Kind: structs.ServiceResolver, + Name: "notfound", + DefaultSubset: "v2", + Subsets: map[string]structs.ServiceResolverSubset{ + "v1": structs.ServiceResolverSubset{ + Filter: "Service.Meta.Version == 1", + }, + "v2": structs.ServiceResolverSubset{ + Filter: "Service.Meta.Version == 2", + OnlyPassing: true, + }, + }, + }, + } + }, + }, } for _, tt := range tests { diff --git a/agent/xds/endpoints.go b/agent/xds/endpoints.go index 607ccda7f6..2e64e70beb 100644 --- a/agent/xds/endpoints.go +++ b/agent/xds/endpoints.go @@ -17,6 +17,10 @@ import ( bexpr "github.com/hashicorp/go-bexpr" ) +const ( + UnnamedSubset = "" +) + // endpointsFromSnapshot returns the xDS API representation of the "endpoints" func (s *Server) endpointsFromSnapshot(cfgSnap *proxycfg.ConfigSnapshot, token string) ([]proto.Message, error) { if cfgSnap == nil { @@ -149,6 +153,23 @@ func (s *Server) endpointsFromSnapshotConnectProxy(cfgSnap *proxycfg.ConfigSnaps return resources, nil } +func (s *Server) filterSubsetEndpoints(subset *structs.ServiceResolverSubset, endpoints structs.CheckServiceNodes) (structs.CheckServiceNodes, error) { + // locally execute the subsets filter + if subset.Filter != "" { + filter, err := bexpr.CreateFilter(subset.Filter, nil, endpoints) + if err != nil { + return nil, err + } + + raw, err := filter.Execute(endpoints) + if err != nil { + return nil, err + } + return raw.(structs.CheckServiceNodes), nil + } + return endpoints, nil +} + func (s *Server) endpointsFromSnapshotMeshGateway(cfgSnap *proxycfg.ConfigSnapshot, token string) ([]proto.Message, error) { resources := make([]proto.Message, 0, len(cfgSnap.MeshGateway.GatewayGroups)+len(cfgSnap.MeshGateway.ServiceGroups)) @@ -165,47 +186,38 @@ func (s *Server) endpointsFromSnapshotMeshGateway(cfgSnap *proxycfg.ConfigSnapsh resources = append(resources, la) } - // generate the endpoints for the local service groups + // Generate the endpoints for each service and its subsets for svc, endpoints := range cfgSnap.MeshGateway.ServiceGroups { - clusterName := connect.ServiceSNI(svc.ID, "", svc.NamespaceOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain) - la := makeLoadAssignment( - clusterName, - []loadAssignmentEndpointGroup{ - {Endpoints: endpoints}, - }, - cfgSnap.Datacenter, - ) - resources = append(resources, la) - } + clusterEndpoints := make(map[string]loadAssignmentEndpointGroup) + clusterEndpoints[UnnamedSubset] = loadAssignmentEndpointGroup{Endpoints: endpoints, OnlyPassing: false} - // generate the endpoints for the service subsets - for svc, resolver := range cfgSnap.MeshGateway.ServiceResolvers { - for subsetName, subset := range resolver.Subsets { - clusterName := connect.ServiceSNI(svc.ID, subsetName, svc.NamespaceOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain) - - endpoints := cfgSnap.MeshGateway.ServiceGroups[svc] - - // locally execute the subsets filter - if subset.Filter != "" { - filter, err := bexpr.CreateFilter(subset.Filter, nil, endpoints) + // Collect all of the loadAssignmentEndpointGroups for the various subsets. We do this before generating + // the endpoints for the default/unnamed subset so that we can take into account the DefaultSubset on the + // service-resolver which may prevent the default/unnamed cluster from creating endpoints for all service + // instances. + if resolver, hasResolver := cfgSnap.MeshGateway.ServiceResolvers[svc]; hasResolver { + for subsetName, subset := range resolver.Subsets { + subsetEndpoints, err := s.filterSubsetEndpoints(&subset, endpoints) if err != nil { return nil, err } + group := loadAssignmentEndpointGroup{Endpoints: subsetEndpoints, OnlyPassing: subset.OnlyPassing} + clusterEndpoints[subsetName] = group - raw, err := filter.Execute(endpoints) - if err != nil { - return nil, err + // if this subset is the default then override the unnamed subset with this configuration + if subsetName == resolver.DefaultSubset { + clusterEndpoints[UnnamedSubset] = group } - endpoints = raw.(structs.CheckServiceNodes) } + } + // now generate the load assignment for all subsets + for subsetName, group := range clusterEndpoints { + clusterName := connect.ServiceSNI(svc.ID, subsetName, svc.NamespaceOrDefault(), cfgSnap.Datacenter, cfgSnap.Roots.TrustDomain) la := makeLoadAssignment( clusterName, []loadAssignmentEndpointGroup{ - { - Endpoints: endpoints, - OnlyPassing: subset.OnlyPassing, - }, + group, }, cfgSnap.Datacenter, ) diff --git a/agent/xds/endpoints_test.go b/agent/xds/endpoints_test.go index ebb4da4f54..e3460b25da 100644 --- a/agent/xds/endpoints_test.go +++ b/agent/xds/endpoints_test.go @@ -340,6 +340,42 @@ func Test_endpointsFromSnapshot(t *testing.T) { } }, }, + { + name: "mesh-gateway-default-service-subset", + create: proxycfg.TestConfigSnapshotMeshGateway, + setup: func(snap *proxycfg.ConfigSnapshot) { + snap.MeshGateway.ServiceResolvers = map[structs.ServiceID]*structs.ServiceResolverConfigEntry{ + structs.NewServiceID("bar", nil): &structs.ServiceResolverConfigEntry{ + Kind: structs.ServiceResolver, + Name: "bar", + DefaultSubset: "v2", + Subsets: map[string]structs.ServiceResolverSubset{ + "v1": structs.ServiceResolverSubset{ + Filter: "Service.Meta.version == 1", + }, + "v2": structs.ServiceResolverSubset{ + Filter: "Service.Meta.version == 2", + OnlyPassing: true, + }, + }, + }, + structs.NewServiceID("foo", nil): &structs.ServiceResolverConfigEntry{ + Kind: structs.ServiceResolver, + Name: "foo", + DefaultSubset: "v2", + Subsets: map[string]structs.ServiceResolverSubset{ + "v1": structs.ServiceResolverSubset{ + Filter: "Service.Meta.version == 1", + }, + "v2": structs.ServiceResolverSubset{ + Filter: "Service.Meta.version == 2", + OnlyPassing: true, + }, + }, + }, + } + }, + }, } for _, tt := range tests { diff --git a/agent/xds/testdata/clusters/mesh-gateway-ignore-extra-resolvers.golden b/agent/xds/testdata/clusters/mesh-gateway-ignore-extra-resolvers.golden new file mode 100644 index 0000000000..aa8d426119 --- /dev/null +++ b/agent/xds/testdata/clusters/mesh-gateway-ignore-extra-resolvers.golden @@ -0,0 +1,87 @@ +{ + "versionInfo": "00000001", + "resources": [ + { + "@type": "type.googleapis.com/envoy.api.v2.Cluster", + "name": "bar.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "type": "EDS", + "edsClusterConfig": { + "edsConfig": { + "ads": { + + } + } + }, + "connectTimeout": "5s", + "outlierDetection": { + + } + }, + { + "@type": "type.googleapis.com/envoy.api.v2.Cluster", + "name": "dc2.internal.11111111-2222-3333-4444-555555555555.consul", + "type": "EDS", + "edsClusterConfig": { + "edsConfig": { + "ads": { + + } + } + }, + "connectTimeout": "5s", + "outlierDetection": { + + } + }, + { + "@type": "type.googleapis.com/envoy.api.v2.Cluster", + "name": "foo.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "type": "EDS", + "edsClusterConfig": { + "edsConfig": { + "ads": { + + } + } + }, + "connectTimeout": "5s", + "outlierDetection": { + + } + }, + { + "@type": "type.googleapis.com/envoy.api.v2.Cluster", + "name": "v1.bar.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "type": "EDS", + "edsClusterConfig": { + "edsConfig": { + "ads": { + + } + } + }, + "connectTimeout": "5s", + "outlierDetection": { + + } + }, + { + "@type": "type.googleapis.com/envoy.api.v2.Cluster", + "name": "v2.bar.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "type": "EDS", + "edsClusterConfig": { + "edsConfig": { + "ads": { + + } + } + }, + "connectTimeout": "5s", + "outlierDetection": { + + } + } + ], + "typeUrl": "type.googleapis.com/envoy.api.v2.Cluster", + "nonce": "00000001" +} \ No newline at end of file diff --git a/agent/xds/testdata/endpoints/mesh-gateway-default-service-subset.golden b/agent/xds/testdata/endpoints/mesh-gateway-default-service-subset.golden new file mode 100644 index 0000000000..9f48144918 --- /dev/null +++ b/agent/xds/testdata/endpoints/mesh-gateway-default-service-subset.golden @@ -0,0 +1,221 @@ +{ + "versionInfo": "00000001", + "resources": [ + { + "@type": "type.googleapis.com/envoy.api.v2.ClusterLoadAssignment", + "clusterName": "bar.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "endpoints": [ + { + "lbEndpoints": [ + { + "endpoint": { + "address": { + "socketAddress": { + "address": "172.16.1.8", + "portValue": 2222 + } + } + }, + "healthStatus": "HEALTHY", + "loadBalancingWeight": 1 + } + ] + } + ] + }, + { + "@type": "type.googleapis.com/envoy.api.v2.ClusterLoadAssignment", + "clusterName": "dc2.internal.11111111-2222-3333-4444-555555555555.consul", + "endpoints": [ + { + "lbEndpoints": [ + { + "endpoint": { + "address": { + "socketAddress": { + "address": "198.18.1.1", + "portValue": 443 + } + } + }, + "healthStatus": "HEALTHY", + "loadBalancingWeight": 1 + }, + { + "endpoint": { + "address": { + "socketAddress": { + "address": "198.18.1.2", + "portValue": 443 + } + } + }, + "healthStatus": "HEALTHY", + "loadBalancingWeight": 1 + } + ] + } + ] + }, + { + "@type": "type.googleapis.com/envoy.api.v2.ClusterLoadAssignment", + "clusterName": "foo.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "endpoints": [ + { + "lbEndpoints": [ + { + "endpoint": { + "address": { + "socketAddress": { + "address": "172.16.1.5", + "portValue": 2222 + } + } + }, + "healthStatus": "HEALTHY", + "loadBalancingWeight": 1 + }, + { + "endpoint": { + "address": { + "socketAddress": { + "address": "172.16.1.9", + "portValue": 2222 + } + } + }, + "healthStatus": "UNHEALTHY", + "loadBalancingWeight": 1 + } + ] + } + ] + }, + { + "@type": "type.googleapis.com/envoy.api.v2.ClusterLoadAssignment", + "clusterName": "v1.bar.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "endpoints": [ + { + "lbEndpoints": [ + { + "endpoint": { + "address": { + "socketAddress": { + "address": "172.16.1.6", + "portValue": 2222 + } + } + }, + "healthStatus": "HEALTHY", + "loadBalancingWeight": 1 + }, + { + "endpoint": { + "address": { + "socketAddress": { + "address": "172.16.1.7", + "portValue": 2222 + } + } + }, + "healthStatus": "HEALTHY", + "loadBalancingWeight": 1 + } + ] + } + ] + }, + { + "@type": "type.googleapis.com/envoy.api.v2.ClusterLoadAssignment", + "clusterName": "v1.foo.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "endpoints": [ + { + "lbEndpoints": [ + { + "endpoint": { + "address": { + "socketAddress": { + "address": "172.16.1.3", + "portValue": 2222 + } + } + }, + "healthStatus": "HEALTHY", + "loadBalancingWeight": 1 + }, + { + "endpoint": { + "address": { + "socketAddress": { + "address": "172.16.1.4", + "portValue": 2222 + } + } + }, + "healthStatus": "HEALTHY", + "loadBalancingWeight": 1 + } + ] + } + ] + }, + { + "@type": "type.googleapis.com/envoy.api.v2.ClusterLoadAssignment", + "clusterName": "v2.bar.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "endpoints": [ + { + "lbEndpoints": [ + { + "endpoint": { + "address": { + "socketAddress": { + "address": "172.16.1.8", + "portValue": 2222 + } + } + }, + "healthStatus": "HEALTHY", + "loadBalancingWeight": 1 + } + ] + } + ] + }, + { + "@type": "type.googleapis.com/envoy.api.v2.ClusterLoadAssignment", + "clusterName": "v2.foo.default.dc1.internal.11111111-2222-3333-4444-555555555555.consul", + "endpoints": [ + { + "lbEndpoints": [ + { + "endpoint": { + "address": { + "socketAddress": { + "address": "172.16.1.5", + "portValue": 2222 + } + } + }, + "healthStatus": "HEALTHY", + "loadBalancingWeight": 1 + }, + { + "endpoint": { + "address": { + "socketAddress": { + "address": "172.16.1.9", + "portValue": 2222 + } + } + }, + "healthStatus": "UNHEALTHY", + "loadBalancingWeight": 1 + } + ] + } + ] + } + ], + "typeUrl": "type.googleapis.com/envoy.api.v2.ClusterLoadAssignment", + "nonce": "00000001" +} \ No newline at end of file