From f0e6e4e69784589fb0056bb0ae2ff0b8287fdce4 Mon Sep 17 00:00:00 2001 From: "R.B. Boyer" <4903+rboyer@users.noreply.github.com> Date: Tue, 12 Jul 2022 11:17:33 -0500 Subject: [PATCH] state: prohibit changing an exported tcp discovery chain in a way that would break SAN validation (#13727) For L4/tcp exported services the mesh gateways will not be terminating TLS. A caller in one peer will be directly establishing TLS connections to the ultimate exported service in the other peer. The caller will be doing SAN validation using the replicated SpiffeID values shipped from the exporting side. There are a class of discovery chain edits that could be done on the exporting side that would cause the introduction of a new SpiffeID value. In between the time of the config entry update on the exporting side and the importing side getting updated peer stream data requests to the exported service would fail due to SAN validation errors. This is unacceptable so instead prohibit the exporting peer from making changes that would break peering in this way. --- agent/consul/state/config_entry.go | 56 ++++++++++++++-- agent/consul/state/config_entry_test.go | 19 ++++++ .../peerstream/subscription_manager_test.go | 64 ++++--------------- 3 files changed, 83 insertions(+), 56 deletions(-) diff --git a/agent/consul/state/config_entry.go b/agent/consul/state/config_entry.go index 7da7b21767..af0fe21133 100644 --- a/agent/consul/state/config_entry.go +++ b/agent/consul/state/config_entry.go @@ -804,7 +804,7 @@ func validateProposedConfigEntryInServiceGraph( exportedServicesByPartition = make(map[string]map[structs.ServiceName]struct{}) ) for chain := range checkChains { - protocol, topNode, err := testCompileDiscoveryChain(tx, chain.ID, overrides, &chain.EnterpriseMeta) + protocol, topNode, newTargets, err := testCompileDiscoveryChain(tx, chain.ID, overrides, &chain.EnterpriseMeta) if err != nil { return err } @@ -830,6 +830,27 @@ func validateProposedConfigEntryInServiceGraph( if err := validateChainIsPeerExportSafe(tx, chainSvc, overrides); err != nil { return err } + + // If a TCP (L4) discovery chain is peer exported we have to take + // care to prohibit certain edits to service-resolvers. + if !structs.IsProtocolHTTPLike(protocol) { + _, _, oldTargets, err := testCompileDiscoveryChain(tx, chain.ID, nil, &chain.EnterpriseMeta) + if err != nil { + return fmt.Errorf("error compiling current discovery chain for %q: %w", chainSvc, err) + } + + // Ensure that you can't introduce any new targets that would + // produce a new SpiffeID for this L4 service. + oldSpiffeIDs := convertTargetsToTestSpiffeIDs(oldTargets) + newSpiffeIDs := convertTargetsToTestSpiffeIDs(newTargets) + for id, targetID := range newSpiffeIDs { + if _, exists := oldSpiffeIDs[id]; !exists { + return fmt.Errorf("peer exported service %q uses protocol=%q and cannot introduce new discovery chain targets like %q", + chainSvc, protocol, targetID, + ) + } + } + } } } @@ -964,10 +985,10 @@ func testCompileDiscoveryChain( chainName string, overrides map[configentry.KindName]structs.ConfigEntry, entMeta *acl.EnterpriseMeta, -) (string, *structs.DiscoveryGraphNode, error) { +) (string, *structs.DiscoveryGraphNode, map[string]*structs.DiscoveryTarget, error) { _, speculativeEntries, err := readDiscoveryChainConfigEntriesTxn(tx, nil, chainName, overrides, entMeta) if err != nil { - return "", nil, err + return "", nil, nil, err } // Note we use an arbitrary namespace and datacenter as those would not @@ -984,10 +1005,10 @@ func testCompileDiscoveryChain( } chain, err := discoverychain.Compile(req) if err != nil { - return "", nil, err + return "", nil, nil, err } - return chain.Protocol, chain.Nodes[chain.StartNode], nil + return chain.Protocol, chain.Nodes[chain.StartNode], chain.Targets, nil } func (s *Store) ServiceDiscoveryChain( @@ -1633,7 +1654,7 @@ func protocolForService( EvaluateInPartition: svc.PartitionOrDefault(), EvaluateInDatacenter: "dc1", // Use a dummy trust domain since that won't affect the protocol here. - EvaluateInTrustDomain: "b6fc9da3-03d4-4b5a-9134-c045e9b20152.consul", + EvaluateInTrustDomain: dummyTrustDomain, Entries: entries, } chain, err := discoverychain.Compile(req) @@ -1643,6 +1664,8 @@ func protocolForService( return maxIdx, chain.Protocol, nil } +const dummyTrustDomain = "b6fc9da3-03d4-4b5a-9134-c045e9b20152.consul" + func newConfigEntryQuery(c structs.ConfigEntry) configentry.KindName { return configentry.NewKindName(c.GetKind(), c.GetName(), c.GetEnterpriseMeta()) } @@ -1664,3 +1687,24 @@ func (q ConfigEntryKindQuery) NamespaceOrDefault() string { func (q ConfigEntryKindQuery) PartitionOrDefault() string { return q.EnterpriseMeta.PartitionOrDefault() } + +// convertTargetsToTestSpiffeIDs indexes the provided targets by their eventual +// spiffeid values using a dummy trust domain. Returns a map of SpiffeIDs to +// targetID values which can be used for error output. +func convertTargetsToTestSpiffeIDs(targets map[string]*structs.DiscoveryTarget) map[string]string { + out := make(map[string]string) + for tid, t := range targets { + testSpiffeID := connect.SpiffeIDService{ + Host: dummyTrustDomain, + Partition: t.Partition, + Namespace: t.Namespace, + Datacenter: t.Datacenter, + Service: t.Service, + } + uri := testSpiffeID.URI().String() + if _, ok := out[uri]; !ok { + out[uri] = tid + } + } + return out +} diff --git a/agent/consul/state/config_entry_test.go b/agent/consul/state/config_entry_test.go index 92f2a88601..78140d021b 100644 --- a/agent/consul/state/config_entry_test.go +++ b/agent/consul/state/config_entry_test.go @@ -1575,6 +1575,25 @@ func TestStore_ConfigEntry_GraphValidation(t *testing.T) { }, expectErr: `contains cross-datacenter failover`, }, + "cannot redirect a peer exported tcp service": { + entries: []structs.ConfigEntry{ + &structs.ExportedServicesConfigEntry{ + Name: "default", + Services: []structs.ExportedService{{ + Name: "main", + Consumers: []structs.ServiceConsumer{{PeerName: "my-peer"}}, + }}, + }, + }, + opAdd: &structs.ServiceResolverConfigEntry{ + Kind: structs.ServiceResolver, + Name: "main", + Redirect: &structs.ServiceResolverRedirect{ + Service: "other", + }, + }, + expectErr: `cannot introduce new discovery chain targets like`, + }, } for name, tc := range cases { diff --git a/agent/grpc/public/services/peerstream/subscription_manager_test.go b/agent/grpc/public/services/peerstream/subscription_manager_test.go index ea35eaccc2..82b1a7e5f1 100644 --- a/agent/grpc/public/services/peerstream/subscription_manager_test.go +++ b/agent/grpc/public/services/peerstream/subscription_manager_test.go @@ -53,6 +53,19 @@ func TestSubscriptionManager_RegisterDeregister(t *testing.T) { checkEvent(t, got, gatewayCorrID, 0) }) + // Initially add in L4 failover so that later we can test removing it. We + // cannot do the other way around because it would fail validation to + // remove a target. + backend.ensureConfigEntry(t, &structs.ServiceResolverConfigEntry{ + Kind: structs.ServiceResolver, + Name: "mysql", + Failover: map[string]structs.ServiceResolverFailover{ + "*": { + Service: "failover", + }, + }, + }) + testutil.RunStep(t, "initial export syncs empty instance lists", func(t *testing.T) { backend.ensureConfigEntry(t, &structs.ExportedServicesConfigEntry{ Name: "default", @@ -262,6 +275,7 @@ func TestSubscriptionManager_RegisterDeregister(t *testing.T) { }, SpiffeID: []string{ "spiffe://11111111-2222-3333-4444-555555555555.consul/ns/default/dc/dc1/svc/mysql", + "spiffe://11111111-2222-3333-4444-555555555555.consul/ns/default/dc/dc1/svc/failover", }, Protocol: "tcp", }, @@ -287,60 +301,10 @@ func TestSubscriptionManager_RegisterDeregister(t *testing.T) { backend.ensureConfigEntry(t, &structs.ServiceResolverConfigEntry{ Kind: structs.ServiceResolver, Name: "mysql", - Failover: map[string]structs.ServiceResolverFailover{ - "*": { - Service: "failover", - }, - }, }) // ensure we get updated peer meta - expectEvents(t, subCh, - func(t *testing.T, got cache.UpdateEvent) { - require.Equal(t, mysqlProxyCorrID, got.CorrelationID) - res := got.Result.(*pbservice.IndexedCheckServiceNodes) - require.Equal(t, uint64(0), res.Index) - - require.Len(t, res.Nodes, 1) - prototest.AssertDeepEqual(t, &pbservice.CheckServiceNode{ - Node: pbNode("mgw", "10.1.1.1", partition), - Service: &pbservice.NodeService{ - Kind: "connect-proxy", - ID: "mysql-sidecar-proxy-instance-0", - Service: "mysql-sidecar-proxy", - Port: 8443, - Weights: &pbservice.Weights{ - Passing: 1, - Warning: 1, - }, - EnterpriseMeta: pbcommon.DefaultEnterpriseMeta, - Proxy: &pbservice.ConnectProxyConfig{ - DestinationServiceID: "mysql-instance-0", - DestinationServiceName: "mysql", - }, - Connect: &pbservice.ServiceConnect{ - PeerMeta: &pbservice.PeeringServiceMeta{ - SNI: []string{ - "mysql.default.default.my-peering.external.11111111-2222-3333-4444-555555555555.consul", - }, - SpiffeID: []string{ - "spiffe://11111111-2222-3333-4444-555555555555.consul/ns/default/dc/dc1/svc/mysql", - "spiffe://11111111-2222-3333-4444-555555555555.consul/ns/default/dc/dc1/svc/failover", - }, - Protocol: "tcp", - }, - }, - }, - }, res.Nodes[0]) - }, - ) - - // reset so the next subtest is valid - backend.deleteConfigEntry(t, structs.ServiceResolver, "mysql") - - // ensure we get peer meta is restored - expectEvents(t, subCh, func(t *testing.T, got cache.UpdateEvent) { require.Equal(t, mysqlProxyCorrID, got.CorrelationID)