From 43193a35c68fc375b695ff95a56891c87c7d3b30 Mon Sep 17 00:00:00 2001 From: "R.B. Boyer" Date: Mon, 8 Feb 2021 10:19:57 -0600 Subject: [PATCH] xds: prevent LDS flaps in mesh gateways due to unstable datacenter lists (#9651) Also fix a similar issue in Terminating Gateways that was masked by an overzealous test. --- .changelog/9651.txt | 3 +++ agent/proxycfg/snapshot.go | 5 +++++ agent/xds/listeners.go | 10 ++++++++++ agent/xds/listeners_test.go | 18 ++++-------------- 4 files changed, 22 insertions(+), 14 deletions(-) create mode 100644 .changelog/9651.txt diff --git a/.changelog/9651.txt b/.changelog/9651.txt new file mode 100644 index 0000000000..eb3be8db67 --- /dev/null +++ b/.changelog/9651.txt @@ -0,0 +1,3 @@ +```release-note:bug +xds: prevent LDS flaps in mesh gateways due to unstable datacenter lists; also prevent some flaps in terminating gateways as well +``` diff --git a/agent/proxycfg/snapshot.go b/agent/proxycfg/snapshot.go index 03d548fc91..08cb91da52 100644 --- a/agent/proxycfg/snapshot.go +++ b/agent/proxycfg/snapshot.go @@ -3,6 +3,7 @@ package proxycfg import ( "context" "fmt" + "sort" "github.com/hashicorp/consul/agent/structs" "github.com/mitchellh/copystructure" @@ -245,6 +246,10 @@ func (c *configSnapshotMeshGateway) Datacenters() []string { dcs = append(dcs, dc) } } + + // Always sort the results to ensure we generate deterministic things over + // xDS, such as mesh-gateway listener filter chains. + sort.Strings(dcs) return dcs } diff --git a/agent/xds/listeners.go b/agent/xds/listeners.go index 23f6613d82..4b9b6defaf 100644 --- a/agent/xds/listeners.go +++ b/agent/xds/listeners.go @@ -777,6 +777,16 @@ func (s *Server) makeTerminatingGatewayListener( } } + // Before we add the fallback, sort these chains by the matched name. All + // of these filter chains are independent, but envoy requires them to be in + // some order. If we put them in a random order then every xDS iteration + // envoy will force the listener to be replaced. Sorting these has no + // effect on how they operate, but it does mean that we won't churn + // listeners at idle. + sort.Slice(l.FilterChains, func(i, j int) bool { + return l.FilterChains[i].FilterChainMatch.ServerNames[0] < l.FilterChains[j].FilterChainMatch.ServerNames[0] + }) + // This fallback catch-all filter ensures a listener will be present for health checks to pass // Envoy will reset these connections since known endpoints are caught by filter chain matches above tcpProxy, err := makeTCPProxyFilter(name, "", "terminating_gateway.") diff --git a/agent/xds/listeners_test.go b/agent/xds/listeners_test.go index 0e9c4b9326..2772a99665 100644 --- a/agent/xds/listeners_test.go +++ b/agent/xds/listeners_test.go @@ -525,24 +525,14 @@ func TestListenersFromSnapshot(t *testing.T) { ProxyFeatures: sf, } listeners, err := s.listenersFromSnapshot(cInfo, snap) + require.NoError(err) + + // The order of listeners returned via LDS isn't relevant, so it's safe + // to sort these for the purposes of test comparisons. sort.Slice(listeners, func(i, j int) bool { return listeners[i].(*envoy.Listener).Name < listeners[j].(*envoy.Listener).Name }) - // For terminating gateways we create filter chain matches for services/subsets from the ServiceGroups map - for i := 0; i < len(listeners); i++ { - l := listeners[i].(*envoy.Listener) - - if l.FilterChains != nil { - // Sort chains by the matched name with the exception of the last one - // The last chain is a fallback and does not have a FilterChainMatch - sort.Slice(l.FilterChains[:len(l.FilterChains)-1], func(i, j int) bool { - return l.FilterChains[i].FilterChainMatch.ServerNames[0] < l.FilterChains[j].FilterChainMatch.ServerNames[0] - }) - } - } - - require.NoError(err) r, err := createResponse(ListenerType, "00000001", "00000001", listeners) require.NoError(err)