diff --git a/.changelog/9651.txt b/.changelog/9651.txt new file mode 100644 index 0000000000..eb3be8db67 --- /dev/null +++ b/.changelog/9651.txt @@ -0,0 +1,3 @@ +```release-note:bug +xds: prevent LDS flaps in mesh gateways due to unstable datacenter lists; also prevent some flaps in terminating gateways as well +``` diff --git a/agent/proxycfg/snapshot.go b/agent/proxycfg/snapshot.go index 03d548fc91..08cb91da52 100644 --- a/agent/proxycfg/snapshot.go +++ b/agent/proxycfg/snapshot.go @@ -3,6 +3,7 @@ package proxycfg import ( "context" "fmt" + "sort" "github.com/hashicorp/consul/agent/structs" "github.com/mitchellh/copystructure" @@ -245,6 +246,10 @@ func (c *configSnapshotMeshGateway) Datacenters() []string { dcs = append(dcs, dc) } } + + // Always sort the results to ensure we generate deterministic things over + // xDS, such as mesh-gateway listener filter chains. + sort.Strings(dcs) return dcs } diff --git a/agent/xds/listeners.go b/agent/xds/listeners.go index 28f395a945..9c57ed4811 100644 --- a/agent/xds/listeners.go +++ b/agent/xds/listeners.go @@ -775,6 +775,16 @@ func (s *Server) makeTerminatingGatewayListener( } } + // Before we add the fallback, sort these chains by the matched name. All + // of these filter chains are independent, but envoy requires them to be in + // some order. If we put them in a random order then every xDS iteration + // envoy will force the listener to be replaced. Sorting these has no + // effect on how they operate, but it does mean that we won't churn + // listeners at idle. + sort.Slice(l.FilterChains, func(i, j int) bool { + return l.FilterChains[i].FilterChainMatch.ServerNames[0] < l.FilterChains[j].FilterChainMatch.ServerNames[0] + }) + // This fallback catch-all filter ensures a listener will be present for health checks to pass // Envoy will reset these connections since known endpoints are caught by filter chain matches above tcpProxy, err := makeTCPProxyFilter(name, "", "terminating_gateway.") diff --git a/agent/xds/listeners_test.go b/agent/xds/listeners_test.go index b30d0b80d4..5d900958f6 100644 --- a/agent/xds/listeners_test.go +++ b/agent/xds/listeners_test.go @@ -513,24 +513,14 @@ func TestListenersFromSnapshot(t *testing.T) { ProxyFeatures: sf, } listeners, err := s.listenersFromSnapshot(cInfo, snap) + require.NoError(err) + + // The order of listeners returned via LDS isn't relevant, so it's safe + // to sort these for the purposes of test comparisons. sort.Slice(listeners, func(i, j int) bool { return listeners[i].(*envoy.Listener).Name < listeners[j].(*envoy.Listener).Name }) - // For terminating gateways we create filter chain matches for services/subsets from the ServiceGroups map - for i := 0; i < len(listeners); i++ { - l := listeners[i].(*envoy.Listener) - - if l.FilterChains != nil { - // Sort chains by the matched name with the exception of the last one - // The last chain is a fallback and does not have a FilterChainMatch - sort.Slice(l.FilterChains[:len(l.FilterChains)-1], func(i, j int) bool { - return l.FilterChains[i].FilterChainMatch.ServerNames[0] < l.FilterChains[j].FilterChainMatch.ServerNames[0] - }) - } - } - - require.NoError(err) r, err := createResponse(ListenerType, "00000001", "00000001", listeners) require.NoError(err)