From 72f90754ae8c6dceeac08da4c1154b64e71dd184 Mon Sep 17 00:00:00 2001 From: Eric Haberkorn Date: Mon, 29 Aug 2022 13:46:41 -0400 Subject: [PATCH] Update max_ejection_percent on outlier detection for peered clusters to 100% (#14373) We can't trust health checks on peered services when service resolvers, splitters and routers are used. --- .changelog/14373.txt | 3 +++ agent/xds/clusters.go | 9 ++++++++- .../connect-proxy-with-peered-upstreams.latest.golden | 4 ++-- ...transparent-proxy-with-peered-upstreams.latest.golden | 6 +++--- 4 files changed, 16 insertions(+), 6 deletions(-) create mode 100644 .changelog/14373.txt diff --git a/.changelog/14373.txt b/.changelog/14373.txt new file mode 100644 index 0000000000..d9531b09ec --- /dev/null +++ b/.changelog/14373.txt @@ -0,0 +1,3 @@ +```release-note:improvement +xds: Set `max_ejection_percent` on Envoy's outlier detection to 100% for peered services. +``` diff --git a/agent/xds/clusters.go b/agent/xds/clusters.go index adde810d37..c3ac718472 100644 --- a/agent/xds/clusters.go +++ b/agent/xds/clusters.go @@ -772,6 +772,13 @@ func (s *ResourceGenerator) makeUpstreamClusterForPeerService( clusterName := generatePeeredClusterName(uid, tbs) + outlierDetection := ToOutlierDetection(cfg.PassiveHealthCheck) + // We can't rely on health checks for services on cluster peers because they + // don't take into account service resolvers, splitters and routers. Setting + // MaxEjectionPercent too 100% gives outlier detection the power to eject the + // entire cluster. + outlierDetection.MaxEjectionPercent = &wrappers.UInt32Value{Value: 100} + s.Logger.Trace("generating cluster for", "cluster", clusterName) if c == nil { c = &envoy_cluster_v3.Cluster{ @@ -785,7 +792,7 @@ func (s *ResourceGenerator) makeUpstreamClusterForPeerService( CircuitBreakers: &envoy_cluster_v3.CircuitBreakers{ Thresholds: makeThresholdsIfNeeded(cfg.Limits), }, - OutlierDetection: ToOutlierDetection(cfg.PassiveHealthCheck), + OutlierDetection: outlierDetection, } if cfg.Protocol == "http2" || cfg.Protocol == "grpc" { if err := s.setHttp2ProtocolOptions(c); err != nil { diff --git a/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden b/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden index d7c23515fc..29059b1435 100644 --- a/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden +++ b/agent/xds/testdata/clusters/connect-proxy-with-peered-upstreams.latest.golden @@ -58,7 +58,7 @@ "dnsRefreshRate": "10s", "dnsLookupFamily": "V4_ONLY", "outlierDetection": { - + "maxEjectionPercent": 100 }, "commonLbConfig": { "healthyPanicThreshold": { @@ -115,7 +115,7 @@ }, "outlierDetection": { - + "maxEjectionPercent": 100 }, "commonLbConfig": { "healthyPanicThreshold": { diff --git a/agent/xds/testdata/clusters/transparent-proxy-with-peered-upstreams.latest.golden b/agent/xds/testdata/clusters/transparent-proxy-with-peered-upstreams.latest.golden index 0dbbf4277d..d1f6d0bb0e 100644 --- a/agent/xds/testdata/clusters/transparent-proxy-with-peered-upstreams.latest.golden +++ b/agent/xds/testdata/clusters/transparent-proxy-with-peered-upstreams.latest.golden @@ -18,7 +18,7 @@ }, "outlierDetection": { - + "maxEjectionPercent": 100 }, "commonLbConfig": { "healthyPanicThreshold": { @@ -75,7 +75,7 @@ }, "outlierDetection": { - + "maxEjectionPercent": 100 }, "commonLbConfig": { "healthyPanicThreshold": { @@ -157,7 +157,7 @@ }, "outlierDetection": { - + "maxEjectionPercent": 100 }, "commonLbConfig": { "healthyPanicThreshold": {