Derek Menteer 0ac8ae6c3b
Fix xDS deadlock due to syncLoop termination. (#20867)
* Fix xDS deadlock due to syncLoop termination.

This fixes an issue where agentless xDS streams can deadlock permanently until
a server is restarted. When this issue occurs, no new proxies are able to
successfully connect to the server.

Effectively, the trigger for this deadlock stems from the following return
statement:
https://github.com/hashicorp/consul/blob/v1.18.0/agent/proxycfg-sources/catalog/config_source.go#L199-L202

When this happens, the entire `syncLoop()` terminates and stops consuming from
the following channel:
https://github.com/hashicorp/consul/blob/v1.18.0/agent/proxycfg-sources/catalog/config_source.go#L182-L192

Which results in the `ConfigSource.cleanup()` function never receiving a
response and holding a mutex indefinitely:
https://github.com/hashicorp/consul/blob/v1.18.0/agent/proxycfg-sources/catalog/config_source.go#L241-L247

Because this mutex is shared, it effectively deadlocks the server's ability to
process new xDS streams.

----

The fix to this issue involves removing the `chan chan struct{}` used like an
RPC-over-channels pattern and replacing it with two distinct channels:

+ `stopSyncLoopCh` - indicates that the `syncLoop()` should terminate soon.  +
`syncLoopDoneCh` - indicates that the `syncLoop()` has terminated.

Splitting these two concepts out and deferring a `close(syncLoopDoneCh)` in the
`syncLoop()` function ensures that the deadlock above should no longer occur.

We also now evict xDS connections of all proxies for the corresponding
`syncLoop()` whenever it encounters an irrecoverable error. This is done by
hoisting the new `syncLoopDoneCh` upwards so that it's visible to the xDS delta
processing. Prior to this fix, the behavior was to simply orphan them so they
would never receive catalog-registration or service-defaults updates.

* Add changelog.
2024-03-15 13:57:11 -05:00

87 lines
2.7 KiB
Go

// Code generated by mockery v2.37.1. DO NOT EDIT.
package catalog
import (
limiter "github.com/hashicorp/consul/agent/grpc-external/limiter"
mock "github.com/stretchr/testify/mock"
pbresource "github.com/hashicorp/consul/proto-public/pbresource"
proxycfg "github.com/hashicorp/consul/agent/proxycfg"
proxysnapshot "github.com/hashicorp/consul/internal/mesh/proxy-snapshot"
)
// MockWatcher is an autogenerated mock type for the Watcher type
type MockWatcher struct {
mock.Mock
}
// Watch provides a mock function with given fields: proxyID, nodeName, token
func (_m *MockWatcher) Watch(proxyID *pbresource.ID, nodeName string, token string) (<-chan proxysnapshot.ProxySnapshot, limiter.SessionTerminatedChan, proxycfg.SrcTerminatedChan, proxysnapshot.CancelFunc, error) {
ret := _m.Called(proxyID, nodeName, token)
var r0 <-chan proxysnapshot.ProxySnapshot
var r1 limiter.SessionTerminatedChan
var r2 proxycfg.SrcTerminatedChan
var r3 proxysnapshot.CancelFunc
var r4 error
if rf, ok := ret.Get(0).(func(*pbresource.ID, string, string) (<-chan proxysnapshot.ProxySnapshot, limiter.SessionTerminatedChan, proxycfg.SrcTerminatedChan, proxysnapshot.CancelFunc, error)); ok {
return rf(proxyID, nodeName, token)
}
if rf, ok := ret.Get(0).(func(*pbresource.ID, string, string) <-chan proxysnapshot.ProxySnapshot); ok {
r0 = rf(proxyID, nodeName, token)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(<-chan proxysnapshot.ProxySnapshot)
}
}
if rf, ok := ret.Get(1).(func(*pbresource.ID, string, string) limiter.SessionTerminatedChan); ok {
r1 = rf(proxyID, nodeName, token)
} else {
if ret.Get(1) != nil {
r1 = ret.Get(1).(limiter.SessionTerminatedChan)
}
}
if rf, ok := ret.Get(2).(func(*pbresource.ID, string, string) proxycfg.SrcTerminatedChan); ok {
r2 = rf(proxyID, nodeName, token)
} else {
if ret.Get(2) != nil {
r2 = ret.Get(2).(proxycfg.SrcTerminatedChan)
}
}
if rf, ok := ret.Get(3).(func(*pbresource.ID, string, string) proxysnapshot.CancelFunc); ok {
r3 = rf(proxyID, nodeName, token)
} else {
if ret.Get(3) != nil {
r3 = ret.Get(3).(proxysnapshot.CancelFunc)
}
}
if rf, ok := ret.Get(4).(func(*pbresource.ID, string, string) error); ok {
r4 = rf(proxyID, nodeName, token)
} else {
r4 = ret.Error(4)
}
return r0, r1, r2, r3, r4
}
// NewMockWatcher creates a new instance of MockWatcher. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
// The first argument is typically a *testing.T value.
func NewMockWatcher(t interface {
mock.TestingT
Cleanup(func())
}) *MockWatcher {
mock := &MockWatcher{}
mock.Mock.Test(t)
t.Cleanup(func() { mock.AssertExpectations(t) })
return mock
}