mirror of
https://github.com/status-im/consul.git
synced 2025-01-10 22:06:20 +00:00
Makes server manager shift away from failed servers from Serf events.
Because this code was doing pointer equality checks, it would work for the case of a failed attempted RPC because the objects are from the manager itself: https://github.com/hashicorp/consul/blob/v1.0.3/agent/consul/rpc.go#L283-L302 But the pointer check would always fail for events coming in from the Serf path because the server object is newly-created: https://github.com/hashicorp/consul/blob/v1.0.3/agent/router/serf_adapter.go#L14-L40 This means that we didn't proactively shift RPC traffic away from a failed server, we'd have to wait for an RPC to fail, which exposes the error to the calling client. By switching over to a name check vs. a pointer check we get the correct behavior. We added a DEBUG log as well to help observe this behavior during integrated testing. Related to #3863 since the fix here needed the same logic duplicated, owing to the complicated atomic stuff. /cc @dadgar for a heads up in case this also affects Nomad.
This commit is contained in:
parent
0123d9db2e
commit
d9a6e2a901
@ -256,7 +256,7 @@ func (m *Manager) NotifyFailedServer(s *metadata.Server) {
|
||||
// the server to the end of the list.
|
||||
|
||||
// Only rotate the server list when there is more than one server
|
||||
if len(l.servers) > 1 && l.servers[0] == s &&
|
||||
if len(l.servers) > 1 && l.servers[0].Name == s.Name &&
|
||||
// Use atomic.CAS to emulate a TryLock().
|
||||
atomic.CompareAndSwapInt32(&m.notifyFailedBarrier, 0, 1) {
|
||||
defer atomic.StoreInt32(&m.notifyFailedBarrier, 0)
|
||||
@ -267,9 +267,10 @@ func (m *Manager) NotifyFailedServer(s *metadata.Server) {
|
||||
defer m.listLock.Unlock()
|
||||
l = m.getServerList()
|
||||
|
||||
if len(l.servers) > 1 && l.servers[0] == s {
|
||||
if len(l.servers) > 1 && l.servers[0].Name == s.Name {
|
||||
l.servers = l.cycleServer()
|
||||
m.saveServerList(l)
|
||||
m.logger.Printf(`[DEBUG] manager: cycled away from server "%s"`, s.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user