From 0e73777ce2327289020b0b15d40d603a4a6362e0 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Tue, 15 Aug 2017 18:07:28 -0500 Subject: [PATCH] Update memberlist for a deadlock fix --- .../hashicorp/memberlist/memberlist.go | 41 +++++++++++-------- .../github.com/hashicorp/memberlist/state.go | 4 +- vendor/vendor.json | 2 +- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/vendor/github.com/hashicorp/memberlist/memberlist.go b/vendor/github.com/hashicorp/memberlist/memberlist.go index db02845330..d5c175e5b2 100644 --- a/vendor/github.com/hashicorp/memberlist/memberlist.go +++ b/vendor/github.com/hashicorp/memberlist/memberlist.go @@ -22,9 +22,10 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "time" - "github.com/hashicorp/go-multierror" + multierror "github.com/hashicorp/go-multierror" sockaddr "github.com/hashicorp/go-sockaddr" "github.com/miekg/dns" ) @@ -35,11 +36,14 @@ type Memberlist struct { numNodes uint32 // Number of known nodes (estimate) config *Config - shutdown bool + shutdown int32 // Used as an atomic boolean value shutdownCh chan struct{} - leave bool + leave int32 // Used as an atomic boolean value leaveBroadcast chan struct{} + shutdownLock sync.Mutex // Serializes calls to Shutdown + leaveLock sync.Mutex // Serializes calls to Leave + transport Transport handoff chan msgHandoff @@ -554,18 +558,17 @@ func (m *Memberlist) NumMembers() (alive int) { // This method is safe to call multiple times, but must not be called // after the cluster is already shut down. func (m *Memberlist) Leave(timeout time.Duration) error { - m.nodeLock.Lock() - // We can't defer m.nodeLock.Unlock() because m.deadNode will also try to - // acquire a lock so we need to Unlock before that. + m.leaveLock.Lock() + defer m.leaveLock.Unlock() - if m.shutdown { - m.nodeLock.Unlock() + if m.hasShutdown() { panic("leave after shutdown") } - if !m.leave { - m.leave = true + if !m.hasLeft() { + atomic.StoreInt32(&m.leave, 1) + m.nodeLock.Lock() state, ok := m.nodeMap[m.config.Name] m.nodeLock.Unlock() if !ok { @@ -591,8 +594,6 @@ func (m *Memberlist) Leave(timeout time.Duration) error { return fmt.Errorf("timeout waiting for leave broadcast") } } - } else { - m.nodeLock.Unlock() } return nil @@ -634,10 +635,10 @@ func (m *Memberlist) ProtocolVersion() uint8 { // // This method is safe to call multiple times. func (m *Memberlist) Shutdown() error { - m.nodeLock.Lock() - defer m.nodeLock.Unlock() + m.shutdownLock.Lock() + defer m.shutdownLock.Unlock() - if m.shutdown { + if m.hasShutdown() { return nil } @@ -647,8 +648,16 @@ func (m *Memberlist) Shutdown() error { m.transport.Shutdown() // Now tear down everything else. - m.shutdown = true + atomic.StoreInt32(&m.shutdown, 1) close(m.shutdownCh) m.deschedule() return nil } + +func (m *Memberlist) hasShutdown() bool { + return atomic.LoadInt32(&m.shutdown) == 1 +} + +func (m *Memberlist) hasLeft() bool { + return atomic.LoadInt32(&m.leave) == 1 +} diff --git a/vendor/github.com/hashicorp/memberlist/state.go b/vendor/github.com/hashicorp/memberlist/state.go index 8513361b1a..29fe5f1cf1 100644 --- a/vendor/github.com/hashicorp/memberlist/state.go +++ b/vendor/github.com/hashicorp/memberlist/state.go @@ -835,7 +835,7 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) { // in-queue to be processed but blocked by the locks above. If we let // that aliveMsg process, it'll cause us to re-join the cluster. This // ensures that we don't. - if m.leave && a.Node == m.config.Name { + if m.hasLeft() && a.Node == m.config.Name { return } @@ -1111,7 +1111,7 @@ func (m *Memberlist) deadNode(d *dead) { // Check if this is us if state.Name == m.config.Name { // If we are not leaving we need to refute - if !m.leave { + if !m.hasLeft() { m.refute(state, d.Incarnation) m.logger.Printf("[WARN] memberlist: Refuting a dead message (from: %s)", d.From) return // Do not mark ourself dead diff --git a/vendor/vendor.json b/vendor/vendor.json index 9e53cfc17c..d8a5c9a255 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -55,7 +55,7 @@ {"checksumSHA1":"kqCMCHy2b+RBMKC+ER+OPqp8C3E=","path":"github.com/hashicorp/hil","revision":"1e86c6b523c55d1fa6c6e930ce80b548664c95c2","revisionTime":"2016-07-11T23:18:37Z"}, {"checksumSHA1":"UICubs001+Q4MsUf9zl2vcMzWQQ=","path":"github.com/hashicorp/hil/ast","revision":"1e86c6b523c55d1fa6c6e930ce80b548664c95c2","revisionTime":"2016-07-11T23:18:37Z"}, {"checksumSHA1":"vt+P9D2yWDO3gdvdgCzwqunlhxU=","path":"github.com/hashicorp/logutils","revision":"0dc08b1671f34c4250ce212759ebd880f743d883","revisionTime":"2015-06-09T07:04:31Z"}, - {"checksumSHA1":"ALN/cUj3330lnFRKFE3G58Z8p+E=","path":"github.com/hashicorp/memberlist","revision":"ea4ef7f066304a8e6f28bdb958888fe899f3b44e","revisionTime":"2017-08-07T23:34:30Z"}, + {"checksumSHA1":"ml0MTqOsKTrsqv/mZhy78Vz4SfA=","path":"github.com/hashicorp/memberlist","revision":"d6c1fb0b99c33d0a8e22acea9da9709b369b5d39","revisionTime":"2017-08-15T22:46:17Z"}, {"checksumSHA1":"qnlqWJYV81ENr61SZk9c65R1mDo=","path":"github.com/hashicorp/net-rpc-msgpackrpc","revision":"a14192a58a694c123d8fe5481d4a4727d6ae82f3","revisionTime":"2015-11-16T02:03:38Z"}, {"checksumSHA1":"5GHIYEtOr1rsHOZUac6RA/82d3I=","path":"github.com/hashicorp/raft","revision":"0a6e1b039ba3d8057e9f16c919d2afb813884f74","revisionTime":"2017-08-04T15:11:58Z","version":"library-v2-stage-one","versionExact":"library-v2-stage-one"}, {"checksumSHA1":"QAxukkv54/iIvLfsUP6IK4R0m/A=","path":"github.com/hashicorp/raft-boltdb","revision":"d1e82c1ec3f15ee991f7cc7ffd5b67ff6f5bbaee","revisionTime":"2015-02-01T20:08:39Z"},