Merge pull request #3396 from hashicorp/memberlist_deadlock

Update memberlist for a deadlock fix
This commit is contained in:
preetapan 2017-08-15 18:08:40 -05:00 committed by GitHub
commit 3327abdbf4
3 changed files with 28 additions and 19 deletions

View File

@ -22,9 +22,10 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/hashicorp/go-multierror"
multierror "github.com/hashicorp/go-multierror"
sockaddr "github.com/hashicorp/go-sockaddr"
"github.com/miekg/dns"
)
@ -35,11 +36,14 @@ type Memberlist struct {
numNodes uint32 // Number of known nodes (estimate)
config *Config
shutdown bool
shutdown int32 // Used as an atomic boolean value
shutdownCh chan struct{}
leave bool
leave int32 // Used as an atomic boolean value
leaveBroadcast chan struct{}
shutdownLock sync.Mutex // Serializes calls to Shutdown
leaveLock sync.Mutex // Serializes calls to Leave
transport Transport
handoff chan msgHandoff
@ -554,18 +558,17 @@ func (m *Memberlist) NumMembers() (alive int) {
// This method is safe to call multiple times, but must not be called
// after the cluster is already shut down.
func (m *Memberlist) Leave(timeout time.Duration) error {
m.nodeLock.Lock()
// We can't defer m.nodeLock.Unlock() because m.deadNode will also try to
// acquire a lock so we need to Unlock before that.
m.leaveLock.Lock()
defer m.leaveLock.Unlock()
if m.shutdown {
m.nodeLock.Unlock()
if m.hasShutdown() {
panic("leave after shutdown")
}
if !m.leave {
m.leave = true
if !m.hasLeft() {
atomic.StoreInt32(&m.leave, 1)
m.nodeLock.Lock()
state, ok := m.nodeMap[m.config.Name]
m.nodeLock.Unlock()
if !ok {
@ -591,8 +594,6 @@ func (m *Memberlist) Leave(timeout time.Duration) error {
return fmt.Errorf("timeout waiting for leave broadcast")
}
}
} else {
m.nodeLock.Unlock()
}
return nil
@ -634,10 +635,10 @@ func (m *Memberlist) ProtocolVersion() uint8 {
//
// This method is safe to call multiple times.
func (m *Memberlist) Shutdown() error {
m.nodeLock.Lock()
defer m.nodeLock.Unlock()
m.shutdownLock.Lock()
defer m.shutdownLock.Unlock()
if m.shutdown {
if m.hasShutdown() {
return nil
}
@ -647,8 +648,16 @@ func (m *Memberlist) Shutdown() error {
m.transport.Shutdown()
// Now tear down everything else.
m.shutdown = true
atomic.StoreInt32(&m.shutdown, 1)
close(m.shutdownCh)
m.deschedule()
return nil
}
func (m *Memberlist) hasShutdown() bool {
return atomic.LoadInt32(&m.shutdown) == 1
}
func (m *Memberlist) hasLeft() bool {
return atomic.LoadInt32(&m.leave) == 1
}

View File

@ -835,7 +835,7 @@ func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) {
// in-queue to be processed but blocked by the locks above. If we let
// that aliveMsg process, it'll cause us to re-join the cluster. This
// ensures that we don't.
if m.leave && a.Node == m.config.Name {
if m.hasLeft() && a.Node == m.config.Name {
return
}
@ -1111,7 +1111,7 @@ func (m *Memberlist) deadNode(d *dead) {
// Check if this is us
if state.Name == m.config.Name {
// If we are not leaving we need to refute
if !m.leave {
if !m.hasLeft() {
m.refute(state, d.Incarnation)
m.logger.Printf("[WARN] memberlist: Refuting a dead message (from: %s)", d.From)
return // Do not mark ourself dead

2
vendor/vendor.json vendored
View File

@ -55,7 +55,7 @@
{"checksumSHA1":"kqCMCHy2b+RBMKC+ER+OPqp8C3E=","path":"github.com/hashicorp/hil","revision":"1e86c6b523c55d1fa6c6e930ce80b548664c95c2","revisionTime":"2016-07-11T23:18:37Z"},
{"checksumSHA1":"UICubs001+Q4MsUf9zl2vcMzWQQ=","path":"github.com/hashicorp/hil/ast","revision":"1e86c6b523c55d1fa6c6e930ce80b548664c95c2","revisionTime":"2016-07-11T23:18:37Z"},
{"checksumSHA1":"vt+P9D2yWDO3gdvdgCzwqunlhxU=","path":"github.com/hashicorp/logutils","revision":"0dc08b1671f34c4250ce212759ebd880f743d883","revisionTime":"2015-06-09T07:04:31Z"},
{"checksumSHA1":"ALN/cUj3330lnFRKFE3G58Z8p+E=","path":"github.com/hashicorp/memberlist","revision":"ea4ef7f066304a8e6f28bdb958888fe899f3b44e","revisionTime":"2017-08-07T23:34:30Z"},
{"checksumSHA1":"ml0MTqOsKTrsqv/mZhy78Vz4SfA=","path":"github.com/hashicorp/memberlist","revision":"d6c1fb0b99c33d0a8e22acea9da9709b369b5d39","revisionTime":"2017-08-15T22:46:17Z"},
{"checksumSHA1":"qnlqWJYV81ENr61SZk9c65R1mDo=","path":"github.com/hashicorp/net-rpc-msgpackrpc","revision":"a14192a58a694c123d8fe5481d4a4727d6ae82f3","revisionTime":"2015-11-16T02:03:38Z"},
{"checksumSHA1":"5GHIYEtOr1rsHOZUac6RA/82d3I=","path":"github.com/hashicorp/raft","revision":"0a6e1b039ba3d8057e9f16c919d2afb813884f74","revisionTime":"2017-08-04T15:11:58Z","version":"library-v2-stage-one","versionExact":"library-v2-stage-one"},
{"checksumSHA1":"QAxukkv54/iIvLfsUP6IK4R0m/A=","path":"github.com/hashicorp/raft-boltdb","revision":"d1e82c1ec3f15ee991f7cc7ffd5b67ff6f5bbaee","revisionTime":"2015-02-01T20:08:39Z"},