Handle deadlock in peerpool

There was another deadlock in the peer pool.
Because we made the event handler asynchrnous, another deadlock popped
up, as the loop locks the global peerpool lock before processing events.
But the handlers also take the global look, effectively resulting in the
same situation we had before, i.e the loop is not running.

THE LOOP MUST BE RUNNING AT ALL TIMES OTHERWISE THE SERVER HANGS.
This commit is contained in:
Andrea Maria Piana 2021-02-01 14:39:41 +01:00
parent b4e5bf417b
commit 4685b9eaa9
2 changed files with 14 additions and 5 deletions

View File

@ -38,7 +38,7 @@ func SubscribeServerEvents(ctx context.Context, node *node.Node) error {
// https://github.com/status-im/status-go/blob/e60f425b45d00d3880b42fdd77b460ec465a9f55/vendor/github.com/ethereum/go-ethereum/p2p/server.go#L746
// If there's back-pressure on the peer event feed
// https://github.com/status-im/status-go/blob/e60f425b45d00d3880b42fdd77b460ec465a9f55/vendor/github.com/ethereum/go-ethereum/p2p/server.go#L783
// The event channel above might become while updateNodeMetrics
// The event channel above might become full while updateNodeMetrics
// is called, which means is never consumed, the server blocks on publishing on
// it, and the two will deadlock (server waits for the channel above to be consumed,
// this code waits for the server to respond to peerCount, which is in the same

View File

@ -103,6 +103,7 @@ type PeerPool struct {
cache *Cache
mu sync.RWMutex
timeoutMu sync.RWMutex
topics []TopicPoolInterface
serverSubscription event.Subscription
events chan *p2p.PeerEvent
@ -123,6 +124,8 @@ func NewPeerPool(discovery discovery.Discovery, config map[discv5.Topic]params.L
}
func (p *PeerPool) setDiscoveryTimeout() {
p.timeoutMu.Lock()
defer p.timeoutMu.Unlock()
if p.opts.AllowStop && p.opts.DiscServerTimeout > 0 {
p.timeout = time.After(p.opts.DiscServerTimeout)
}
@ -217,9 +220,9 @@ func (p *PeerPool) stopDiscovery(server *p2p.Server) {
t.StopSearch(server)
}
p.mu.Lock()
p.timeoutMu.Lock()
p.timeout = nil
p.mu.Unlock()
p.timeoutMu.Unlock()
signal.SendDiscoveryStopped()
}
@ -275,9 +278,15 @@ func (p *PeerPool) handleServerPeers(server *p2p.Server, events <-chan *p2p.Peer
}
for {
p.mu.RLock()
// We use a separate lock for timeout, as this loop should
// always be running, otherwise the p2p.Server will hang.
// Because the handler of events might potentially hang on the
// server, deadlocking if this loop is waiting for the global lock.
// NOTE: this code probably needs to be refactored and simplified
// as it's difficult to follow the asynchronous nature of it.
p.timeoutMu.RLock()
timeout := p.timeout
p.mu.RUnlock()
p.timeoutMu.RUnlock()
select {
case <-p.quit: