Bug/fix pending peers segfault (#2004)

* Replace mclock with time in peer pools

we used mclock as golang before 1.9 did not support monotonic clocks,
https://github.com/gavv/monotime, it does now https://golang.org/pkg/time/
so we can fallback on the system implementation, which will return
nanoseconds with a resolution that is system dependent.

* Handle case where peer have same discovered time

In some system the resolution of the clock is not high enough so
multiple peers are added on the same nanosecond.

This result in the peer just added being immediately removed.

This code adds a check making sure we don't assume that a peer is added.

Another approach would be to make sure to include the peer in the list,
so prevent the peer just being added to be evicted, but it's slightly
more complicated and the resolution is generally accurate enough for our
purpose so that peers will be fresh enough if they have the same
discovered time.

It also adds a regression test, I had to use an interface to stub the
clock.

Fixes: https://github.com/status-im/nim-status-client/issues/522

* bump version to 0.55.3
This commit is contained in:
Andrea Maria Piana 2020-07-24 17:14:05 +02:00 committed by GitHub
parent 26978ca03d
commit 58fcf809ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 73 additions and 31 deletions

View File

@ -1 +1 @@
0.55.2
0.55.3

View File

@ -7,7 +7,6 @@ import (
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/mclock"
"github.com/ethereum/go-ethereum/event"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/p2p"
@ -77,7 +76,7 @@ func NewDefaultOptions() *Options {
type peerInfo struct {
// discoveredTime last time when node was found by v5
discoveredTime mclock.AbsTime
discoveredTime time.Time
// dismissed is true when our node requested a disconnect
dismissed bool
// added is true when the node tries to add this peer to a server

View File

@ -16,7 +16,7 @@ var _ heap.Interface = (*peerPriorityQueue)(nil)
func (q peerPriorityQueue) Len() int { return len(q) }
func (q peerPriorityQueue) Less(i, j int) bool {
return q[i].discoveredTime > q[j].discoveredTime
return q[i].discoveredTime.After(q[j].discoveredTime)
}
func (q peerPriorityQueue) Swap(i, j int) {

View File

@ -4,19 +4,18 @@ import (
"container/heap"
"math/rand"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/ethereum/go-ethereum/common/mclock"
)
func TestPeerPriorityQueueSorting(t *testing.T) {
count := 5
discTimes := make([]mclock.AbsTime, count)
discTimes := make([]time.Time, count)
// generate a slice of monotonic times
for i := 0; i < count; i++ {
discTimes[i] = mclock.Now()
discTimes[i] = time.Now()
}
// shuffle discTimes
@ -41,7 +40,7 @@ func TestPeerPriorityQueueSorting(t *testing.T) {
for q.Len() > 0 {
newItem := heap.Pop(&q).(*peerInfoItem)
if item != nil {
require.True(t, item.discoveredTime > newItem.discoveredTime)
require.True(t, item.discoveredTime.After(newItem.discoveredTime))
}
item = newItem
}
@ -54,13 +53,13 @@ func TestPeerPriorityQueueIndexUpdating(t *testing.T) {
item1 := &peerInfoItem{
index: -1,
peerInfo: &peerInfo{
discoveredTime: mclock.Now(),
discoveredTime: time.Now(),
},
}
item2 := &peerInfoItem{
index: -1,
peerInfo: &peerInfo{
discoveredTime: mclock.Now(),
discoveredTime: time.Now(),
},
}

View File

@ -6,7 +6,6 @@ import (
"sync/atomic"
"time"
"github.com/ethereum/go-ethereum/common/mclock"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/p2p"
"github.com/ethereum/go-ethereum/p2p/discv5"
@ -46,6 +45,14 @@ type TopicPoolInterface interface {
readyToStopSearch() bool
}
type Clock interface {
Now() time.Time
}
type realClock struct{}
func (realClock) Now() time.Time { return time.Now() }
// newTopicPool returns instance of TopicPool.
func newTopicPool(discovery discovery.Discovery, topic discv5.Topic, limits params.Limits, slowMode, fastMode time.Duration, cache *Cache) *TopicPool {
pool := TopicPool{
@ -62,6 +69,7 @@ func newTopicPool(discovery discovery.Discovery, topic discv5.Topic, limits para
cache: cache,
maxCachedPeers: limits.Max * maxCachedPeersMultiplier,
maxPendingPeers: limits.Max * maxPendingPeersMultiplier,
clock: realClock{},
}
heap.Init(&pool.discoveredPeersQueue)
@ -100,6 +108,8 @@ type TopicPool struct {
maxPendingPeers int
maxCachedPeers int
cache *Cache
clock Clock
}
func (t *TopicPool) addToPendingPeers(peer *peerInfo) {
@ -118,7 +128,7 @@ func (t *TopicPool) addToPendingPeers(peer *peerInfo) {
var oldestPeer *peerInfo
for _, i := range t.pendingPeers {
if oldestPeer != nil && oldestPeer.discoveredTime < i.peerInfo.discoveredTime {
if oldestPeer != nil && oldestPeer.discoveredTime.Before(i.peerInfo.discoveredTime) {
continue
}
@ -162,12 +172,12 @@ func (t *TopicPool) removeFromPendingPeers(nodeID enode.ID) {
}
}
func (t *TopicPool) updatePendingPeer(nodeID enode.ID, time mclock.AbsTime) {
func (t *TopicPool) updatePendingPeer(nodeID enode.ID) {
peer, ok := t.pendingPeers[nodeID]
if !ok {
return
}
peer.discoveredTime = mclock.Now()
peer.discoveredTime = t.clock.Now()
if peer.index != notQueuedIndex {
heap.Fix(&t.discoveredPeersQueue, peer.index)
}
@ -217,7 +227,7 @@ func (t *TopicPool) setStopSearchTimeout(delay time.Duration) {
if t.stopSearchTimeout != nil {
return
}
now := time.Now().Add(delay)
now := t.clock.Now().Add(delay)
t.stopSearchTimeout = &now
}
@ -227,7 +237,7 @@ func (t *TopicPool) isStopSearchDelayExpired() bool {
if t.stopSearchTimeout == nil {
return false
}
return t.stopSearchTimeout.Before(time.Now())
return t.stopSearchTimeout.Before(t.clock.Now())
}
// readyToStopSearch return true if all conditions to stop search are ok.
@ -391,7 +401,7 @@ func (t *TopicPool) AddPeerFromTable(server *p2p.Server) *discv5.Node {
// TODO(adam): investigate if it's worth to keep the peer in the queue
// until the server confirms it is added and in the meanwhile only adjust its priority.
peer := t.popFromQueue()
if peer != nil && mclock.Now() < peer.discoveredTime+mclock.AbsTime(expirationPeriod) {
if peer != nil && t.clock.Now().Before(peer.discoveredTime.Add(expirationPeriod)) {
t.addServerPeer(server, peer)
return peer.node
}
@ -483,15 +493,15 @@ func (t *TopicPool) processFoundNode(server *p2p.Server, node *discv5.Node) erro
// peer is already connected so update only discoveredTime
if peer, ok := t.connectedPeers[nodeID]; ok {
peer.discoveredTime = mclock.Now()
peer.discoveredTime = t.clock.Now()
return nil
}
if _, ok := t.pendingPeers[nodeID]; ok {
t.updatePendingPeer(nodeID, mclock.Now())
t.updatePendingPeer(nodeID)
} else {
t.addToPendingPeers(&peerInfo{
discoveredTime: mclock.Now(),
discoveredTime: t.clock.Now(),
node: node,
publicKey: pk,
})
@ -499,6 +509,22 @@ func (t *TopicPool) processFoundNode(server *p2p.Server, node *discv5.Node) erro
log.Debug(
"adding peer to a server", "peer", node.ID.String(),
"connected", len(t.connectedPeers), "max", t.maxCachedPeers)
// This can happen when the monotonic clock is not precise enough and
// multiple peers gets added at the same clock time, resulting in all
// of them having the same discoveredTime.
// At which point a random peer will be removed, sometimes being the
// peer we just added.
// We could make sure that the latest added peer is not removed,
// but this is simpler, and peers will be fresh enough as resolution
// should be quite high (ms at least).
// This has been reported on windows builds
// only https://github.com/status-im/nim-status-client/issues/522
if t.pendingPeers[nodeID] == nil {
log.Debug("peer added has just been removed", "peer", nodeID)
return nil
}
// the upper limit is not reached, so let's add this peer
if len(t.connectedPeers) < t.maxCachedPeers {
t.addServerPeer(server, t.pendingPeers[nodeID].peerInfo)

View File

@ -9,7 +9,6 @@ import (
"github.com/stretchr/testify/suite"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/mclock"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/p2p"
"github.com/ethereum/go-ethereum/p2p/discv5"
@ -193,6 +192,25 @@ func (s *TopicPoolSuite) TestNewPeerSelectedOnDrop() {
s.Len(s.topicPool.discoveredPeersQueue, 0)
}
type mockConstantClock struct{}
func (mockConstantClock) Now() time.Time { return time.Unix(0, 0) }
// Regression test for https://github.com/status-im/nim-status-client/issues/522
func (s *TopicPoolSuite) TestNewPeerLackOfClockPrecision() {
_, peer1 := s.createDiscV5Node(s.peer.Self().IP(), 32311)
_, peer2 := s.createDiscV5Node(s.peer.Self().IP(), 32311)
_, peer3 := s.createDiscV5Node(s.peer.Self().IP(), 32311)
s.topicPool.maxPendingPeers = 2
s.topicPool.clock = mockConstantClock{}
s.Require().NoError(s.topicPool.processFoundNode(s.peer, peer1))
s.Require().NoError(s.topicPool.processFoundNode(s.peer, peer2))
s.Require().NoError(s.topicPool.processFoundNode(s.peer, peer3))
}
func (s *TopicPoolSuite) TestRequestedDoesntRemove() {
// max limit is 1 because we test that 2nd peer will stay in local table
// when we request to drop it
@ -238,7 +256,7 @@ func (s *TopicPoolSuite) TestTheMostRecentPeerIsSelected() {
// peer1 has dropped
s.topicPool.ConfirmDropped(s.peer, nodeID1)
// and peer3 is take from the pool as the most recent
s.True(s.topicPool.pendingPeers[nodeID2].discoveredTime < s.topicPool.pendingPeers[nodeID3].discoveredTime)
s.True(s.topicPool.pendingPeers[nodeID2].discoveredTime.Before(s.topicPool.pendingPeers[nodeID3].discoveredTime))
s.Equal(peer3.ID, s.topicPool.AddPeerFromTable(s.peer).ID)
}
@ -353,9 +371,9 @@ func (s *TopicPoolSuite) TestMaxPendingPeers() {
pk2, _ := peer2.ID.Pubkey()
pk3, _ := peer3.ID.Pubkey()
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: mclock.Now(), node: peer1, publicKey: pk1})
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: mclock.Now(), node: peer2, publicKey: pk2})
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: mclock.Now(), node: peer3, publicKey: pk3})
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: time.Now(), node: peer1, publicKey: pk1})
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: time.Now(), node: peer2, publicKey: pk2})
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: time.Now(), node: peer3, publicKey: pk3})
s.Equal(2, len(s.topicPool.pendingPeers))
s.Require().NotContains(s.topicPool.pendingPeers, nodeID1)
@ -364,7 +382,7 @@ func (s *TopicPoolSuite) TestMaxPendingPeers() {
// maxPendingPeers = 0 means no limits.
s.topicPool.maxPendingPeers = 0
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: mclock.Now(), node: peer1, publicKey: pk1})
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: time.Now(), node: peer1, publicKey: pk1})
s.Equal(3, len(s.topicPool.pendingPeers))
s.Require().Contains(s.topicPool.pendingPeers, nodeID1)
@ -377,11 +395,11 @@ func (s *TopicPoolSuite) TestQueueDuplicatePeers() {
_, peer2 := s.createDiscV5Node(s.peer.Self().IP(), 32311)
pk1, _ := peer1.ID.Pubkey()
pk2, _ := peer2.ID.Pubkey()
peerInfo1 := &peerInfo{discoveredTime: mclock.Now(), node: peer1, publicKey: pk1}
peerInfo2 := &peerInfo{discoveredTime: mclock.Now(), node: peer2, publicKey: pk2}
peerInfo1 := &peerInfo{discoveredTime: time.Now(), node: peer1, publicKey: pk1}
peerInfo2 := &peerInfo{discoveredTime: time.Now(), node: peer2, publicKey: pk2}
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: mclock.Now(), node: peer1, publicKey: pk1})
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: mclock.Now(), node: peer2, publicKey: pk2})
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: time.Now(), node: peer1, publicKey: pk1})
s.topicPool.addToPendingPeers(&peerInfo{discoveredTime: time.Now(), node: peer2, publicKey: pk2})
s.topicPool.addToQueue(peerInfo1)
s.topicPool.addToQueue(peerInfo2)