Peerpool failover (#801)

This commit is contained in:
Dmitry Shulyak 2018-04-12 16:08:49 +03:00 committed by GitHub
parent ecd9762648
commit a7a2e01b4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 186 additions and 49 deletions

31
geth/peers/discv5.go Normal file
View File

@ -0,0 +1,31 @@
package peers
import (
"net"
"github.com/ethereum/go-ethereum/p2p"
"github.com/ethereum/go-ethereum/p2p/discv5"
)
// StartDiscv5 starts discv5 udp listener.
// This is done here to avoid patching p2p server, we can't hold a lock of course
// but no other sub-process should use discovery
func StartDiscv5(server *p2p.Server) (*discv5.Network, error) {
addr, err := net.ResolveUDPAddr("udp", server.ListenAddr)
if err != nil {
return nil, err
}
conn, err := net.ListenUDP("udp", addr)
if err != nil {
return nil, err
}
realaddr := conn.LocalAddr().(*net.UDPAddr)
ntab, err := discv5.ListenUDP(server.PrivateKey, conn, realaddr, "", server.NetRestrict)
if err != nil {
return nil, err
}
if err := ntab.SetFallbackNodes(server.BootstrapNodesV5); err != nil {
return nil, err
}
return ntab, nil
}

View File

@ -9,6 +9,7 @@ import (
"github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/event"
"github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/p2p"
"github.com/ethereum/go-ethereum/p2p/discover"
"github.com/ethereum/go-ethereum/p2p/discv5" "github.com/ethereum/go-ethereum/p2p/discv5"
"github.com/status-im/status-go/geth/params" "github.com/status-im/status-go/geth/params"
@ -22,6 +23,8 @@ var (
const ( const (
// expirationPeriod is an amount of time while peer is considered as a connectable // expirationPeriod is an amount of time while peer is considered as a connectable
expirationPeriod = 60 * time.Minute expirationPeriod = 60 * time.Minute
// discoveryRestartTimeout defines how often loop will try to start discovery server
discoveryRestartTimeout = 2 * time.Second
// DefaultFastSync is a recommended value for aggressive peers search. // DefaultFastSync is a recommended value for aggressive peers search.
DefaultFastSync = 3 * time.Second DefaultFastSync = 3 * time.Second
// DefaultSlowSync is a recommended value for slow (background) peers search. // DefaultSlowSync is a recommended value for slow (background) peers search.
@ -44,6 +47,8 @@ type peerInfo struct {
discoveredTime mclock.AbsTime discoveredTime mclock.AbsTime
// connected is true if node is added as a static peer // connected is true if node is added as a static peer
connected bool connected bool
// requested is true when our node requested a disconnect
requested bool
node *discv5.Node node *discv5.Node
} }
@ -92,40 +97,96 @@ func (p *PeerPool) Start(server *p2p.Server) error {
return nil return nil
} }
// restartDiscovery and search for topics that have peer count below min
func (p *PeerPool) restartDiscovery(server *p2p.Server) error {
if server.DiscV5 == nil {
ntab, err := StartDiscv5(server)
if err != nil {
log.Error("starting discv5 failed", "error", err, "retry in", discoveryRestartTimeout)
return err
}
log.Debug("restarted discovery from peer pool")
server.DiscV5 = ntab
}
for _, t := range p.topics {
if !t.BelowMin() || t.SearchRunning() {
continue
}
err := t.StartSearch(server)
if err != nil {
log.Error("search failed to start", "error", err)
}
}
return nil
}
// handleServerPeers watches server peer events, notifies topic pools about changes // handleServerPeers watches server peer events, notifies topic pools about changes
// in the peer set and stops the discv5 if all topic pools collected enough peers. // in the peer set and stops the discv5 if all topic pools collected enough peers.
func (p *PeerPool) handleServerPeers(server *p2p.Server, events <-chan *p2p.PeerEvent) { func (p *PeerPool) handleServerPeers(server *p2p.Server, events <-chan *p2p.PeerEvent) {
var retryDiscv5 <-chan time.Time
for { for {
select { select {
case <-p.quit: case <-p.quit:
return return
case <-retryDiscv5:
if err := p.restartDiscovery(server); err != nil {
retryDiscv5 = time.After(discoveryRestartTimeout)
}
case event := <-events: case event := <-events:
switch event.Type { switch event.Type {
case p2p.PeerEventTypeDrop: case p2p.PeerEventTypeDrop:
p.mu.Lock() log.Debug("confirm peer dropped", "ID", event.Peer)
for _, t := range p.topics { if p.stopOnMax && p.handleDroppedPeer(server, event.Peer) {
t.ConfirmDropped(server, event.Peer, event.Error) retryDiscv5 = time.After(0)
// TODO(dshulyak) restart discv5 if peers number dropped too low
} }
p.mu.Unlock()
case p2p.PeerEventTypeAdd: case p2p.PeerEventTypeAdd:
p.mu.Lock() log.Debug("confirm peer added", "ID", event.Peer)
total := 0 if p.stopOnMax && p.handleAddedPeer(server, event.Peer) {
for _, t := range p.topics { log.Debug("closing discv5 connection", "server", server.Self())
t.ConfirmAdded(server, event.Peer)
if p.stopOnMax && t.MaxReached() {
total++
t.StopSearch()
}
}
if p.stopOnMax && total == len(p.config) {
log.Debug("closing discv5 connection")
server.DiscV5.Close() server.DiscV5.Close()
} server.DiscV5 = nil
p.mu.Unlock()
} }
} }
} }
}
}
// handleAddedPeer notifies all topics about added peer and return true if all topics has max limit of connections
func (p *PeerPool) handleAddedPeer(server *p2p.Server, nodeID discover.NodeID) (all bool) {
p.mu.Lock()
defer p.mu.Unlock()
all = true
for _, t := range p.topics {
t.ConfirmAdded(server, nodeID)
if p.stopOnMax && t.MaxReached() {
t.StopSearch()
} else {
all = false
}
}
return all
}
// handleDroppedPeer notifies every topic about dropped peer and returns true if any peer have connections
// below min limit
func (p *PeerPool) handleDroppedPeer(server *p2p.Server, nodeID discover.NodeID) (any bool) {
p.mu.Lock()
defer p.mu.Unlock()
for _, t := range p.topics {
confirmed := t.ConfirmDropped(server, nodeID)
if confirmed {
newPeer := t.AddPeerFromTable(server)
if newPeer != nil {
log.Debug("added peer from local table", "ID", newPeer.ID)
}
}
log.Debug("search", "topic", t.topic, "below min", t.BelowMin())
if t.BelowMin() && !t.SearchRunning() {
any = true
}
}
return any
} }
// Stop closes pool quit channel and all channels that are watched by search queries // Stop closes pool quit channel and all channels that are watched by search queries

View File

@ -10,6 +10,7 @@ import (
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/p2p"
"github.com/ethereum/go-ethereum/p2p/discover"
"github.com/ethereum/go-ethereum/p2p/discv5" "github.com/ethereum/go-ethereum/p2p/discv5"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
@ -46,7 +47,8 @@ func (s *PeerPoolSimulationSuite) SetupTest() {
s.Require().NoError(s.bootnode.Start()) s.Require().NoError(s.bootnode.Start())
bootnodeV5 := discv5.NewNode(s.bootnode.DiscV5.Self().ID, net.ParseIP("127.0.0.1"), uint16(port), uint16(port)) bootnodeV5 := discv5.NewNode(s.bootnode.DiscV5.Self().ID, net.ParseIP("127.0.0.1"), uint16(port), uint16(port))
s.peers = make([]*p2p.Server, 2) // 1 peer to initiate connection, 1 peer as a first candidate, 1 peer - for failover
s.peers = make([]*p2p.Server, 3)
for i := range s.peers { for i := range s.peers {
key, _ := crypto.GenerateKey() key, _ := crypto.GenerateKey()
peer := &p2p.Server{ peer := &p2p.Server{
@ -66,14 +68,26 @@ func (s *PeerPoolSimulationSuite) SetupTest() {
} }
} }
func (s *PeerPoolSimulationSuite) TestSingleTopicDiscovery() { func (s *PeerPoolSimulationSuite) getPeerFromEvent(events <-chan *p2p.PeerEvent, etype p2p.PeerEventType) (nodeID discover.NodeID) {
select {
case ev := <-events:
if ev.Type == etype {
return ev.Peer
}
case <-time.After(5 * time.Second):
s.Fail("timed out waiting for a peer")
return
}
return
}
func (s *PeerPoolSimulationSuite) TestSingleTopicDiscoveryWithFailover() {
topic := discv5.Topic("cap=test") topic := discv5.Topic("cap=test")
expectedConnections := 1
// simulation should only rely on fast sync // simulation should only rely on fast sync
config := map[discv5.Topic]params.Limits{ config := map[discv5.Topic]params.Limits{
topic: {expectedConnections, expectedConnections}, topic: {1, 1}, // limits a chosen for simplicity of the simulation
} }
peerPool := NewPeerPool(config, 100*time.Millisecond, 100*time.Millisecond, nil, false) peerPool := NewPeerPool(config, 100*time.Millisecond, 100*time.Millisecond, nil, true)
register := NewRegister(topic) register := NewRegister(topic)
s.Require().NoError(register.Start(s.peers[0])) s.Require().NoError(register.Start(s.peers[0]))
defer register.Stop() defer register.Stop()
@ -84,20 +98,20 @@ func (s *PeerPoolSimulationSuite) TestSingleTopicDiscovery() {
defer subscription.Unsubscribe() defer subscription.Unsubscribe()
s.NoError(peerPool.Start(s.peers[1])) s.NoError(peerPool.Start(s.peers[1]))
defer peerPool.Stop() defer peerPool.Stop()
connected := 0 connected := s.getPeerFromEvent(events, p2p.PeerEventTypeAdd)
for { s.Equal(s.peers[0].Self().ID, connected)
select { time.Sleep(100 * time.Millisecond)
case ev := <-events: s.Require().Nil(s.peers[1].DiscV5)
if ev.Type == p2p.PeerEventTypeAdd { s.peers[0].Stop()
connected++ disconnected := s.getPeerFromEvent(events, p2p.PeerEventTypeDrop)
} s.Equal(connected, disconnected)
case <-time.After(5 * time.Second): time.Sleep(100 * time.Millisecond)
s.Require().FailNowf("waiting for peers timed out", strconv.Itoa(connected)) s.Require().NotNil(s.peers[1].DiscV5)
} register = NewRegister(topic)
if connected == expectedConnections { s.Require().NoError(register.Start(s.peers[2]))
break defer register.Stop()
} newConnected := s.getPeerFromEvent(events, p2p.PeerEventTypeAdd)
} s.Equal(s.peers[2].Self().ID, newConnected)
} }
func (s *PeerPoolSimulationSuite) TearDown() { func (s *PeerPoolSimulationSuite) TearDown() {

View File

@ -56,6 +56,13 @@ func (t *TopicPool) MaxReached() bool {
return t.connected == t.limits[1] return t.connected == t.limits[1]
} }
// BelowMin returns true if current number of peers is below min limit.
func (t *TopicPool) BelowMin() bool {
t.mu.RLock()
defer t.mu.RUnlock()
return t.connected < t.limits[0]
}
// ConfirmAdded called when peer was added by p2p Server. // ConfirmAdded called when peer was added by p2p Server.
// 1. Skip a peer if it not in our peer table // 1. Skip a peer if it not in our peer table
// 2. Add a peer to a cache. // 2. Add a peer to a cache.
@ -80,6 +87,7 @@ func (t *TopicPool) ConfirmAdded(server *p2p.Server, nodeID discover.NodeID) {
// when max limit is reached drop every peer after // when max limit is reached drop every peer after
if t.connected == t.limits[1] { if t.connected == t.limits[1] {
log.Debug("max limit is reached drop the peer", "ID", nodeID, "topic", t.topic) log.Debug("max limit is reached drop the peer", "ID", nodeID, "topic", t.topic)
peer.requested = true
t.removePeer(server, peer) t.removePeer(server, peer)
return return
} }
@ -99,8 +107,9 @@ func (t *TopicPool) ConfirmAdded(server *p2p.Server, nodeID discover.NodeID) {
// 2. If disconnect request - we could drop that peer ourselves. // 2. If disconnect request - we could drop that peer ourselves.
// 3. If connected number will drop below min limit - switch to fast mode. // 3. If connected number will drop below min limit - switch to fast mode.
// 4. Delete a peer from cache and peer table. // 4. Delete a peer from cache and peer table.
// 5. Connect with another valid peer, if such is available. // Returns false if peer is not in our table or we requested removal of this peer.
func (t *TopicPool) ConfirmDropped(server *p2p.Server, nodeID discover.NodeID, reason string) (new bool) { // Otherwise peer is removed and true is returned.
func (t *TopicPool) ConfirmDropped(server *p2p.Server, nodeID discover.NodeID) bool {
t.mu.Lock() t.mu.Lock()
defer t.mu.Unlock() defer t.mu.Unlock()
// either inbound or connected from another topic // either inbound or connected from another topic
@ -108,9 +117,8 @@ func (t *TopicPool) ConfirmDropped(server *p2p.Server, nodeID discover.NodeID, r
if !exist { if !exist {
return false return false
} }
log.Debug("disconnect reason", "peer", nodeID, "reason", reason) log.Debug("disconnect", "ID", nodeID)
// if requested - we don't need to remove peer from cache and look for a replacement if peer.requested {
if reason == p2p.DiscRequested.Error() {
return false return false
} }
if t.SearchRunning() && t.connected == t.limits[0] { if t.SearchRunning() && t.connected == t.limits[0] {
@ -124,14 +132,21 @@ func (t *TopicPool) ConfirmDropped(server *p2p.Server, nodeID discover.NodeID, r
log.Error("failed to remove peer from cache", "error", err) log.Error("failed to remove peer from cache", "error", err)
} }
} }
return true
}
// AddPeerFromTable checks if there is a valid peer in local table and adds it to a server.
func (t *TopicPool) AddPeerFromTable(server *p2p.Server) *discv5.Node {
t.mu.RLock()
defer t.mu.RUnlock()
// TODO use a heap queue and always get a peer that was discovered recently // TODO use a heap queue and always get a peer that was discovered recently
for _, peer := range t.peers { for _, peer := range t.peers {
if !peer.connected && mclock.Now() < peer.discoveredTime+mclock.AbsTime(expirationPeriod) { if !peer.connected && mclock.Now() < peer.discoveredTime+mclock.AbsTime(expirationPeriod) {
t.addPeer(server, peer) t.addPeer(server, peer)
return true return peer.node
} }
} }
return false return nil
} }
// StartSearch creates discv5 queries and runs a loop to consume found peers. // StartSearch creates discv5 queries and runs a loop to consume found peers.
@ -169,7 +184,11 @@ func (t *TopicPool) StartSearch(server *p2p.Server) error {
} }
func (t *TopicPool) handleFoundPeers(server *p2p.Server, found <-chan *discv5.Node, lookup <-chan bool) { func (t *TopicPool) handleFoundPeers(server *p2p.Server, found <-chan *discv5.Node, lookup <-chan bool) {
if t.connected >= t.limits[0] {
t.period <- t.slowSync
} else {
t.period <- t.fastSync t.period <- t.fastSync
}
selfID := discv5.NodeID(server.Self().ID) selfID := discv5.NodeID(server.Self().ID)
for { for {
select { select {
@ -226,6 +245,9 @@ func (t *TopicPool) removePeer(server *p2p.Server, info *peerInfo) {
// StopSearch stops the closes stop // StopSearch stops the closes stop
func (t *TopicPool) StopSearch() { func (t *TopicPool) StopSearch() {
if !t.SearchRunning() {
return
}
if t.quit == nil { if t.quit == nil {
return return
} }

View File

@ -63,7 +63,7 @@ func (s *TopicPoolSuite) TestSyncSwitches() {
s.topicPool.ConfirmAdded(s.peer, discover.NodeID(testPeer.ID)) s.topicPool.ConfirmAdded(s.peer, discover.NodeID(testPeer.ID))
s.AssertConsumed(s.topicPool.period, s.topicPool.slowSync, time.Second) s.AssertConsumed(s.topicPool.period, s.topicPool.slowSync, time.Second)
s.True(s.topicPool.peers[testPeer.ID].connected) s.True(s.topicPool.peers[testPeer.ID].connected)
s.topicPool.ConfirmDropped(s.peer, discover.NodeID(testPeer.ID), p2p.DiscProtocolError.Error()) s.topicPool.ConfirmDropped(s.peer, discover.NodeID(testPeer.ID))
s.AssertConsumed(s.topicPool.period, s.topicPool.fastSync, time.Second) s.AssertConsumed(s.topicPool.period, s.topicPool.fastSync, time.Second)
} }
@ -82,15 +82,24 @@ func (s *TopicPoolSuite) TestNewPeerSelectedOnDrop() {
s.topicPool.ConfirmAdded(s.peer, discover.NodeID(peer3.ID)) s.topicPool.ConfirmAdded(s.peer, discover.NodeID(peer3.ID))
s.False(s.topicPool.peers[peer3.ID].connected) s.False(s.topicPool.peers[peer3.ID].connected)
s.True(s.topicPool.ConfirmDropped(s.peer, discover.NodeID(peer1.ID), p2p.DiscNetworkError.Error())) s.True(s.topicPool.ConfirmDropped(s.peer, discover.NodeID(peer1.ID)))
s.Equal(peer3.ID, s.topicPool.AddPeerFromTable(s.peer).ID)
} }
func (s *TopicPoolSuite) TestRequestedDoesntRemove() { func (s *TopicPoolSuite) TestRequestedDoesntRemove() {
// max limit is 1 because we test that 2nd peer will stay in local table
// when we request to drop it
s.topicPool.limits = params.Limits{1, 1}
peer1 := discv5.NewNode(discv5.NodeID{1}, s.peer.Self().IP, 32311, 32311) peer1 := discv5.NewNode(discv5.NodeID{1}, s.peer.Self().IP, 32311, 32311)
peer2 := discv5.NewNode(discv5.NodeID{2}, s.peer.Self().IP, 32311, 32311)
s.topicPool.processFoundNode(s.peer, peer1) s.topicPool.processFoundNode(s.peer, peer1)
s.topicPool.processFoundNode(s.peer, peer2)
s.topicPool.ConfirmAdded(s.peer, discover.NodeID(peer1.ID)) s.topicPool.ConfirmAdded(s.peer, discover.NodeID(peer1.ID))
s.topicPool.ConfirmDropped(s.peer, discover.NodeID(peer1.ID), p2p.DiscRequested.Error()) s.topicPool.ConfirmAdded(s.peer, discover.NodeID(peer2.ID))
s.Contains(s.topicPool.peers, peer1.ID) s.False(s.topicPool.peers[peer1.ID].requested)
s.topicPool.ConfirmDropped(s.peer, discover.NodeID(peer1.ID), p2p.DiscProtocolError.Error()) s.True(s.topicPool.peers[peer2.ID].requested)
s.topicPool.ConfirmDropped(s.peer, discover.NodeID(peer2.ID))
s.Contains(s.topicPool.peers, peer2.ID)
s.topicPool.ConfirmDropped(s.peer, discover.NodeID(peer1.ID))
s.NotContains(s.topicPool.peers, peer1.ID) s.NotContains(s.topicPool.peers, peer1.ID)
} }