2018-04-10 09:44:09 +03:00
|
|
|
package peers
|
|
|
|
|
|
|
|
import (
|
2018-11-14 08:03:58 +01:00
|
|
|
"crypto/ecdsa"
|
2018-04-10 09:44:09 +03:00
|
|
|
"errors"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
2018-08-20 15:55:43 +02:00
|
|
|
"github.com/ethereum/go-ethereum/common"
|
2018-04-10 09:44:09 +03:00
|
|
|
"github.com/ethereum/go-ethereum/event"
|
|
|
|
"github.com/ethereum/go-ethereum/log"
|
|
|
|
"github.com/ethereum/go-ethereum/p2p"
|
|
|
|
"github.com/ethereum/go-ethereum/p2p/discv5"
|
2018-11-14 08:03:58 +01:00
|
|
|
"github.com/ethereum/go-ethereum/p2p/enode"
|
2018-04-10 09:44:09 +03:00
|
|
|
|
2018-08-20 15:55:43 +02:00
|
|
|
"github.com/status-im/status-go/contracts"
|
2018-07-04 13:51:47 +03:00
|
|
|
"github.com/status-im/status-go/discovery"
|
2018-08-20 15:55:43 +02:00
|
|
|
"github.com/status-im/status-go/mailserver/registry"
|
2018-06-08 13:29:50 +02:00
|
|
|
"github.com/status-im/status-go/params"
|
2018-07-25 16:48:02 +02:00
|
|
|
"github.com/status-im/status-go/peers/verifier"
|
2018-05-03 09:35:58 +02:00
|
|
|
"github.com/status-im/status-go/signal"
|
2018-04-10 09:44:09 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
// ErrDiscv5NotRunning returned when pool is started but discover v5 is not running or not enabled.
|
|
|
|
ErrDiscv5NotRunning = errors.New("Discovery v5 is not running")
|
|
|
|
)
|
|
|
|
|
2018-04-13 11:34:30 +03:00
|
|
|
// PoolEvent is a type used to for peer pool events.
|
|
|
|
type PoolEvent string
|
|
|
|
|
2018-04-10 09:44:09 +03:00
|
|
|
const (
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
immediately = 0 * time.Minute
|
2018-04-10 09:44:09 +03:00
|
|
|
// expirationPeriod is an amount of time while peer is considered as a connectable
|
|
|
|
expirationPeriod = 60 * time.Minute
|
2018-04-12 16:08:49 +03:00
|
|
|
// discoveryRestartTimeout defines how often loop will try to start discovery server
|
|
|
|
discoveryRestartTimeout = 2 * time.Second
|
2018-04-10 09:44:09 +03:00
|
|
|
// DefaultFastSync is a recommended value for aggressive peers search.
|
|
|
|
DefaultFastSync = 3 * time.Second
|
|
|
|
// DefaultSlowSync is a recommended value for slow (background) peers search.
|
2018-05-01 17:19:11 +02:00
|
|
|
DefaultSlowSync = 30 * time.Second
|
|
|
|
// DefaultDiscV5Timeout is a timeout after which Discv5 is stopped.
|
|
|
|
DefaultDiscV5Timeout = 3 * time.Minute
|
|
|
|
// DefaultTopicFastModeTimeout is a timeout after which sync mode is switched to slow mode.
|
|
|
|
DefaultTopicFastModeTimeout = 30 * time.Second
|
2018-06-06 15:39:27 +02:00
|
|
|
// DefaultTopicStopSearchDelay is the default delay when stopping a topic search.
|
|
|
|
DefaultTopicStopSearchDelay = 10 * time.Second
|
2018-04-10 09:44:09 +03:00
|
|
|
)
|
|
|
|
|
2018-05-15 11:16:25 +02:00
|
|
|
// Options is a struct with PeerPool configuration.
|
|
|
|
type Options struct {
|
|
|
|
FastSync time.Duration
|
|
|
|
SlowSync time.Duration
|
|
|
|
// After this time, Discovery is stopped even if max peers is not reached.
|
|
|
|
DiscServerTimeout time.Duration
|
|
|
|
// AllowStop allows stopping Discovery when reaching max peers or after timeout.
|
|
|
|
AllowStop bool
|
2018-06-06 15:39:27 +02:00
|
|
|
// TopicStopSearchDelay time stopSearch will be waiting for max cached peers to be
|
|
|
|
// filled before really stopping the search.
|
|
|
|
TopicStopSearchDelay time.Duration
|
2018-07-25 16:48:02 +02:00
|
|
|
// TrustedMailServers is a list of trusted nodes.
|
2018-11-14 08:03:58 +01:00
|
|
|
TrustedMailServers []enode.ID
|
2018-08-20 15:55:43 +02:00
|
|
|
// MailServerRegistryAddress is the MailServerRegistry contract address
|
|
|
|
MailServerRegistryAddress string
|
2018-05-15 11:16:25 +02:00
|
|
|
}
|
|
|
|
|
2018-06-06 15:39:27 +02:00
|
|
|
// NewDefaultOptions returns a struct with default Options.
|
2018-05-15 11:16:25 +02:00
|
|
|
func NewDefaultOptions() *Options {
|
|
|
|
return &Options{
|
2018-06-06 15:39:27 +02:00
|
|
|
FastSync: DefaultFastSync,
|
|
|
|
SlowSync: DefaultSlowSync,
|
|
|
|
DiscServerTimeout: DefaultDiscV5Timeout,
|
|
|
|
AllowStop: false,
|
|
|
|
TopicStopSearchDelay: DefaultTopicStopSearchDelay,
|
2018-04-10 09:44:09 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type peerInfo struct {
|
|
|
|
// discoveredTime last time when node was found by v5
|
2020-07-24 17:14:05 +02:00
|
|
|
discoveredTime time.Time
|
2018-04-19 17:18:49 +02:00
|
|
|
// dismissed is true when our node requested a disconnect
|
|
|
|
dismissed bool
|
2018-11-13 14:58:26 +01:00
|
|
|
// added is true when the node tries to add this peer to a server
|
|
|
|
added bool
|
2018-04-10 09:44:09 +03:00
|
|
|
|
|
|
|
node *discv5.Node
|
2018-11-14 08:03:58 +01:00
|
|
|
// store public key separately to make peerInfo more independent from discv5
|
|
|
|
publicKey *ecdsa.PublicKey
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *peerInfo) NodeID() enode.ID {
|
|
|
|
return enode.PubkeyToIDV4(p.publicKey)
|
2018-04-10 09:44:09 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// PeerPool manages discovered peers and connects them to p2p server
|
|
|
|
type PeerPool struct {
|
2018-05-15 11:16:25 +02:00
|
|
|
opts *Options
|
|
|
|
|
2018-07-04 13:51:47 +03:00
|
|
|
discovery discovery.Discovery
|
2018-07-03 14:27:04 +03:00
|
|
|
|
2018-04-10 09:44:09 +03:00
|
|
|
// config can be set only once per pool life cycle
|
2018-05-15 11:16:25 +02:00
|
|
|
config map[discv5.Topic]params.Limits
|
|
|
|
cache *Cache
|
2018-04-10 09:44:09 +03:00
|
|
|
|
|
|
|
mu sync.RWMutex
|
2021-02-01 14:39:41 +01:00
|
|
|
timeoutMu sync.RWMutex
|
2018-07-16 09:40:40 +02:00
|
|
|
topics []TopicPoolInterface
|
2018-04-10 09:44:09 +03:00
|
|
|
serverSubscription event.Subscription
|
2018-05-01 17:19:11 +02:00
|
|
|
events chan *p2p.PeerEvent
|
2018-04-10 09:44:09 +03:00
|
|
|
quit chan struct{}
|
2018-05-10 14:45:51 +03:00
|
|
|
wg sync.WaitGroup
|
2018-05-01 17:19:11 +02:00
|
|
|
timeout <-chan time.Time
|
2018-07-16 09:40:40 +02:00
|
|
|
updateTopic chan *updateTopicRequest
|
2018-04-10 09:44:09 +03:00
|
|
|
}
|
|
|
|
|
2018-05-15 11:16:25 +02:00
|
|
|
// NewPeerPool creates instance of PeerPool
|
2018-07-04 13:51:47 +03:00
|
|
|
func NewPeerPool(discovery discovery.Discovery, config map[discv5.Topic]params.Limits, cache *Cache, options *Options) *PeerPool {
|
2018-05-15 11:16:25 +02:00
|
|
|
return &PeerPool{
|
2018-07-03 14:27:04 +03:00
|
|
|
opts: options,
|
|
|
|
discovery: discovery,
|
|
|
|
config: config,
|
|
|
|
cache: cache,
|
2018-05-15 11:16:25 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *PeerPool) setDiscoveryTimeout() {
|
2021-02-01 14:39:41 +01:00
|
|
|
p.timeoutMu.Lock()
|
|
|
|
defer p.timeoutMu.Unlock()
|
2018-05-15 11:16:25 +02:00
|
|
|
if p.opts.AllowStop && p.opts.DiscServerTimeout > 0 {
|
|
|
|
p.timeout = time.After(p.opts.DiscServerTimeout)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-10 09:44:09 +03:00
|
|
|
// Start creates topic pool for each topic in config and subscribes to server events.
|
2018-08-20 15:55:43 +02:00
|
|
|
func (p *PeerPool) Start(server *p2p.Server, rpcClient contracts.RPCClient) error {
|
2018-07-03 14:27:04 +03:00
|
|
|
if !p.discovery.Running() {
|
2018-04-10 09:44:09 +03:00
|
|
|
return ErrDiscv5NotRunning
|
|
|
|
}
|
2018-05-15 11:16:25 +02:00
|
|
|
|
2018-04-10 09:44:09 +03:00
|
|
|
p.mu.Lock()
|
|
|
|
defer p.mu.Unlock()
|
2018-05-15 11:16:25 +02:00
|
|
|
|
|
|
|
// init channels
|
2018-04-10 09:44:09 +03:00
|
|
|
p.quit = make(chan struct{})
|
2018-07-16 09:40:40 +02:00
|
|
|
p.updateTopic = make(chan *updateTopicRequest)
|
2018-05-15 11:16:25 +02:00
|
|
|
p.setDiscoveryTimeout()
|
|
|
|
|
2018-05-26 09:37:13 +02:00
|
|
|
// subscribe to peer events
|
|
|
|
p.events = make(chan *p2p.PeerEvent, 20)
|
|
|
|
p.serverSubscription = server.SubscribeEvents(p.events)
|
|
|
|
p.wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
p.handleServerPeers(server, p.events)
|
|
|
|
p.wg.Done()
|
|
|
|
}()
|
|
|
|
|
2018-05-15 11:16:25 +02:00
|
|
|
// collect topics and start searching for nodes
|
2018-07-16 09:40:40 +02:00
|
|
|
p.topics = make([]TopicPoolInterface, 0, len(p.config))
|
2018-04-10 09:44:09 +03:00
|
|
|
for topic, limits := range p.config {
|
2018-07-16 09:40:40 +02:00
|
|
|
var topicPool TopicPoolInterface
|
|
|
|
t := newTopicPool(p.discovery, topic, limits, p.opts.SlowSync, p.opts.FastSync, p.cache)
|
|
|
|
if topic == MailServerDiscoveryTopic {
|
2018-08-20 15:55:43 +02:00
|
|
|
v, err := p.initVerifier(rpcClient)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
topicPool = newCacheOnlyTopicPool(t, v)
|
2018-07-16 09:40:40 +02:00
|
|
|
} else {
|
|
|
|
topicPool = t
|
|
|
|
}
|
2018-04-10 09:44:09 +03:00
|
|
|
if err := topicPool.StartSearch(server); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
p.topics = append(p.topics, topicPool)
|
|
|
|
}
|
|
|
|
|
2018-05-15 11:16:25 +02:00
|
|
|
// discovery must be already started when pool is started
|
|
|
|
signal.SendDiscoveryStarted()
|
2018-05-01 17:19:11 +02:00
|
|
|
|
2018-04-10 09:44:09 +03:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-08-20 15:55:43 +02:00
|
|
|
func (p *PeerPool) initVerifier(rpcClient contracts.RPCClient) (v Verifier, err error) {
|
|
|
|
if addr := p.opts.MailServerRegistryAddress; addr != "" {
|
|
|
|
caller := contracts.NewContractCaller(rpcClient)
|
|
|
|
addrBytes := common.FromHex(addr)
|
|
|
|
return registry.NewVerifier(caller, common.BytesToAddress(addrBytes))
|
|
|
|
}
|
|
|
|
|
|
|
|
return verifier.NewLocalVerifier(p.opts.TrustedMailServers), nil
|
|
|
|
}
|
|
|
|
|
2018-07-03 14:27:04 +03:00
|
|
|
func (p *PeerPool) startDiscovery() error {
|
|
|
|
if p.discovery.Running() {
|
2018-05-01 17:19:11 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-07-03 14:27:04 +03:00
|
|
|
if err := p.discovery.Start(); err != nil {
|
2018-05-01 17:19:11 +02:00
|
|
|
return err
|
|
|
|
}
|
2018-05-15 11:16:25 +02:00
|
|
|
|
2018-05-01 17:19:11 +02:00
|
|
|
p.mu.Lock()
|
2018-05-15 11:16:25 +02:00
|
|
|
p.setDiscoveryTimeout()
|
2018-05-01 17:19:11 +02:00
|
|
|
p.mu.Unlock()
|
|
|
|
|
2018-05-03 09:35:58 +02:00
|
|
|
signal.SendDiscoveryStarted()
|
2018-05-01 17:19:11 +02:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-07-16 09:40:40 +02:00
|
|
|
func (p *PeerPool) stopDiscovery(server *p2p.Server) {
|
2018-07-03 14:27:04 +03:00
|
|
|
if !p.discovery.Running() {
|
2018-05-01 17:19:11 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-09-24 09:52:26 +03:00
|
|
|
if err := p.discovery.Stop(); err != nil {
|
|
|
|
log.Error("discovery errored when stopping", "err", err)
|
|
|
|
}
|
2018-05-10 14:45:51 +03:00
|
|
|
for _, t := range p.topics {
|
2018-07-16 09:40:40 +02:00
|
|
|
t.StopSearch(server)
|
2018-05-10 14:45:51 +03:00
|
|
|
}
|
|
|
|
|
2021-02-01 14:39:41 +01:00
|
|
|
p.timeoutMu.Lock()
|
2018-05-01 17:19:11 +02:00
|
|
|
p.timeout = nil
|
2021-02-01 14:39:41 +01:00
|
|
|
p.timeoutMu.Unlock()
|
2018-05-01 17:19:11 +02:00
|
|
|
|
2018-05-03 09:35:58 +02:00
|
|
|
signal.SendDiscoveryStopped()
|
2018-05-01 17:19:11 +02:00
|
|
|
}
|
|
|
|
|
2018-04-12 16:08:49 +03:00
|
|
|
// restartDiscovery and search for topics that have peer count below min
|
|
|
|
func (p *PeerPool) restartDiscovery(server *p2p.Server) error {
|
2018-07-03 14:27:04 +03:00
|
|
|
if !p.discovery.Running() {
|
|
|
|
if err := p.startDiscovery(); err != nil {
|
2018-04-12 16:08:49 +03:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
log.Debug("restarted discovery from peer pool")
|
|
|
|
}
|
|
|
|
for _, t := range p.topics {
|
|
|
|
if !t.BelowMin() || t.SearchRunning() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
err := t.StartSearch(server)
|
|
|
|
if err != nil {
|
|
|
|
log.Error("search failed to start", "error", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-04-10 09:44:09 +03:00
|
|
|
// handleServerPeers watches server peer events, notifies topic pools about changes
|
|
|
|
// in the peer set and stops the discv5 if all topic pools collected enough peers.
|
2018-05-01 17:19:11 +02:00
|
|
|
//
|
|
|
|
// @TODO(adam): split it into peers and discovery management loops. This should
|
|
|
|
// simplify the whole logic and allow to remove `timeout` field from `PeerPool`.
|
2018-04-10 09:44:09 +03:00
|
|
|
func (p *PeerPool) handleServerPeers(server *p2p.Server, events <-chan *p2p.PeerEvent) {
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
retryDiscv5 := make(chan struct{}, 1)
|
|
|
|
stopDiscv5 := make(chan struct{}, 1)
|
|
|
|
|
|
|
|
queueRetry := func(d time.Duration) {
|
|
|
|
go func() {
|
|
|
|
time.Sleep(d)
|
|
|
|
select {
|
|
|
|
case retryDiscv5 <- struct{}{}:
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
queueStop := func() {
|
|
|
|
go func() {
|
|
|
|
select {
|
|
|
|
case stopDiscv5 <- struct{}{}:
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
}
|
2018-04-12 16:08:49 +03:00
|
|
|
|
2018-04-10 09:44:09 +03:00
|
|
|
for {
|
2021-02-01 14:39:41 +01:00
|
|
|
// We use a separate lock for timeout, as this loop should
|
|
|
|
// always be running, otherwise the p2p.Server will hang.
|
|
|
|
// Because the handler of events might potentially hang on the
|
|
|
|
// server, deadlocking if this loop is waiting for the global lock.
|
|
|
|
// NOTE: this code probably needs to be refactored and simplified
|
|
|
|
// as it's difficult to follow the asynchronous nature of it.
|
|
|
|
p.timeoutMu.RLock()
|
2018-05-01 17:19:11 +02:00
|
|
|
timeout := p.timeout
|
2021-02-01 14:39:41 +01:00
|
|
|
p.timeoutMu.RUnlock()
|
2018-05-01 17:19:11 +02:00
|
|
|
|
2018-04-10 09:44:09 +03:00
|
|
|
select {
|
|
|
|
case <-p.quit:
|
2018-07-03 14:27:04 +03:00
|
|
|
log.Debug("stopping DiscV5 because of quit")
|
2018-07-16 09:40:40 +02:00
|
|
|
p.stopDiscovery(server)
|
2018-04-10 09:44:09 +03:00
|
|
|
return
|
2018-05-01 17:19:11 +02:00
|
|
|
case <-timeout:
|
2018-07-03 14:27:04 +03:00
|
|
|
log.Info("DiscV5 timed out")
|
2018-07-16 09:40:40 +02:00
|
|
|
p.stopDiscovery(server)
|
2018-04-12 16:08:49 +03:00
|
|
|
case <-retryDiscv5:
|
|
|
|
if err := p.restartDiscovery(server); err != nil {
|
2018-05-01 17:19:11 +02:00
|
|
|
log.Error("starting discv5 failed", "error", err, "retry", discoveryRestartTimeout)
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
queueRetry(discoveryRestartTimeout)
|
2018-04-12 16:08:49 +03:00
|
|
|
}
|
2018-06-06 15:39:27 +02:00
|
|
|
case <-stopDiscv5:
|
2018-07-16 09:40:40 +02:00
|
|
|
p.handleStopTopics(server)
|
|
|
|
case req := <-p.updateTopic:
|
|
|
|
if p.updateTopicLimits(server, req) == nil {
|
|
|
|
if !p.discovery.Running() {
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
queueRetry(immediately)
|
2018-07-16 09:40:40 +02:00
|
|
|
}
|
|
|
|
}
|
2018-04-10 09:44:09 +03:00
|
|
|
case event := <-events:
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
// NOTE: handlePeerEventType needs to be called asynchronously
|
|
|
|
// as it publishes on the <-events channel, leading to a deadlock
|
|
|
|
// if events channel is full.
|
|
|
|
go p.handlePeerEventType(server, event, queueRetry, queueStop)
|
2018-04-10 09:44:09 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
func (p *PeerPool) handlePeerEventType(server *p2p.Server, event *p2p.PeerEvent, queueRetry func(time.Duration), queueStop func()) {
|
2018-04-12 16:08:49 +03:00
|
|
|
p.mu.Lock()
|
|
|
|
defer p.mu.Unlock()
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
|
2021-02-05 14:25:58 +01:00
|
|
|
var shouldRetry bool
|
|
|
|
var shouldStop bool
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
switch event.Type {
|
|
|
|
case p2p.PeerEventTypeDrop:
|
|
|
|
log.Debug("confirm peer dropped", "ID", event.Peer)
|
|
|
|
if p.handleDroppedPeer(server, event.Peer) {
|
2021-02-05 14:25:58 +01:00
|
|
|
shouldRetry = true
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
}
|
|
|
|
case p2p.PeerEventTypeAdd: // skip other events
|
|
|
|
log.Debug("confirm peer added", "ID", event.Peer)
|
|
|
|
p.handleAddedPeer(server, event.Peer)
|
2021-02-05 14:25:58 +01:00
|
|
|
shouldStop = true
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
default:
|
|
|
|
return
|
|
|
|
}
|
2021-02-05 14:25:58 +01:00
|
|
|
|
|
|
|
// First we send the discovery summary
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
SendDiscoverySummary(server.PeersInfo())
|
2021-02-05 14:25:58 +01:00
|
|
|
|
|
|
|
// then we send the stop event
|
|
|
|
if shouldRetry {
|
|
|
|
queueRetry(immediately)
|
|
|
|
} else if shouldStop {
|
|
|
|
queueStop()
|
|
|
|
}
|
Fix deadlock in peerpoool
This is a bit complicated, so:
1) Peerpool was subscribing to `event.Feed`, which is a global event
emitter for ethereum.
2) The p2p.Server was publshing on `event.Feed`, this triggered in the
same routine a publish on `event.Feed`.
3) Peerpool was listening to `event.Feed`, react on it, and in the same
routine, trigger some code on p2p.Server that would publish on
`event.Feed`
This meant that if the size of the channel was unbufferred, it would deadlock, as
peerPool would not be consuming when it would publish (the same go
routine publishes and listen effectively, through a lot of indirection
and non-buffered channels, p2p.Server->event.Feed)
The channel though was a buffered channel with size 10, and this meant that most of the times is
fine.
The issue is that peerpool is not the only producer to this channel.
So it's possible that while is processing an event, the buffer would
fill up, and it would hange trying to publish, and nobody is listening
to the channel, hanging EVERYTHING.
At least that's what I think, needs to be tested, but definitely an
issue.
I kept the code changes to a minimum, this code is a bit hairy, but it's
fairly critical so I don't want to make too many changes.
2021-01-29 13:44:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// handleAddedPeer notifies all topics about added peer.
|
|
|
|
func (p *PeerPool) handleAddedPeer(server *p2p.Server, nodeID enode.ID) {
|
2018-04-12 16:08:49 +03:00
|
|
|
for _, t := range p.topics {
|
|
|
|
t.ConfirmAdded(server, nodeID)
|
2018-05-15 11:16:25 +02:00
|
|
|
if p.opts.AllowStop && t.MaxReached() {
|
2018-06-06 15:39:27 +02:00
|
|
|
t.setStopSearchTimeout(p.opts.TopicStopSearchDelay)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleStopTopics stops the search on any topics having reached its max cached
|
|
|
|
// limit or its delay stop is expired, additionally will stop discovery if all
|
|
|
|
// peers are stopped.
|
2018-07-16 09:40:40 +02:00
|
|
|
func (p *PeerPool) handleStopTopics(server *p2p.Server) {
|
2018-06-06 15:39:27 +02:00
|
|
|
if !p.opts.AllowStop {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
for _, t := range p.topics {
|
|
|
|
if t.readyToStopSearch() {
|
2018-07-16 09:40:40 +02:00
|
|
|
t.StopSearch(server)
|
2018-06-06 15:39:27 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if p.allTopicsStopped() {
|
2018-07-03 14:27:04 +03:00
|
|
|
log.Debug("closing discv5 connection because all topics reached max limit")
|
2018-07-16 09:40:40 +02:00
|
|
|
p.stopDiscovery(server)
|
2018-06-06 15:39:27 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// allTopicsStopped returns true if all topics are stopped.
|
|
|
|
func (p *PeerPool) allTopicsStopped() (all bool) {
|
|
|
|
if !p.opts.AllowStop {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
all = true
|
|
|
|
for _, t := range p.topics {
|
|
|
|
if !t.isStopped() {
|
2018-04-12 16:08:49 +03:00
|
|
|
all = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return all
|
|
|
|
}
|
|
|
|
|
|
|
|
// handleDroppedPeer notifies every topic about dropped peer and returns true if any peer have connections
|
|
|
|
// below min limit
|
2018-11-14 08:03:58 +01:00
|
|
|
func (p *PeerPool) handleDroppedPeer(server *p2p.Server, nodeID enode.ID) (any bool) {
|
2018-04-12 16:08:49 +03:00
|
|
|
for _, t := range p.topics {
|
|
|
|
confirmed := t.ConfirmDropped(server, nodeID)
|
|
|
|
if confirmed {
|
|
|
|
newPeer := t.AddPeerFromTable(server)
|
|
|
|
if newPeer != nil {
|
|
|
|
log.Debug("added peer from local table", "ID", newPeer.ID)
|
|
|
|
}
|
|
|
|
}
|
2018-07-16 09:40:40 +02:00
|
|
|
log.Debug("search", "topic", t.Topic(), "below min", t.BelowMin())
|
2018-04-12 16:08:49 +03:00
|
|
|
if t.BelowMin() && !t.SearchRunning() {
|
|
|
|
any = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return any
|
|
|
|
}
|
|
|
|
|
2018-04-10 09:44:09 +03:00
|
|
|
// Stop closes pool quit channel and all channels that are watched by search queries
|
|
|
|
// and waits till all goroutines will exit.
|
|
|
|
func (p *PeerPool) Stop() {
|
|
|
|
// pool wasn't started
|
|
|
|
if p.quit == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
select {
|
|
|
|
case <-p.quit:
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
log.Debug("started closing peer pool")
|
|
|
|
close(p.quit)
|
|
|
|
}
|
|
|
|
p.serverSubscription.Unsubscribe()
|
2018-05-10 14:45:51 +03:00
|
|
|
p.wg.Wait()
|
2018-04-10 09:44:09 +03:00
|
|
|
}
|
2018-07-16 09:40:40 +02:00
|
|
|
|
|
|
|
type updateTopicRequest struct {
|
|
|
|
Topic string
|
|
|
|
Limits params.Limits
|
|
|
|
}
|
|
|
|
|
|
|
|
// UpdateTopic updates the pre-existing TopicPool limits.
|
|
|
|
func (p *PeerPool) UpdateTopic(topic string, limits params.Limits) error {
|
|
|
|
if _, err := p.getTopic(topic); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
p.updateTopic <- &updateTopicRequest{
|
|
|
|
Topic: topic,
|
|
|
|
Limits: limits,
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *PeerPool) updateTopicLimits(server *p2p.Server, req *updateTopicRequest) error {
|
|
|
|
t, err := p.getTopic(req.Topic)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
t.SetLimits(req.Limits)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *PeerPool) getTopic(topic string) (TopicPoolInterface, error) {
|
|
|
|
for _, t := range p.topics {
|
|
|
|
if t.Topic() == discv5.Topic(topic) {
|
|
|
|
return t, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil, errors.New("topic not found")
|
|
|
|
}
|