status-go/_assets/patches/geth/0028-p2p-watchdog.patch

120 lines
3.1 KiB
Diff

diff --git c/p2p/peer.go w/p2p/peer.go
index 73e33418e..322268b28 100644
--- c/p2p/peer.go
+++ w/p2p/peer.go
@@ -22,6 +22,7 @@ import (
"net"
"sort"
"sync"
+ "sync/atomic"
"time"
"github.com/ethereum/go-ethereum/common/mclock"
@@ -38,7 +39,10 @@ const (
snappyProtocolVersion = 5
- pingInterval = 15 * time.Second
+ pingInterval = 1 * time.Second
+ // watchdogInterval intentionally lower than ping interval.
+ // this way we reduce potential flaky window size.
+ watchdogInterval = 200 * time.Millisecond
)
const (
@@ -100,6 +104,7 @@ type Peer struct {
log log.Logger
created mclock.AbsTime
+ flaky int32
wg sync.WaitGroup
protoErr chan error
closed chan struct{}
@@ -118,6 +123,11 @@ func NewPeer(id discover.NodeID, name string, caps []Cap) *Peer {
return peer
}
+// IsFlaky returns true if there was no incoming traffic recently.
+func (p *Peer) IsFlaky() bool {
+ return atomic.LoadInt32(&p.flaky) == 1
+}
+
// ID returns the node's public key.
func (p *Peer) ID() discover.NodeID {
return p.rw.id
@@ -188,8 +198,10 @@ func (p *Peer) run() (remoteRequested bool, err error) {
readErr = make(chan error, 1)
reason DiscReason // sent to the peer
)
- p.wg.Add(2)
- go p.readLoop(readErr)
+ p.wg.Add(3)
+ reads := make(chan struct{}, 10) // channel for reads
+ go p.readLoop(readErr, reads)
+ go p.watchdogLoop(reads)
go p.pingLoop()
// Start all protocol handlers.
@@ -248,7 +260,24 @@ func (p *Peer) pingLoop() {
}
}
-func (p *Peer) readLoop(errc chan<- error) {
+func (p *Peer) watchdogLoop(reads <-chan struct{}) {
+ defer p.wg.Done()
+ hb := time.NewTimer(watchdogInterval)
+ defer hb.Stop()
+ for {
+ select {
+ case <-reads:
+ atomic.StoreInt32(&p.flaky, 0)
+ case <-hb.C:
+ atomic.StoreInt32(&p.flaky, 1)
+ case <-p.closed:
+ return
+ }
+ hb.Reset(watchdogInterval)
+ }
+}
+
+func (p *Peer) readLoop(errc chan<- error, reads chan<- struct{}) {
defer p.wg.Done()
for {
msg, err := p.rw.ReadMsg()
@@ -261,6 +290,7 @@ func (p *Peer) readLoop(errc chan<- error) {
errc <- err
return
}
+ reads <- struct{}{}
}
}
diff --git c/p2p/server.go w/p2p/server.go
index c41d1dc15..04c6f7147 100644
--- c/p2p/server.go
+++ w/p2p/server.go
@@ -45,7 +45,7 @@ const (
// Maximum time allowed for reading a complete message.
// This is effectively the amount of time a connection can be idle.
- frameReadTimeout = 30 * time.Second
+ frameReadTimeout = 10 * time.Second
// Maximum amount of time allowed for writing a complete message.
frameWriteTimeout = 20 * time.Second
diff --git c/whisper/whisperv6/peer.go w/whisper/whisperv6/peer.go
index 427127290..c30e92d1c 100644
--- c/whisper/whisperv6/peer.go
+++ w/whisper/whisperv6/peer.go
@@ -187,6 +187,10 @@ func (peer *Peer) expire() {
// broadcast iterates over the collection of envelopes and transmits yet unknown
// ones over the network.
func (peer *Peer) broadcast() error {
+ if peer.peer.IsFlaky() {
+ log.Trace("Waiting for a peer to restore communication", "ID", peer.peer.ID())
+ return nil
+ }
envelopes := peer.host.Envelopes()
bundle := make([]*Envelope, 0, len(envelopes))
for _, envelope := range envelopes {