chore: keepAlive will not immediatly disconnect peers when waking up from sleep, but do it only if pinging the peer fails

This commit is contained in:
Richard Ramos 2023-08-08 10:44:38 -04:00 committed by richΛrd
parent e0e4a2fa87
commit 9e52e09dd5
2 changed files with 22 additions and 25 deletions

View File

@ -5,7 +5,6 @@ import (
"sync" "sync"
"time" "time"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/p2p/protocol/ping" "github.com/libp2p/go-libp2p/p2p/protocol/ping"
@ -15,15 +14,10 @@ import (
const maxAllowedPingFailures = 2 const maxAllowedPingFailures = 2
func disconnectPeers(host host.Host, logger *zap.Logger) { // If the difference between the last time the keep alive code was executed and now is greater
logger.Warn("keep alive hasnt been executed recently. Killing all connections to peers") // than sleepDectectionIntervalFactor * keepAlivePeriod, force the ping verification to disconnect
for _, p := range host.Network().Peers() { // the peers if they don't reply back
err := host.Network().ClosePeer(p) const sleepDetectionIntervalFactor = 3
if err != nil {
logger.Warn("while disconnecting peer", zap.Error(err))
}
}
}
// startKeepAlive creates a go routine that periodically pings connected peers. // startKeepAlive creates a go routine that periodically pings connected peers.
// This is necessary because TCP connections are automatically closed due to inactivity, // This is necessary because TCP connections are automatically closed due to inactivity,
@ -36,15 +30,17 @@ func (w *WakuNode) startKeepAlive(ctx context.Context, t time.Duration) {
lastTimeExecuted := w.timesource.Now() lastTimeExecuted := w.timesource.Now()
sleepDetectionInterval := int64(t) * 3 sleepDetectionInterval := int64(t) * sleepDetectionIntervalFactor
for { for {
select { select {
case <-ticker.C: case <-ticker.C:
difference := w.timesource.Now().UnixNano() - lastTimeExecuted.UnixNano() difference := w.timesource.Now().UnixNano() - lastTimeExecuted.UnixNano()
forceDisconnectOnPingFailure := false
if difference > sleepDetectionInterval { if difference > sleepDetectionInterval {
disconnectPeers(w.host, w.log) forceDisconnectOnPingFailure = true
lastTimeExecuted = w.timesource.Now() lastTimeExecuted = w.timesource.Now()
w.log.Warn("keep alive hasnt been executed recently. Killing connections to peers if ping fails")
continue continue
} }
@ -55,7 +51,7 @@ func (w *WakuNode) startKeepAlive(ctx context.Context, t time.Duration) {
pingWg.Add(len(peersToPing)) pingWg.Add(len(peersToPing))
for _, p := range peersToPing { for _, p := range peersToPing {
if p != w.host.ID() { if p != w.host.ID() {
go w.pingPeer(ctx, &pingWg, p) go w.pingPeer(ctx, &pingWg, p, forceDisconnectOnPingFailure)
} }
} }
pingWg.Wait() pingWg.Wait()
@ -68,41 +64,41 @@ func (w *WakuNode) startKeepAlive(ctx context.Context, t time.Duration) {
} }
} }
func (w *WakuNode) pingPeer(ctx context.Context, wg *sync.WaitGroup, peer peer.ID) { func (w *WakuNode) pingPeer(ctx context.Context, wg *sync.WaitGroup, peerID peer.ID, forceDisconnectOnFail bool) {
defer wg.Done() defer wg.Done()
ctx, cancel := context.WithTimeout(ctx, 7*time.Second) ctx, cancel := context.WithTimeout(ctx, 7*time.Second)
defer cancel() defer cancel()
logger := w.log.With(logging.HostID("peer", peer)) logger := w.log.With(logging.HostID("peer", peerID))
logger.Debug("pinging") logger.Debug("pinging")
pr := ping.Ping(ctx, w.host, peer) pr := ping.Ping(ctx, w.host, peerID)
select { select {
case res := <-pr: case res := <-pr:
if res.Error != nil { if res.Error != nil {
w.keepAliveMutex.Lock() w.keepAliveMutex.Lock()
w.keepAliveFails[peer]++ w.keepAliveFails[peerID]++
w.keepAliveMutex.Unlock() w.keepAliveMutex.Unlock()
logger.Debug("could not ping", zap.Error(res.Error)) logger.Debug("could not ping", zap.Error(res.Error))
} else { } else {
w.keepAliveMutex.Lock() w.keepAliveMutex.Lock()
delete(w.keepAliveFails, peer) delete(w.keepAliveFails, peerID)
w.keepAliveMutex.Unlock() w.keepAliveMutex.Unlock()
} }
case <-ctx.Done(): case <-ctx.Done():
w.keepAliveMutex.Lock() w.keepAliveMutex.Lock()
w.keepAliveFails[peer]++ w.keepAliveFails[peerID]++
w.keepAliveMutex.Unlock() w.keepAliveMutex.Unlock()
logger.Debug("could not ping (context done)", zap.Error(ctx.Err())) logger.Debug("could not ping (context done)", zap.Error(ctx.Err()))
} }
w.keepAliveMutex.Lock() w.keepAliveMutex.Lock()
if w.keepAliveFails[peer] > maxAllowedPingFailures && w.host.Network().Connectedness(peer) == network.Connected { if (forceDisconnectOnFail || w.keepAliveFails[peerID] > maxAllowedPingFailures) && w.host.Network().Connectedness(peerID) == network.Connected {
logger.Info("disconnecting peer") logger.Info("disconnecting peer")
if err := w.host.Network().ClosePeer(peer); err != nil { if err := w.host.Network().ClosePeer(peerID); err != nil {
logger.Debug("closing conn to peer", zap.Error(err)) logger.Debug("closing conn to peer", zap.Error(err))
} }
w.keepAliveFails[peer] = 0 w.keepAliveFails[peerID] = 0
} }
w.keepAliveMutex.Unlock() w.keepAliveMutex.Unlock()
} }

View File

@ -21,10 +21,11 @@ func TestKeepAlive(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
host2, err := libp2p.New(libp2p.DefaultTransports, libp2p.ListenAddrStrings("/ip4/0.0.0.0/tcp/0")) host2, err := libp2p.New(libp2p.DefaultTransports, libp2p.ListenAddrStrings("/ip4/0.0.0.0/tcp/0"))
require.NoError(t, err) require.NoError(t, err)
peerID2 := host2.ID()
host1.Peerstore().AddAddrs(host2.ID(), host2.Addrs(), peerstore.PermanentAddrTTL) host1.Peerstore().AddAddrs(peerID2, host2.Addrs(), peerstore.PermanentAddrTTL)
err = host1.Connect(ctx, host1.Peerstore().PeerInfo(host2.ID())) err = host1.Connect(ctx, host1.Peerstore().PeerInfo(peerID2))
require.NoError(t, err) require.NoError(t, err)
require.Len(t, host1.Network().Peers(), 1) require.Len(t, host1.Network().Peers(), 1)
@ -42,7 +43,7 @@ func TestKeepAlive(t *testing.T) {
} }
w.wg.Add(1) w.wg.Add(1)
w.pingPeer(ctx2, w.wg, host2.ID()) w.pingPeer(ctx2, w.wg, peerID2, false)
require.NoError(t, ctx.Err()) require.NoError(t, ctx.Err())
} }