go-waku/waku/v2/node/keepalive.go

97 lines
2.5 KiB
Go
Raw Normal View History

2021-12-08 14:21:30 +00:00
package node
import (
"context"
"time"
2022-10-19 15:39:32 -04:00
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
2021-12-08 14:21:30 +00:00
"github.com/libp2p/go-libp2p/p2p/protocol/ping"
"github.com/waku-org/go-waku/logging"
2022-05-27 09:25:06 -04:00
"go.uber.org/zap"
2021-12-08 14:21:30 +00:00
)
const maxAllowedPingFailures = 2
const maxPublishAttempt = 5
// startKeepAlive creates a go routine that periodically pings connected peers.
// This is necessary because TCP connections are automatically closed due to inactivity,
// and doing a ping will avoid this (with a small bandwidth cost)
func (w *WakuNode) startKeepAlive(t time.Duration) {
go func() {
defer w.wg.Done()
2022-05-27 09:25:06 -04:00
w.log.Info("setting up ping protocol", zap.Duration("duration", t))
2021-12-08 14:21:30 +00:00
ticker := time.NewTicker(t)
defer ticker.Stop()
2022-12-08 23:08:04 -04:00
lastTimeExecuted := w.timesource.Now()
2022-12-08 15:48:16 -04:00
sleepDetectionInterval := int64(t) * 3
2021-12-08 14:21:30 +00:00
for {
select {
case <-ticker.C:
2022-12-08 23:08:04 -04:00
difference := w.timesource.Now().UnixNano() - lastTimeExecuted.UnixNano()
if difference > sleepDetectionInterval {
w.log.Warn("keep alive hasnt been executed recently. Killing all connections to peers")
for _, p := range w.host.Network().Peers() {
err := w.host.Network().ClosePeer(p)
if err != nil {
w.log.Warn("while disconnecting peer", zap.Error(err))
}
}
2022-12-08 23:08:04 -04:00
lastTimeExecuted = w.timesource.Now()
continue
}
// Network's peers collection,
// contains only currently active peers
for _, p := range w.host.Network().Peers() {
2021-12-08 14:21:30 +00:00
if p != w.host.ID() {
w.wg.Add(1)
go w.pingPeer(p)
}
}
2022-12-08 23:08:04 -04:00
lastTimeExecuted = w.timesource.Now()
case <-w.ctx.Done():
w.log.Info("stopping ping protocol")
2021-12-08 14:21:30 +00:00
return
}
}
}()
}
func (w *WakuNode) pingPeer(peer peer.ID) {
w.keepAliveMutex.Lock()
defer w.keepAliveMutex.Unlock()
defer w.wg.Done()
ctx, cancel := context.WithTimeout(w.ctx, 3*time.Second)
defer cancel()
2022-05-27 09:25:06 -04:00
logger := w.log.With(logging.HostID("peer", peer))
logger.Debug("pinging")
2021-12-08 14:21:30 +00:00
pr := ping.Ping(ctx, w.host, peer)
select {
case res := <-pr:
if res.Error != nil {
w.keepAliveFails[peer]++
2022-05-27 09:25:06 -04:00
logger.Debug("could not ping", zap.Error(res.Error))
2021-12-08 14:21:30 +00:00
} else {
delete(w.keepAliveFails, peer)
2021-12-08 14:21:30 +00:00
}
case <-ctx.Done():
w.keepAliveFails[peer]++
2022-05-27 09:25:06 -04:00
logger.Debug("could not ping (context done)", zap.Error(ctx.Err()))
2021-12-08 14:21:30 +00:00
}
if w.keepAliveFails[peer] > maxAllowedPingFailures && w.host.Network().Connectedness(peer) == network.Connected {
2022-05-27 09:25:06 -04:00
logger.Info("disconnecting peer")
2021-12-08 14:21:30 +00:00
if err := w.host.Network().ClosePeer(peer); err != nil {
2022-05-27 09:25:06 -04:00
logger.Debug("closing conn to peer", zap.Error(err))
2021-12-08 14:21:30 +00:00
}
w.keepAliveFails[peer] = 0
}
}