2023-08-03 21:51:15 +05:30
package peermanager
2023-01-13 19:58:22 -04:00
// Adapted from github.com/libp2p/go-libp2p@v0.23.2/p2p/discovery/backoff/backoffconnector.go
import (
"context"
"errors"
2023-08-30 21:33:57 +07:00
"math/rand"
2023-01-13 19:58:22 -04:00
"sync"
2023-09-19 07:39:39 +07:00
"sync/atomic"
2023-01-13 19:58:22 -04:00
"time"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/network"
"github.com/libp2p/go-libp2p/core/peer"
2023-06-05 10:39:38 -04:00
2023-01-13 19:58:22 -04:00
"github.com/libp2p/go-libp2p/p2p/discovery/backoff"
"github.com/waku-org/go-waku/logging"
2024-06-25 11:28:04 -04:00
"github.com/waku-org/go-waku/waku/v2/onlinechecker"
2023-08-03 21:51:15 +05:30
wps "github.com/waku-org/go-waku/waku/v2/peerstore"
2023-11-07 22:43:19 +05:30
"github.com/waku-org/go-waku/waku/v2/service"
2023-06-05 10:39:38 -04:00
2023-01-13 19:58:22 -04:00
"go.uber.org/zap"
lru "github.com/hashicorp/golang-lru"
)
2023-08-15 06:57:51 +05:30
// PeerConnectionStrategy is a utility to connect to peers,
// but only if we have not recently tried connecting to them already
2023-01-13 19:58:22 -04:00
type PeerConnectionStrategy struct {
2024-06-25 11:28:04 -04:00
mux sync . Mutex
cache * lru . TwoQueueCache
host host . Host
pm * PeerManager
onlineChecker onlinechecker . OnlineChecker
2023-09-19 07:39:39 +07:00
paused atomic . Bool
dialTimeout time . Duration
2023-11-07 22:43:19 +05:30
* service . CommonDiscoveryService
2023-10-25 21:25:56 -04:00
subscriptions [ ] subscription
2023-01-13 19:58:22 -04:00
backoff backoff . BackoffFactory
logger * zap . Logger
}
2023-10-25 21:25:56 -04:00
type subscription struct {
ctx context . Context
2023-11-07 22:43:19 +05:30
ch <- chan service . PeerData
2023-10-25 21:25:56 -04:00
}
2023-08-30 21:33:57 +07:00
// backoff describes the strategy used to decide how long to backoff after previously attempting to connect to a peer
func getBackOff ( ) backoff . BackoffFactory {
rngSrc := rand . NewSource ( rand . Int63 ( ) )
minBackoff , maxBackoff := time . Minute , time . Hour
bkf := backoff . NewExponentialBackoff ( minBackoff , maxBackoff , backoff . FullJitter , time . Second , 5.0 , 0 , rand . New ( rngSrc ) )
return bkf
}
2023-08-15 06:57:51 +05:30
// NewPeerConnectionStrategy creates a utility to connect to peers,
// but only if we have not recently tried connecting to them already.
//
2023-01-13 19:58:22 -04:00
// dialTimeout is how long we attempt to connect to a peer before giving up
// minPeers is the minimum number of peers that the node should have
2024-06-25 11:28:04 -04:00
func NewPeerConnectionStrategy (
pm * PeerManager ,
onlineChecker onlinechecker . OnlineChecker ,
dialTimeout time . Duration ,
logger * zap . Logger ,
) ( * PeerConnectionStrategy , error ) {
2023-08-30 21:33:57 +07:00
// cacheSize is the size of a TwoQueueCache
cacheSize := 600
2023-01-13 19:58:22 -04:00
cache , err := lru . New2Q ( cacheSize )
if err != nil {
return nil , err
}
2023-08-30 21:33:57 +07:00
//
2023-08-28 10:47:48 +04:00
pc := & PeerConnectionStrategy {
2023-09-19 07:39:39 +07:00
cache : cache ,
dialTimeout : dialTimeout ,
2023-11-07 22:43:19 +05:30
CommonDiscoveryService : service . NewCommonDiscoveryService ( ) ,
2024-06-25 11:28:04 -04:00
onlineChecker : onlineChecker ,
2023-09-19 07:39:39 +07:00
pm : pm ,
backoff : getBackOff ( ) ,
logger : logger . Named ( "discovery-connector" ) ,
2023-08-28 10:47:48 +04:00
}
pm . SetPeerConnector ( pc )
return pc , nil
2023-01-13 19:58:22 -04:00
}
type connCacheData struct {
nextTry time . Time
strat backoff . BackoffStrategy
}
2023-07-07 08:35:22 -04:00
// Subscribe receives channels on which discovered peers should be pushed
2023-11-07 22:43:19 +05:30
func ( c * PeerConnectionStrategy ) Subscribe ( ctx context . Context , ch <- chan service . PeerData ) {
2023-09-19 07:39:39 +07:00
// if not running yet, store the subscription and return
if err := c . ErrOnNotRunning ( ) ; err != nil {
c . mux . Lock ( )
2023-10-25 21:25:56 -04:00
c . subscriptions = append ( c . subscriptions , subscription { ctx , ch } )
2023-09-19 07:39:39 +07:00
c . mux . Unlock ( )
return
2023-07-07 08:35:22 -04:00
}
2023-09-19 07:39:39 +07:00
// if running start a goroutine to consume the subscription
c . WaitGroup ( ) . Add ( 1 )
go func ( ) {
defer c . WaitGroup ( ) . Done ( )
2023-10-25 21:25:56 -04:00
c . consumeSubscription ( subscription { ctx , ch } )
2023-09-19 07:39:39 +07:00
} ( )
2023-07-07 08:35:22 -04:00
}
2023-10-25 21:25:56 -04:00
func ( c * PeerConnectionStrategy ) consumeSubscription ( s subscription ) {
2023-07-07 08:35:22 -04:00
for {
2023-08-28 10:47:48 +04:00
// for returning from the loop when peerConnector is paused.
2023-07-07 08:35:22 -04:00
select {
2023-09-19 07:39:39 +07:00
case <- c . Context ( ) . Done ( ) :
2023-07-07 08:35:22 -04:00
return
2023-10-25 21:25:56 -04:00
case <- s . ctx . Done ( ) :
return
2023-08-28 10:47:48 +04:00
default :
}
//
if ! c . isPaused ( ) {
2023-07-07 08:35:22 -04:00
select {
2023-09-19 07:39:39 +07:00
case <- c . Context ( ) . Done ( ) :
2023-07-07 08:35:22 -04:00
return
2023-10-25 21:25:56 -04:00
case <- s . ctx . Done ( ) :
return
case p , ok := <- s . ch :
2023-08-28 10:47:48 +04:00
if ! ok {
return
}
2023-09-27 12:16:37 +05:30
triggerImmediateConnection := false
//Not connecting to peer as soon as it is discovered,
// rather expecting this to be pushed from PeerManager based on the need.
2024-06-26 06:18:44 +05:30
if len ( c . host . Network ( ) . Peers ( ) ) < c . pm . OutPeersTarget {
2023-09-27 12:16:37 +05:30
triggerImmediateConnection = true
}
c . pm . AddDiscoveredPeer ( p , triggerImmediateConnection )
2023-08-28 10:47:48 +04:00
case <- time . After ( 1 * time . Second ) :
// This timeout is to not lock the goroutine
break
2023-07-07 08:35:22 -04:00
}
2023-08-28 10:47:48 +04:00
} else {
time . Sleep ( 1 * time . Second ) // sleep while the peerConnector is paused.
2023-07-07 08:35:22 -04:00
}
}
2023-01-13 19:58:22 -04:00
}
2023-08-15 06:57:51 +05:30
// SetHost sets the host to be able to mount or consume a protocol
2023-04-16 20:04:12 -04:00
func ( c * PeerConnectionStrategy ) SetHost ( h host . Host ) {
c . host = h
}
2023-08-15 06:57:51 +05:30
// Start attempts to connect to the peers passed in by peerCh.
// Will not connect to peers if they are within the backoff period.
2023-01-13 19:58:22 -04:00
func ( c * PeerConnectionStrategy ) Start ( ctx context . Context ) error {
2023-09-19 07:39:39 +07:00
return c . CommonDiscoveryService . Start ( ctx , c . start )
2023-01-13 19:58:22 -04:00
2023-09-19 07:39:39 +07:00
}
func ( c * PeerConnectionStrategy ) start ( ) error {
2023-09-27 12:16:37 +05:30
c . WaitGroup ( ) . Add ( 1 )
2023-09-19 07:39:39 +07:00
go c . dialPeers ( )
2023-01-13 19:58:22 -04:00
2023-09-19 07:39:39 +07:00
c . consumeSubscriptions ( )
2023-07-07 08:35:22 -04:00
2023-01-13 19:58:22 -04:00
return nil
}
2023-08-15 06:57:51 +05:30
// Stop terminates the peer-connector
2023-01-13 19:58:22 -04:00
func ( c * PeerConnectionStrategy ) Stop ( ) {
2023-09-19 07:39:39 +07:00
c . CommonDiscoveryService . Stop ( func ( ) { } )
2023-01-13 19:58:22 -04:00
}
func ( c * PeerConnectionStrategy ) isPaused ( ) bool {
2023-08-28 10:47:48 +04:00
return c . paused . Load ( )
2023-01-13 19:58:22 -04:00
}
2024-06-25 11:28:04 -04:00
func ( c * PeerConnectionStrategy ) SetPaused ( paused bool ) {
c . paused . Store ( paused )
}
2023-08-28 10:47:48 +04:00
// it might happen Subscribe is called before peerConnector has started so store these subscriptions in subscriptions array and custom after c.cancel is set.
2023-09-19 07:39:39 +07:00
func ( c * PeerConnectionStrategy ) consumeSubscriptions ( ) {
2023-07-07 08:35:22 -04:00
for _ , subs := range c . subscriptions {
2023-09-19 07:39:39 +07:00
c . WaitGroup ( ) . Add ( 1 )
2023-10-25 21:25:56 -04:00
go func ( s subscription ) {
2023-09-19 07:39:39 +07:00
defer c . WaitGroup ( ) . Done ( )
c . consumeSubscription ( s )
2023-07-07 08:35:22 -04:00
} ( subs )
}
2023-08-28 10:47:48 +04:00
c . subscriptions = nil
2023-07-07 08:35:22 -04:00
}
2023-07-06 10:19:51 -04:00
const maxActiveDials = 5
2023-08-28 10:47:48 +04:00
// c.cache is thread safe
// only reason why mutex is used: if canDialPeer is queried twice for the same peer.
2023-08-15 06:57:51 +05:30
func ( c * PeerConnectionStrategy ) canDialPeer ( pi peer . AddrInfo ) bool {
c . mux . Lock ( )
2023-08-28 10:47:48 +04:00
defer c . mux . Unlock ( )
2023-08-15 06:57:51 +05:30
val , ok := c . cache . Get ( pi . ID )
if ok {
tv := val . ( * connCacheData )
now := time . Now ( )
if now . Before ( tv . nextTry ) {
2024-01-30 17:20:35 +05:30
c . logger . Debug ( "Skipping connecting to peer due to backoff strategy" ,
2024-08-14 06:17:00 +05:30
logging . UTCTime ( "currentTime" , now ) , logging . UTCTime ( "until" , tv . nextTry ) )
2023-08-15 06:57:51 +05:30
return false
}
2024-01-30 17:20:35 +05:30
c . logger . Debug ( "Proceeding with connecting to peer" ,
2024-08-14 06:17:00 +05:30
logging . UTCTime ( "currentTime" , now ) , logging . UTCTime ( "nextTry" , tv . nextTry ) )
2024-05-13 15:07:08 -04:00
}
return true
}
func ( c * PeerConnectionStrategy ) addConnectionBackoff ( peerID peer . ID ) {
c . mux . Lock ( )
defer c . mux . Unlock ( )
val , ok := c . cache . Get ( peerID )
var cachedPeer * connCacheData
if ok {
tv := val . ( * connCacheData )
tv . nextTry = time . Now ( ) . Add ( tv . strat . Delay ( ) )
2023-08-15 06:57:51 +05:30
} else {
cachedPeer = & connCacheData { strat : c . backoff ( ) }
cachedPeer . nextTry = time . Now ( ) . Add ( cachedPeer . strat . Delay ( ) )
2024-01-30 17:20:35 +05:30
c . logger . Debug ( "Initializing connectionCache for peer " ,
2024-08-14 06:17:00 +05:30
logging . HostID ( "peerID" , peerID ) , logging . UTCTime ( "until" , cachedPeer . nextTry ) )
2024-05-13 15:07:08 -04:00
c . cache . Add ( peerID , cachedPeer )
2023-08-15 06:57:51 +05:30
}
}
2023-09-19 07:39:39 +07:00
func ( c * PeerConnectionStrategy ) dialPeers ( ) {
defer c . WaitGroup ( ) . Done ( )
2023-07-06 10:19:51 -04:00
2024-06-26 06:18:44 +05:30
maxGoRoutines := c . pm . OutPeersTarget
2023-07-06 10:19:51 -04:00
if maxGoRoutines > maxActiveDials {
maxGoRoutines = maxActiveDials
}
sem := make ( chan struct { } , maxGoRoutines )
2023-01-13 19:58:22 -04:00
for {
select {
2024-06-25 11:28:04 -04:00
case <- c . Context ( ) . Done ( ) :
return
2023-09-19 07:39:39 +07:00
case pd , ok := <- c . GetListeningChan ( ) :
2023-01-13 19:58:22 -04:00
if ! ok {
return
}
2024-06-25 11:28:04 -04:00
if ! c . onlineChecker . IsOnline ( ) {
continue
}
2023-09-19 07:39:39 +07:00
addrInfo := pd . AddrInfo
2023-01-13 19:58:22 -04:00
2023-09-19 07:39:39 +07:00
if addrInfo . ID == c . host . ID ( ) || addrInfo . ID == "" ||
c . host . Network ( ) . Connectedness ( addrInfo . ID ) == network . Connected {
2023-06-30 14:29:24 -04:00
continue
}
2023-09-19 07:39:39 +07:00
if c . canDialPeer ( addrInfo ) {
2023-08-15 06:57:51 +05:30
sem <- struct { } { }
2023-09-19 07:39:39 +07:00
c . WaitGroup ( ) . Add ( 1 )
go c . dialPeer ( addrInfo , sem )
2023-01-13 19:58:22 -04:00
}
}
}
}
2023-08-15 06:57:51 +05:30
2023-09-19 07:39:39 +07:00
func ( c * PeerConnectionStrategy ) dialPeer ( pi peer . AddrInfo , sem chan struct { } ) {
defer c . WaitGroup ( ) . Done ( )
ctx , cancel := context . WithTimeout ( c . Context ( ) , c . dialTimeout )
2023-08-15 06:57:51 +05:30
defer cancel ( )
err := c . host . Connect ( ctx , pi )
if err != nil && ! errors . Is ( err , context . Canceled ) {
2024-05-13 15:07:08 -04:00
c . addConnectionBackoff ( pi . ID )
2023-08-15 06:57:51 +05:30
c . host . Peerstore ( ) . ( wps . WakuPeerstore ) . AddConnFailure ( pi )
c . logger . Warn ( "connecting to peer" , logging . HostID ( "peerID" , pi . ID ) , zap . Error ( err ) )
}
<- sem
}