mirror of https://github.com/waku-org/nwaku.git
feat(networking): prune peers from same ip beyond colocation limit (#1765)
This commit is contained in:
parent
ffac77611c
commit
047d1cf095
|
@ -727,3 +727,40 @@ procSuite "Peer Manager":
|
||||||
.build(),
|
.build(),
|
||||||
maxFailedAttempts = 5,
|
maxFailedAttempts = 5,
|
||||||
storage = nil)
|
storage = nil)
|
||||||
|
|
||||||
|
asyncTest "colocationLimit is enforced by pruneConnsByIp()":
|
||||||
|
# Create 5 nodes
|
||||||
|
let nodes = toSeq(0..<5).mapIt(newTestWakuNode(generateSecp256k1Key(), ValidIpAddress.init("0.0.0.0"), Port(0)))
|
||||||
|
|
||||||
|
# Start them with relay + filter
|
||||||
|
await allFutures(nodes.mapIt(it.start()))
|
||||||
|
await allFutures(nodes.mapIt(it.mountRelay()))
|
||||||
|
|
||||||
|
let pInfos = nodes.mapIt(it.switch.peerInfo.toRemotePeerInfo())
|
||||||
|
|
||||||
|
# 2 in connections
|
||||||
|
discard await nodes[1].peerManager.connectRelay(pInfos[0])
|
||||||
|
discard await nodes[2].peerManager.connectRelay(pInfos[0])
|
||||||
|
|
||||||
|
# 2 out connections
|
||||||
|
discard await nodes[0].peerManager.connectRelay(pInfos[3])
|
||||||
|
discard await nodes[0].peerManager.connectRelay(pInfos[4])
|
||||||
|
|
||||||
|
# force max 1 conn per ip
|
||||||
|
nodes[0].peerManager.colocationLimit = 1
|
||||||
|
nodes[0].peerManager.updateIpTable()
|
||||||
|
|
||||||
|
# table is updated and we have 4 conns (2in 2out)
|
||||||
|
check:
|
||||||
|
nodes[0].peerManager.ipTable["127.0.0.1"].len == 4
|
||||||
|
nodes[0].peerManager.switch.connManager.getConnections().len == 4
|
||||||
|
nodes[0].peerManager.peerStore.peers().len == 4
|
||||||
|
|
||||||
|
await nodes[0].peerManager.pruneConnsByIp()
|
||||||
|
|
||||||
|
# peers are pruned, max 1 conn per ip
|
||||||
|
nodes[0].peerManager.updateIpTable()
|
||||||
|
check:
|
||||||
|
nodes[0].peerManager.ipTable["127.0.0.1"].len == 1
|
||||||
|
nodes[0].peerManager.switch.connManager.getConnections().len == 1
|
||||||
|
nodes[0].peerManager.peerStore.peers().len == 1
|
||||||
|
|
|
@ -10,7 +10,8 @@ import
|
||||||
chronicles,
|
chronicles,
|
||||||
metrics,
|
metrics,
|
||||||
libp2p/multistream,
|
libp2p/multistream,
|
||||||
libp2p/muxers/muxer
|
libp2p/muxers/muxer,
|
||||||
|
libp2p/nameresolving/nameresolver
|
||||||
import
|
import
|
||||||
../../../common/nimchronos,
|
../../../common/nimchronos,
|
||||||
../../waku_core,
|
../../waku_core,
|
||||||
|
@ -54,11 +55,14 @@ const
|
||||||
# How often the peer store is pruned
|
# How often the peer store is pruned
|
||||||
PrunePeerStoreInterval = chronos.minutes(5)
|
PrunePeerStoreInterval = chronos.minutes(5)
|
||||||
|
|
||||||
# How often the peer store is updated with metrics
|
# How often metrics and logs are shown/updated
|
||||||
UpdateMetricsInterval = chronos.seconds(15)
|
LogAndMetricsInterval = chronos.seconds(60)
|
||||||
|
|
||||||
# How often to log peer manager metrics
|
# Prune by ip interval
|
||||||
LogSummaryInterval = chronos.seconds(60)
|
PruneByIpInterval = chronos.seconds(30)
|
||||||
|
|
||||||
|
# Max peers that we allow from the same IP
|
||||||
|
ColocationLimit = 5
|
||||||
|
|
||||||
type
|
type
|
||||||
PeerManager* = ref object of RootObj
|
PeerManager* = ref object of RootObj
|
||||||
|
@ -70,6 +74,8 @@ type
|
||||||
storage: PeerStorage
|
storage: PeerStorage
|
||||||
serviceSlots*: Table[string, RemotePeerInfo]
|
serviceSlots*: Table[string, RemotePeerInfo]
|
||||||
outPeersTarget*: int
|
outPeersTarget*: int
|
||||||
|
ipTable*: Table[string, seq[PeerId]]
|
||||||
|
colocationLimit*: int
|
||||||
started: bool
|
started: bool
|
||||||
|
|
||||||
proc protocolMatcher*(codec: string): Matcher =
|
proc protocolMatcher*(codec: string): Matcher =
|
||||||
|
@ -278,13 +284,8 @@ proc canBeConnected*(pm: PeerManager,
|
||||||
# Initialisation #
|
# Initialisation #
|
||||||
##################
|
##################
|
||||||
|
|
||||||
# currently disabled. note that peer connection state connected/disconnected
|
# called when a connection i) is created or ii) is closed
|
||||||
# cant be tracked using this handler when more than one conn is allowed and
|
|
||||||
# when using autonat. eg if a peer has 2 conns and one is disconnected we cant
|
|
||||||
# assume that the peer is disconnected, because the other one might still be active.
|
|
||||||
# note that even with maxconn = 1, autonat forces more than one connection.
|
|
||||||
proc onConnEvent(pm: PeerManager, peerId: PeerID, event: ConnEvent) {.async.} =
|
proc onConnEvent(pm: PeerManager, peerId: PeerID, event: ConnEvent) {.async.} =
|
||||||
|
|
||||||
case event.kind
|
case event.kind
|
||||||
of ConnEventKind.Connected:
|
of ConnEventKind.Connected:
|
||||||
let direction = if event.incoming: Inbound else: Outbound
|
let direction = if event.incoming: Inbound else: Outbound
|
||||||
|
@ -292,30 +293,51 @@ proc onConnEvent(pm: PeerManager, peerId: PeerID, event: ConnEvent) {.async.} =
|
||||||
of ConnEventKind.Disconnected:
|
of ConnEventKind.Disconnected:
|
||||||
discard
|
discard
|
||||||
|
|
||||||
|
# called when a peer i) first connects to us ii) disconnects all connections from us
|
||||||
proc onPeerEvent(pm: PeerManager, peerId: PeerId, event: PeerEvent) {.async.} =
|
proc onPeerEvent(pm: PeerManager, peerId: PeerId, event: PeerEvent) {.async.} =
|
||||||
|
var direction: PeerDirection
|
||||||
|
var connectedness: Connectedness
|
||||||
|
|
||||||
if event.kind == PeerEventKind.Joined:
|
if event.kind == PeerEventKind.Joined:
|
||||||
let direction = if event.initiator: Outbound else: Inbound
|
direction = if event.initiator: Outbound else: Inbound
|
||||||
pm.peerStore[ConnectionBook][peerId] = Connected
|
connectedness = Connected
|
||||||
pm.peerStore[DirectionBook][peerId] = direction
|
|
||||||
|
|
||||||
if not pm.storage.isNil:
|
|
||||||
pm.storage.insertOrReplace(peerId, pm.peerStore.get(peerId), Connected)
|
|
||||||
return
|
|
||||||
|
|
||||||
elif event.kind == PeerEventKind.Left:
|
elif event.kind == PeerEventKind.Left:
|
||||||
pm.peerStore[DirectionBook][peerId] = UnknownDirection
|
direction = UnknownDirection
|
||||||
pm.peerStore[ConnectionBook][peerId] = CanConnect
|
connectedness = CanConnect
|
||||||
|
|
||||||
|
pm.peerStore[ConnectionBook][peerId] = connectedness
|
||||||
|
pm.peerStore[DirectionBook][peerId] = direction
|
||||||
|
if not pm.storage.isNil:
|
||||||
|
pm.storage.insertOrReplace(peerId, pm.peerStore.get(peerId), connectedness, getTime().toUnix)
|
||||||
|
|
||||||
|
proc updateIpTable*(pm: PeerManager) =
|
||||||
|
# clean table
|
||||||
|
pm.ipTable = initTable[string, seq[PeerId]]()
|
||||||
|
|
||||||
|
# populate ip->peerIds from existing out/in connections
|
||||||
|
for peerId, conn in pm.switch.connManager.getConnections():
|
||||||
|
if conn.len == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# we may want to enable it only in inbound peers
|
||||||
|
#if conn[0].connection.transportDir != In:
|
||||||
|
# continue
|
||||||
|
|
||||||
|
# assumes just one physical connection per peer
|
||||||
|
let observedAddr = conn[0].connection.observedAddr
|
||||||
|
if observedAddr.isSome:
|
||||||
|
# TODO: think if circuit relay ips should be handled differently
|
||||||
|
let ip = observedAddr.get.getHostname()
|
||||||
|
pm.ipTable.mgetOrPut(ip, newSeq[PeerId]()).add(peerId)
|
||||||
|
|
||||||
if not pm.storage.isNil:
|
|
||||||
pm.storage.insertOrReplace(peerId, pm.peerStore.get(peerId), CanConnect, getTime().toUnix)
|
|
||||||
return
|
|
||||||
|
|
||||||
proc new*(T: type PeerManager,
|
proc new*(T: type PeerManager,
|
||||||
switch: Switch,
|
switch: Switch,
|
||||||
storage: PeerStorage = nil,
|
storage: PeerStorage = nil,
|
||||||
initialBackoffInSec = InitialBackoffInSec,
|
initialBackoffInSec = InitialBackoffInSec,
|
||||||
backoffFactor = BackoffFactor,
|
backoffFactor = BackoffFactor,
|
||||||
maxFailedAttempts = MaxFailedAttempts,): PeerManager =
|
maxFailedAttempts = MaxFailedAttempts,
|
||||||
|
colocationLimit = ColocationLimit,): PeerManager =
|
||||||
|
|
||||||
let capacity = switch.peerStore.capacity
|
let capacity = switch.peerStore.capacity
|
||||||
let maxConnections = switch.connManager.inSema.size
|
let maxConnections = switch.connManager.inSema.size
|
||||||
|
@ -338,7 +360,8 @@ proc new*(T: type PeerManager,
|
||||||
initialBackoffInSec: initialBackoffInSec,
|
initialBackoffInSec: initialBackoffInSec,
|
||||||
backoffFactor: backoffFactor,
|
backoffFactor: backoffFactor,
|
||||||
outPeersTarget: max(maxConnections div 10, 10),
|
outPeersTarget: max(maxConnections div 10, 10),
|
||||||
maxFailedAttempts: maxFailedAttempts)
|
maxFailedAttempts: maxFailedAttempts,
|
||||||
|
colocationLimit: colocationLimit)
|
||||||
|
|
||||||
proc connHook(peerId: PeerID, event: ConnEvent): Future[void] {.gcsafe.} =
|
proc connHook(peerId: PeerID, event: ConnEvent): Future[void] {.gcsafe.} =
|
||||||
onConnEvent(pm, peerId, event)
|
onConnEvent(pm, peerId, event)
|
||||||
|
@ -360,6 +383,7 @@ proc new*(T: type PeerManager,
|
||||||
pm.peerStore[AddressBook].addHandler(peerStoreChanged)
|
pm.peerStore[AddressBook].addHandler(peerStoreChanged)
|
||||||
|
|
||||||
pm.serviceSlots = initTable[string, RemotePeerInfo]()
|
pm.serviceSlots = initTable[string, RemotePeerInfo]()
|
||||||
|
pm.ipTable = initTable[string, seq[PeerId]]()
|
||||||
|
|
||||||
if not storage.isNil():
|
if not storage.isNil():
|
||||||
debug "found persistent peer storage"
|
debug "found persistent peer storage"
|
||||||
|
@ -520,6 +544,23 @@ proc pruneInRelayConns(pm: PeerManager, amount: int) {.async.} =
|
||||||
for p in inRelayPeers[0..<connsToPrune]:
|
for p in inRelayPeers[0..<connsToPrune]:
|
||||||
await pm.switch.disconnect(p)
|
await pm.switch.disconnect(p)
|
||||||
|
|
||||||
|
proc pruneConnsByIp*(pm: PeerManager) {.async.} =
|
||||||
|
## prunes connections based on ip colocation, allowing no more
|
||||||
|
## than ColocationLimit inbound connections from same ip
|
||||||
|
##
|
||||||
|
|
||||||
|
# update the table tracking ip and the connected peers
|
||||||
|
pm.updateIpTable()
|
||||||
|
|
||||||
|
# trigger disconnections based on colocationLimit
|
||||||
|
for ip, peersInIp in pm.ipTable.pairs:
|
||||||
|
if peersInIp.len > pm.colocationLimit:
|
||||||
|
let connsToPrune = peersInIp.len - pm.colocationLimit
|
||||||
|
for peerId in peersInIp[0..<connsToPrune]:
|
||||||
|
debug "Pruning connection due to ip colocation", peerId = peerId, ip = ip
|
||||||
|
await pm.switch.disconnect(peerId)
|
||||||
|
pm.peerStore.delete(peerId)
|
||||||
|
|
||||||
proc connectToRelayPeers*(pm: PeerManager) {.async.} =
|
proc connectToRelayPeers*(pm: PeerManager) {.async.} =
|
||||||
let (inRelayPeers, outRelayPeers) = pm.connectedPeers(WakuRelayCodec)
|
let (inRelayPeers, outRelayPeers) = pm.connectedPeers(WakuRelayCodec)
|
||||||
let maxConnections = pm.switch.connManager.inSema.size
|
let maxConnections = pm.switch.connManager.inSema.size
|
||||||
|
@ -603,6 +644,12 @@ proc selectPeer*(pm: PeerManager, proto: string): Option[RemotePeerInfo] =
|
||||||
debug "No peer found for protocol", protocol=proto
|
debug "No peer found for protocol", protocol=proto
|
||||||
return none(RemotePeerInfo)
|
return none(RemotePeerInfo)
|
||||||
|
|
||||||
|
proc pruneConnsByIpLoop(pm: PeerManager) {.async.} =
|
||||||
|
debug "Starting prune peer by ip loop"
|
||||||
|
while pm.started:
|
||||||
|
await pm.pruneConnsByIp()
|
||||||
|
await sleepAsync(PruneByIpInterval)
|
||||||
|
|
||||||
# Prunes peers from peerstore to remove old/stale ones
|
# Prunes peers from peerstore to remove old/stale ones
|
||||||
proc prunePeerStoreLoop(pm: PeerManager) {.async.} =
|
proc prunePeerStoreLoop(pm: PeerManager) {.async.} =
|
||||||
debug "Starting prune peerstore loop"
|
debug "Starting prune peerstore loop"
|
||||||
|
@ -617,8 +664,9 @@ proc relayConnectivityLoop*(pm: PeerManager) {.async.} =
|
||||||
await pm.connectToRelayPeers()
|
await pm.connectToRelayPeers()
|
||||||
await sleepAsync(ConnectivityLoopInterval)
|
await sleepAsync(ConnectivityLoopInterval)
|
||||||
|
|
||||||
proc logSummary*(pm: PeerManager) {.async.} =
|
proc logAndMetrics(pm: PeerManager) {.async.} =
|
||||||
heartbeat "Log peer manager summary", LogSummaryInterval:
|
heartbeat "Scheduling log and metrics run", LogAndMetricsInterval:
|
||||||
|
# log metrics
|
||||||
let (inRelayPeers, outRelayPeers) = pm.connectedPeers(WakuRelayCodec)
|
let (inRelayPeers, outRelayPeers) = pm.connectedPeers(WakuRelayCodec)
|
||||||
let maxConnections = pm.switch.connManager.inSema.size
|
let maxConnections = pm.switch.connManager.inSema.size
|
||||||
let totalRelayPeers = inRelayPeers.len + outRelayPeers.len
|
let totalRelayPeers = inRelayPeers.len + outRelayPeers.len
|
||||||
|
@ -634,8 +682,7 @@ proc logSummary*(pm: PeerManager) {.async.} =
|
||||||
notConnectedPeers = notConnectedPeers.len,
|
notConnectedPeers = notConnectedPeers.len,
|
||||||
outsideBackoffPeers = outsideBackoffPeers.len
|
outsideBackoffPeers = outsideBackoffPeers.len
|
||||||
|
|
||||||
proc updateMetrics(pm: PeerManager) {.async.} =
|
# update prometheus metrics
|
||||||
heartbeat "Scheduling updateMetrics run", UpdateMetricsInterval:
|
|
||||||
for proto in pm.peerStore.getWakuProtos():
|
for proto in pm.peerStore.getWakuProtos():
|
||||||
let (protoConnsIn, protoConnsOut) = pm.connectedPeers(proto)
|
let (protoConnsIn, protoConnsOut) = pm.connectedPeers(proto)
|
||||||
let (protoStreamsIn, protoStreamsOut) = pm.getNumStreams(proto)
|
let (protoStreamsIn, protoStreamsOut) = pm.getNumStreams(proto)
|
||||||
|
@ -646,10 +693,10 @@ proc updateMetrics(pm: PeerManager) {.async.} =
|
||||||
|
|
||||||
proc start*(pm: PeerManager) =
|
proc start*(pm: PeerManager) =
|
||||||
pm.started = true
|
pm.started = true
|
||||||
asyncSpawn pm.updateMetrics()
|
|
||||||
asyncSpawn pm.relayConnectivityLoop()
|
asyncSpawn pm.relayConnectivityLoop()
|
||||||
asyncSpawn pm.prunePeerStoreLoop()
|
asyncSpawn pm.prunePeerStoreLoop()
|
||||||
asyncSpawn pm.logSummary()
|
asyncSpawn pm.pruneConnsByIpLoop()
|
||||||
|
asyncSpawn pm.logAndMetrics()
|
||||||
|
|
||||||
proc stop*(pm: PeerManager) =
|
proc stop*(pm: PeerManager) =
|
||||||
pm.started = false
|
pm.started = false
|
||||||
|
|
Loading…
Reference in New Issue