feat: add max number of elements to non-prio queue (#1077)

This commit is contained in:
diegomrsantos 2024-03-25 22:00:11 +01:00 committed by GitHub
parent 458b0885dd
commit 1a707e1264
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 65 additions and 31 deletions

View File

@ -83,7 +83,8 @@ proc init*(_: type[GossipSubParams]): GossipSubParams =
enablePX: false, enablePX: false,
bandwidthEstimatebps: 100_000_000, # 100 Mbps or 12.5 MBps bandwidthEstimatebps: 100_000_000, # 100 Mbps or 12.5 MBps
overheadRateLimit: Opt.none(tuple[bytes: int, interval: Duration]), overheadRateLimit: Opt.none(tuple[bytes: int, interval: Duration]),
disconnectPeerAboveRateLimit: false disconnectPeerAboveRateLimit: false,
maxNumElementsInNonPriorityQueue: DefaultMaxNumElementsInNonPriorityQueue
) )
proc validateParameters*(parameters: GossipSubParams): Result[void, cstring] = proc validateParameters*(parameters: GossipSubParams): Result[void, cstring] =
@ -172,10 +173,10 @@ method onNewPeer*(g: GossipSub, peer: PubSubPeer) =
method onPubSubPeerEvent*(p: GossipSub, peer: PubSubPeer, event: PubSubPeerEvent) {.gcsafe.} = method onPubSubPeerEvent*(p: GossipSub, peer: PubSubPeer, event: PubSubPeerEvent) {.gcsafe.} =
case event.kind case event.kind
of PubSubPeerEventKind.Connected: of PubSubPeerEventKind.StreamOpened:
discard discard
of PubSubPeerEventKind.Disconnected: of PubSubPeerEventKind.StreamClosed:
# If a send connection is lost, it's better to remove peer from the mesh - # If a send stream is lost, it's better to remove peer from the mesh -
# if it gets reestablished, the peer will be readded to the mesh, and if it # if it gets reestablished, the peer will be readded to the mesh, and if it
# doesn't, well.. then we hope the peer is going away! # doesn't, well.. then we hope the peer is going away!
for topic, peers in p.mesh.mpairs(): for topic, peers in p.mesh.mpairs():
@ -183,6 +184,8 @@ method onPubSubPeerEvent*(p: GossipSub, peer: PubSubPeer, event: PubSubPeerEvent
peers.excl(peer) peers.excl(peer)
for _, peers in p.fanout.mpairs(): for _, peers in p.fanout.mpairs():
peers.excl(peer) peers.excl(peer)
of PubSubPeerEventKind.DisconnectionRequested:
asyncSpawn p.disconnectPeer(peer) # this should unsubscribePeer the peer too
procCall FloodSub(p).onPubSubPeerEvent(peer, event) procCall FloodSub(p).onPubSubPeerEvent(peer, event)
@ -750,4 +753,5 @@ method getOrCreatePeer*(
let peer = procCall PubSub(g).getOrCreatePeer(peerId, protos) let peer = procCall PubSub(g).getOrCreatePeer(peerId, protos)
g.parameters.overheadRateLimit.withValue(overheadRateLimit): g.parameters.overheadRateLimit.withValue(overheadRateLimit):
peer.overheadRateLimitOpt = Opt.some(TokenBucket.new(overheadRateLimit.bytes, overheadRateLimit.interval)) peer.overheadRateLimitOpt = Opt.some(TokenBucket.new(overheadRateLimit.bytes, overheadRateLimit.interval))
peer.maxNumElementsInNonPriorityQueue = g.parameters.maxNumElementsInNonPriorityQueue
return peer return peer

View File

@ -147,6 +147,9 @@ type
overheadRateLimit*: Opt[tuple[bytes: int, interval: Duration]] overheadRateLimit*: Opt[tuple[bytes: int, interval: Duration]]
disconnectPeerAboveRateLimit*: bool disconnectPeerAboveRateLimit*: bool
# Max number of elements allowed in the non-priority queue. When this limit has been reached, the peer will be disconnected.
maxNumElementsInNonPriorityQueue*: int
BackoffTable* = Table[string, Table[PeerId, Moment]] BackoffTable* = Table[string, Table[PeerId, Moment]]
ValidationSeenTable* = Table[MessageId, HashSet[PubSubPeer]] ValidationSeenTable* = Table[MessageId, HashSet[PubSubPeer]]

View File

@ -287,11 +287,14 @@ method onNewPeer(p: PubSub, peer: PubSubPeer) {.base, gcsafe.} = discard
method onPubSubPeerEvent*(p: PubSub, peer: PubSubPeer, event: PubSubPeerEvent) {.base, gcsafe.} = method onPubSubPeerEvent*(p: PubSub, peer: PubSubPeer, event: PubSubPeerEvent) {.base, gcsafe.} =
# Peer event is raised for the send connection in particular # Peer event is raised for the send connection in particular
case event.kind case event.kind
of PubSubPeerEventKind.Connected: of PubSubPeerEventKind.StreamOpened:
if p.topics.len > 0: if p.topics.len > 0:
p.sendSubs(peer, toSeq(p.topics.keys), true) p.sendSubs(peer, toSeq(p.topics.keys), true)
of PubSubPeerEventKind.Disconnected: of PubSubPeerEventKind.StreamClosed:
discard discard
of PubSubPeerEventKind.DisconnectionRequested:
discard
method getOrCreatePeer*( method getOrCreatePeer*(
p: PubSub, p: PubSub,

View File

@ -35,6 +35,11 @@ when defined(pubsubpeer_queue_metrics):
declareGauge(libp2p_gossipsub_priority_queue_size, "the number of messages in the priority queue", labels = ["id"]) declareGauge(libp2p_gossipsub_priority_queue_size, "the number of messages in the priority queue", labels = ["id"])
declareGauge(libp2p_gossipsub_non_priority_queue_size, "the number of messages in the non-priority queue", labels = ["id"]) declareGauge(libp2p_gossipsub_non_priority_queue_size, "the number of messages in the non-priority queue", labels = ["id"])
declareCounter(libp2p_pubsub_disconnects_over_non_priority_queue_limit, "number of peers disconnected due to over non-prio queue capacity")
const
DefaultMaxNumElementsInNonPriorityQueue* = 1024
type type
PeerRateLimitError* = object of CatchableError PeerRateLimitError* = object of CatchableError
@ -43,8 +48,9 @@ type
onSend*: proc(peer: PubSubPeer; msgs: var RPCMsg) {.gcsafe, raises: [].} onSend*: proc(peer: PubSubPeer; msgs: var RPCMsg) {.gcsafe, raises: [].}
PubSubPeerEventKind* {.pure.} = enum PubSubPeerEventKind* {.pure.} = enum
Connected StreamOpened
Disconnected StreamClosed
DisconnectionRequested # tells gossipsub that the transport connection to the peer should be closed
PubSubPeerEvent* = object PubSubPeerEvent* = object
kind*: PubSubPeerEventKind kind*: PubSubPeerEventKind
@ -83,6 +89,8 @@ type
overheadRateLimitOpt*: Opt[TokenBucket] overheadRateLimitOpt*: Opt[TokenBucket]
rpcmessagequeue: RpcMessageQueue rpcmessagequeue: RpcMessageQueue
maxNumElementsInNonPriorityQueue*: int # The max number of elements allowed in the non-priority queue.
disconnected: bool
RPCHandler* = proc(peer: PubSubPeer, data: seq[byte]): Future[void] RPCHandler* = proc(peer: PubSubPeer, data: seq[byte]): Future[void]
{.gcsafe, raises: [].} {.gcsafe, raises: [].}
@ -181,6 +189,24 @@ proc handle*(p: PubSubPeer, conn: Connection) {.async.} =
debug "exiting pubsub read loop", debug "exiting pubsub read loop",
conn, peer = p, closed = conn.closed conn, peer = p, closed = conn.closed
proc closeSendConn(p: PubSubPeer, event: PubSubPeerEventKind) {.async.} =
if p.sendConn != nil:
trace "Removing send connection", p, conn = p.sendConn
await p.sendConn.close()
p.sendConn = nil
if not p.connectedFut.finished:
p.connectedFut.complete()
try:
if p.onEvent != nil:
p.onEvent(p, PubSubPeerEvent(kind: event))
except CancelledError as exc:
raise exc
except CatchableError as exc:
debug "Errors during diconnection events", error = exc.msg
# don't cleanup p.address else we leak some gossip stat table
proc connectOnce(p: PubSubPeer): Future[void] {.async.} = proc connectOnce(p: PubSubPeer): Future[void] {.async.} =
try: try:
if p.connectedFut.finished: if p.connectedFut.finished:
@ -203,27 +229,11 @@ proc connectOnce(p: PubSubPeer): Future[void] {.async.} =
p.address = if p.sendConn.observedAddr.isSome: some(p.sendConn.observedAddr.get) else: none(MultiAddress) p.address = if p.sendConn.observedAddr.isSome: some(p.sendConn.observedAddr.get) else: none(MultiAddress)
if p.onEvent != nil: if p.onEvent != nil:
p.onEvent(p, PubSubPeerEvent(kind: PubSubPeerEventKind.Connected)) p.onEvent(p, PubSubPeerEvent(kind: PubSubPeerEventKind.StreamOpened))
await handle(p, newConn) await handle(p, newConn)
finally: finally:
if p.sendConn != nil: await p.closeSendConn(PubSubPeerEventKind.StreamClosed)
trace "Removing send connection", p, conn = p.sendConn
await p.sendConn.close()
p.sendConn = nil
if not p.connectedFut.finished:
p.connectedFut.complete()
try:
if p.onEvent != nil:
p.onEvent(p, PubSubPeerEvent(kind: PubSubPeerEventKind.Disconnected))
except CancelledError as exc:
raise exc
except CatchableError as exc:
debug "Errors during diconnection events", error = exc.msg
# don't cleanup p.address else we leak some gossip stat table
proc connectImpl(p: PubSubPeer) {.async.} = proc connectImpl(p: PubSubPeer) {.async.} =
try: try:
@ -231,6 +241,10 @@ proc connectImpl(p: PubSubPeer) {.async.} =
# send connection might get disconnected due to a timeout or an unrelated # send connection might get disconnected due to a timeout or an unrelated
# issue so we try to get a new on # issue so we try to get a new on
while true: while true:
if p.disconnected:
if not p.connectedFut.finished:
p.connectedFut.complete()
return
await connectOnce(p) await connectOnce(p)
except CatchableError as exc: # never cancelled except CatchableError as exc: # never cancelled
debug "Could not establish send connection", msg = exc.msg debug "Could not establish send connection", msg = exc.msg
@ -336,6 +350,14 @@ proc sendEncoded*(p: PubSubPeer, msg: seq[byte], isHighPriority: bool): Future[v
when defined(pubsubpeer_queue_metrics): when defined(pubsubpeer_queue_metrics):
libp2p_gossipsub_priority_queue_size.inc(labelValues = [$p.peerId]) libp2p_gossipsub_priority_queue_size.inc(labelValues = [$p.peerId])
f f
else:
if len(p.rpcmessagequeue.nonPriorityQueue) >= p.maxNumElementsInNonPriorityQueue:
if not p.disconnected:
p.disconnected = true
libp2p_pubsub_disconnects_over_non_priority_queue_limit.inc()
p.closeSendConn(PubSubPeerEventKind.DisconnectionRequested)
else:
Future[void].completed()
else: else:
let f = p.rpcmessagequeue.nonPriorityQueue.addLast(msg) let f = p.rpcmessagequeue.nonPriorityQueue.addLast(msg)
when defined(pubsubpeer_queue_metrics): when defined(pubsubpeer_queue_metrics):
@ -457,7 +479,7 @@ proc stopSendNonPriorityTask*(p: PubSubPeer) =
proc new(T: typedesc[RpcMessageQueue]): T = proc new(T: typedesc[RpcMessageQueue]): T =
return T( return T(
sendPriorityQueue: initDeque[Future[void]](), sendPriorityQueue: initDeque[Future[void]](),
nonPriorityQueue: newAsyncQueue[seq[byte]](), nonPriorityQueue: newAsyncQueue[seq[byte]]()
) )
proc new*( proc new*(
@ -467,6 +489,7 @@ proc new*(
onEvent: OnEvent, onEvent: OnEvent,
codec: string, codec: string,
maxMessageSize: int, maxMessageSize: int,
maxNumElementsInNonPriorityQueue: int = DefaultMaxNumElementsInNonPriorityQueue,
overheadRateLimitOpt: Opt[TokenBucket] = Opt.none(TokenBucket)): T = overheadRateLimitOpt: Opt[TokenBucket] = Opt.none(TokenBucket)): T =
result = T( result = T(
@ -478,6 +501,7 @@ proc new*(
maxMessageSize: maxMessageSize, maxMessageSize: maxMessageSize,
overheadRateLimitOpt: overheadRateLimitOpt, overheadRateLimitOpt: overheadRateLimitOpt,
rpcmessagequeue: RpcMessageQueue.new(), rpcmessagequeue: RpcMessageQueue.new(),
maxNumElementsInNonPriorityQueue: maxNumElementsInNonPriorityQueue
) )
result.sentIHaves.addFirst(default(HashSet[MessageId])) result.sentIHaves.addFirst(default(HashSet[MessageId]))
result.heDontWants.addFirst(default(HashSet[MessageId])) result.heDontWants.addFirst(default(HashSet[MessageId]))