nim-libp2p/libp2p/protocols/pubsub/pubsubpeer.nim

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

561 lines
20 KiB
Nim
Raw Normal View History

2022-07-01 18:19:57 +00:00
# Nim-LibP2P
2023-01-20 14:47:40 +00:00
# Copyright (c) 2023 Status Research & Development GmbH
2022-07-01 18:19:57 +00:00
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
# * MIT license ([LICENSE-MIT](LICENSE-MIT))
# at your option.
# This file may not be copied, modified, or distributed except according to
# those terms.
2019-09-10 02:15:52 +00:00
2023-06-07 11:12:49 +00:00
{.push raises: [].}
import std/[sequtils, strutils, tables, hashes, options, sets, deques]
import stew/results
2020-06-11 18:09:34 +00:00
import chronos, chronicles, nimcrypto/sha2, metrics
2023-09-22 14:45:08 +00:00
import chronos/ratelimit
import
rpc/[messages, message, protobuf],
../../peerid,
../../peerinfo,
../../stream/connection,
2019-09-12 05:46:08 +00:00
../../crypto/crypto,
../../protobuf/minprotobuf,
../../utility
2019-09-10 02:15:52 +00:00
2023-07-28 08:58:05 +00:00
export peerid, connection, deques
2019-09-10 02:15:52 +00:00
logScope:
2020-12-01 17:34:27 +00:00
topics = "libp2p pubsubpeer"
2019-09-10 02:15:52 +00:00
when defined(libp2p_expensive_metrics):
declareCounter(
libp2p_pubsub_sent_messages, "number of messages sent", labels = ["id", "topic"]
)
declareCounter(
libp2p_pubsub_skipped_received_messages,
"number of received skipped messages",
labels = ["id"],
)
declareCounter(
libp2p_pubsub_skipped_sent_messages,
"number of sent skipped messages",
labels = ["id"],
)
when defined(pubsubpeer_queue_metrics):
declareGauge(
libp2p_gossipsub_priority_queue_size,
"the number of messages in the priority queue",
labels = ["id"],
)
declareGauge(
libp2p_gossipsub_non_priority_queue_size,
"the number of messages in the non-priority queue",
labels = ["id"],
)
declareCounter(
libp2p_pubsub_disconnects_over_non_priority_queue_limit,
"number of peers disconnected due to over non-prio queue capacity",
)
const DefaultMaxNumElementsInNonPriorityQueue* = 1024
2019-09-10 02:15:52 +00:00
type
2023-09-22 14:45:08 +00:00
PeerRateLimitError* = object of CatchableError
PubSubObserver* = ref object
2023-06-07 11:12:49 +00:00
onRecv*: proc(peer: PubSubPeer, msgs: var RPCMsg) {.gcsafe, raises: [].}
onSend*: proc(peer: PubSubPeer, msgs: var RPCMsg) {.gcsafe, raises: [].}
onValidated*:
proc(peer: PubSubPeer, msg: Message, msgId: MessageId) {.gcsafe, raises: [].}
2019-09-10 02:15:52 +00:00
PubSubPeerEventKind* {.pure.} = enum
StreamOpened
StreamClosed
DisconnectionRequested
# tells gossipsub that the transport connection to the peer should be closed
PubSubPeerEvent* = object
kind*: PubSubPeerEventKind
2023-06-07 11:12:49 +00:00
GetConn* = proc(): Future[Connection] {.gcsafe, raises: [].}
DropConn* = proc(peer: PubSubPeer) {.gcsafe, raises: [].}
# have to pass peer as it's unknown during init
OnEvent* = proc(peer: PubSubPeer, event: PubSubPeerEvent) {.gcsafe, raises: [].}
RpcMessageQueue* = ref object
# Tracks async tasks for sending high-priority peer-published messages.
sendPriorityQueue: Deque[Future[void]]
# Queue for lower-priority messages, like "IWANT" replies and relay messages.
nonPriorityQueue: AsyncQueue[seq[byte]]
# Task for processing non-priority message queue.
sendNonPriorityTask: Future[void]
PubSubPeer* = ref object of RootObj
getConn*: GetConn # callback to establish a new send connection
onEvent*: OnEvent # Connectivity updates for peer
codec*: string # the protocol that this peer joined from
sendConn*: Connection # cached send connection
connectedFut: Future[void]
address*: Option[MultiAddress]
2021-12-16 10:05:20 +00:00
peerId*: PeerId
handler*: RPCHandler
observers*: ref seq[PubSubObserver] # ref as in smart_ptr
Gossip one one (#240) * allow multiple codecs per protocol (without breaking things) * add 1.1 protocol to gossip * explicit peering part 1 * explicit peering part 2 * explicit peering part 3 * PeerInfo and ControlPrune protocols * fix encodePrune * validated always, even explicit peers * prune by score (score is stub still) * add a way to pass parameters to gossip * standard setup fixes * take into account explicit direct peers in publish * add floodPublish logic * small fixes, publish still half broken * make sure to waitsub in sparse test * use var semantics to optimize table access * wip... lvalues don't work properly sadly... * big publish refactor, replenish and balance * fix internal tests * use g.peers for fanout (todo: don't include flood peers) * exclude non gossip from fanout * internal test fixes * fix flood tests * fix test's trypublish * test interop fixes * make sure to not remove peers from gossip table * restore old replenishFanout * cleanups * restore utility module import * restore trace vs debug in gossip * improve fanout replenish behavior further * triage publish nil peers (issue is on master too but just hidden behind a if/in) * getGossipPeers fixes * remove topics from pubsubpeer (was unused) * simplify rebalanceMesh (following spec) and make it finally reach D_high * better diagnostics * merge new pubsubpeer, copy 1.1 to new module * fix up merge * conditional enable gossip11 module * add back topics in peers, re-enable flood publish * add more heartbeat locking to prevent races * actually lock the heartbeat * minor fixes * with sugar * merge 1.0 * remove assertion in publish * fix multistream 1.1 multi proto * Fix merge oops * wip * fix gossip 11 upstream * gossipsub11 -> gossipsub * support interop testing * tests fixing * fix directchat build * control prune updates (pb) * wip parameters * gossip internal tests fixes * parameters wip * finishup with params * cleanups/wip * small sugar * grafted and pruned procs * wip updateScores * wip * fix logging issue * pubsubpeer, chronicles explicit override * fix internal gossip tests * wip * tables troubleshooting * score wip * score wip * fixes * fix test utils generateNodes * don't delete while iterating in score update * fix grafted defect * add a handleConnect in subscribeTopic * pruning improvements * wip * score fixes * post merge - builds gossip tests * further merge fixes * rebalance improvements and opportunistic grafting * fix test for now * restore explicit peering * implement peer exchange graft message * add an hard cap to PX * backoff time management * IWANT cap/budget * Adaptive gossip dissemination * outbound mesh quota, internal tests fixing * oversub prune score based, finish outbound quota * finishup with score and ihave budget * use go daemon 0.3.0 * import fixes * byScore cleanup score sorting * remove pointless scaling in `/` Duration operator * revert using libp2p org for daemon * interop fixes * fixes and cleanup * remove heartbeat assertion, minor debug fixes * logging improvements and cleaning up * (to revert) add some traces * add explicit topic to gossip rpcs * pubsub merge fixes and type fix in switch * Revert "(to revert) add some traces" This reverts commit 4663eaab6cc336c81cee50bc54025cf0b7bcbd99. * cleanup some now irrelevant todo * shuffle peers anyway as score might be disabled * add missing shuffle * old merge fix * more merge fixes * debug improvements * re-enable gossip internal tests * add gossip10 fallback (dormant but tested) * split gossipsub internal tests into 1.0 and 1.1 Co-authored-by: Dmitriy Ryajov <dryajov@gmail.com>
2020-09-21 09:16:29 +00:00
score*: float64
sentIHaves*: Deque[HashSet[MessageId]]
iDontWants*: Deque[HashSet[SaltedId]]
2024-05-02 10:18:55 +00:00
## IDONTWANT contains unvalidated message id:s which may be long and/or
## expensive to look up, so we apply the same salting to them as during
## unvalidated message processing
Gossip one one (#240) * allow multiple codecs per protocol (without breaking things) * add 1.1 protocol to gossip * explicit peering part 1 * explicit peering part 2 * explicit peering part 3 * PeerInfo and ControlPrune protocols * fix encodePrune * validated always, even explicit peers * prune by score (score is stub still) * add a way to pass parameters to gossip * standard setup fixes * take into account explicit direct peers in publish * add floodPublish logic * small fixes, publish still half broken * make sure to waitsub in sparse test * use var semantics to optimize table access * wip... lvalues don't work properly sadly... * big publish refactor, replenish and balance * fix internal tests * use g.peers for fanout (todo: don't include flood peers) * exclude non gossip from fanout * internal test fixes * fix flood tests * fix test's trypublish * test interop fixes * make sure to not remove peers from gossip table * restore old replenishFanout * cleanups * restore utility module import * restore trace vs debug in gossip * improve fanout replenish behavior further * triage publish nil peers (issue is on master too but just hidden behind a if/in) * getGossipPeers fixes * remove topics from pubsubpeer (was unused) * simplify rebalanceMesh (following spec) and make it finally reach D_high * better diagnostics * merge new pubsubpeer, copy 1.1 to new module * fix up merge * conditional enable gossip11 module * add back topics in peers, re-enable flood publish * add more heartbeat locking to prevent races * actually lock the heartbeat * minor fixes * with sugar * merge 1.0 * remove assertion in publish * fix multistream 1.1 multi proto * Fix merge oops * wip * fix gossip 11 upstream * gossipsub11 -> gossipsub * support interop testing * tests fixing * fix directchat build * control prune updates (pb) * wip parameters * gossip internal tests fixes * parameters wip * finishup with params * cleanups/wip * small sugar * grafted and pruned procs * wip updateScores * wip * fix logging issue * pubsubpeer, chronicles explicit override * fix internal gossip tests * wip * tables troubleshooting * score wip * score wip * fixes * fix test utils generateNodes * don't delete while iterating in score update * fix grafted defect * add a handleConnect in subscribeTopic * pruning improvements * wip * score fixes * post merge - builds gossip tests * further merge fixes * rebalance improvements and opportunistic grafting * fix test for now * restore explicit peering * implement peer exchange graft message * add an hard cap to PX * backoff time management * IWANT cap/budget * Adaptive gossip dissemination * outbound mesh quota, internal tests fixing * oversub prune score based, finish outbound quota * finishup with score and ihave budget * use go daemon 0.3.0 * import fixes * byScore cleanup score sorting * remove pointless scaling in `/` Duration operator * revert using libp2p org for daemon * interop fixes * fixes and cleanup * remove heartbeat assertion, minor debug fixes * logging improvements and cleaning up * (to revert) add some traces * add explicit topic to gossip rpcs * pubsub merge fixes and type fix in switch * Revert "(to revert) add some traces" This reverts commit 4663eaab6cc336c81cee50bc54025cf0b7bcbd99. * cleanup some now irrelevant todo * shuffle peers anyway as score might be disabled * add missing shuffle * old merge fix * more merge fixes * debug improvements * re-enable gossip internal tests * add gossip10 fallback (dormant but tested) * split gossipsub internal tests into 1.0 and 1.1 Co-authored-by: Dmitriy Ryajov <dryajov@gmail.com>
2020-09-21 09:16:29 +00:00
iHaveBudget*: int
2023-06-21 08:40:10 +00:00
pingBudget*: int
maxMessageSize: int
Gossip one one (#240) * allow multiple codecs per protocol (without breaking things) * add 1.1 protocol to gossip * explicit peering part 1 * explicit peering part 2 * explicit peering part 3 * PeerInfo and ControlPrune protocols * fix encodePrune * validated always, even explicit peers * prune by score (score is stub still) * add a way to pass parameters to gossip * standard setup fixes * take into account explicit direct peers in publish * add floodPublish logic * small fixes, publish still half broken * make sure to waitsub in sparse test * use var semantics to optimize table access * wip... lvalues don't work properly sadly... * big publish refactor, replenish and balance * fix internal tests * use g.peers for fanout (todo: don't include flood peers) * exclude non gossip from fanout * internal test fixes * fix flood tests * fix test's trypublish * test interop fixes * make sure to not remove peers from gossip table * restore old replenishFanout * cleanups * restore utility module import * restore trace vs debug in gossip * improve fanout replenish behavior further * triage publish nil peers (issue is on master too but just hidden behind a if/in) * getGossipPeers fixes * remove topics from pubsubpeer (was unused) * simplify rebalanceMesh (following spec) and make it finally reach D_high * better diagnostics * merge new pubsubpeer, copy 1.1 to new module * fix up merge * conditional enable gossip11 module * add back topics in peers, re-enable flood publish * add more heartbeat locking to prevent races * actually lock the heartbeat * minor fixes * with sugar * merge 1.0 * remove assertion in publish * fix multistream 1.1 multi proto * Fix merge oops * wip * fix gossip 11 upstream * gossipsub11 -> gossipsub * support interop testing * tests fixing * fix directchat build * control prune updates (pb) * wip parameters * gossip internal tests fixes * parameters wip * finishup with params * cleanups/wip * small sugar * grafted and pruned procs * wip updateScores * wip * fix logging issue * pubsubpeer, chronicles explicit override * fix internal gossip tests * wip * tables troubleshooting * score wip * score wip * fixes * fix test utils generateNodes * don't delete while iterating in score update * fix grafted defect * add a handleConnect in subscribeTopic * pruning improvements * wip * score fixes * post merge - builds gossip tests * further merge fixes * rebalance improvements and opportunistic grafting * fix test for now * restore explicit peering * implement peer exchange graft message * add an hard cap to PX * backoff time management * IWANT cap/budget * Adaptive gossip dissemination * outbound mesh quota, internal tests fixing * oversub prune score based, finish outbound quota * finishup with score and ihave budget * use go daemon 0.3.0 * import fixes * byScore cleanup score sorting * remove pointless scaling in `/` Duration operator * revert using libp2p org for daemon * interop fixes * fixes and cleanup * remove heartbeat assertion, minor debug fixes * logging improvements and cleaning up * (to revert) add some traces * add explicit topic to gossip rpcs * pubsub merge fixes and type fix in switch * Revert "(to revert) add some traces" This reverts commit 4663eaab6cc336c81cee50bc54025cf0b7bcbd99. * cleanup some now irrelevant todo * shuffle peers anyway as score might be disabled * add missing shuffle * old merge fix * more merge fixes * debug improvements * re-enable gossip internal tests * add gossip10 fallback (dormant but tested) * split gossipsub internal tests into 1.0 and 1.1 Co-authored-by: Dmitriy Ryajov <dryajov@gmail.com>
2020-09-21 09:16:29 +00:00
appScore*: float64 # application specific score
behaviourPenalty*: float64 # the eventual penalty score
2023-09-22 14:45:08 +00:00
overheadRateLimitOpt*: Opt[TokenBucket]
rpcmessagequeue: RpcMessageQueue
maxNumElementsInNonPriorityQueue*: int
# The max number of elements allowed in the non-priority queue.
disconnected: bool
2023-09-22 14:45:08 +00:00
RPCHandler* =
proc(peer: PubSubPeer, data: seq[byte]): Future[void] {.gcsafe, raises: [].}
2019-09-10 02:15:52 +00:00
when defined(libp2p_agents_metrics):
func shortAgent*(p: PubSubPeer): string =
if p.sendConn.isNil or p.sendConn.getWrapped().isNil:
"unknown"
else:
#TODO the sendConn is setup before identify,
#so we have to read the parents short agent..
p.sendConn.getWrapped().shortAgent
proc getAgent*(peer: PubSubPeer): string =
return
when defined(libp2p_agents_metrics):
if peer.shortAgent.len > 0: peer.shortAgent else: "unknown"
else:
"unknown"
func hash*(p: PubSubPeer): Hash =
2021-02-26 10:19:15 +00:00
p.peerId.hash
func `==`*(a, b: PubSubPeer): bool =
a.peerId == b.peerId
func shortLog*(p: PubSubPeer): string =
if p.isNil:
"PubSubPeer(nil)"
else:
shortLog(p.peerId)
chronicles.formatIt(PubSubPeer):
shortLog(it)
PubSub (Gossip & Flood) Implementation (#36) This adds gossipsub and floodsub, as well as basic interop testing with the go libp2p daemon. * add close event * wip: gossipsub * splitting rpc message * making message handling more consistent * initial gossipsub implementation * feat: nim 1.0 cleanup * wip: gossipsub protobuf * adding encoding/decoding of gossipsub messages * add disconnect handler * add proper gossipsub msg handling * misc: cleanup for nim 1.0 * splitting floodsub and gossipsub tests * feat: add mesh rebalansing * test pubsub * add mesh rebalansing tests * testing mesh maintenance * finishing mcache implementatin * wip: commenting out broken tests * wip: don't run heartbeat for now * switchout debug for trace logging * testing gossip peer selection algorithm * test stream piping * more work around message amplification * get the peerid from message * use timed cache as backing store * allow setting timeout in constructor * several changes to improve performance * more through testing of msg amplification * prevent gc issues * allow piping to self and prevent deadlocks * improove floodsub * allow running hook on cache eviction * prevent race conditions * prevent race conditions and improove tests * use hashes as cache keys * removing useless file * don't create a new seq * re-enable pubsub tests * fix imports * reduce number of runs to speed up tests * break out control message processing * normalize sleeps between steps * implement proper transport filtering * initial interop testing * clean up floodsub publish logic * allow dialing without a protocol * adding multiple reads/writes * use protobuf varint in mplex * don't loose conn's peerInfo * initial interop pubsub tests * don't duplicate connections/peers * bring back interop tests * wip: interop * re-enable interop and daemon tests * add multiple read write tests from handlers * don't cleanup channel prematurely * use correct channel to send/receive msgs * adjust tests with latest changes * include interop tests * remove temp logging output * fix ci * use correct public key serialization * additional tests for pubsub interop
2019-12-06 02:16:18 +00:00
proc connected*(p: PubSubPeer): bool =
not p.sendConn.isNil and not (p.sendConn.closed or p.sendConn.atEof)
proc hasObservers*(p: PubSubPeer): bool =
p.observers != nil and anyIt(p.observers[], it != nil)
func outbound*(p: PubSubPeer): bool =
# gossipsub 1.1 spec requires us to know if the transport is outgoing
# in order to give priotity to connections we make
# https://github.com/libp2p/specs/blob/master/pubsub/gossipsub/gossipsub-v1.1.md#outbound-mesh-quotas
# This behaviour is presrcibed to counter sybil attacks and ensures that a coordinated inbound attack can never fully take over the mesh
if not p.sendConn.isNil and p.sendConn.transportDir == Direction.Out: true else: false
2023-09-22 14:45:08 +00:00
proc recvObservers*(p: PubSubPeer, msg: var RPCMsg) =
# trigger hooks
if not (isNil(p.observers)) and p.observers[].len > 0:
for obs in p.observers[]:
if not (isNil(obs)): # TODO: should never be nil, but...
if not (isNil(obs.onRecv)):
obs.onRecv(p, msg)
proc validatedObservers*(p: PubSubPeer, msg: Message, msgId: MessageId) =
# trigger hooks
if not (isNil(p.observers)) and p.observers[].len > 0:
for obs in p.observers[]:
if not (isNil(obs.onValidated)):
obs.onValidated(p, msg, msgId)
proc sendObservers(p: PubSubPeer, msg: var RPCMsg) =
# trigger hooks
if not (isNil(p.observers)) and p.observers[].len > 0:
for obs in p.observers[]:
if not (isNil(obs)): # TODO: should never be nil, but...
if not (isNil(obs.onSend)):
obs.onSend(p, msg)
proc handle*(p: PubSubPeer, conn: Connection) {.async.} =
debug "starting pubsub read loop", conn, peer = p, closed = conn.closed
2019-09-10 02:15:52 +00:00
try:
2020-05-25 14:33:24 +00:00
try:
while not conn.atEof:
trace "waiting for data", conn, peer = p, closed = conn.closed
var data = await conn.readLp(p.maxMessageSize)
trace "read data from peer",
conn, peer = p, closed = conn.closed, data = data.shortLog
2020-05-25 14:33:24 +00:00
2023-09-22 14:45:08 +00:00
await p.handler(p, data)
data = newSeq[byte]() # Release memory
except PeerRateLimitError as exc:
debug "Peer rate limit exceeded, exiting read while",
conn, peer = p, description = exc.msg
2023-09-22 14:45:08 +00:00
except CatchableError as exc:
debug "Exception occurred in PubSubPeer.handle",
conn, peer = p, closed = conn.closed, description = exc.msg
2020-05-25 14:33:24 +00:00
finally:
await conn.close()
except CancelledError:
# This is top-level procedure which will work as separate task, so it
2020-11-23 21:02:23 +00:00
# do not need to propagate CancelledError.
trace "Unexpected cancellation in PubSubPeer.handle"
PubSub (Gossip & Flood) Implementation (#36) This adds gossipsub and floodsub, as well as basic interop testing with the go libp2p daemon. * add close event * wip: gossipsub * splitting rpc message * making message handling more consistent * initial gossipsub implementation * feat: nim 1.0 cleanup * wip: gossipsub protobuf * adding encoding/decoding of gossipsub messages * add disconnect handler * add proper gossipsub msg handling * misc: cleanup for nim 1.0 * splitting floodsub and gossipsub tests * feat: add mesh rebalansing * test pubsub * add mesh rebalansing tests * testing mesh maintenance * finishing mcache implementatin * wip: commenting out broken tests * wip: don't run heartbeat for now * switchout debug for trace logging * testing gossip peer selection algorithm * test stream piping * more work around message amplification * get the peerid from message * use timed cache as backing store * allow setting timeout in constructor * several changes to improve performance * more through testing of msg amplification * prevent gc issues * allow piping to self and prevent deadlocks * improove floodsub * allow running hook on cache eviction * prevent race conditions * prevent race conditions and improove tests * use hashes as cache keys * removing useless file * don't create a new seq * re-enable pubsub tests * fix imports * reduce number of runs to speed up tests * break out control message processing * normalize sleeps between steps * implement proper transport filtering * initial interop testing * clean up floodsub publish logic * allow dialing without a protocol * adding multiple reads/writes * use protobuf varint in mplex * don't loose conn's peerInfo * initial interop pubsub tests * don't duplicate connections/peers * bring back interop tests * wip: interop * re-enable interop and daemon tests * add multiple read write tests from handlers * don't cleanup channel prematurely * use correct channel to send/receive msgs * adjust tests with latest changes * include interop tests * remove temp logging output * fix ci * use correct public key serialization * additional tests for pubsub interop
2019-12-06 02:16:18 +00:00
except CatchableError as exc:
trace "Exception occurred in PubSubPeer.handle",
conn, peer = p, closed = conn.closed, description = exc.msg
finally:
debug "exiting pubsub read loop", conn, peer = p, closed = conn.closed
2019-09-10 02:15:52 +00:00
proc closeSendConn(p: PubSubPeer, event: PubSubPeerEventKind) {.async.} =
if p.sendConn != nil:
trace "Removing send connection", p, conn = p.sendConn
await p.sendConn.close()
p.sendConn = nil
if not p.connectedFut.finished:
p.connectedFut.complete()
try:
if p.onEvent != nil:
p.onEvent(p, PubSubPeerEvent(kind: event))
except CancelledError as exc:
raise exc
except CatchableError as exc:
debug "Errors during diconnection events", description = exc.msg
# don't cleanup p.address else we leak some gossip stat table
proc connectOnce(p: PubSubPeer): Future[void] {.async.} =
try:
if p.connectedFut.finished:
p.connectedFut = newFuture[void]()
let newConn = await p.getConn().wait(5.seconds)
if newConn.isNil:
raise (ref LPError)(msg: "Cannot establish send connection")
# When the send channel goes up, subscriptions need to be sent to the
# remote peer - if we had multiple channels up and one goes down, all
# stop working so we make an effort to only keep a single channel alive
trace "Get new send connection", p, newConn
2023-03-08 11:30:19 +00:00
# Careful to race conditions here.
# Topic subscription relies on either connectedFut
# to be completed, or onEvent to be called later
p.connectedFut.complete()
p.sendConn = newConn
p.address =
if p.sendConn.observedAddr.isSome:
some(p.sendConn.observedAddr.get)
else:
none(MultiAddress)
if p.onEvent != nil:
p.onEvent(p, PubSubPeerEvent(kind: PubSubPeerEventKind.StreamOpened))
await handle(p, newConn)
finally:
await p.closeSendConn(PubSubPeerEventKind.StreamClosed)
proc connectImpl(p: PubSubPeer) {.async.} =
try:
# Keep trying to establish a connection while it's possible to do so - the
# send connection might get disconnected due to a timeout or an unrelated
# issue so we try to get a new on
while true:
if p.disconnected:
if not p.connectedFut.finished:
p.connectedFut.complete()
return
await connectOnce(p)
except CatchableError as exc: # never cancelled
debug "Could not establish send connection", description = exc.msg
proc connect*(p: PubSubPeer) =
if p.connected:
return
asyncSpawn connectImpl(p)
2023-03-08 11:30:19 +00:00
proc hasSendConn*(p: PubSubPeer): bool =
p.sendConn != nil
2020-11-23 21:02:23 +00:00
template sendMetrics(msg: RPCMsg): untyped =
when defined(libp2p_expensive_metrics):
for x in msg.messages:
# metrics
libp2p_pubsub_sent_messages.inc(labelValues = [$p.peerId, x.topic])
2020-11-23 21:02:23 +00:00
proc clearSendPriorityQueue(p: PubSubPeer) =
if p.rpcmessagequeue.sendPriorityQueue.len == 0:
return # fast path
while p.rpcmessagequeue.sendPriorityQueue.len > 0 and
p.rpcmessagequeue.sendPriorityQueue[0].finished:
discard p.rpcmessagequeue.sendPriorityQueue.popFirst()
while p.rpcmessagequeue.sendPriorityQueue.len > 0 and
p.rpcmessagequeue.sendPriorityQueue[^1].finished:
discard p.rpcmessagequeue.sendPriorityQueue.popLast()
when defined(pubsubpeer_queue_metrics):
libp2p_gossipsub_priority_queue_size.set(
value = p.rpcmessagequeue.sendPriorityQueue.len.int64, labelValues = [$p.peerId]
)
proc sendMsgContinue(conn: Connection, msgFut: Future[void]) {.async.} =
# Continuation for a pending `sendMsg` future from below
try:
await msgFut
trace "sent pubsub message to remote", conn
except CatchableError as exc: # never cancelled
# Because we detach the send call from the currently executing task using
# asyncSpawn, no exceptions may leak out of it
trace "Unable to send to remote", conn, description = exc.msg
# Next time sendConn is used, it will be have its close flag set and thus
# will be recycled
await conn.close() # This will clean up the send connection
proc sendMsgSlow(p: PubSubPeer, msg: seq[byte]) {.async.} =
# Slow path of `sendMsg` where msg is held in memory while send connection is
# being set up
if p.sendConn == nil:
# Wait for a send conn to be setup. `connectOnce` will
# complete this even if the sendConn setup failed
discard await race(p.connectedFut)
var conn = p.sendConn
if conn == nil or conn.closed():
debug "No send connection", p, payload = shortLog(msg)
return
trace "sending encoded msg to peer", conn, encoded = shortLog(msg)
await sendMsgContinue(conn, conn.writeLp(msg))
proc sendMsg(p: PubSubPeer, msg: seq[byte]): Future[void] =
if p.sendConn != nil and not p.sendConn.closed():
# Fast path that avoids copying msg (which happens for {.async.})
let conn = p.sendConn
trace "sending encoded msg to peer", conn, encoded = shortLog(msg)
let f = conn.writeLp(msg)
if not f.completed():
sendMsgContinue(conn, f)
else:
f
else:
sendMsgSlow(p, msg)
proc sendEncoded*(p: PubSubPeer, msg: seq[byte], isHighPriority: bool): Future[void] =
## Asynchronously sends an encoded message to a specified `PubSubPeer`.
##
## Parameters:
## - `p`: The `PubSubPeer` instance to which the message is to be sent.
## - `msg`: The message to be sent, encoded as a sequence of bytes (`seq[byte]`).
## - `isHighPriority`: A boolean indicating whether the message should be treated as high priority.
## High priority messages are sent immediately, while low priority messages are queued and sent only after all high
## priority messages have been sent.
doAssert(not isNil(p), "pubsubpeer nil!")
p.clearSendPriorityQueue()
# When queues are empty, skipping the non-priority queue for low priority
# messages reduces latency
let emptyQueues =
(
p.rpcmessagequeue.sendPriorityQueue.len() +
p.rpcmessagequeue.nonPriorityQueue.len()
) == 0
if msg.len <= 0:
debug "empty message, skipping", p, payload = shortLog(msg)
Future[void].completed()
elif msg.len > p.maxMessageSize:
info "trying to send a msg too big for pubsub",
maxSize = p.maxMessageSize, msgSize = msg.len
Future[void].completed()
elif isHighPriority or emptyQueues:
let f = p.sendMsg(msg)
if not f.finished:
p.rpcmessagequeue.sendPriorityQueue.addLast(f)
when defined(pubsubpeer_queue_metrics):
libp2p_gossipsub_priority_queue_size.inc(labelValues = [$p.peerId])
f
else:
if len(p.rpcmessagequeue.nonPriorityQueue) >= p.maxNumElementsInNonPriorityQueue:
if not p.disconnected:
p.disconnected = true
libp2p_pubsub_disconnects_over_non_priority_queue_limit.inc()
p.closeSendConn(PubSubPeerEventKind.DisconnectionRequested)
else:
Future[void].completed()
else:
let f = p.rpcmessagequeue.nonPriorityQueue.addLast(msg)
when defined(pubsubpeer_queue_metrics):
libp2p_gossipsub_non_priority_queue_size.inc(labelValues = [$p.peerId])
f
iterator splitRPCMsg(
peer: PubSubPeer, rpcMsg: RPCMsg, maxSize: int, anonymize: bool
): seq[byte] =
## This iterator takes an `RPCMsg` and sequentially repackages its Messages into new `RPCMsg` instances.
## Each new `RPCMsg` accumulates Messages until reaching the specified `maxSize`. If a single Message
## exceeds the `maxSize` when trying to fit into an empty `RPCMsg`, the latter is skipped as too large to send.
## Every constructed `RPCMsg` is then encoded, optionally anonymized, and yielded as a sequence of bytes.
var currentRPCMsg = rpcMsg
currentRPCMsg.messages = newSeq[Message]()
var currentSize = byteSize(currentRPCMsg)
for msg in rpcMsg.messages:
let msgSize = byteSize(msg)
# Check if adding the next message will exceed maxSize
if float(currentSize + msgSize) * 1.1 > float(maxSize):
# Guessing 10% protobuf overhead
if currentRPCMsg.messages.len == 0:
trace "message too big to sent", peer, rpcMsg = shortLog(currentRPCMsg)
continue # Skip this message
trace "sending msg to peer", peer, rpcMsg = shortLog(currentRPCMsg)
yield encodeRpcMsg(currentRPCMsg, anonymize)
currentRPCMsg = RPCMsg()
currentSize = 0
currentRPCMsg.messages.add(msg)
currentSize += msgSize
# Check if there is a non-empty currentRPCMsg left to be added
if currentSize > 0 and currentRPCMsg.messages.len > 0:
trace "sending msg to peer", peer, rpcMsg = shortLog(currentRPCMsg)
yield encodeRpcMsg(currentRPCMsg, anonymize)
else:
trace "message too big to sent", peer, rpcMsg = shortLog(currentRPCMsg)
proc send*(
p: PubSubPeer, msg: RPCMsg, anonymize: bool, isHighPriority: bool
) {.raises: [].} =
## Asynchronously sends an `RPCMsg` to a specified `PubSubPeer` with an option for anonymization.
##
## Parameters:
## - `p`: The `PubSubPeer` instance to which the message is to be sent.
## - `msg`: The `RPCMsg` instance representing the message to be sent.
## - `anonymize`: A boolean flag indicating whether the message should be sent with anonymization.
## - `isHighPriority`: A boolean flag indicating whether the message should be treated as high priority.
## High priority messages are sent immediately, while low priority messages are queued and sent only after all high
## priority messages have been sent.
# When sending messages, we take care to re-encode them with the right
# anonymization flag to ensure that we're not penalized for sending invalid
# or malicious data on the wire - in particular, re-encoding protects against
# some forms of valid but redundantly encoded protobufs with unknown or
# duplicated fields
let encoded =
if p.hasObservers():
2020-11-23 21:02:23 +00:00
var mm = msg
# trigger send hooks
p.sendObservers(mm)
2020-11-23 21:02:23 +00:00
sendMetrics(mm)
encodeRpcMsg(mm, anonymize)
else:
# If there are no send hooks, we redundantly re-encode the message to
# protobuf for every peer - this could easily be improved!
2020-11-23 21:02:23 +00:00
sendMetrics(msg)
encodeRpcMsg(msg, anonymize)
if encoded.len > p.maxMessageSize and msg.messages.len > 1:
for encodedSplitMsg in splitRPCMsg(p, msg, p.maxMessageSize, anonymize):
asyncSpawn p.sendEncoded(encodedSplitMsg, isHighPriority)
else:
# If the message size is within limits, send it as is
trace "sending msg to peer", peer = p, rpcMsg = shortLog(msg)
asyncSpawn p.sendEncoded(encoded, isHighPriority)
PubSub (Gossip & Flood) Implementation (#36) This adds gossipsub and floodsub, as well as basic interop testing with the go libp2p daemon. * add close event * wip: gossipsub * splitting rpc message * making message handling more consistent * initial gossipsub implementation * feat: nim 1.0 cleanup * wip: gossipsub protobuf * adding encoding/decoding of gossipsub messages * add disconnect handler * add proper gossipsub msg handling * misc: cleanup for nim 1.0 * splitting floodsub and gossipsub tests * feat: add mesh rebalansing * test pubsub * add mesh rebalansing tests * testing mesh maintenance * finishing mcache implementatin * wip: commenting out broken tests * wip: don't run heartbeat for now * switchout debug for trace logging * testing gossip peer selection algorithm * test stream piping * more work around message amplification * get the peerid from message * use timed cache as backing store * allow setting timeout in constructor * several changes to improve performance * more through testing of msg amplification * prevent gc issues * allow piping to self and prevent deadlocks * improove floodsub * allow running hook on cache eviction * prevent race conditions * prevent race conditions and improove tests * use hashes as cache keys * removing useless file * don't create a new seq * re-enable pubsub tests * fix imports * reduce number of runs to speed up tests * break out control message processing * normalize sleeps between steps * implement proper transport filtering * initial interop testing * clean up floodsub publish logic * allow dialing without a protocol * adding multiple reads/writes * use protobuf varint in mplex * don't loose conn's peerInfo * initial interop pubsub tests * don't duplicate connections/peers * bring back interop tests * wip: interop * re-enable interop and daemon tests * add multiple read write tests from handlers * don't cleanup channel prematurely * use correct channel to send/receive msgs * adjust tests with latest changes * include interop tests * remove temp logging output * fix ci * use correct public key serialization * additional tests for pubsub interop
2019-12-06 02:16:18 +00:00
proc canAskIWant*(p: PubSubPeer, msgId: MessageId): bool =
for sentIHave in p.sentIHaves.mitems():
if msgId in sentIHave:
sentIHave.excl(msgId)
return true
return false
proc sendNonPriorityTask(p: PubSubPeer) {.async.} =
while true:
# we send non-priority messages only if there are no pending priority messages
let msg = await p.rpcmessagequeue.nonPriorityQueue.popFirst()
while p.rpcmessagequeue.sendPriorityQueue.len > 0:
p.clearSendPriorityQueue()
# waiting for the last future minimizes the number of times we have to
# wait for something (each wait = performance cost) -
# clearSendPriorityQueue ensures we're not waiting for an already-finished
# future
if p.rpcmessagequeue.sendPriorityQueue.len > 0:
2024-03-20 10:54:32 +00:00
# `race` prevents `p.rpcmessagequeue.sendPriorityQueue[^1]` from being
# cancelled when this task is cancelled
discard await race(p.rpcmessagequeue.sendPriorityQueue[^1])
when defined(pubsubpeer_queue_metrics):
libp2p_gossipsub_non_priority_queue_size.dec(labelValues = [$p.peerId])
await p.sendMsg(msg)
proc startSendNonPriorityTask(p: PubSubPeer) =
debug "starting sendNonPriorityTask", p
if p.rpcmessagequeue.sendNonPriorityTask.isNil:
p.rpcmessagequeue.sendNonPriorityTask = p.sendNonPriorityTask()
proc stopSendNonPriorityTask*(p: PubSubPeer) =
if not p.rpcmessagequeue.sendNonPriorityTask.isNil:
debug "stopping sendNonPriorityTask", p
p.rpcmessagequeue.sendNonPriorityTask.cancelSoon()
p.rpcmessagequeue.sendNonPriorityTask = nil
p.rpcmessagequeue.sendPriorityQueue.clear()
p.rpcmessagequeue.nonPriorityQueue.clear()
when defined(pubsubpeer_queue_metrics):
libp2p_gossipsub_priority_queue_size.set(labelValues = [$p.peerId], value = 0)
libp2p_gossipsub_non_priority_queue_size.set(labelValues = [$p.peerId], value = 0)
proc new(T: typedesc[RpcMessageQueue]): T =
return T(
sendPriorityQueue: initDeque[Future[void]](),
nonPriorityQueue: newAsyncQueue[seq[byte]](),
)
proc new*(
T: typedesc[PubSubPeer],
2021-12-16 10:05:20 +00:00
peerId: PeerId,
getConn: GetConn,
onEvent: OnEvent,
codec: string,
2023-09-22 14:45:08 +00:00
maxMessageSize: int,
maxNumElementsInNonPriorityQueue: int = DefaultMaxNumElementsInNonPriorityQueue,
2023-09-22 14:45:08 +00:00
overheadRateLimitOpt: Opt[TokenBucket] = Opt.none(TokenBucket),
): T =
result = T(
getConn: getConn,
onEvent: onEvent,
codec: codec,
peerId: peerId,
connectedFut: newFuture[void](),
2023-09-22 14:45:08 +00:00
maxMessageSize: maxMessageSize,
overheadRateLimitOpt: overheadRateLimitOpt,
rpcmessagequeue: RpcMessageQueue.new(),
maxNumElementsInNonPriorityQueue: maxNumElementsInNonPriorityQueue,
)
result.sentIHaves.addFirst(default(HashSet[MessageId]))
result.iDontWants.addFirst(default(HashSet[SaltedId]))
result.startSendNonPriorityTask()