2019-09-09 20:15:52 -06:00
|
|
|
## Nim-LibP2P
|
2019-09-24 11:48:23 -06:00
|
|
|
## Copyright (c) 2019 Status Research & Development GmbH
|
2019-09-09 20:15:52 -06:00
|
|
|
## Licensed under either of
|
|
|
|
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
|
|
|
|
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
|
|
|
|
## at your option.
|
|
|
|
## This file may not be copied, modified, or distributed except according to
|
|
|
|
## those terms.
|
|
|
|
|
2020-09-24 18:43:20 +02:00
|
|
|
import std/[sequtils, strutils, tables, hashes]
|
2020-06-11 12:09:34 -06:00
|
|
|
import chronos, chronicles, nimcrypto/sha2, metrics
|
2019-12-10 14:50:35 -06:00
|
|
|
import rpc/[messages, message, protobuf],
|
2020-07-01 15:25:09 +09:00
|
|
|
../../peerid,
|
2019-09-24 10:16:39 -06:00
|
|
|
../../peerinfo,
|
2020-06-19 11:29:43 -06:00
|
|
|
../../stream/connection,
|
2019-09-11 23:46:08 -06:00
|
|
|
../../crypto/crypto,
|
2020-03-23 15:03:36 +09:00
|
|
|
../../protobuf/minprotobuf,
|
|
|
|
../../utility
|
2019-09-09 20:15:52 -06:00
|
|
|
|
2020-09-24 18:43:20 +02:00
|
|
|
export peerid, connection
|
|
|
|
|
2019-09-09 20:15:52 -06:00
|
|
|
logScope:
|
2020-12-01 11:34:27 -06:00
|
|
|
topics = "libp2p pubsubpeer"
|
2019-09-09 20:15:52 -06:00
|
|
|
|
2020-08-05 01:27:59 +02:00
|
|
|
when defined(libp2p_expensive_metrics):
|
|
|
|
declareCounter(libp2p_pubsub_sent_messages, "number of messages sent", labels = ["id", "topic"])
|
|
|
|
declareCounter(libp2p_pubsub_received_messages, "number of messages received", labels = ["id", "topic"])
|
|
|
|
declareCounter(libp2p_pubsub_skipped_received_messages, "number of received skipped messages", labels = ["id"])
|
|
|
|
declareCounter(libp2p_pubsub_skipped_sent_messages, "number of sent skipped messages", labels = ["id"])
|
2020-06-11 20:20:58 -06:00
|
|
|
|
2019-09-09 20:15:52 -06:00
|
|
|
type
|
2020-04-30 22:22:31 +09:00
|
|
|
PubSubObserver* = ref object
|
2020-05-29 10:46:27 -06:00
|
|
|
onRecv*: proc(peer: PubSubPeer; msgs: var RPCMsg) {.gcsafe, raises: [Defect].}
|
|
|
|
onSend*: proc(peer: PubSubPeer; msgs: var RPCMsg) {.gcsafe, raises: [Defect].}
|
2019-09-09 20:15:52 -06:00
|
|
|
|
2020-09-22 09:05:53 +02:00
|
|
|
PubSubPeerEventKind* {.pure.} = enum
|
|
|
|
Connected
|
|
|
|
Disconnected
|
|
|
|
|
|
|
|
PubsubPeerEvent* = object
|
|
|
|
kind*: PubSubPeerEventKind
|
|
|
|
|
|
|
|
GetConn* = proc(): Future[Connection] {.gcsafe.}
|
2021-03-03 08:23:40 +09:00
|
|
|
DropConn* = proc(peer: PubsubPeer) {.gcsafe.} # have to pass peer as it's unknown during init
|
2020-09-22 09:05:53 +02:00
|
|
|
OnEvent* = proc(peer: PubSubPeer, event: PubsubPeerEvent) {.gcsafe.}
|
2020-09-01 09:33:03 +02:00
|
|
|
|
2020-04-30 22:22:31 +09:00
|
|
|
PubSubPeer* = ref object of RootObj
|
2020-09-01 09:33:03 +02:00
|
|
|
getConn*: GetConn # callback to establish a new send connection
|
2021-03-03 08:23:40 +09:00
|
|
|
dropConn*: DropConn # Function pointer to use to drop connections
|
2020-09-22 09:05:53 +02:00
|
|
|
onEvent*: OnEvent # Connectivity updates for peer
|
2020-08-11 18:05:49 -06:00
|
|
|
codec*: string # the protocol that this peer joined from
|
2020-09-22 09:05:53 +02:00
|
|
|
sendConn*: Connection # cached send connection
|
2021-02-27 23:49:56 +09:00
|
|
|
address*: Option[MultiAddress]
|
2020-08-11 18:05:49 -06:00
|
|
|
peerId*: PeerID
|
2020-04-30 22:22:31 +09:00
|
|
|
handler*: RPCHandler
|
|
|
|
observers*: ref seq[PubSubObserver] # ref as in smart_ptr
|
|
|
|
|
2020-09-21 18:16:29 +09:00
|
|
|
score*: float64
|
|
|
|
iWantBudget*: int
|
|
|
|
iHaveBudget*: int
|
|
|
|
appScore*: float64 # application specific score
|
|
|
|
behaviourPenalty*: float64 # the eventual penalty score
|
2021-01-15 13:48:03 +09:00
|
|
|
|
2021-01-08 14:21:24 +09:00
|
|
|
when defined(libp2p_agents_metrics):
|
|
|
|
shortAgent*: string
|
2020-09-21 18:16:29 +09:00
|
|
|
|
2020-09-01 09:33:03 +02:00
|
|
|
RPCHandler* = proc(peer: PubSubPeer, msg: RPCMsg): Future[void] {.gcsafe.}
|
2019-09-09 20:15:52 -06:00
|
|
|
|
2020-09-22 09:05:53 +02:00
|
|
|
func hash*(p: PubSubPeer): Hash =
|
2021-02-26 19:19:15 +09:00
|
|
|
p.peerId.hash
|
|
|
|
|
|
|
|
func `==`*(a, b: PubSubPeer): bool =
|
|
|
|
a.peerId == b.peerId
|
2020-07-13 22:32:38 +09:00
|
|
|
|
2020-09-06 10:31:47 +02:00
|
|
|
func shortLog*(p: PubSubPeer): string =
|
|
|
|
if p.isNil: "PubSubPeer(nil)"
|
|
|
|
else: shortLog(p.peerId)
|
|
|
|
chronicles.formatIt(PubSubPeer): shortLog(it)
|
2019-12-05 20:16:18 -06:00
|
|
|
|
2020-06-29 09:15:31 -06:00
|
|
|
proc connected*(p: PubSubPeer): bool =
|
2020-08-11 18:05:49 -06:00
|
|
|
not p.sendConn.isNil and not
|
|
|
|
(p.sendConn.closed or p.sendConn.atEof)
|
2020-07-07 18:33:05 -06:00
|
|
|
|
2021-04-18 10:08:33 +02:00
|
|
|
proc hasObservers*(p: PubSubPeer): bool =
|
2020-09-24 18:43:20 +02:00
|
|
|
p.observers != nil and anyIt(p.observers[], it != nil)
|
|
|
|
|
2021-02-06 09:13:04 +09:00
|
|
|
func outbound*(p: PubSubPeer): bool =
|
2021-03-03 08:23:40 +09:00
|
|
|
# gossipsub 1.1 spec requires us to know if the transport is outgoing
|
|
|
|
# in order to give priotity to connections we make
|
|
|
|
# https://github.com/libp2p/specs/blob/master/pubsub/gossipsub/gossipsub-v1.1.md#outbound-mesh-quotas
|
|
|
|
# This behaviour is presrcibed to counter sybil attacks and ensures that a coordinated inbound attack can never fully take over the mesh
|
|
|
|
if not p.sendConn.isNil and p.sendConn.transportDir == Direction.Out:
|
2021-02-06 09:13:04 +09:00
|
|
|
true
|
|
|
|
else:
|
|
|
|
false
|
|
|
|
|
2020-05-29 10:46:27 -06:00
|
|
|
proc recvObservers(p: PubSubPeer, msg: var RPCMsg) =
|
|
|
|
# trigger hooks
|
|
|
|
if not(isNil(p.observers)) and p.observers[].len > 0:
|
|
|
|
for obs in p.observers[]:
|
2020-06-24 09:08:44 -06:00
|
|
|
if not(isNil(obs)): # TODO: should never be nil, but...
|
|
|
|
obs.onRecv(p, msg)
|
2020-05-29 10:46:27 -06:00
|
|
|
|
|
|
|
proc sendObservers(p: PubSubPeer, msg: var RPCMsg) =
|
|
|
|
# trigger hooks
|
|
|
|
if not(isNil(p.observers)) and p.observers[].len > 0:
|
|
|
|
for obs in p.observers[]:
|
2020-06-24 09:08:44 -06:00
|
|
|
if not(isNil(obs)): # TODO: should never be nil, but...
|
|
|
|
obs.onSend(p, msg)
|
2020-05-29 10:46:27 -06:00
|
|
|
|
2019-12-16 23:24:03 -06:00
|
|
|
proc handle*(p: PubSubPeer, conn: Connection) {.async.} =
|
2020-09-06 10:31:47 +02:00
|
|
|
debug "starting pubsub read loop",
|
|
|
|
conn, peer = p, closed = conn.closed
|
2019-09-09 20:15:52 -06:00
|
|
|
try:
|
2020-05-25 08:33:24 -06:00
|
|
|
try:
|
2020-08-02 23:20:11 -06:00
|
|
|
while not conn.atEof:
|
2020-09-06 10:31:47 +02:00
|
|
|
trace "waiting for data", conn, peer = p, closed = conn.closed
|
|
|
|
|
2020-08-11 18:05:49 -06:00
|
|
|
let data = await conn.readLp(64 * 1024)
|
2020-09-06 10:31:47 +02:00
|
|
|
trace "read data from peer",
|
|
|
|
conn, peer = p, closed = conn.closed,
|
|
|
|
data = data.shortLog
|
2020-05-25 08:33:24 -06:00
|
|
|
|
2020-07-15 11:25:39 +03:00
|
|
|
var rmsg = decodeRpcMsg(data)
|
|
|
|
if rmsg.isErr():
|
2020-09-06 10:31:47 +02:00
|
|
|
notice "failed to decode msg from peer",
|
|
|
|
conn, peer = p, closed = conn.closed,
|
|
|
|
err = rmsg.error()
|
2020-07-15 11:25:39 +03:00
|
|
|
break
|
|
|
|
|
2020-09-06 10:31:47 +02:00
|
|
|
trace "decoded msg from peer",
|
|
|
|
conn, peer = p, closed = conn.closed,
|
|
|
|
msg = rmsg.get().shortLog
|
2020-05-25 08:33:24 -06:00
|
|
|
# trigger hooks
|
2020-09-04 08:10:32 +02:00
|
|
|
p.recvObservers(rmsg.get())
|
2020-06-02 17:53:38 -06:00
|
|
|
|
2020-08-05 01:27:59 +02:00
|
|
|
when defined(libp2p_expensive_metrics):
|
2020-09-04 08:10:32 +02:00
|
|
|
for m in rmsg.get().messages:
|
2020-08-05 01:27:59 +02:00
|
|
|
for t in m.topicIDs:
|
|
|
|
# metrics
|
2020-09-06 10:31:47 +02:00
|
|
|
libp2p_pubsub_received_messages.inc(labelValues = [$p.peerId, t])
|
2020-06-11 12:09:34 -06:00
|
|
|
|
2020-09-04 08:10:32 +02:00
|
|
|
await p.handler(p, rmsg.get())
|
2020-05-25 08:33:24 -06:00
|
|
|
finally:
|
|
|
|
await conn.close()
|
2020-09-04 19:30:45 +03:00
|
|
|
except CancelledError:
|
|
|
|
# This is top-level procedure which will work as separate task, so it
|
2020-11-23 15:02:23 -06:00
|
|
|
# do not need to propagate CancelledError.
|
2020-09-04 19:30:45 +03:00
|
|
|
trace "Unexpected cancellation in PubSubPeer.handle"
|
2019-12-05 20:16:18 -06:00
|
|
|
except CatchableError as exc:
|
2020-09-06 10:31:47 +02:00
|
|
|
trace "Exception occurred in PubSubPeer.handle",
|
|
|
|
conn, peer = p, closed = conn.closed, exc = exc.msg
|
2020-09-01 09:33:03 +02:00
|
|
|
finally:
|
2020-09-06 10:31:47 +02:00
|
|
|
debug "exiting pubsub read loop",
|
|
|
|
conn, peer = p, closed = conn.closed
|
2019-09-09 20:15:52 -06:00
|
|
|
|
2020-09-22 09:05:53 +02:00
|
|
|
proc connectOnce(p: PubSubPeer): Future[void] {.async.} =
|
2020-08-17 22:39:39 +03:00
|
|
|
try:
|
2020-09-22 09:05:53 +02:00
|
|
|
let newConn = await p.getConn()
|
2020-08-17 22:39:39 +03:00
|
|
|
if newConn.isNil:
|
2020-09-22 09:05:53 +02:00
|
|
|
raise (ref CatchableError)(msg: "Cannot establish send connection")
|
2020-08-17 22:39:39 +03:00
|
|
|
|
2020-09-22 09:05:53 +02:00
|
|
|
# When the send channel goes up, subscriptions need to be sent to the
|
|
|
|
# remote peer - if we had multiple channels up and one goes down, all
|
|
|
|
# stop working so we make an effort to only keep a single channel alive
|
2020-09-01 09:33:03 +02:00
|
|
|
|
2020-09-22 09:05:53 +02:00
|
|
|
trace "Get new send connection", p, newConn
|
2020-08-17 22:39:39 +03:00
|
|
|
p.sendConn = newConn
|
2021-02-27 23:49:56 +09:00
|
|
|
p.address = some(p.sendConn.observedAddr)
|
2020-09-22 09:05:53 +02:00
|
|
|
|
|
|
|
if p.onEvent != nil:
|
|
|
|
p.onEvent(p, PubsubPeerEvent(kind: PubSubPeerEventKind.Connected))
|
|
|
|
|
|
|
|
await handle(p, newConn)
|
2020-08-17 22:39:39 +03:00
|
|
|
finally:
|
2020-09-22 09:05:53 +02:00
|
|
|
if p.sendConn != nil:
|
|
|
|
trace "Removing send connection", p, conn = p.sendConn
|
|
|
|
await p.sendConn.close()
|
2021-03-03 08:23:40 +09:00
|
|
|
p.sendConn = nil
|
remove send lock (#334)
* remove send lock
When mplex receives data it will block until a reader has processed the
data. Thus, when a large message is received, such as a gossipsub
subscription table, all of mplex will be blocked until all reading is
finished.
However, if at the same time a `dial` to establish a gossipsub send
connection is ongoing, that `dial` will be blocked because mplex is no
longer reading data - specifically, it might indeed be the connection
that's processing the previous data that is waiting for a send
connection.
There are other problems with the current code:
* If an exception is raised, it is not necessarily raised for the same
connection as `p.sendConn`, so resetting `p.sendConn` in the exception
handling is wrong
* `p.isConnected` is checked before taking the lock - thus, if it
returns false, a new dial will be started. If a new task enters `send`
before dial is finished, it will also determine `p.isConnected` is
false, then get stuck on the lock - when the previous task finishes and
releases the lock, the new task will _also_ dial and thus reset
`p.sendConn` causing a leak.
* prefer existing connection
simplifies flow
2020-08-17 12:38:27 +02:00
|
|
|
|
2021-03-03 08:23:40 +09:00
|
|
|
try:
|
|
|
|
if p.onEvent != nil:
|
|
|
|
p.onEvent(p, PubsubPeerEvent(kind: PubSubPeerEventKind.Disconnected))
|
|
|
|
except CancelledError:
|
|
|
|
raise
|
|
|
|
except CatchableError as exc:
|
|
|
|
debug "Errors during diconnection events", error = exc.msg
|
2021-02-28 04:53:17 +00:00
|
|
|
|
2021-03-03 08:23:40 +09:00
|
|
|
# don't cleanup p.address else we leak some gossip stat table
|
2020-09-22 09:05:53 +02:00
|
|
|
|
|
|
|
proc connectImpl(p: PubSubPeer) {.async.} =
|
2020-09-01 09:33:03 +02:00
|
|
|
try:
|
2020-09-22 09:05:53 +02:00
|
|
|
# Keep trying to establish a connection while it's possible to do so - the
|
|
|
|
# send connection might get disconnected due to a timeout or an unrelated
|
|
|
|
# issue so we try to get a new on
|
|
|
|
while true:
|
|
|
|
await connectOnce(p)
|
2021-03-09 13:22:52 +01:00
|
|
|
except CatchableError as exc: # never cancelled
|
2020-09-22 09:05:53 +02:00
|
|
|
debug "Could not establish send connection", msg = exc.msg
|
2021-03-03 08:23:40 +09:00
|
|
|
finally:
|
|
|
|
# drop the connection, else we end up with ghost peers
|
|
|
|
if p.dropConn != nil: p.dropConn(p)
|
2020-09-01 09:33:03 +02:00
|
|
|
|
|
|
|
proc connect*(p: PubSubPeer) =
|
2020-09-22 09:05:53 +02:00
|
|
|
asyncSpawn connectImpl(p)
|
2020-08-11 18:05:49 -06:00
|
|
|
|
2020-09-24 18:43:20 +02:00
|
|
|
proc sendImpl(conn: Connection, encoded: seq[byte]) {.async.} =
|
2020-08-11 18:05:49 -06:00
|
|
|
try:
|
2020-09-06 10:31:47 +02:00
|
|
|
trace "sending encoded msgs to peer", conn, encoded = shortLog(encoded)
|
2020-09-01 09:33:03 +02:00
|
|
|
await conn.writeLp(encoded)
|
2020-09-06 10:31:47 +02:00
|
|
|
trace "sent pubsub message to remote", conn
|
2020-08-02 23:20:11 -06:00
|
|
|
|
2021-03-09 13:22:52 +01:00
|
|
|
except CatchableError as exc: # never cancelled
|
2020-09-01 09:33:03 +02:00
|
|
|
# Because we detach the send call from the currently executing task using
|
2020-09-22 09:05:53 +02:00
|
|
|
# asyncSpawn, no exceptions may leak out of it
|
2020-09-24 18:43:20 +02:00
|
|
|
trace "Unable to send to remote", conn, msg = exc.msg
|
remove send lock (#334)
* remove send lock
When mplex receives data it will block until a reader has processed the
data. Thus, when a large message is received, such as a gossipsub
subscription table, all of mplex will be blocked until all reading is
finished.
However, if at the same time a `dial` to establish a gossipsub send
connection is ongoing, that `dial` will be blocked because mplex is no
longer reading data - specifically, it might indeed be the connection
that's processing the previous data that is waiting for a send
connection.
There are other problems with the current code:
* If an exception is raised, it is not necessarily raised for the same
connection as `p.sendConn`, so resetting `p.sendConn` in the exception
handling is wrong
* `p.isConnected` is checked before taking the lock - thus, if it
returns false, a new dial will be started. If a new task enters `send`
before dial is finished, it will also determine `p.isConnected` is
false, then get stuck on the lock - when the previous task finishes and
releases the lock, the new task will _also_ dial and thus reset
`p.sendConn` causing a leak.
* prefer existing connection
simplifies flow
2020-08-17 12:38:27 +02:00
|
|
|
# Next time sendConn is used, it will be have its close flag set and thus
|
|
|
|
# will be recycled
|
2020-07-16 12:06:57 +02:00
|
|
|
|
2020-09-22 09:05:53 +02:00
|
|
|
await conn.close() # This will clean up the send connection
|
2020-09-01 09:33:03 +02:00
|
|
|
|
2020-11-23 15:02:23 -06:00
|
|
|
template sendMetrics(msg: RPCMsg): untyped =
|
|
|
|
when defined(libp2p_expensive_metrics):
|
|
|
|
for x in msg.messages:
|
|
|
|
for t in x.topicIDs:
|
|
|
|
# metrics
|
|
|
|
libp2p_pubsub_sent_messages.inc(labelValues = [$p.peerId, t])
|
|
|
|
|
2021-04-18 10:08:33 +02:00
|
|
|
proc sendEncoded*(p: PubSubPeer, msg: seq[byte]) {.raises: [Defect].} =
|
2020-09-24 18:43:20 +02:00
|
|
|
doAssert(not isNil(p), "pubsubpeer nil!")
|
|
|
|
|
2021-04-18 10:08:33 +02:00
|
|
|
if msg.len <= 0:
|
|
|
|
debug "empty message, skipping", p, msg = shortLog(msg)
|
|
|
|
return
|
|
|
|
|
2020-09-24 18:43:20 +02:00
|
|
|
let conn = p.sendConn
|
|
|
|
if conn == nil or conn.closed():
|
2021-04-18 10:08:33 +02:00
|
|
|
trace "No send connection, skipping message", p, msg = shortLog(msg)
|
2020-09-24 18:43:20 +02:00
|
|
|
return
|
|
|
|
|
2021-04-18 10:08:33 +02:00
|
|
|
# To limit the size of the closure, we only pass the encoded message and
|
|
|
|
# connection to the spawned send task
|
|
|
|
asyncSpawn(try:
|
|
|
|
sendImpl(conn, msg)
|
|
|
|
except Exception as exc: # TODO chronos Exception
|
|
|
|
raiseAssert exc.msg)
|
|
|
|
|
|
|
|
proc send*(p: PubSubPeer, msg: RPCMsg, anonymize: bool) {.raises: [Defect].} =
|
2020-09-24 18:43:20 +02:00
|
|
|
trace "sending msg to peer", peer = p, rpcMsg = shortLog(msg)
|
|
|
|
|
2020-09-25 18:39:34 +02:00
|
|
|
# When sending messages, we take care to re-encode them with the right
|
|
|
|
# anonymization flag to ensure that we're not penalized for sending invalid
|
|
|
|
# or malicious data on the wire - in particular, re-encoding protects against
|
|
|
|
# some forms of valid but redundantly encoded protobufs with unknown or
|
|
|
|
# duplicated fields
|
2020-09-24 18:43:20 +02:00
|
|
|
let encoded = if p.hasObservers():
|
2020-11-23 15:02:23 -06:00
|
|
|
var mm = msg
|
2020-09-24 18:43:20 +02:00
|
|
|
# trigger send hooks
|
|
|
|
p.sendObservers(mm)
|
2020-11-23 15:02:23 -06:00
|
|
|
sendMetrics(mm)
|
2020-09-25 18:39:34 +02:00
|
|
|
encodeRpcMsg(mm, anonymize)
|
2020-09-24 18:43:20 +02:00
|
|
|
else:
|
|
|
|
# If there are no send hooks, we redundantly re-encode the message to
|
|
|
|
# protobuf for every peer - this could easily be improved!
|
2020-11-23 15:02:23 -06:00
|
|
|
sendMetrics(msg)
|
2020-09-25 18:39:34 +02:00
|
|
|
encodeRpcMsg(msg, anonymize)
|
2020-09-24 18:43:20 +02:00
|
|
|
|
2021-04-18 10:08:33 +02:00
|
|
|
p.sendEncoded(encoded)
|
2019-12-05 20:16:18 -06:00
|
|
|
|
2020-08-11 18:05:49 -06:00
|
|
|
proc newPubSubPeer*(peerId: PeerID,
|
2020-09-01 09:33:03 +02:00
|
|
|
getConn: GetConn,
|
2021-03-03 08:23:40 +09:00
|
|
|
dropConn: DropConn,
|
2020-09-22 09:05:53 +02:00
|
|
|
onEvent: OnEvent,
|
2020-08-11 18:05:49 -06:00
|
|
|
codec: string): PubSubPeer =
|
2020-09-22 09:05:53 +02:00
|
|
|
PubSubPeer(
|
|
|
|
getConn: getConn,
|
2021-03-03 08:23:40 +09:00
|
|
|
dropConn: dropConn,
|
2020-09-22 09:05:53 +02:00
|
|
|
onEvent: onEvent,
|
|
|
|
codec: codec,
|
|
|
|
peerId: peerId,
|
|
|
|
)
|