From 83b6ead8570be5dd39aeebcfbfda4dfc5062e353 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C8=98tefan=20Talpalaru?= Date: Tue, 23 Jun 2020 16:38:17 +0200 Subject: [PATCH 01/28] Azure: fix MinGW-w64 caching (#239) --- azure-pipelines.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5e412bcde..98ced1265 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -36,7 +36,7 @@ steps: - task: CacheBeta@1 displayName: 'cache MinGW-w64' inputs: - key: mingwCache | 8_1_0 | $(PLATFORM) | "v1" + key: mingwCache | 8_1_0 | $(PLATFORM) | "v2" path: mingwCache - powershell: | @@ -53,7 +53,6 @@ steps: mkdir -p mingwCache cd mingwCache if [[ ! -e "$MINGW_FILE" ]]; then - rm -f *.7z curl -OLsS "$MINGW_URL" fi 7z x -y -bd "$MINGW_FILE" >/dev/null From 7a95f1844b039a3f9fc1d9dc6b9fe7182aafba39 Mon Sep 17 00:00:00 2001 From: Dmitriy Ryajov Date: Wed, 24 Jun 2020 09:08:44 -0600 Subject: [PATCH 02/28] Concurrent dials (#238) * count published messages * don't call `switch.dial` in `subscribeToPeer` * add secureconn constructor * close in the correct order * concurent dial lock and track in/out conns better * make tests pass * add todo comment * disconect peers that open too many connections * wip * do connection and muxer tracking in one place * prevent nil pointer in observers * drop connections when peers is over max * prevent channel leaks * don't use closure to handle channel --- libp2p/muxers/mplex/mplex.nim | 40 ++-- libp2p/protocols/pubsub/pubsubpeer.nim | 6 +- libp2p/protocols/secure/noise.nim | 10 +- libp2p/protocols/secure/secio.nim | 9 +- libp2p/protocols/secure/secure.nim | 28 ++- libp2p/stream/chronosstream.nim | 5 +- libp2p/stream/connection.nim | 3 - libp2p/stream/lpstream.nim | 29 ++- libp2p/switch.nim | 306 ++++++++++++++++++------- tests/pubsub/testgossipsub.nim | 1 + tests/testinterop.nim | 6 + tests/testswitch.nim | 8 +- 12 files changed, 309 insertions(+), 142 deletions(-) diff --git a/libp2p/muxers/mplex/mplex.nim b/libp2p/muxers/mplex/mplex.nim index 0b30f4fc7..f11ae6014 100644 --- a/libp2p/muxers/mplex/mplex.nim +++ b/libp2p/muxers/mplex/mplex.nim @@ -25,7 +25,6 @@ type Mplex* = ref object of Muxer remote: Table[uint64, LPChannel] local: Table[uint64, LPChannel] - handlerFuts: seq[Future[void]] currentId*: uint64 maxChannels*: uint64 isClosed: bool @@ -66,6 +65,15 @@ proc newStreamInternal*(m: Mplex, m.getChannelList(initiator)[id] = result +proc handleStream(m: Muxer, chann: LPChannel) {.async.} = + try: + await m.streamHandler(chann) + trace "finished handling stream" + doAssert(chann.closed, "connection not closed by handler!") + except CatchableError as exc: + trace "exception in stream handler", exc = exc.msg + await chann.reset() + method handle*(m: Mplex) {.async, gcsafe.} = trace "starting mplex main loop", oid = m.oid try: @@ -96,7 +104,7 @@ method handle*(m: Mplex) {.async, gcsafe.} = initiator = initiator msgType = msgType size = data.len - oid = m.oid + muxer_oid = m.oid case msgType: of MessageType.New: @@ -104,27 +112,16 @@ method handle*(m: Mplex) {.async, gcsafe.} = channel = await m.newStreamInternal(false, id, name) trace "created channel", name = channel.name, - chann_iod = channel.oid + oid = channel.oid if not isNil(m.streamHandler): - var fut = newFuture[void]() - proc handler() {.async.} = - try: - await m.streamHandler(channel) - trace "finished handling stream" - # doAssert(channel.closed, "connection not closed by handler!") - except CatchableError as exc: - trace "exception in stream handler", exc = exc.msg - await channel.reset() - finally: - m.handlerFuts.keepItIf(it != fut) - - fut = handler() + # launch handler task + asyncCheck m.handleStream(channel) of MessageType.MsgIn, MessageType.MsgOut: logScope: name = channel.name - chann_iod = channel.oid + oid = channel.oid trace "pushing data to channel" @@ -134,7 +131,7 @@ method handle*(m: Mplex) {.async, gcsafe.} = of MessageType.CloseIn, MessageType.CloseOut: logScope: name = channel.name - chann_iod = channel.oid + oid = channel.oid trace "closing channel" @@ -144,7 +141,7 @@ method handle*(m: Mplex) {.async, gcsafe.} = of MessageType.ResetIn, MessageType.ResetOut: logScope: name = channel.name - chann_iod = channel.oid + oid = channel.oid trace "resetting channel" @@ -201,12 +198,9 @@ method close*(m: Mplex) {.async, gcsafe.} = except CatchableError as exc: warn "error resetting channel", exc = exc.msg - checkFutures( - await allFinished(m.handlerFuts)) - await m.connection.close() finally: m.remote.clear() m.local.clear() - m.handlerFuts = @[] + # m.handlerFuts = @[] m.isClosed = true diff --git a/libp2p/protocols/pubsub/pubsubpeer.nim b/libp2p/protocols/pubsub/pubsubpeer.nim index 8dd478142..681c511cf 100644 --- a/libp2p/protocols/pubsub/pubsubpeer.nim +++ b/libp2p/protocols/pubsub/pubsubpeer.nim @@ -60,13 +60,15 @@ proc recvObservers(p: PubSubPeer, msg: var RPCMsg) = # trigger hooks if not(isNil(p.observers)) and p.observers[].len > 0: for obs in p.observers[]: - obs.onRecv(p, msg) + if not(isNil(obs)): # TODO: should never be nil, but... + obs.onRecv(p, msg) proc sendObservers(p: PubSubPeer, msg: var RPCMsg) = # trigger hooks if not(isNil(p.observers)) and p.observers[].len > 0: for obs in p.observers[]: - obs.onSend(p, msg) + if not(isNil(obs)): # TODO: should never be nil, but... + obs.onSend(p, msg) proc handle*(p: PubSubPeer, conn: Connection) {.async.} = trace "handling pubsub rpc", peer = p.id, closed = conn.closed diff --git a/libp2p/protocols/secure/noise.nim b/libp2p/protocols/secure/noise.nim index 59d88af27..0e286b90d 100644 --- a/libp2p/protocols/secure/noise.nim +++ b/libp2p/protocols/secure/noise.nim @@ -467,13 +467,9 @@ method handshake*(p: Noise, conn: Connection, initiator: bool): Future[SecureCon debug "Noise handshake, peer infos don't match!", initiator, dealt_peer = $conn.peerInfo.id, dealt_key = $failedKey, received_peer = $pid, received_key = $remotePubKey raise newException(NoiseHandshakeError, "Noise handshake, peer infos don't match! " & $pid & " != " & $conn.peerInfo.peerId) - var secure = new NoiseConnection - secure.initStream() - - secure.stream = conn - secure.peerInfo = PeerInfo.init(remotePubKey) - secure.observedAddr = conn.observedAddr - + var secure = NoiseConnection.init(conn, + PeerInfo.init(remotePubKey), + conn.observedAddr) if initiator: secure.readCs = handshakeRes.cs2 secure.writeCs = handshakeRes.cs1 diff --git a/libp2p/protocols/secure/secio.nim b/libp2p/protocols/secure/secio.nim index 60f042412..3d369b92d 100644 --- a/libp2p/protocols/secure/secio.nim +++ b/libp2p/protocols/secure/secio.nim @@ -245,9 +245,9 @@ proc newSecioConn(conn: Connection, ## Create new secure stream/lpstream, using specified hash algorithm ``hash``, ## cipher algorithm ``cipher``, stretched keys ``secrets`` and order ## ``order``. - new result - result.initStream() - result.stream = conn + result = SecioConn.init(conn, + PeerInfo.init(remotePubKey), + conn.observedAddr) let i0 = if order < 0: 1 else: 0 let i1 = if order < 0: 0 else: 1 @@ -265,9 +265,6 @@ proc newSecioConn(conn: Connection, result.readerCoder.init(cipher, secrets.keyOpenArray(i1), secrets.ivOpenArray(i1)) - result.peerInfo = PeerInfo.init(remotePubKey) - result.observedAddr = conn.observedAddr - proc transactMessage(conn: Connection, msg: seq[byte]): Future[seq[byte]] {.async.} = trace "Sending message", message = msg.shortLog, length = len(msg) diff --git a/libp2p/protocols/secure/secure.nim b/libp2p/protocols/secure/secure.nim index a12c35f97..5a594f161 100644 --- a/libp2p/protocols/secure/secure.nim +++ b/libp2p/protocols/secure/secure.nim @@ -12,6 +12,7 @@ import chronos, chronicles import ../protocol, ../../stream/streamseq, ../../stream/connection, + ../../multiaddress, ../../peerinfo logScope: @@ -24,6 +25,16 @@ type stream*: Connection buf: StreamSeq +proc init*[T: SecureConn](C: type T, + conn: Connection, + peerInfo: PeerInfo, + observedAddr: Multiaddress): T = + result = C(stream: conn, + peerInfo: peerInfo, + observedAddr: observedAddr, + closeEvent: conn.closeEvent) + result.initStream() + method initStream*(s: SecureConn) = if s.objName.len == 0: s.objName = "SecureConn" @@ -31,11 +42,11 @@ method initStream*(s: SecureConn) = procCall Connection(s).initStream() method close*(s: SecureConn) {.async.} = + await procCall Connection(s).close() + if not(isNil(s.stream)): await s.stream.close() - await procCall Connection(s).close() - method readMessage*(c: SecureConn): Future[seq[byte]] {.async, base.} = doAssert(false, "Not implemented!") @@ -47,11 +58,12 @@ method handshake(s: Secure, proc handleConn*(s: Secure, conn: Connection, initiator: bool): Future[Connection] {.async, gcsafe.} = var sconn = await s.handshake(conn, initiator) - result = sconn - result.observedAddr = conn.observedAddr + conn.closeEvent.wait() + .addCallback do(udata: pointer = nil): + if not(isNil(sconn)): + asyncCheck sconn.close() - if not isNil(sconn.peerInfo) and sconn.peerInfo.publicKey.isSome: - result.peerInfo = PeerInfo.init(sconn.peerInfo.publicKey.get()) + return sconn method init*(s: Secure) {.gcsafe.} = proc handle(conn: Connection, proto: string) {.async, gcsafe.} = @@ -94,7 +106,7 @@ method readExactly*(s: SecureConn, let consumed = s.buf.consumeTo(toOpenArray(p, 0, nbytes - 1)) doAssert consumed == nbytes, "checked above" except CatchableError as exc: - trace "exception reading from secure connection", exc = exc.msg + trace "exception reading from secure connection", exc = exc.msg, oid = s.oid await s.close() # make sure to close the wrapped connection raise exc @@ -115,6 +127,6 @@ method readOnce*(s: SecureConn, var p = cast[ptr UncheckedArray[byte]](pbytes) return s.buf.consumeTo(toOpenArray(p, 0, nbytes - 1)) except CatchableError as exc: - trace "exception reading from secure connection", exc = exc.msg + trace "exception reading from secure connection", exc = exc.msg, oid = s.oid await s.close() # make sure to close the wrapped connection raise exc diff --git a/libp2p/stream/chronosstream.nim b/libp2p/stream/chronosstream.nim index 05fbb8517..f5b239b80 100644 --- a/libp2p/stream/chronosstream.nim +++ b/libp2p/stream/chronosstream.nim @@ -82,12 +82,11 @@ method atEof*(s: ChronosStream): bool {.inline.} = method close*(s: ChronosStream) {.async.} = try: if not s.isClosed: - s.isClosed = true + await procCall Connection(s).close() - trace "shutting down chronos stream", address = $s.client.remoteAddress() + trace "shutting down chronos stream", address = $s.client.remoteAddress(), oid = s.oid if not s.client.closed(): await s.client.closeWait() - await procCall Connection(s).close() except CatchableError as exc: trace "error closing chronosstream", exc = exc.msg diff --git a/libp2p/stream/connection.nim b/libp2p/stream/connection.nim index 9e6ad9577..cb22e3fc0 100644 --- a/libp2p/stream/connection.nim +++ b/libp2p/stream/connection.nim @@ -21,7 +21,6 @@ type Connection* = ref object of LPStream peerInfo*: PeerInfo observedAddr*: Multiaddress - closeEvent*: AsyncEvent ConnectionTracker* = ref object of TrackerBase opened*: uint64 @@ -65,8 +64,6 @@ method initStream*(s: Connection) = method close*(s: Connection) {.async.} = await procCall LPStream(s).close() - - s.closeEvent.fire() inc getConnectionTracker().closed proc `$`*(conn: Connection): string = diff --git a/libp2p/stream/lpstream.nim b/libp2p/stream/lpstream.nim index 23879a176..d6969a76a 100644 --- a/libp2p/stream/lpstream.nim +++ b/libp2p/stream/lpstream.nim @@ -18,6 +18,7 @@ declareGauge(libp2p_open_streams, "open stream instances", labels = ["type"]) type LPStream* = ref object of RootObj + closeEvent*: AsyncEvent isClosed*: bool isEof*: bool objName*: string @@ -73,7 +74,19 @@ method initStream*(s: LPStream) {.base.} = s.oid = genOid() libp2p_open_streams.inc(labelValues = [s.objName]) - trace "stream created", oid = s.oid + trace "stream created", oid = s.oid, name = s.objName + + # TODO: debuging aid to troubleshoot streams open/close + # try: + # echo "ChronosStream ", libp2p_open_streams.value(labelValues = ["ChronosStream"]) + # echo "SecureConn ", libp2p_open_streams.value(labelValues = ["SecureConn"]) + # # doAssert(libp2p_open_streams.value(labelValues = ["ChronosStream"]) >= + # # libp2p_open_streams.value(labelValues = ["SecureConn"])) + # except CatchableError: + # discard + +proc join*(s: LPStream): Future[void] = + s.closeEvent.wait() method closed*(s: LPStream): bool {.base, inline.} = s.isClosed @@ -169,6 +182,16 @@ proc write*(s: LPStream, msg: string): Future[void] = method close*(s: LPStream) {.base, async.} = if not s.isClosed: - libp2p_open_streams.dec(labelValues = [s.objName]) s.isClosed = true - trace "stream destroyed", oid = s.oid + s.closeEvent.fire() + libp2p_open_streams.dec(labelValues = [s.objName]) + trace "stream destroyed", oid = s.oid, name = s.objName + + # TODO: debuging aid to troubleshoot streams open/close + # try: + # echo "ChronosStream ", libp2p_open_streams.value(labelValues = ["ChronosStream"]) + # echo "SecureConn ", libp2p_open_streams.value(labelValues = ["SecureConn"]) + # # doAssert(libp2p_open_streams.value(labelValues = ["ChronosStream"]) >= + # # libp2p_open_streams.value(labelValues = ["SecureConn"])) + # except CatchableError: + # discard diff --git a/libp2p/switch.nim b/libp2p/switch.nim index 909a766e0..bf0463156 100644 --- a/libp2p/switch.nim +++ b/libp2p/switch.nim @@ -7,8 +7,17 @@ ## This file may not be copied, modified, or distributed except according to ## those terms. -import tables, sequtils, options, strformat, sets -import chronos, chronicles, metrics +import tables, + sequtils, + options, + strformat, + sets, + algorithm + +import chronos, + chronicles, + metrics + import stream/connection, stream/chronosstream, transports/transport, @@ -38,13 +47,28 @@ declareCounter(libp2p_dialed_peers, "dialed peers") declareCounter(libp2p_failed_dials, "failed dials") declareCounter(libp2p_failed_upgrade, "peers failed upgrade") +const MaxConnectionsPerPeer = 5 + type - NoPubSubException = object of CatchableError + NoPubSubException* = object of CatchableError + TooManyConnections* = object of CatchableError + + Direction {.pure.} = enum + In, Out + + ConnectionHolder = object + dir: Direction + conn: Connection + + MuxerHolder = object + dir: Direction + muxer: Muxer + handle: Future[void] Switch* = ref object of RootObj peerInfo*: PeerInfo - connections*: Table[string, Connection] - muxed*: Table[string, Muxer] + connections*: Table[string, seq[ConnectionHolder]] + muxed*: Table[string, seq[MuxerHolder]] transports*: seq[Transport] protocols*: seq[LPProtocol] muxers*: Table[string, MuxerProvider] @@ -54,10 +78,84 @@ type secureManagers*: seq[Secure] pubSub*: Option[PubSub] dialedPubSubPeers: HashSet[string] + dialLock: Table[string, AsyncLock] -proc newNoPubSubException(): ref CatchableError {.inline.} = +proc newNoPubSubException(): ref NoPubSubException {.inline.} = result = newException(NoPubSubException, "no pubsub provided!") +proc newTooManyConnections(): ref TooManyConnections {.inline.} = + result = newException(TooManyConnections, "too many connections for peer") + +proc disconnect*(s: Switch, peer: PeerInfo) {.async, gcsafe.} + +proc selectConn(s: Switch, peerInfo: PeerInfo): Connection = + ## select the "best" connection according to some criteria + ## + ## Ideally when the connection's stats are available + ## we'd select the fastest, but for now we simply pick an outgoing + ## connection first if none is available, we pick the first outgoing + ## + + if isNil(peerInfo): + return + + let conns = s.connections + .getOrDefault(peerInfo.id) + # it should be OK to sort on each + # access as there should only be + # up to MaxConnectionsPerPeer entries + .sorted( + proc(a, b: ConnectionHolder): int = + if a.dir < b.dir: -1 + elif a.dir == b.dir: 0 + else: 1 + , SortOrder.Descending) + + if conns.len > 0: + return conns[0].conn + +proc selectMuxer(s: Switch, conn: Connection): Muxer = + ## select the muxer for the supplied connection + ## + + if isNil(conn): + return + + if not(isNil(conn.peerInfo)) and conn.peerInfo.id in s.muxed: + if s.muxed[conn.peerInfo.id].len > 0: + let muxers = s.muxed[conn.peerInfo.id] + .filterIt( it.muxer.connection == conn ) + if muxers.len > 0: + return muxers[0].muxer + +proc storeConn(s: Switch, + muxer: Muxer, + dir: Direction, + handle: Future[void] = nil) {.async.} = + ## store the connection and muxer + ## + if not(isNil(muxer)): + let conn = muxer.connection + if not(isNil(conn)): + let id = conn.peerInfo.id + if s.connections.getOrDefault(id).len > MaxConnectionsPerPeer: + warn "disconnecting peer, too many connections", peer = $conn.peerInfo, + conns = s.connections + .getOrDefault(id).len + await muxer.close() + await s.disconnect(conn.peerInfo) + raise newTooManyConnections() + + s.connections.mgetOrPut( + id, + newSeq[ConnectionHolder]()) + .add(ConnectionHolder(conn: conn, dir: dir)) + + s.muxed.mgetOrPut( + muxer.connection.peerInfo.id, + newSeq[MuxerHolder]()) + .add(MuxerHolder(muxer: muxer, handle: handle, dir: dir)) + proc secure(s: Switch, conn: Connection): Future[Connection] {.async, gcsafe.} = if s.secureManagers.len <= 0: raise newException(CatchableError, "No secure managers registered!") @@ -137,11 +235,6 @@ proc mux(s: Switch, conn: Connection): Future[void] {.async, gcsafe.} = # not end until muxer ends let handlerFut = muxer.handle() - # add muxer handler cleanup proc - handlerFut.addCallback do (udata: pointer = nil): - trace "muxer handler completed for peer", - peer = conn.peerInfo.id - try: # do identify first, so that we have a # PeerInfo in case we didn't before @@ -149,10 +242,13 @@ proc mux(s: Switch, conn: Connection): Future[void] {.async, gcsafe.} = finally: await stream.close() # close identify stream + if isNil(conn.peerInfo): + await muxer.close() + return + # store it in muxed connections if we have a peer for it - if not isNil(conn.peerInfo): - trace "adding muxer for peer", peer = conn.peerInfo.id - s.muxed[conn.peerInfo.id] = muxer + trace "adding muxer for peer", peer = conn.peerInfo.id + await s.storeConn(muxer, Direction.Out, handlerFut) proc cleanupConn(s: Switch, conn: Connection) {.async, gcsafe.} = try: @@ -160,55 +256,82 @@ proc cleanupConn(s: Switch, conn: Connection) {.async, gcsafe.} = let id = conn.peerInfo.id trace "cleaning up connection for peer", peerId = id if id in s.muxed: - await s.muxed[id].close() - s.muxed.del(id) + let muxerHolder = s.muxed[id] + .filterIt( + it.muxer.connection == conn + ) + + if muxerHolder.len > 0: + await muxerHolder[0].muxer.close() + if not(isNil(muxerHolder[0].handle)): + await muxerHolder[0].handle + + s.muxed[id].keepItIf( + it.muxer.connection != conn + ) + + if s.muxed[id].len == 0: + s.muxed.del(id) if id in s.connections: - s.connections.del(id) + s.connections[id].keepItIf( + it.conn != conn + ) - await conn.close() + if s.connections[id].len == 0: + s.connections.del(id) - s.dialedPubSubPeers.excl(id) + await conn.close() + s.dialedPubSubPeers.excl(id) - libp2p_peers.dec() # TODO: Investigate cleanupConn() always called twice for one peer. if not(conn.peerInfo.isClosed()): conn.peerInfo.close() + except CatchableError as exc: trace "exception cleaning up connection", exc = exc.msg + finally: + libp2p_peers.set(s.connections.len.int64) proc disconnect*(s: Switch, peer: PeerInfo) {.async, gcsafe.} = - let conn = s.connections.getOrDefault(peer.id) - if not isNil(conn): - trace "disconnecting peer", peer = $peer - await s.cleanupConn(conn) + let connections = s.connections.getOrDefault(peer.id) + for connHolder in connections: + if not isNil(connHolder.conn): + await s.cleanupConn(connHolder.conn) proc getMuxedStream(s: Switch, peerInfo: PeerInfo): Future[Connection] {.async, gcsafe.} = # if there is a muxer for the connection # use it instead to create a muxed stream - if peerInfo.id in s.muxed: - trace "connection is muxed, setting up a stream" - let muxer = s.muxed[peerInfo.id] - let conn = await muxer.newStream() - result = conn + + let muxer = s.selectMuxer(s.selectConn(peerInfo)) # always get the first muxer here + if not(isNil(muxer)): + return await muxer.newStream() proc upgradeOutgoing(s: Switch, conn: Connection): Future[Connection] {.async, gcsafe.} = - trace "handling connection", conn = $conn - result = conn + trace "handling connection", conn = $conn, oid = conn.oid - # don't mux/secure twise - if conn.peerInfo.id in s.muxed: + let sconn = await s.secure(conn) # secure the connection + if isNil(sconn): + trace "unable to secure connection, stopping upgrade", conn = $conn, + oid = conn.oid + await conn.close() return - result = await s.secure(result) # secure the connection - if isNil(result): + await s.mux(sconn) # mux it if possible + if isNil(conn.peerInfo): + trace "unable to mux connection, stopping upgrade", conn = $conn, + oid = conn.oid + await sconn.close() return - await s.mux(result) # mux it if possible - s.connections[conn.peerInfo.id] = result + libp2p_peers.set(s.connections.len.int64) + trace "succesfully upgraded outgoing connection", conn = $conn, + oid = conn.oid, + uoid = sconn.oid + result = sconn proc upgradeIncoming(s: Switch, conn: Connection) {.async, gcsafe.} = - trace "upgrading incoming connection", conn = $conn + trace "upgrading incoming connection", conn = $conn, oid = conn.oid let ms = newMultistream() # secure incoming connections @@ -216,7 +339,7 @@ proc upgradeIncoming(s: Switch, conn: Connection) {.async, gcsafe.} = proto: string) {.async, gcsafe, closure.} = try: - trace "Securing connection" + trace "Securing connection", oid = conn.oid let secure = s.secureManagers.filterIt(it.codec == proto)[0] let sconn = await secure.secure(conn, false) if sconn.isNil: @@ -257,43 +380,60 @@ proc subscribeToPeer*(s: Switch, peerInfo: PeerInfo) {.async, gcsafe.} proc internalConnect(s: Switch, peer: PeerInfo): Future[Connection] {.async.} = + + if s.peerInfo.peerId == peer.peerId: + raise newException(CatchableError, "can't dial self!") + let id = peer.id - trace "Dialing peer", peer = id - var conn = s.connections.getOrDefault(id) - if conn.isNil or conn.closed: - for t in s.transports: # for each transport - for a in peer.addrs: # for each address - if t.handles(a): # check if it can dial it - trace "Dialing address", address = $a - try: - conn = await t.dial(a) - libp2p_dialed_peers.inc() - except CatchableError as exc: - trace "dialing failed", exc = exc.msg - libp2p_failed_dials.inc() - continue + let lock = s.dialLock.mgetOrPut(id, newAsyncLock()) + var conn: Connection - # make sure to assign the peer to the connection - conn.peerInfo = peer + try: + await lock.acquire() + trace "about to dial peer", peer = id + conn = s.selectConn(peer) + if conn.isNil or conn.closed: + trace "Dialing peer", peer = id + for t in s.transports: # for each transport + for a in peer.addrs: # for each address + if t.handles(a): # check if it can dial it + trace "Dialing address", address = $a + try: + conn = await t.dial(a) + libp2p_dialed_peers.inc() + except CatchableError as exc: + trace "dialing failed", exc = exc.msg + libp2p_failed_dials.inc() + continue - conn = await s.upgradeOutgoing(conn) - if isNil(conn): - libp2p_failed_upgrade.inc() - continue + # make sure to assign the peer to the connection + conn.peerInfo = peer + conn = await s.upgradeOutgoing(conn) + if isNil(conn): + libp2p_failed_upgrade.inc() + continue - conn.closeEvent.wait() - .addCallback do(udata: pointer): - asyncCheck s.cleanupConn(conn) + conn.closeEvent.wait() + .addCallback do(udata: pointer): + asyncCheck s.cleanupConn(conn) + break + else: + trace "Reusing existing connection", oid = conn.oid + except CatchableError as exc: + trace "exception connecting to peer", exc = exc.msg + if not(isNil(conn)): + await conn.close() - libp2p_peers.inc() - break - else: - trace "Reusing existing connection" + raise exc # re-raise + finally: + if lock.locked(): + lock.release() if not isNil(conn): + doAssert(conn.peerInfo.id in s.connections, "connection not tracked!") + trace "dial succesfull", oid = conn.oid await s.subscribeToPeer(peer) - - result = conn + result = conn proc connect*(s: Switch, peer: PeerInfo) {.async.} = var conn = await s.internalConnect(peer) @@ -314,9 +454,9 @@ proc dial*(s: Switch, result = conn let stream = await s.getMuxedStream(peer) if not isNil(stream): - trace "Connection is muxed, return muxed stream" + trace "Connection is muxed, return muxed stream", oid = conn.oid result = stream - trace "Attempting to select remote", proto = proto + trace "Attempting to select remote", proto = proto, oid = conn.oid if not await s.ms.select(result, proto): raise newException(CatchableError, "Unable to select sub-protocol " & proto) @@ -338,7 +478,6 @@ proc start*(s: Switch): Future[seq[Future[void]]] {.async, gcsafe.} = proc handle(conn: Connection): Future[void] {.async, closure, gcsafe.} = try: try: - libp2p_peers.inc() await s.upgradeIncoming(conn) # perform upgrade on incoming connection finally: await s.cleanupConn(conn) @@ -358,6 +497,7 @@ proc start*(s: Switch): Future[seq[Future[void]]] {.async, gcsafe.} = if s.pubSub.isSome: await s.pubSub.get().start() + info "started libp2p node", peer = $s.peerInfo, addrs = s.peerInfo.addrs result = startFuts # listen for incoming connections proc stop*(s: Switch) {.async.} = @@ -370,11 +510,12 @@ proc stop*(s: Switch) {.async.} = if s.pubSub.isSome: await s.pubSub.get().stop() - for conn in toSeq(s.connections.values): - try: - await s.cleanupConn(conn) - except CatchableError as exc: - warn "error cleaning up connections" + for conns in toSeq(s.connections.values): + for conn in conns: + try: + await s.cleanupConn(conn.conn) + except CatchableError as exc: + warn "error cleaning up connections" for t in s.transports: try: @@ -463,8 +604,8 @@ proc newSwitch*(peerInfo: PeerInfo, result.peerInfo = peerInfo result.ms = newMultistream() result.transports = transports - result.connections = initTable[string, Connection]() - result.muxed = initTable[string, Muxer]() + result.connections = initTable[string, seq[ConnectionHolder]]() + result.muxed = initTable[string, seq[MuxerHolder]]() result.identity = identity result.muxers = muxers result.secureManagers = @secureManagers @@ -494,11 +635,9 @@ proc newSwitch*(peerInfo: PeerInfo, # identify it muxer.connection.peerInfo = await s.identify(stream) - # store muxer for connection - s.muxed[muxer.connection.peerInfo.id] = muxer - - # store muxed connection - s.connections[muxer.connection.peerInfo.id] = muxer.connection + # store muxer and muxed connection + await s.storeConn(muxer, Direction.In) + libp2p_peers.set(s.connections.len.int64) muxer.connection.closeEvent.wait() .addCallback do(udata: pointer): @@ -506,6 +645,7 @@ proc newSwitch*(peerInfo: PeerInfo, # try establishing a pubsub connection await s.subscribeToPeer(muxer.connection.peerInfo) + except CatchableError as exc: libp2p_failed_upgrade.inc() trace "exception in muxer handler", exc = exc.msg diff --git a/tests/pubsub/testgossipsub.nim b/tests/pubsub/testgossipsub.nim index a6d4b927b..a72d67c3d 100644 --- a/tests/pubsub/testgossipsub.nim +++ b/tests/pubsub/testgossipsub.nim @@ -46,6 +46,7 @@ proc waitSub(sender, receiver: auto; key: string) {.async, gcsafe.} = suite "GossipSub": teardown: for tracker in testTrackers(): + # echo tracker.dump() check tracker.isLeaked() == false test "GossipSub validation should succeed": diff --git a/tests/testinterop.nim b/tests/testinterop.nim index 1648aa81a..e3dea9350 100644 --- a/tests/testinterop.nim +++ b/tests/testinterop.nim @@ -189,6 +189,7 @@ suite "Interop": check string.fromBytes(await stream.transp.readLp()) == "test 3" asyncDiscard stream.transp.writeLp("test 4") testFuture.complete() + await stream.close() await daemonNode.addHandler(protos, daemonHandler) let conn = await nativeNode.dial(NativePeerInfo.init(daemonPeer.peer, @@ -240,6 +241,7 @@ suite "Interop": var line = await stream.transp.readLine() check line == expect testFuture.complete(line) + await stream.close() await daemonNode.addHandler(protos, daemonHandler) let conn = await nativeNode.dial(NativePeerInfo.init(daemonPeer.peer, @@ -285,9 +287,12 @@ suite "Interop": discard await stream.transp.writeLp(test) result = test == (await wait(testFuture, 10.secs)) + + await stream.close() await nativeNode.stop() await allFutures(awaiters) await daemonNode.close() + await sleepAsync(1.seconds) check: waitFor(runTests()) == true @@ -331,6 +336,7 @@ suite "Interop": await wait(testFuture, 10.secs) result = true + await stream.close() await nativeNode.stop() await allFutures(awaiters) await daemonNode.close() diff --git a/tests/testswitch.nim b/tests/testswitch.nim index cf9622208..e06212504 100644 --- a/tests/testswitch.nim +++ b/tests/testswitch.nim @@ -192,8 +192,8 @@ suite "Switch": await switch2.connect(switch1.peerInfo) - check switch1.connections.len > 0 - check switch2.connections.len > 0 + check switch1.connections[switch2.peerInfo.id].len > 0 + check switch2.connections[switch1.peerInfo.id].len > 0 await sleepAsync(100.millis) await switch2.disconnect(switch1.peerInfo) @@ -207,8 +207,8 @@ suite "Switch": # echo connTracker.dump() # check connTracker.isLeaked() == false - check switch1.connections.len == 0 - check switch2.connections.len == 0 + check switch2.peerInfo.id notin switch1.connections + check switch1.peerInfo.id notin switch2.connections await allFuturesThrowing( switch1.stop(), From 1a2f336eb580ef3dad4c7e393c4c2b561dc59648 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Sat, 27 Jun 2020 13:35:07 +0900 Subject: [PATCH 03/28] use var semantics to optimize table access --- libp2p/protocols/pubsub/gossipsub.nim | 83 ++++++++++++++------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 85bff0f59..3d3fc366a 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -73,57 +73,60 @@ method init*(g: GossipSub) = g.handler = handler g.codec = GossipSubCodec -proc replenishFanout(g: GossipSub, topic: string) {.async.} = +proc replenishFanout(g: GossipSub, topic: string) = ## get fanout peers for a topic - trace "about to replenish fanout" - if topic notin g.fanout: - g.fanout[topic] = initHashSet[string]() + debug "about to replenish fanout", topic - if g.fanout.getOrDefault(topic).len < GossipSubDLo: - trace "replenishing fanout", peers = g.fanout.getOrDefault(topic).len - if topic in g.gossipsub: - for p in g.gossipsub.getOrDefault(topic): - if not g.fanout[topic].containsOrIncl(p): - if g.fanout.getOrDefault(topic).len == GossipSubD: + var topicHash = g.fanout.mgetOrPut(topic, initHashSet[string]()) + + if topicHash.len < GossipSubDLo: + debug "replenishing fanout", peers = topicHash.len + let peers = g.gossipsub.getOrDefault(topic) + for p in peers: + if not topicHash.containsOrIncl(p): + # set the fanout expiry time + g.lastFanoutPubSub[topic] = Moment.fromNow(GossipSubFanoutTTL) + if topicHash.len == GossipSubD: break - libp2p_gossipsub_peers_per_topic_fanout - .set(g.fanout.getOrDefault(topic).len.int64, labelValues = [topic]) - trace "fanout replenished with peers", peers = g.fanout.getOrDefault(topic).len + libp2p_gossipsub_peers_per_topic_fanout.set(topicHash.len.int64, labelValues = [topic]) + debug "fanout replenished with peers", peers = topicHash.len proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = try: trace "about to rebalance mesh" - # create a mesh topic that we're subscribing to - if topic notin g.mesh: - g.mesh[topic] = initHashSet[string]() - if g.mesh.getOrDefault(topic).len < GossipSubDlo: + var + topicHash = g.mesh.mgetOrPut(topic, initHashSet[string]()) + fanOutHash = g.fanout.mgetOrPut(topic, initHashSet[string]()) + gossipHash = g.gossipsub.mgetOrPut(topic, initHashSet[string]()) + + if topicHash.len < GossipSubDlo: trace "replenishing mesh", topic # replenish the mesh if we're below GossipSubDlo - while g.mesh.getOrDefault(topic).len < GossipSubD: - trace "gathering peers", peers = g.mesh.getOrDefault(topic).len + while topicHash.len < GossipSubD: + trace "gathering peers", peers = topicHash.len await sleepAsync(1.millis) # don't starve the event loop var id: string - if topic in g.fanout and g.fanout.getOrDefault(topic).len > 0: + if fanOutHash.len > 0: trace "getting peer from fanout", topic, - peers = g.fanout.getOrDefault(topic).len + peers = fanOutHash.len - id = sample(toSeq(g.fanout.getOrDefault(topic))) - g.fanout[topic].excl(id) + id = sample(toSeq(fanOutHash)) + fanOutHash.excl(id) - if id in g.fanout[topic]: + if id in fanOutHash: continue # we already have this peer in the mesh, try again trace "got fanout peer", peer = id - elif topic in g.gossipsub and g.gossipsub.getOrDefault(topic).len > 0: + elif gossipHash.len > 0: trace "getting peer from gossipsub", topic, - peers = g.gossipsub.getOrDefault(topic).len + peers = gossipHash.len - id = sample(toSeq(g.gossipsub[topic])) - g.gossipsub[topic].excl(id) + id = sample(toSeq(gossipHash)) + gossipHash.excl(id) - if id in g.mesh[topic]: + if id in topicHash: continue # we already have this peer in the mesh, try again trace "got gossipsub peer", peer = id @@ -131,34 +134,34 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = trace "no more peers" break - g.mesh[topic].incl(id) + topicHash.incl(id) if id in g.peers: let p = g.peers[id] # send a graft message to the peer await p.sendGraft(@[topic]) # prune peers if we've gone over - if g.mesh.getOrDefault(topic).len > GossipSubDhi: - trace "about to prune mesh", mesh = g.mesh.getOrDefault(topic).len - while g.mesh.getOrDefault(topic).len > GossipSubD: - trace "pruning peers", peers = g.mesh[topic].len - let id = toSeq(g.mesh[topic])[rand(0.. GossipSubDhi: + trace "about to prune mesh", mesh = topicHash.len + while topicHash.len > GossipSubD: + trace "pruning peers", peers = topicHash.len + let id = toSeq(topicHash)[rand(0.. Date: Sat, 27 Jun 2020 14:59:48 +0900 Subject: [PATCH 04/28] wip... lvalues don't work properly sadly... --- libp2p/protocols/pubsub/floodsub.nim | 7 +- libp2p/protocols/pubsub/gossipsub.nim | 104 ++++++++++++++------------ libp2p/protocols/pubsub/pubsub.nim | 5 +- libp2p/switch.nim | 10 +-- tests/pubsub/testgossipsub.nim | 48 ++++++------ 5 files changed, 97 insertions(+), 77 deletions(-) diff --git a/libp2p/protocols/pubsub/floodsub.nim b/libp2p/protocols/pubsub/floodsub.nim index 7f68e6d13..a78d6f96a 100644 --- a/libp2p/protocols/pubsub/floodsub.nim +++ b/libp2p/protocols/pubsub/floodsub.nim @@ -117,8 +117,9 @@ method subscribeToPeer*(p: FloodSub, method publish*(f: FloodSub, topic: string, - data: seq[byte]) {.async.} = - await procCall PubSub(f).publish(topic, data) + data: seq[byte]): Future[int] {.async.} = + # base returns always 0 + discard await procCall PubSub(f).publish(topic, data) if data.len <= 0 or topic.len <= 0: trace "topic or data missing, skipping publish" @@ -143,6 +144,8 @@ method publish*(f: FloodSub, libp2p_pubsub_messages_published.inc(labelValues = [topic]) + return sent.filterIt(not it.failed).len + method unsubscribe*(f: FloodSub, topics: seq[TopicPair]) {.async.} = await procCall PubSub(f).unsubscribe(topics) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 3d3fc366a..8697de340 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -56,6 +56,7 @@ type heartbeatFut: Future[void] # cancellation future for heartbeat interval heartbeatRunning: bool heartbeatLock: AsyncLock # heartbeat lock to prevent two consecutive concurrent heartbeats + subLock: AsyncLock declareGauge(libp2p_gossipsub_peers_per_topic_mesh, "gossipsub peers per topic in mesh", labels = ["topic"]) declareGauge(libp2p_gossipsub_peers_per_topic_fanout, "gossipsub peers per topic in fanout", labels = ["topic"]) @@ -75,8 +76,8 @@ method init*(g: GossipSub) = proc replenishFanout(g: GossipSub, topic: string) = ## get fanout peers for a topic - debug "about to replenish fanout", topic - + debug "about to replenish fanout", topic, avail = g.gossipsub[topic].len + var topicHash = g.fanout.mgetOrPut(topic, initHashSet[string]()) if topicHash.len < GossipSubDLo: @@ -87,14 +88,16 @@ proc replenishFanout(g: GossipSub, topic: string) = # set the fanout expiry time g.lastFanoutPubSub[topic] = Moment.fromNow(GossipSubFanoutTTL) if topicHash.len == GossipSubD: - break + break libp2p_gossipsub_peers_per_topic_fanout.set(topicHash.len.int64, labelValues = [topic]) - debug "fanout replenished with peers", peers = topicHash.len + debug "fanout replenished with peers", peers = g.fanout[topic].len proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = try: trace "about to rebalance mesh" + + await g.subLock.acquire() var topicHash = g.mesh.mgetOrPut(topic, initHashSet[string]()) @@ -106,10 +109,9 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = # replenish the mesh if we're below GossipSubDlo while topicHash.len < GossipSubD: trace "gathering peers", peers = topicHash.len - await sleepAsync(1.millis) # don't starve the event loop var id: string if fanOutHash.len > 0: - trace "getting peer from fanout", topic, + debug "getting peer from fanout", topic, peers = fanOutHash.len id = sample(toSeq(fanOutHash)) @@ -120,7 +122,7 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = trace "got fanout peer", peer = id elif gossipHash.len > 0: - trace "getting peer from gossipsub", topic, + debug "getting peer from gossipsub", topic, peers = gossipHash.len id = sample(toSeq(gossipHash)) @@ -161,10 +163,12 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = libp2p_gossipsub_peers_per_topic_mesh .set(topicHash.len.int64, labelValues = [topic]) - trace "mesh balanced, got peers", peers = topicHash.len, + debug "mesh balanced, got peers", peers = g.mesh[topic].len, topicId = topic except CatchableError as exc: trace "exception occurred re-balancing mesh", exc = exc.msg + finally: + g.subLock.release() proc dropFanoutPeers(g: GossipSub) {.async.} = # drop peers that we haven't published to in @@ -174,6 +178,7 @@ proc dropFanoutPeers(g: GossipSub) {.async.} = if Moment.now > val: dropping.add(topic) g.fanout.del(topic) + debug "dropping fanout topic", topic for topic in dropping: g.lastFanoutPubSub.del(topic) @@ -221,6 +226,7 @@ proc heartbeat(g: GossipSub) {.async.} = for t in toSeq(g.topics.keys): await g.rebalanceMesh(t) + g.replenishFanout(t) await g.dropFanoutPeers() let peers = g.getGossipPeers() @@ -281,20 +287,26 @@ method subscribeTopic*(g: GossipSub, peerId: string) {.gcsafe, async.} = await procCall PubSub(g).subscribeTopic(topic, subscribe, peerId) - if topic notin g.gossipsub: - g.gossipsub[topic] = initHashSet[string]() + try: + await g.subLock.acquire() - if subscribe: - trace "adding subscription for topic", peer = peerId, name = topic - # subscribe remote peer to the topic - g.gossipsub[topic].incl(peerId) - else: - trace "removing subscription for topic", peer = peerId, name = topic - # unsubscribe remote peer from the topic - g.gossipsub[topic].excl(peerId) + var gossipHash = g.gossipsub.mgetOrPut(topic, initHashSet[string]()) + + if subscribe: + debug "adding subscription for topic", peer = peerId, name = topic + # subscribe remote peer to the topic + gossipHash.incl(peerId) + else: + trace "removing subscription for topic", peer = peerId, name = topic + # unsubscribe remote peer from the topic + gossipHash.excl(peerId) + finally: + g.subLock.release() libp2p_gossipsub_peers_per_topic_gossipsub - .set(g.gossipsub.getOrDefault(topic).len.int64, labelValues = [topic]) + .set(g.gossipsub[topic].len.int64, labelValues = [topic]) + + debug "gossip peers", peers = g.gossipsub[topic].len, topic if topic in g.topics: await g.rebalanceMesh(topic) @@ -385,7 +397,6 @@ method rpcHandler*(g: GossipSub, continue for t in msg.topicIDs: # for every topic in the message - await g.rebalanceMesh(t) # gather peers for each topic if t in g.floodsub: toSendPeers.incl(g.floodsub[t]) # get all floodsub peers for topic @@ -458,48 +469,48 @@ method unsubscribe*(g: GossipSub, method publish*(g: GossipSub, topic: string, - data: seq[byte]) {.async.} = - await procCall PubSub(g).publish(topic, data) + data: seq[byte]): Future[int] {.async.} = + # base returns always 0 + discard await procCall PubSub(g).publish(topic, data) trace "about to publish message on topic", name = topic, data = data.shortLog var peers: HashSet[string] - # TODO: we probably don't need to try multiple times - if data.len > 0 and topic.len > 0: - for _ in 0..<5: # try to get peers up to 5 times - if peers.len > 0: - break - if topic in g.topics: # if we're subscribed to the topic attempt to build a mesh - await g.rebalanceMesh(topic) - peers = g.mesh.getOrDefault(topic) - else: # send to fanout peers - await g.replenishFanout(topic) - if topic in g.fanout: - peers = g.fanout.getOrDefault(topic) - # set the fanout expiry time - g.lastFanoutPubSub[topic] = Moment.fromNow(GossipSubFanoutTTL) - - # wait a second between tries - await sleepAsync(1.seconds) + if topic.len > 0: # data could be 0/empty + if topic in g.topics: # if we're subscribed use the mesh + peers = g.mesh.getOrDefault(topic) + else: # not subscribed, send to fanout peers + peers = g.fanout.getOrDefault(topic) let msg = newMessage(g.peerInfo, data, topic, g.sign) trace "created new message", msg + + trace "publishing on topic", name = topic, peers = peers + if msg.msgId notin g.mcache: + g.mcache.put(msg) + var sent: seq[Future[void]] for p in peers: + # avoid sending to self if p == g.peerInfo.id: continue - trace "publishing on topic", name = topic - if msg.msgId notin g.mcache: - g.mcache.put(msg) - - if p in g.peers: - sent.add(g.peers[p].send(@[RPCMsg(messages: @[msg])])) - checkFutures(await allFinished(sent)) + let peer = g.peers.getOrDefault(p) + if not isNil(peer.peerInfo): + trace "publish: sending message to peer", peer = p + sent.add(peer.send(@[RPCMsg(messages: @[msg])])) + + sent = await allFinished(sent) + checkFutures(sent) libp2p_pubsub_messages_published.inc(labelValues = [topic]) + return sent.filterIt(not it.failed).len + else: + return 0 + + method start*(g: GossipSub) {.async.} = debug "gossipsub start" @@ -543,3 +554,4 @@ method initPubSub*(g: GossipSub) = g.gossip = initTable[string, seq[ControlIHave]]() # pending gossip g.control = initTable[string, ControlMessage]() # pending control messages g.heartbeatLock = newAsyncLock() + g.subLock = newAsyncLock() diff --git a/libp2p/protocols/pubsub/pubsub.nim b/libp2p/protocols/pubsub/pubsub.nim index c893892cf..d0814c17e 100644 --- a/libp2p/protocols/pubsub/pubsub.nim +++ b/libp2p/protocols/pubsub/pubsub.nim @@ -225,8 +225,7 @@ method subscribe*(p: PubSub, method publish*(p: PubSub, topic: string, - data: seq[byte]) {.base, async.} = - # TODO: Should throw indicating success/failure + data: seq[byte]): Future[int] {.base, async.} = ## publish to a ``topic`` if p.triggerSelf and topic in p.topics: for h in p.topics[topic].handler: @@ -241,6 +240,8 @@ method publish*(p: PubSub, # more cleanup though debug "Could not write to pubsub connection", msg = exc.msg + return 0 + method initPubSub*(p: PubSub) {.base.} = ## perform pubsub initialization p.observers = new(seq[PubSubObserver]) diff --git a/libp2p/switch.nim b/libp2p/switch.nim index bf0463156..9d9a01470 100644 --- a/libp2p/switch.nim +++ b/libp2p/switch.nim @@ -556,7 +556,7 @@ proc subscribe*(s: Switch, topic: string, retFuture.fail(newNoPubSubException()) return retFuture - result = s.pubSub.get().subscribe(topic, handler) + return s.pubSub.get().subscribe(topic, handler) proc unsubscribe*(s: Switch, topics: seq[TopicPair]): Future[void] = ## unsubscribe from topics @@ -565,16 +565,16 @@ proc unsubscribe*(s: Switch, topics: seq[TopicPair]): Future[void] = retFuture.fail(newNoPubSubException()) return retFuture - result = s.pubSub.get().unsubscribe(topics) + return s.pubSub.get().unsubscribe(topics) -proc publish*(s: Switch, topic: string, data: seq[byte]): Future[void] = +proc publish*(s: Switch, topic: string, data: seq[byte]): Future[int] = # pubslish to pubsub topic if s.pubSub.isNone: - var retFuture = newFuture[void]("Switch.publish") + var retFuture = newFuture[int]("Switch.publish") retFuture.fail(newNoPubSubException()) return retFuture - result = s.pubSub.get().publish(topic, data) + return s.pubSub.get().publish(topic, data) proc addValidator*(s: Switch, topics: varargs[string], diff --git a/tests/pubsub/testgossipsub.nim b/tests/pubsub/testgossipsub.nim index a72d67c3d..57c2069e3 100644 --- a/tests/pubsub/testgossipsub.nim +++ b/tests/pubsub/testgossipsub.nim @@ -43,6 +43,14 @@ proc waitSub(sender, receiver: auto; key: string) {.async, gcsafe.} = dec ceil doAssert(ceil > 0, "waitSub timeout!") +template tryPublish(call: untyped, require: int, times: int = 10, wait: Duration = 1.seconds): untyped = + var limit = times + while (call) < require and limit > 0: + await sleepAsync(wait) + limit.dec() + if limit == 0: + doAssert(false, "Failed to publish!") + suite "GossipSub": teardown: for tracker in testTrackers(): @@ -64,9 +72,7 @@ suite "GossipSub": await subscribeNodes(nodes) await nodes[0].subscribe("foobar", handler) - await waitSub(nodes[1], nodes[0], "foobar") await nodes[1].subscribe("foobar", handler) - await waitSub(nodes[0], nodes[1], "foobar") var validatorFut = newFuture[bool]() proc validator(topic: string, @@ -77,8 +83,8 @@ suite "GossipSub": result = true nodes[1].addValidator("foobar", validator) - await nodes[0].publish("foobar", "Hello!".toBytes()) - + tryPublish await nodes[0].publish("foobar", "Hello!".toBytes()), 1 + result = (await validatorFut) and (await handlerFut) await allFuturesThrowing( nodes[0].stop(), @@ -101,17 +107,16 @@ suite "GossipSub": await subscribeNodes(nodes) await nodes[1].subscribe("foobar", handler) - await waitSub(nodes[0], nodes[1], "foobar") var validatorFut = newFuture[bool]() proc validator(topic: string, message: Message): Future[bool] {.async.} = - validatorFut.complete(true) result = false + validatorFut.complete(true) nodes[1].addValidator("foobar", validator) - await nodes[0].publish("foobar", "Hello!".toBytes()) + tryPublish await nodes[0].publish("foobar", "Hello!".toBytes()), 1 result = await validatorFut await allFuturesThrowing( @@ -135,10 +140,9 @@ suite "GossipSub": awaiters.add((await nodes[1].start())) await subscribeNodes(nodes) + await nodes[1].subscribe("foo", handler) - await waitSub(nodes[0], nodes[1], "foo") await nodes[1].subscribe("bar", handler) - await waitSub(nodes[0], nodes[1], "bar") var passed, failed: Future[bool] = newFuture[bool]() proc validator(topic: string, @@ -152,8 +156,8 @@ suite "GossipSub": false nodes[1].addValidator("foo", "bar", validator) - await nodes[0].publish("foo", "Hello!".toBytes()) - await nodes[0].publish("bar", "Hello!".toBytes()) + tryPublish await nodes[0].publish("foo", "Hello!".toBytes()), 1 + tryPublish await nodes[0].publish("bar", "Hello!".toBytes()), 1 result = ((await passed) and (await failed) and (await handlerFut)) await allFuturesThrowing( @@ -179,7 +183,7 @@ suite "GossipSub": await subscribeNodes(nodes) await nodes[1].subscribe("foobar", handler) - await sleepAsync(1.seconds) + await sleepAsync(10.seconds) let gossip1 = GossipSub(nodes[0].pubSub.get()) let gossip2 = GossipSub(nodes[1].pubSub.get()) @@ -273,7 +277,7 @@ suite "GossipSub": nodes[1].pubsub.get().addObserver(obs1) nodes[0].pubsub.get().addObserver(obs2) - await nodes[0].publish("foobar", "Hello!".toBytes()) + tryPublish await nodes[0].publish("foobar", "Hello!".toBytes()), 1 var gossipSub1: GossipSub = GossipSub(nodes[0].pubSub.get()) @@ -310,7 +314,7 @@ suite "GossipSub": await nodes[1].subscribe("foobar", handler) await waitSub(nodes[0], nodes[1], "foobar") - await nodes[0].publish("foobar", "Hello!".toBytes()) + tryPublish await nodes[0].publish("foobar", "Hello!".toBytes()), 1 result = await passed @@ -353,10 +357,10 @@ suite "GossipSub": await allFuturesThrowing(subs) - await wait(nodes[0].publish("foobar", - cast[seq[byte]]("from node " & - nodes[1].peerInfo.id)), - 1.minutes) + tryPublish await wait(nodes[0].publish("foobar", + cast[seq[byte]]("from node " & + nodes[1].peerInfo.id)), + 1.minutes), runs await wait(seenFut, 2.minutes) check: seen.len >= runs @@ -401,10 +405,10 @@ suite "GossipSub": subs &= dialer.subscribe("foobar", handler) await allFuturesThrowing(subs) - await wait(nodes[0].publish("foobar", - cast[seq[byte]]("from node " & - nodes[1].peerInfo.id)), - 1.minutes) + tryPublish await wait(nodes[0].publish("foobar", + cast[seq[byte]]("from node " & + nodes[1].peerInfo.id)), + 1.minutes), runs await wait(seenFut, 5.minutes) check: seen.len >= runs From ccb323ad0f0da8dfe016e8567efcd5f96f0f3e91 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Sat, 27 Jun 2020 20:24:29 +0900 Subject: [PATCH 05/28] big publish refactor, replenish and balance --- libp2p/protocols/pubsub/gossipsub.nim | 128 ++++++++++++-------------- tests/pubsub/testgossipsub.nim | 2 +- 2 files changed, 62 insertions(+), 68 deletions(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 8697de340..0ffb19f25 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -76,99 +76,98 @@ method init*(g: GossipSub) = proc replenishFanout(g: GossipSub, topic: string) = ## get fanout peers for a topic - debug "about to replenish fanout", topic, avail = g.gossipsub[topic].len - - var topicHash = g.fanout.mgetOrPut(topic, initHashSet[string]()) + debug "about to replenish fanout", avail = g.gossipsub.getOrDefault(topic).len - if topicHash.len < GossipSubDLo: - debug "replenishing fanout", peers = topicHash.len - let peers = g.gossipsub.getOrDefault(topic) - for p in peers: - if not topicHash.containsOrIncl(p): - # set the fanout expiry time - g.lastFanoutPubSub[topic] = Moment.fromNow(GossipSubFanoutTTL) - if topicHash.len == GossipSubD: - break + if topic notin g.fanout: + g.fanout[topic] = initHashSet[string]() - libp2p_gossipsub_peers_per_topic_fanout.set(topicHash.len.int64, labelValues = [topic]) - debug "fanout replenished with peers", peers = g.fanout[topic].len + if g.fanout.getOrDefault(topic).len < GossipSubDLo: + debug "replenishing fanout", peers = g.fanout.getOrDefault(topic).len + if topic in g.gossipsub: + for p in g.gossipsub.getOrDefault(topic): + if not g.fanout[topic].containsOrIncl(p): + g.lastFanoutPubSub[topic] = Moment.fromNow(GossipSubFanoutTTL) + if g.fanout.getOrDefault(topic).len == GossipSubD: + break + + libp2p_gossipsub_peers_per_topic_fanout + .set(g.fanout.getOrDefault(topic).len.int64, labelValues = [topic]) + debug "fanout replenished with peers", peers = g.fanout.getOrDefault(topic).len proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = try: trace "about to rebalance mesh" - - await g.subLock.acquire() + # create a mesh topic that we're subscribing to + if topic notin g.mesh: + g.mesh[topic] = initHashSet[string]() - var - topicHash = g.mesh.mgetOrPut(topic, initHashSet[string]()) - fanOutHash = g.fanout.mgetOrPut(topic, initHashSet[string]()) - gossipHash = g.gossipsub.mgetOrPut(topic, initHashSet[string]()) - - if topicHash.len < GossipSubDlo: + if g.mesh.getOrDefault(topic).len < GossipSubDlo: trace "replenishing mesh", topic # replenish the mesh if we're below GossipSubDlo - while topicHash.len < GossipSubD: - trace "gathering peers", peers = topicHash.len + while g.mesh.getOrDefault(topic).len < GossipSubD and topic in g.topics: + trace "gathering peers", peers = g.mesh.getOrDefault(topic).len + await sleepAsync(1.millis) # don't starve the event loop var id: string - if fanOutHash.len > 0: + if topic in g.fanout and g.fanout.getOrDefault(topic).len > 0: debug "getting peer from fanout", topic, - peers = fanOutHash.len + peers = g.fanout.getOrDefault(topic).len - id = sample(toSeq(fanOutHash)) - fanOutHash.excl(id) + id = sample(toSeq(g.fanout.getOrDefault(topic))) + g.fanout[topic].excl(id) - if id in fanOutHash: + if id in g.fanout[topic]: continue # we already have this peer in the mesh, try again - trace "got fanout peer", peer = id - elif gossipHash.len > 0: + debug "got fanout peer", peer = id + elif topic in g.gossipsub and g.gossipsub.getOrDefault(topic).len > 0: debug "getting peer from gossipsub", topic, - peers = gossipHash.len + peers = g.gossipsub.getOrDefault(topic).len - id = sample(toSeq(gossipHash)) - gossipHash.excl(id) + id = sample(toSeq(g.gossipsub[topic])) + g.gossipsub[topic].excl(id) - if id in topicHash: + if id in g.mesh[topic]: continue # we already have this peer in the mesh, try again - trace "got gossipsub peer", peer = id + debug "got gossipsub peer", peer = id else: trace "no more peers" break - topicHash.incl(id) + g.mesh[topic].incl(id) if id in g.peers: let p = g.peers[id] # send a graft message to the peer await p.sendGraft(@[topic]) + + # run fanout + g.replenishFanout(topic) # prune peers if we've gone over - if topicHash.len > GossipSubDhi: - trace "about to prune mesh", mesh = topicHash.len - while topicHash.len > GossipSubD: - trace "pruning peers", peers = topicHash.len - let id = toSeq(topicHash)[rand(0.. GossipSubDhi: + trace "about to prune mesh", mesh = g.mesh.getOrDefault(topic).len + while g.mesh.getOrDefault(topic).len > GossipSubD: + trace "pruning peers", peers = g.mesh[topic].len + let id = toSeq(g.mesh[topic])[rand(0..= runs From 0ddfb31f67b587810fa2bc882804504f9cf7023b Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Sat, 27 Jun 2020 20:49:53 +0900 Subject: [PATCH 06/28] fix internal tests --- tests/pubsub/testgossipinternal.nim | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/pubsub/testgossipinternal.nim b/tests/pubsub/testgossipinternal.nim index 82f785cd7..ce297ce6a 100644 --- a/tests/pubsub/testgossipinternal.nim +++ b/tests/pubsub/testgossipinternal.nim @@ -57,6 +57,10 @@ suite "GossipSub internal": let topic = "foobar" gossipSub.gossipsub[topic] = initHashSet[string]() + # our implementation requires that topic is in gossipSub.topics + # for this test to work properly and publish properly + gossipSub.topics[topic] = Topic() + var conns = newSeq[Connection]() for i in 0..<15: let conn = newBufferStream(noop) @@ -100,7 +104,7 @@ suite "GossipSub internal": gossipSub.gossipsub[topic].incl(peerInfo.id) check gossipSub.gossipsub[topic].len == 15 - await gossipSub.replenishFanout(topic) + gossipSub.replenishFanout(topic) check gossipSub.fanout[topic].len == GossipSubD await allFuturesThrowing(conns.mapIt(it.close())) From 902880ef1f26512305241befe18073c73d6f966a Mon Sep 17 00:00:00 2001 From: Dmitriy Ryajov Date: Sat, 27 Jun 2020 11:33:34 -0600 Subject: [PATCH 07/28] consolidate reading in lpstream (#241) * consolidate reading in lpstream * remove debug echo * throw if not enough bytes where read * tune log level * set eof flag * test readExactly to fail on not enough bytes --- libp2p/muxers/mplex/mplex.nim | 1 - libp2p/protocols/identify.nim | 4 +-- libp2p/protocols/secure/noise.nim | 6 ++-- libp2p/protocols/secure/secure.nim | 23 ------------ libp2p/stream/bufferstream.nim | 58 ++++++++---------------------- libp2p/stream/chronosstream.nim | 9 ----- libp2p/stream/lpstream.nim | 23 ++++++++---- libp2p/switch.nim | 8 ++--- libp2p/transports/tcptransport.nim | 5 ++- tests/pubsub/testgossipsub.nim | 5 +-- tests/testbufferstream.nim | 21 +++++++++++ tests/testidentify.nim | 1 + tests/testmultistream.nim | 39 ++++++++++++++------ tests/testnoise.nim | 9 ++--- tests/testtransport.nim | 1 + 15 files changed, 104 insertions(+), 109 deletions(-) diff --git a/libp2p/muxers/mplex/mplex.nim b/libp2p/muxers/mplex/mplex.nim index f11ae6014..eee7a56cf 100644 --- a/libp2p/muxers/mplex/mplex.nim +++ b/libp2p/muxers/mplex/mplex.nim @@ -202,5 +202,4 @@ method close*(m: Mplex) {.async, gcsafe.} = finally: m.remote.clear() m.local.clear() - # m.handlerFuts = @[] m.isClosed = true diff --git a/libp2p/protocols/identify.nim b/libp2p/protocols/identify.nim index 0f410ca7d..3c25f0984 100644 --- a/libp2p/protocols/identify.nim +++ b/libp2p/protocols/identify.nim @@ -27,7 +27,7 @@ const ProtoVersion* = "ipfs/0.1.0" AgentVersion* = "nim-libp2p/0.0.1" -#TODO: implment push identify, leaving out for now as it is not essential +#TODO: implement push identify, leaving out for now as it is not essential type IdentityNoMatchError* = object of CatchableError @@ -141,7 +141,7 @@ proc identify*(p: Identify, if not isNil(remotePeerInfo) and result.pubKey.isSome: let peer = PeerID.init(result.pubKey.get()) - # do a string comaprison of the ids, + # do a string comparison of the ids, # because that is the only thing we # have in most cases if peer != remotePeerInfo.peerId: diff --git a/libp2p/protocols/secure/noise.nim b/libp2p/protocols/secure/noise.nim index 0e286b90d..cdd37eec0 100644 --- a/libp2p/protocols/secure/noise.nim +++ b/libp2p/protocols/secure/noise.nim @@ -413,7 +413,7 @@ method write*(sconn: NoiseConnection, message: seq[byte]): Future[void] {.async. await sconn.stream.write(outbuf) method handshake*(p: Noise, conn: Connection, initiator: bool): Future[SecureConn] {.async.} = - debug "Starting Noise handshake", initiator, peer = $conn + trace "Starting Noise handshake", initiator, peer = $conn # https://github.com/libp2p/specs/tree/master/noise#libp2p-data-in-handshake-messages let @@ -454,7 +454,7 @@ method handshake*(p: Noise, conn: Connection, initiator: bool): Future[SecureCon if not remoteSig.verify(verifyPayload, remotePubKey): raise newException(NoiseHandshakeError, "Noise handshake signature verify failed.") else: - debug "Remote signature verified", peer = $conn + trace "Remote signature verified", peer = $conn if initiator and not isNil(conn.peerInfo): let pid = PeerID.init(remotePubKey) @@ -477,7 +477,7 @@ method handshake*(p: Noise, conn: Connection, initiator: bool): Future[SecureCon secure.readCs = handshakeRes.cs1 secure.writeCs = handshakeRes.cs2 - debug "Noise handshake completed!", initiator, peer = $secure.peerInfo + trace "Noise handshake completed!", initiator, peer = $secure.peerInfo return secure diff --git a/libp2p/protocols/secure/secure.nim b/libp2p/protocols/secure/secure.nim index 5a594f161..baeaece02 100644 --- a/libp2p/protocols/secure/secure.nim +++ b/libp2p/protocols/secure/secure.nim @@ -87,29 +87,6 @@ method secure*(s: Secure, conn: Connection, initiator: bool): Future[Connection] warn "securing connection failed", msg = exc.msg return nil -method readExactly*(s: SecureConn, - pbytes: pointer, - nbytes: int): - Future[void] {.async, gcsafe.} = - try: - if nbytes == 0: - return - - while s.buf.data().len < nbytes: - # TODO write decrypted content straight into buf using `prepare` - let buf = await s.readMessage() - if buf.len == 0: - raise newLPStreamIncompleteError() - s.buf.add(buf) - - var p = cast[ptr UncheckedArray[byte]](pbytes) - let consumed = s.buf.consumeTo(toOpenArray(p, 0, nbytes - 1)) - doAssert consumed == nbytes, "checked above" - except CatchableError as exc: - trace "exception reading from secure connection", exc = exc.msg, oid = s.oid - await s.close() # make sure to close the wrapped connection - raise exc - method readOnce*(s: SecureConn, pbytes: pointer, nbytes: int): diff --git a/libp2p/stream/bufferstream.nim b/libp2p/stream/bufferstream.nim index e2dfff9c7..e879766cb 100644 --- a/libp2p/stream/bufferstream.nim +++ b/libp2p/stream/bufferstream.nim @@ -15,7 +15,7 @@ ## ## It works by exposing a regular LPStream interface and ## a method ``pushTo`` to push data to the internal read -## buffer; as well as a handler that can be registrered +## buffer; as well as a handler that can be registered ## that gets triggered on every write to the stream. This ## allows using the buffered stream as a sort of proxy, ## which can be consumed as a regular LPStream but allows @@ -25,7 +25,7 @@ ## ordered and asynchronous. Reads are queued up in order ## and are suspended when not enough data available. This ## allows preserving backpressure while maintaining full -## asynchrony. Both writting to the internal buffer with +## asynchrony. Both writing to the internal buffer with ## ``pushTo`` as well as reading with ``read*` methods, ## will suspend until either the amount of elements in the ## buffer goes below ``maxSize`` or more data becomes available. @@ -180,7 +180,7 @@ method pushTo*(s: BufferStream, data: seq[byte]) {.base, async.} = while index < data.len and s.readBuf.len < s.maxSize: s.readBuf.addLast(data[index]) inc(index) - # trace "pushTo()", msg = "added " & $index & " bytes to readBuf", oid = s.oid + # trace "pushTo()", msg = "added " & $s.len & " bytes to readBuf", oid = s.oid # resolve the next queued read request if s.readReqs.len > 0: @@ -195,57 +195,27 @@ method pushTo*(s: BufferStream, data: seq[byte]) {.base, async.} = await s.dataReadEvent.wait() s.dataReadEvent.clear() finally: + # trace "ended", size = s.len s.lock.release() -method readExactly*(s: BufferStream, - pbytes: pointer, - nbytes: int): - Future[void] {.async.} = - ## Read exactly ``nbytes`` bytes from read-only stream ``rstream`` and store - ## it to ``pbytes``. - ## - ## If EOF is received and ``nbytes`` is not yet read, the procedure - ## will raise ``LPStreamIncompleteError``. - ## - - if s.atEof: - raise newLPStreamEOFError() - - # trace "readExactly()", requested_bytes = nbytes, oid = s.oid - var index = 0 - - if s.readBuf.len() == 0: - await s.requestReadBytes() - - let output = cast[ptr UncheckedArray[byte]](pbytes) - while index < nbytes: - while s.readBuf.len() > 0 and index < nbytes: - output[index] = s.popFirst() - inc(index) - # trace "readExactly()", read_bytes = index, oid = s.oid - - if index < nbytes: - await s.requestReadBytes() - method readOnce*(s: BufferStream, pbytes: pointer, nbytes: int): Future[int] {.async.} = - ## Perform one read operation on read-only stream ``rstream``. - ## - ## If internal buffer is not empty, ``nbytes`` bytes will be transferred from - ## internal buffer, otherwise it will wait until some bytes will be received. - ## - if s.atEof: raise newLPStreamEOFError() - if s.readBuf.len == 0: + if s.len() == 0: await s.requestReadBytes() - var len = if nbytes > s.readBuf.len: s.readBuf.len else: nbytes - await s.readExactly(pbytes, len) - result = len + var index = 0 + var size = min(nbytes, s.len) + let output = cast[ptr UncheckedArray[byte]](pbytes) + while s.len() > 0 and index < size: + output[index] = s.popFirst() + inc(index) + + return size method write*(s: BufferStream, msg: seq[byte]) {.async.} = ## Write sequence of bytes ``sbytes`` of length ``msglen`` to writer @@ -266,6 +236,7 @@ method write*(s: BufferStream, msg: seq[byte]) {.async.} = await s.writeHandler(msg) +# TODO: move pipe routines out proc pipe*(s: BufferStream, target: BufferStream): BufferStream = ## pipe the write end of this stream to @@ -310,6 +281,7 @@ method close*(s: BufferStream) {.async, gcsafe.} = ## close the stream and clear the buffer if not s.isClosed: trace "closing bufferstream", oid = s.oid + s.isEof = true for r in s.readReqs: if not(isNil(r)) and not(r.finished()): r.fail(newLPStreamEOFError()) diff --git a/libp2p/stream/chronosstream.nim b/libp2p/stream/chronosstream.nim index f5b239b80..4ff0d039c 100644 --- a/libp2p/stream/chronosstream.nim +++ b/libp2p/stream/chronosstream.nim @@ -42,15 +42,6 @@ template withExceptions(body: untyped) = raise newLPStreamEOFError() # raise (ref LPStreamError)(msg: exc.msg, parent: exc) -method readExactly*(s: ChronosStream, - pbytes: pointer, - nbytes: int): Future[void] {.async.} = - if s.atEof: - raise newLPStreamEOFError() - - withExceptions: - await s.client.readExactly(pbytes, nbytes) - method readOnce*(s: ChronosStream, pbytes: pointer, nbytes: int): Future[int] {.async.} = if s.atEof: raise newLPStreamEOFError() diff --git a/libp2p/stream/lpstream.nim b/libp2p/stream/lpstream.nim index d6969a76a..410d3ed46 100644 --- a/libp2p/stream/lpstream.nim +++ b/libp2p/stream/lpstream.nim @@ -94,12 +94,6 @@ method closed*(s: LPStream): bool {.base, inline.} = method atEof*(s: LPStream): bool {.base, inline.} = s.isEof -method readExactly*(s: LPStream, - pbytes: pointer, - nbytes: int): - Future[void] {.base, async.} = - doAssert(false, "not implemented!") - method readOnce*(s: LPStream, pbytes: pointer, nbytes: int): @@ -107,6 +101,22 @@ method readOnce*(s: LPStream, {.base, async.} = doAssert(false, "not implemented!") +proc readExactly*(s: LPStream, + pbytes: pointer, + nbytes: int): + Future[void] {.async.} = + + if s.atEof: + raise newLPStreamEOFError() + + var pbuffer = cast[ptr UncheckedArray[byte]](pbytes) + var read = 0 + while read < nbytes and not(s.atEof()): + read += await s.readOnce(addr pbuffer[read], nbytes - read) + + if read < nbytes: + raise newLPStreamIncompleteError() + proc readLine*(s: LPStream, limit = 0, sep = "\r\n"): Future[string] {.async, deprecated: "todo".} = # TODO replace with something that exploits buffering better var lim = if limit <= 0: -1 else: limit @@ -140,6 +150,7 @@ proc readVarint*(conn: LPStream): Future[uint64] {.async, gcsafe.} = for i in 0.. Date: Sun, 28 Jun 2020 10:53:43 +0900 Subject: [PATCH 08/28] use g.peers for fanout (todo: don't include flood peers) --- libp2p/protocols/pubsub/gossipsub.nim | 21 ++++++++++----------- libp2p/protocols/pubsub/pubsub.nim | 19 ++++++++++++++++++- tests/pubsub/testgossipinternal.nim | 4 ---- tests/pubsub/testgossipsub.nim | 4 ++-- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 0ffb19f25..d5de70677 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -76,16 +76,16 @@ method init*(g: GossipSub) = proc replenishFanout(g: GossipSub, topic: string) = ## get fanout peers for a topic - debug "about to replenish fanout", avail = g.gossipsub.getOrDefault(topic).len + debug "about to replenish fanout", totalPeers = g.peers.len if topic notin g.fanout: g.fanout[topic] = initHashSet[string]() - if g.fanout.getOrDefault(topic).len < GossipSubDLo: - debug "replenishing fanout", peers = g.fanout.getOrDefault(topic).len - if topic in g.gossipsub: - for p in g.gossipsub.getOrDefault(topic): - if not g.fanout[topic].containsOrIncl(p): + if g.fanout[topic].len < GossipSubDLo: + debug "replenishing fanout", peers = g.fanout[topic].len + for id, peer in g.peers: + if peer.topics.find(topic) != -1: # linear search but likely faster then a small hash + if not g.fanout[topic].containsOrIncl(id): g.lastFanoutPubSub[topic] = Moment.fromNow(GossipSubFanoutTTL) if g.fanout.getOrDefault(topic).len == GossipSubD: break @@ -140,9 +140,6 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = # send a graft message to the peer await p.sendGraft(@[topic]) - # run fanout - g.replenishFanout(topic) - # prune peers if we've gone over if g.mesh.getOrDefault(topic).len > GossipSubDhi: trace "about to prune mesh", mesh = g.mesh.getOrDefault(topic).len @@ -302,8 +299,9 @@ method subscribeTopic*(g: GossipSub, debug "gossip peers", peers = g.gossipsub[topic].len, topic - # also rebalance current topic - await g.rebalanceMesh(topic) + # also rebalance current topic if we are subbed to + if topic in g.topics: + await g.rebalanceMesh(topic) proc handleGraft(g: GossipSub, peer: PubSubPeer, @@ -475,6 +473,7 @@ method publish*(g: GossipSub, if topic in g.topics: # if we're subscribed use the mesh peers = g.mesh.getOrDefault(topic) else: # not subscribed, send to fanout peers + g.replenishFanout(topic) peers = g.fanout.getOrDefault(topic) let msg = newMessage(g.peerInfo, data, topic, g.sign) diff --git a/libp2p/protocols/pubsub/pubsub.nim b/libp2p/protocols/pubsub/pubsub.nim index d0814c17e..4fb4aa80b 100644 --- a/libp2p/protocols/pubsub/pubsub.nim +++ b/libp2p/protocols/pubsub/pubsub.nim @@ -75,10 +75,27 @@ method subscribeTopic*(p: PubSub, topic: string, subscribe: bool, peerId: string) {.base, async.} = + var peer = p.peers.getOrDefault(peerId) + if isNil(peer) or isNil(peer.peerInfo): # should not happen + if subscribe: + warn "subscribeTopic but peer was unknown!" + return # Stop causing bad metrics! + else: + return # Stop causing bad metrics! + + let idx = peer.topics.find(topic) if subscribe: libp2p_pubsub_peers_per_topic.inc(labelValues = [topic]) + if idx == -1: + peer.topics &= topic + else: + warn "subscribe but topic was already previously subscribed", topic, peer = peerId else: - libp2p_pubsub_peers_per_topic.dec(labelValues = [topic]) + libp2p_pubsub_peers_per_topic.dec(labelValues = [topic]) + if idx == -1: + warn "unsubscribe but topic was not previously subscribed", topic, peer = peerId + else: + peer.topics.del(idx) method rpcHandler*(p: PubSub, peer: PubSubPeer, diff --git a/tests/pubsub/testgossipinternal.nim b/tests/pubsub/testgossipinternal.nim index ce297ce6a..3d03f013b 100644 --- a/tests/pubsub/testgossipinternal.nim +++ b/tests/pubsub/testgossipinternal.nim @@ -57,10 +57,6 @@ suite "GossipSub internal": let topic = "foobar" gossipSub.gossipsub[topic] = initHashSet[string]() - # our implementation requires that topic is in gossipSub.topics - # for this test to work properly and publish properly - gossipSub.topics[topic] = Topic() - var conns = newSeq[Connection]() for i in 0..<15: let conn = newBufferStream(noop) diff --git a/tests/pubsub/testgossipsub.nim b/tests/pubsub/testgossipsub.nim index 8b2a1d5df..88c9441ae 100644 --- a/tests/pubsub/testgossipsub.nim +++ b/tests/pubsub/testgossipsub.nim @@ -365,7 +365,7 @@ suite "GossipSub": await wait(seenFut, 2.minutes) check: seen.len >= runs for k, v in seen.pairs: - check: v == 1 + check: v >= 1 await allFuturesThrowing(nodes.mapIt(it.stop())) await allFuturesThrowing(awaitters) @@ -413,7 +413,7 @@ suite "GossipSub": await wait(seenFut, 5.minutes) check: seen.len >= runs for k, v in seen.pairs: - check: v == 1 + check: v >= 1 await allFuturesThrowing(nodes.mapIt(it.stop())) await allFuturesThrowing(awaitters) From b7382eaf25c81a0f0e03c0e70ce45910769aa7d5 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Sun, 28 Jun 2020 10:55:44 +0900 Subject: [PATCH 09/28] exclude non gossip from fanout --- libp2p/protocols/pubsub/gossipsub.nim | 2 +- libp2p/protocols/pubsub/pubsubpeer.nim | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index d5de70677..3b89a1c50 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -84,7 +84,7 @@ proc replenishFanout(g: GossipSub, topic: string) = if g.fanout[topic].len < GossipSubDLo: debug "replenishing fanout", peers = g.fanout[topic].len for id, peer in g.peers: - if peer.topics.find(topic) != -1: # linear search but likely faster then a small hash + if peer.proto == GossipSubCodec and peer.topics.find(topic) != -1: # linear search but likely faster then a small hash if not g.fanout[topic].containsOrIncl(id): g.lastFanoutPubSub[topic] = Moment.fromNow(GossipSubFanoutTTL) if g.fanout.getOrDefault(topic).len == GossipSubD: diff --git a/libp2p/protocols/pubsub/pubsubpeer.nim b/libp2p/protocols/pubsub/pubsubpeer.nim index 681c511cf..c820c212b 100644 --- a/libp2p/protocols/pubsub/pubsubpeer.nim +++ b/libp2p/protocols/pubsub/pubsubpeer.nim @@ -32,7 +32,7 @@ type onSend*: proc(peer: PubSubPeer; msgs: var RPCMsg) {.gcsafe, raises: [Defect].} PubSubPeer* = ref object of RootObj - proto: string # the protocol that this peer joined from + proto*: string # the protocol that this peer joined from sendConn: Connection peerInfo*: PeerInfo handler*: RPCHandler From f98d2b635dc95ac00dcc986b748eb738a4cca00b Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Sun, 28 Jun 2020 11:03:33 +0900 Subject: [PATCH 10/28] internal test fixes --- tests/pubsub/testgossipinternal.nim | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/pubsub/testgossipinternal.nim b/tests/pubsub/testgossipinternal.nim index 3d03f013b..7260b4af4 100644 --- a/tests/pubsub/testgossipinternal.nim +++ b/tests/pubsub/testgossipinternal.nim @@ -56,6 +56,7 @@ suite "GossipSub internal": let topic = "foobar" gossipSub.gossipsub[topic] = initHashSet[string]() + gossipSub.topics[topic] = Topic() # has to be in topics to rebalance var conns = newSeq[Connection]() for i in 0..<15: @@ -97,6 +98,7 @@ suite "GossipSub internal": conn.peerInfo = peerInfo gossipSub.peers[peerInfo.id] = newPubSubPeer(peerInfo, GossipSubCodec) gossipSub.peers[peerInfo.id].handler = handler + gossipSub.peers[peerInfo.id].topics &= topic gossipSub.gossipsub[topic].incl(peerInfo.id) check gossipSub.gossipsub[topic].len == 15 From 31d79ebed8ff332f49b31dd85ee2301a3ddc90b1 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Sun, 28 Jun 2020 11:58:12 +0900 Subject: [PATCH 11/28] fix flood tests --- tests/pubsub/testfloodsub.nim | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/pubsub/testfloodsub.nim b/tests/pubsub/testfloodsub.nim index 319a41ede..d13aa00eb 100644 --- a/tests/pubsub/testfloodsub.nim +++ b/tests/pubsub/testfloodsub.nim @@ -60,7 +60,7 @@ suite "FloodSub": await nodes[1].subscribe("foobar", handler) await waitSub(nodes[0], nodes[1], "foobar") - await nodes[0].publish("foobar", "Hello!".toBytes()) + discard await nodes[0].publish("foobar", "Hello!".toBytes()) result = await completionFut.wait(5.seconds) @@ -91,7 +91,7 @@ suite "FloodSub": await nodes[0].subscribe("foobar", handler) await waitSub(nodes[1], nodes[0], "foobar") - await nodes[1].publish("foobar", "Hello!".toBytes()) + discard await nodes[1].publish("foobar", "Hello!".toBytes()) result = await completionFut.wait(5.seconds) @@ -126,7 +126,7 @@ suite "FloodSub": nodes[1].addValidator("foobar", validator) - await nodes[0].publish("foobar", "Hello!".toBytes()) + discard await nodes[0].publish("foobar", "Hello!".toBytes()) check (await handlerFut) == true await allFuturesThrowing( @@ -160,7 +160,7 @@ suite "FloodSub": nodes[1].addValidator("foobar", validator) - await nodes[0].publish("foobar", "Hello!".toBytes()) + discard await nodes[0].publish("foobar", "Hello!".toBytes()) await allFuturesThrowing( nodes[0].stop(), @@ -198,8 +198,8 @@ suite "FloodSub": nodes[1].addValidator("foo", "bar", validator) - await nodes[0].publish("foo", "Hello!".toBytes()) - await nodes[0].publish("bar", "Hello!".toBytes()) + discard await nodes[0].publish("foo", "Hello!".toBytes()) + discard await nodes[0].publish("bar", "Hello!".toBytes()) await allFuturesThrowing( nodes[0].stop(), @@ -250,7 +250,7 @@ suite "FloodSub": subs &= waitSub(nodes[i], nodes[y], "foobar") await allFuturesThrowing(subs) - var pubs: seq[Future[void]] + var pubs: seq[Future[int]] for i in 0.. Date: Sun, 28 Jun 2020 12:50:11 +0900 Subject: [PATCH 12/28] fix test's trypublish --- tests/pubsub/testgossipsub.nim | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/pubsub/testgossipsub.nim b/tests/pubsub/testgossipsub.nim index 88c9441ae..220894555 100644 --- a/tests/pubsub/testgossipsub.nim +++ b/tests/pubsub/testgossipsub.nim @@ -43,9 +43,12 @@ proc waitSub(sender, receiver: auto; key: string) {.async, gcsafe.} = dec ceil doAssert(ceil > 0, "waitSub timeout!") -template tryPublish(call: untyped, require: int, times: int = 10, wait: Duration = 1.seconds): untyped = - var limit = times - while (call) < require and limit > 0: +template tryPublish(call: untyped, require: int, wait: Duration = 1.seconds, times: int = 10): untyped = + var + limit = times + pubs = 0 + while pubs < require and limit > 0: + pubs = pubs + call await sleepAsync(wait) limit.dec() if limit == 0: @@ -360,7 +363,7 @@ suite "GossipSub": tryPublish await wait(nodes[0].publish("foobar", cast[seq[byte]]("from node " & nodes[1].peerInfo.id)), - 1.minutes), runs + 1.minutes), runs, 5.seconds await wait(seenFut, 2.minutes) check: seen.len >= runs @@ -408,7 +411,7 @@ suite "GossipSub": tryPublish await wait(nodes[0].publish("foobar", cast[seq[byte]]("from node " & nodes[1].peerInfo.id)), - 1.minutes), 3 + 1.minutes), 3, 5.seconds await wait(seenFut, 5.minutes) check: seen.len >= runs From d4ca9691d0bf588a827367d7f6b6a1ba1e7942a5 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Sun, 28 Jun 2020 16:37:27 +0900 Subject: [PATCH 13/28] test interop fixes --- tests/testinterop.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testinterop.nim b/tests/testinterop.nim index e3dea9350..e2514828e 100644 --- a/tests/testinterop.nim +++ b/tests/testinterop.nim @@ -151,7 +151,7 @@ proc testPubSubNodePublish(gossip: bool = false, proc publisher() {.async.} = while not finished: - await nativeNode.publish(testTopic, msgData) + discard await nativeNode.publish(testTopic, msgData) await sleepAsync(500.millis) await wait(publisher(), 5.minutes) # should be plenty of time From aa6756dfe049dd3eba215938308ec956335c7e9e Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Sun, 28 Jun 2020 17:56:38 +0200 Subject: [PATCH 14/28] allow message id provider to be specified (#243) * don't send public key in message when not signing (information leak) * don't run rebalance if there are peers in gossip (see #242) * don't crash randomly on bad peer id from remote --- libp2p/protocols/pubsub/floodsub.nim | 15 +++--- libp2p/protocols/pubsub/gossipsub.nim | 41 +++++++++------- libp2p/protocols/pubsub/mcache.nim | 14 +++--- libp2p/protocols/pubsub/pubsub.nim | 15 ++++-- libp2p/protocols/pubsub/pubsubpeer.nim | 2 +- libp2p/protocols/pubsub/rpc/message.nim | 62 ++++++++++-------------- libp2p/protocols/pubsub/rpc/messages.nim | 7 +-- libp2p/protocols/pubsub/rpc/protobuf.nim | 14 ++++-- libp2p/standard_setup.nim | 14 ++++-- tests/pubsub/testgossipinternal.nim | 17 ++++--- tests/pubsub/testmcache.nim | 39 +++++++-------- tests/pubsub/testmessage.nim | 33 +++---------- 12 files changed, 137 insertions(+), 136 deletions(-) diff --git a/libp2p/protocols/pubsub/floodsub.nim b/libp2p/protocols/pubsub/floodsub.nim index 7f68e6d13..91f31c162 100644 --- a/libp2p/protocols/pubsub/floodsub.nim +++ b/libp2p/protocols/pubsub/floodsub.nim @@ -13,7 +13,6 @@ import pubsub, pubsubpeer, timedcache, rpc/[messages, message], - ../../crypto/crypto, ../../stream/connection, ../../peer, ../../peerinfo, @@ -65,8 +64,11 @@ method rpcHandler*(f: FloodSub, if m.messages.len > 0: # if there are any messages var toSendPeers: HashSet[string] = initHashSet[string]() for msg in m.messages: # for every message - if msg.msgId notin f.seen: - f.seen.put(msg.msgId) # add the message to the seen cache + let msgId = f.msgIdProvider(msg) + logScope: msgId + + if msgId notin f.seen: + f.seen.put(msgId) # add the message to the seen cache if f.verifySignature and not msg.verify(peer.peerInfo): trace "dropping message due to failed signature verification" @@ -81,10 +83,9 @@ method rpcHandler*(f: FloodSub, toSendPeers.incl(f.floodsub[t]) # get all the peers interested in this topic if t in f.topics: # check that we're subscribed to it for h in f.topics[t].handler: - trace "calling handler for message", msg = msg.msgId, - topicId = t, + trace "calling handler for message", topicId = t, localPeer = f.peerInfo.id, - fromPeer = msg.fromPeerId().pretty + fromPeer = msg.fromPeer.pretty await h(t, msg.data) # trigger user provided handler # forward the message to all peers interested in it @@ -129,7 +130,7 @@ method publish*(f: FloodSub, return trace "publishing on topic", name = topic - let msg = newMessage(f.peerInfo, data, topic, f.sign) + let msg = Message.init(f.peerInfo, data, topic, f.sign) var sent: seq[Future[void]] # start the future but do not wait yet for p in f.floodsub.getOrDefault(topic): diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 85bff0f59..028a6ce12 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -15,7 +15,6 @@ import pubsub, mcache, timedcache, rpc/[messages, message], - ../../crypto/crypto, ../protocol, ../../peerinfo, ../../stream/connection, @@ -361,12 +360,16 @@ method rpcHandler*(g: GossipSub, if m.messages.len > 0: # if there are any messages var toSendPeers: HashSet[string] for msg in m.messages: # for every message - trace "processing message with id", msg = msg.msgId - if msg.msgId in g.seen: - trace "message already processed, skipping", msg = msg.msgId + let msgId = g.msgIdProvider(msg) + logScope: msgId + + if msgId in g.seen: + trace "message already processed, skipping" continue - g.seen.put(msg.msgId) # add the message to the seen cache + trace "processing message" + + g.seen.put(msgId) # add the message to the seen cache if g.verifySignature and not msg.verify(peer.peerInfo): trace "dropping message due to failed signature verification" @@ -377,8 +380,8 @@ method rpcHandler*(g: GossipSub, continue # this shouldn't happen - if g.peerInfo.peerId == msg.fromPeerId(): - trace "skipping messages from self", msg = msg.msgId + if g.peerInfo.peerId == msg.fromPeer: + trace "skipping messages from self" continue for t in msg.topicIDs: # for every topic in the message @@ -391,10 +394,9 @@ method rpcHandler*(g: GossipSub, if t in g.topics: # if we're subscribed to the topic for h in g.topics[t].handler: - trace "calling handler for message", msg = msg.msgId, - topicId = t, + trace "calling handler for message", topicId = t, localPeer = g.peerInfo.id, - fromPeer = msg.fromPeerId().pretty + fromPeer = msg.fromPeer.pretty await h(t, msg.data) # trigger user provided handler # forward the message to all peers interested in it @@ -409,7 +411,7 @@ method rpcHandler*(g: GossipSub, let msgs = m.messages.filterIt( # don't forward to message originator - id != it.fromPeerId() + id != it.fromPeer ) var sent: seq[Future[void]] @@ -460,9 +462,9 @@ method publish*(g: GossipSub, trace "about to publish message on topic", name = topic, data = data.shortLog - var peers: HashSet[string] # TODO: we probably don't need to try multiple times if data.len > 0 and topic.len > 0: + var peers = g.mesh.getOrDefault(topic) for _ in 0..<5: # try to get peers up to 5 times if peers.len > 0: break @@ -480,7 +482,10 @@ method publish*(g: GossipSub, # wait a second between tries await sleepAsync(1.seconds) - let msg = newMessage(g.peerInfo, data, topic, g.sign) + let + msg = Message.init(g.peerInfo, data, topic, g.sign) + msgId = g.msgIdProvider(msg) + trace "created new message", msg var sent: seq[Future[void]] for p in peers: @@ -488,8 +493,8 @@ method publish*(g: GossipSub, continue trace "publishing on topic", name = topic - if msg.msgId notin g.mcache: - g.mcache.put(msg) + if msgId notin g.mcache: + g.mcache.put(msgId, msg) if p in g.peers: sent.add(g.peers[p].send(@[RPCMsg(messages: @[msg])])) @@ -499,10 +504,10 @@ method publish*(g: GossipSub, method start*(g: GossipSub) {.async.} = debug "gossipsub start" - + ## start pubsub ## start long running/repeating procedures - + # interlock start to to avoid overlapping to stops await g.heartbeatLock.acquire() @@ -514,7 +519,7 @@ method start*(g: GossipSub) {.async.} = method stop*(g: GossipSub) {.async.} = debug "gossipsub stop" - + ## stop pubsub ## stop long running tasks diff --git a/libp2p/protocols/pubsub/mcache.nim b/libp2p/protocols/pubsub/mcache.nim index 06157c942..82231f550 100644 --- a/libp2p/protocols/pubsub/mcache.nim +++ b/libp2p/protocols/pubsub/mcache.nim @@ -9,7 +9,7 @@ import chronos, chronicles import tables, options, sets, sequtils -import rpc/[messages, message], timedcache +import rpc/[messages], timedcache type CacheEntry* = object @@ -30,17 +30,17 @@ proc get*(c: MCache, mid: string): Option[Message] = proc contains*(c: MCache, mid: string): bool = c.get(mid).isSome -proc put*(c: MCache, msg: Message) = +proc put*(c: MCache, msgId: string, msg: Message) = proc handler(key: string, val: Message) {.gcsafe.} = ## make sure we remove the message from history ## to keep things consisten c.history.applyIt( - it.filterIt(it.mid != msg.msgId) + it.filterIt(it.mid != msgId) ) - if msg.msgId notin c.msgs: - c.msgs.put(msg.msgId, msg, handler = handler) - c.history[0].add(CacheEntry(mid: msg.msgId, msg: msg)) + if msgId notin c.msgs: + c.msgs.put(msgId, msg, handler = handler) + c.history[0].add(CacheEntry(mid: msgId, msg: msg)) proc window*(c: MCache, topic: string): HashSet[string] = result = initHashSet[string]() @@ -56,7 +56,7 @@ proc window*(c: MCache, topic: string): HashSet[string] = for entry in slot: for t in entry.msg.topicIDs: if t == topic: - result.incl(entry.msg.msgId) + result.incl(entry.mid) break proc shift*(c: MCache) = diff --git a/libp2p/protocols/pubsub/pubsub.nim b/libp2p/protocols/pubsub/pubsub.nim index c893892cf..8a0bbab5b 100644 --- a/libp2p/protocols/pubsub/pubsub.nim +++ b/libp2p/protocols/pubsub/pubsub.nim @@ -10,9 +10,10 @@ import tables, sequtils, sets import chronos, chronicles import pubsubpeer, - rpc/messages, + rpc/[message, messages], ../protocol, ../../stream/connection, + ../../peer, ../../peerinfo import metrics @@ -38,6 +39,9 @@ type TopicPair* = tuple[topic: string, handler: TopicHandler] + MsgIdProvider* = + proc(m: Message): string {.noSideEffect, raises: [Defect], nimcall, gcsafe.} + Topic* = object name*: string handler*: seq[TopicHandler] @@ -52,6 +56,7 @@ type cleanupLock: AsyncLock validators*: Table[string, HashSet[ValidatorHandler]] observers: ref seq[PubSubObserver] # ref as in smart_ptr + msgIdProvider*: MsgIdProvider # Turn message into message id (not nil) proc sendSubs*(p: PubSub, peer: PubSubPeer, @@ -244,6 +249,8 @@ method publish*(p: PubSub, method initPubSub*(p: PubSub) {.base.} = ## perform pubsub initialization p.observers = new(seq[PubSubObserver]) + if p.msgIdProvider == nil: + p.msgIdProvider = defaultMsgIdProvider method start*(p: PubSub) {.async, base.} = ## start pubsub @@ -292,12 +299,14 @@ proc newPubSub*(P: typedesc[PubSub], peerInfo: PeerInfo, triggerSelf: bool = false, verifySignature: bool = true, - sign: bool = true): P = + sign: bool = true, + msgIdProvider: MsgIdProvider = defaultMsgIdProvider): P = result = P(peerInfo: peerInfo, triggerSelf: triggerSelf, verifySignature: verifySignature, sign: sign, - cleanupLock: newAsyncLock()) + cleanupLock: newAsyncLock(), + msgIdProvider: msgIdProvider) result.initPubSub() proc addObserver*(p: PubSub; observer: PubSubObserver) = p.observers[] &= observer diff --git a/libp2p/protocols/pubsub/pubsubpeer.nim b/libp2p/protocols/pubsub/pubsubpeer.nim index 681c511cf..3fb437772 100644 --- a/libp2p/protocols/pubsub/pubsubpeer.nim +++ b/libp2p/protocols/pubsub/pubsubpeer.nim @@ -154,7 +154,7 @@ proc sendMsg*(p: PubSubPeer, topic: string, data: seq[byte], sign: bool): Future[void] {.gcsafe.} = - p.send(@[RPCMsg(messages: @[newMessage(p.peerInfo, data, topic, sign)])]) + p.send(@[RPCMsg(messages: @[Message.init(p.peerInfo, data, topic, sign)])]) proc sendGraft*(p: PubSubPeer, topics: seq[string]) {.async.} = for topic in topics: diff --git a/libp2p/protocols/pubsub/rpc/message.nim b/libp2p/protocols/pubsub/rpc/message.nim index 6e0a59412..d7a35ae14 100644 --- a/libp2p/protocols/pubsub/rpc/message.nim +++ b/libp2p/protocols/pubsub/rpc/message.nim @@ -7,9 +7,12 @@ ## This file may not be copied, modified, or distributed except according to ## those terms. +{.push raises: [Defect].} + import options import chronicles, stew/byteutils import metrics +import chronicles import nimcrypto/sysrand import messages, protobuf, ../../../peer, @@ -20,33 +23,18 @@ import messages, protobuf, logScope: topics = "pubsubmessage" -const PubSubPrefix = "libp2p-pubsub:" +const PubSubPrefix = toBytes("libp2p-pubsub:") declareCounter(libp2p_pubsub_sig_verify_success, "pubsub successfully validated messages") declareCounter(libp2p_pubsub_sig_verify_failure, "pubsub failed validated messages") -proc msgIdProvider(m: Message): string = - ## default msg id provider - crypto.toHex(m.seqno) & PeerID.init(m.fromPeer).pretty +func defaultMsgIdProvider*(m: Message): string = + byteutils.toHex(m.seqno) & m.fromPeer.pretty -template msgId*(m: Message): string = - ## calls the ``msgIdProvider`` from - ## the instantiation scope - ## - mixin msgIdProvider - m.msgIdProvider() - -proc fromPeerId*(m: Message): PeerId = - PeerID.init(m.fromPeer) - -proc sign*(msg: Message, p: PeerInfo): Message {.gcsafe.} = +proc sign*(msg: Message, p: PeerInfo): seq[byte] {.gcsafe, raises: [ResultError[CryptoError], Defect].} = var buff = initProtoBuffer() encodeMessage(msg, buff) - if buff.buffer.len > 0: - result = msg - result.signature = p.privateKey. - sign(PubSubPrefix.toBytes() & buff.buffer).tryGet(). - getBytes() + p.privateKey.sign(PubSubPrefix & buff.buffer).tryGet().getBytes() proc verify*(m: Message, p: PeerInfo): bool = if m.signature.len > 0 and m.key.len > 0: @@ -61,27 +49,29 @@ proc verify*(m: Message, p: PeerInfo): bool = var key: PublicKey if remote.init(m.signature) and key.init(m.key): trace "verifying signature", remoteSignature = remote - result = remote.verify(PubSubPrefix.toBytes() & buff.buffer, key) - + result = remote.verify(PubSubPrefix & buff.buffer, key) + if result: libp2p_pubsub_sig_verify_success.inc() else: libp2p_pubsub_sig_verify_failure.inc() -proc newMessage*(p: PeerInfo, - data: seq[byte], - topic: string, - sign: bool = true): Message {.gcsafe.} = +proc init*( + T: type Message, + p: PeerInfo, + data: seq[byte], + topic: string, + sign: bool = true): Message {.gcsafe, raises: [CatchableError, Defect].} = var seqno: seq[byte] = newSeq[byte](8) - if randomBytes(addr seqno[0], 8) > 0: - if p.publicKey.isSome: - var key: seq[byte] = p.publicKey.get().getBytes().tryGet() + if randomBytes(addr seqno[0], 8) <= 0: + raise (ref CatchableError)(msg: "Cannot get randomness for message") - result = Message(fromPeer: p.peerId.getBytes(), - data: data, - seqno: seqno, - topicIDs: @[topic]) - if sign: - result = result.sign(p) + result = Message( + fromPeer: p.peerId, + data: data, + seqno: seqno, + topicIDs: @[topic]) - result.key = key + if sign and p.publicKey.isSome: + result.signature = sign(result, p) + result.key = p.publicKey.get().getBytes().tryGet() diff --git a/libp2p/protocols/pubsub/rpc/messages.nim b/libp2p/protocols/pubsub/rpc/messages.nim index da7ac1a9e..afb6a5f3e 100644 --- a/libp2p/protocols/pubsub/rpc/messages.nim +++ b/libp2p/protocols/pubsub/rpc/messages.nim @@ -9,6 +9,7 @@ import options, sequtils import ../../../utility +import ../../../peer type SubOpts* = object @@ -16,7 +17,7 @@ type topic*: string Message* = object - fromPeer*: seq[byte] + fromPeer*: PeerId data*: seq[byte] seqno*: seq[byte] topicIDs*: seq[string] @@ -75,10 +76,10 @@ func shortLog*(c: ControlMessage): auto = graft: mapIt(c.graft, it.shortLog), prune: mapIt(c.prune, it.shortLog) ) - + func shortLog*(msg: Message): auto = ( - fromPeer: msg.fromPeer.shortLog, + fromPeer: msg.fromPeer, data: msg.data.shortLog, seqno: msg.seqno.shortLog, topicIDs: $msg.topicIDs, diff --git a/libp2p/protocols/pubsub/rpc/protobuf.nim b/libp2p/protocols/pubsub/rpc/protobuf.nim index 78a422cd1..556232475 100644 --- a/libp2p/protocols/pubsub/rpc/protobuf.nim +++ b/libp2p/protocols/pubsub/rpc/protobuf.nim @@ -10,6 +10,7 @@ import options import chronicles import messages, + ../../../peer, ../../../utility, ../../../protobuf/minprotobuf @@ -162,7 +163,7 @@ proc decodeSubs*(pb: var ProtoBuffer): seq[SubOpts] {.gcsafe.} = trace "got subscriptions", subscriptions = result proc encodeMessage*(msg: Message, pb: var ProtoBuffer) {.gcsafe.} = - pb.write(initProtoField(1, msg.fromPeer)) + pb.write(initProtoField(1, msg.fromPeer.getBytes())) pb.write(initProtoField(2, msg.data)) pb.write(initProtoField(3, msg.seqno)) @@ -181,9 +182,16 @@ proc decodeMessages*(pb: var ProtoBuffer): seq[Message] {.gcsafe.} = # TODO: which of this fields are really optional? while true: var msg: Message - if pb.getBytes(1, msg.fromPeer) < 0: + var fromPeer: seq[byte] + if pb.getBytes(1, fromPeer) < 0: break - trace "read message field", fromPeer = msg.fromPeer.shortLog + try: + msg.fromPeer = PeerID.init(fromPeer) + except CatchableError as err: + debug "Invalid fromPeer in message", msg = err.msg + break + + trace "read message field", fromPeer = msg.fromPeer.pretty if pb.getBytes(2, msg.data) < 0: break diff --git a/libp2p/standard_setup.nim b/libp2p/standard_setup.nim index 17fcb87da..991378a1e 100644 --- a/libp2p/standard_setup.nim +++ b/libp2p/standard_setup.nim @@ -9,7 +9,8 @@ import crypto/crypto, transports/[transport, tcptransport], muxers/[muxer, mplex/mplex, mplex/types], protocols/[identify, secure/secure], - protocols/pubsub/[pubsub, gossipsub, floodsub] + protocols/pubsub/[pubsub, gossipsub, floodsub], + protocols/pubsub/rpc/message import protocols/secure/noise, @@ -30,11 +31,12 @@ proc newStandardSwitch*(privKey = none(PrivateKey), secureManagers: openarray[SecureProtocol] = [ # array cos order matters SecureProtocol.Secio, - SecureProtocol.Noise, + SecureProtocol.Noise, ], verifySignature = libp2p_pubsub_verify, sign = libp2p_pubsub_sign, - transportFlags: set[ServerFlags] = {}): Switch = + transportFlags: set[ServerFlags] = {}, + msgIdProvider: MsgIdProvider = defaultMsgIdProvider): Switch = proc createMplex(conn: Connection): Muxer = newMplex(conn) @@ -60,13 +62,15 @@ proc newStandardSwitch*(privKey = none(PrivateKey), peerInfo = peerInfo, triggerSelf = triggerSelf, verifySignature = verifySignature, - sign = sign).PubSub + sign = sign, + msgIdProvider = msgIdProvider).PubSub else: newPubSub(FloodSub, peerInfo = peerInfo, triggerSelf = triggerSelf, verifySignature = verifySignature, - sign = sign).PubSub + sign = sign, + msgIdProvider = msgIdProvider).PubSub newSwitch( peerInfo, diff --git a/tests/pubsub/testgossipinternal.nim b/tests/pubsub/testgossipinternal.nim index 82f785cd7..603dfaa9b 100644 --- a/tests/pubsub/testgossipinternal.nim +++ b/tests/pubsub/testgossipinternal.nim @@ -5,6 +5,7 @@ include ../../libp2p/protocols/pubsub/gossipsub import unittest import stew/byteutils import ../../libp2p/errors +import ../../libp2p/crypto/crypto import ../../libp2p/stream/bufferstream import ../helpers @@ -229,8 +230,8 @@ suite "GossipSub internal": conns &= conn let peerInfo = PeerInfo.init(PrivateKey.random(ECDSA).get()) conn.peerInfo = peerInfo - let msg = newMessage(peerInfo, ("HELLO" & $i).toBytes(), topic, false) - gossipSub.mcache.put(msg) + let msg = Message.init(peerInfo, ("HELLO" & $i).toBytes(), topic, false) + gossipSub.mcache.put(gossipSub.msgIdProvider(msg), msg) check gossipSub.fanout[topic].len == 15 check gossipSub.mesh[topic].len == 15 @@ -279,8 +280,8 @@ suite "GossipSub internal": conns &= conn let peerInfo = PeerInfo.init(PrivateKey.random(ECDSA).get()) conn.peerInfo = peerInfo - let msg = newMessage(peerInfo, ("HELLO" & $i).toBytes(), topic, false) - gossipSub.mcache.put(msg) + let msg = Message.init(peerInfo, ("HELLO" & $i).toBytes(), topic, false) + gossipSub.mcache.put(gossipSub.msgIdProvider(msg), msg) let peers = gossipSub.getGossipPeers() check peers.len == GossipSubD @@ -322,8 +323,8 @@ suite "GossipSub internal": conns &= conn let peerInfo = PeerInfo.init(PrivateKey.random(ECDSA).get()) conn.peerInfo = peerInfo - let msg = newMessage(peerInfo, ("HELLO" & $i).toBytes(), topic, false) - gossipSub.mcache.put(msg) + let msg = Message.init(peerInfo, ("HELLO" & $i).toBytes(), topic, false) + gossipSub.mcache.put(gossipSub.msgIdProvider(msg), msg) let peers = gossipSub.getGossipPeers() check peers.len == GossipSubD @@ -365,8 +366,8 @@ suite "GossipSub internal": conns &= conn let peerInfo = PeerInfo.init(PrivateKey.random(ECDSA).get()) conn.peerInfo = peerInfo - let msg = newMessage(peerInfo, ("bar" & $i).toBytes(), topic, false) - gossipSub.mcache.put(msg) + let msg = Message.init(peerInfo, ("bar" & $i).toBytes(), topic, false) + gossipSub.mcache.put(gossipSub.msgIdProvider(msg), msg) let peers = gossipSub.getGossipPeers() check peers.len == 0 diff --git a/tests/pubsub/testmcache.nim b/tests/pubsub/testmcache.nim index fca0011ec..118be8b3b 100644 --- a/tests/pubsub/testmcache.nim +++ b/tests/pubsub/testmcache.nim @@ -11,25 +11,26 @@ import ../../libp2p/[peer, suite "MCache": test "put/get": var mCache = newMCache(3, 5) - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).data, + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), seqno: "12345".toBytes()) - mCache.put(msg) - check mCache.get(msg.msgId).isSome and mCache.get(msg.msgId).get() == msg + let msgId = defaultMsgIdProvider(msg) + mCache.put(msgId, msg) + check mCache.get(msgId).isSome and mCache.get(msgId).get() == msg test "window": var mCache = newMCache(3, 5) for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).data, + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), seqno: "12345".toBytes(), topicIDs: @["foo"]) - mCache.put(msg) + mCache.put(defaultMsgIdProvider(msg), msg) for i in 0..<5: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).data, + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), seqno: "12345".toBytes(), topicIDs: @["bar"]) - mCache.put(msg) + mCache.put(defaultMsgIdProvider(msg), msg) var mids = mCache.window("foo") check mids.len == 3 @@ -41,28 +42,28 @@ suite "MCache": var mCache = newMCache(1, 5) for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).data, + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), seqno: "12345".toBytes(), topicIDs: @["foo"]) - mCache.put(msg) + mCache.put(defaultMsgIdProvider(msg), msg) mCache.shift() check mCache.window("foo").len == 0 for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).data, + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), seqno: "12345".toBytes(), topicIDs: @["bar"]) - mCache.put(msg) + mCache.put(defaultMsgIdProvider(msg), msg) mCache.shift() check mCache.window("bar").len == 0 for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).data, + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), seqno: "12345".toBytes(), topicIDs: @["baz"]) - mCache.put(msg) + mCache.put(defaultMsgIdProvider(msg), msg) mCache.shift() check mCache.window("baz").len == 0 @@ -71,22 +72,22 @@ suite "MCache": var mCache = newMCache(1, 5) for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).data, + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), seqno: "12345".toBytes(), topicIDs: @["foo"]) - mCache.put(msg) + mCache.put(defaultMsgIdProvider(msg), msg) for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).data, + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), seqno: "12345".toBytes(), topicIDs: @["bar"]) - mCache.put(msg) + mCache.put(defaultMsgIdProvider(msg), msg) for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).data, + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), seqno: "12345".toBytes(), topicIDs: @["baz"]) - mCache.put(msg) + mCache.put(defaultMsgIdProvider(msg), msg) mCache.shift() check mCache.window("foo").len == 0 diff --git a/tests/pubsub/testmessage.nim b/tests/pubsub/testmessage.nim index d0d48b405..77128ef05 100644 --- a/tests/pubsub/testmessage.nim +++ b/tests/pubsub/testmessage.nim @@ -1,33 +1,14 @@ import unittest -import nimcrypto/sha2, - stew/[base64, byteutils] -import ../../libp2p/[peer, + +import ../../libp2p/[peer, peerinfo, crypto/crypto, protocols/pubsub/rpc/message, protocols/pubsub/rpc/messages] suite "Message": - test "default message id": - let msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).data, - seqno: ("12345").toBytes()) - - check msg.msgId == byteutils.toHex(msg.seqno) & PeerID.init(msg.fromPeer).pretty - - test "sha256 message id": - let msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).data, - seqno: ("12345").toBytes(), - data: ("12345").toBytes()) - - proc msgIdProvider(m: Message): string = - Base64Url.encode( - sha256. - digest(m.data). - data. - toOpenArray(0, sha256.sizeDigest() - 1)) - - check msg.msgId == Base64Url.encode( - sha256. - digest(msg.data). - data. - toOpenArray(0, sha256.sizeDigest() - 1)) + test "signature": + let + peer = PeerInfo.init(PrivateKey.random(ECDSA).get()) + msg = Message.init(peer, @[], "topic", sign = true) + check verify(msg, peer) From 2356f6ab6809c0b9d5d594fd903cadaeea882d2d Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Mon, 29 Jun 2020 10:45:06 +0900 Subject: [PATCH 15/28] make sure to not remove peers from gossip table --- libp2p/protocols/pubsub/gossipsub.nim | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 3b89a1c50..7fafe44d8 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -190,31 +190,29 @@ proc getGossipPeers(g: GossipSub): Table[string, ControlMessage] {.gcsafe.} = let fanout: HashSet[string] = g.fanout.getOrDefault(topic) let gossipPeers = mesh + fanout + var extraPeers = g.gossipsub # copy it! let mids = g.mcache.window(topic) if mids.len > 0: let ihave = ControlIHave(topicID: topic, messageIDs: toSeq(mids)) - if topic notin g.gossipsub: + if topic notin extraPeers: trace "topic not in gossip array, skipping", topicID = topic continue while result.len < GossipSubD: - if g.gossipsub.getOrDefault(topic).len == 0: + if extraPeers.getOrDefault(topic).len == 0: trace "no peers for topic, skipping", topicID = topic break - let id = toSeq(g.gossipsub.getOrDefault(topic)).sample() - if id in g.gossipsub.getOrDefault(topic): - g.gossipsub[topic].excl(id) + let id = toSeq(extraPeers.getOrDefault(topic)).sample() + if id in extraPeers.getOrDefault(topic): + extraPeers[topic].excl(id) if id notin gossipPeers: if id notin result: result[id] = ControlMessage() result[id].ihave.add(ihave) - libp2p_gossipsub_peers_per_topic_gossipsub - .set(g.gossipsub.getOrDefault(topic).len.int64, labelValues = [topic]) - proc heartbeat(g: GossipSub) {.async.} = while g.heartbeatRunning: try: From 3dd28adc9f7439a35fa4cadc9e114939940ad20f Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Mon, 29 Jun 2020 10:46:59 +0900 Subject: [PATCH 16/28] restore old replenishFanout --- libp2p/protocols/pubsub/gossipsub.nim | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 7fafe44d8..10d68dea1 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -76,23 +76,21 @@ method init*(g: GossipSub) = proc replenishFanout(g: GossipSub, topic: string) = ## get fanout peers for a topic - debug "about to replenish fanout", totalPeers = g.peers.len - + trace "about to replenish fanout" if topic notin g.fanout: g.fanout[topic] = initHashSet[string]() - if g.fanout[topic].len < GossipSubDLo: - debug "replenishing fanout", peers = g.fanout[topic].len - for id, peer in g.peers: - if peer.proto == GossipSubCodec and peer.topics.find(topic) != -1: # linear search but likely faster then a small hash - if not g.fanout[topic].containsOrIncl(id): - g.lastFanoutPubSub[topic] = Moment.fromNow(GossipSubFanoutTTL) + if g.fanout.getOrDefault(topic).len < GossipSubDLo: + trace "replenishing fanout", peers = g.fanout.getOrDefault(topic).len + if topic in g.gossipsub: + for p in g.gossipsub.getOrDefault(topic): + if not g.fanout[topic].containsOrIncl(p): if g.fanout.getOrDefault(topic).len == GossipSubD: break libp2p_gossipsub_peers_per_topic_fanout .set(g.fanout.getOrDefault(topic).len.int64, labelValues = [topic]) - debug "fanout replenished with peers", peers = g.fanout.getOrDefault(topic).len + trace "fanout replenished with peers", peers = g.fanout.getOrDefault(topic).len proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = try: From 680dcfd9cb81d1151511caf747b99bea0d49a333 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Tue, 30 Jun 2020 00:10:11 +0900 Subject: [PATCH 17/28] cleanups --- libp2p/protocols/pubsub/gossipsub.nim | 5 +---- tests/pubsub/testgossipsub.nim | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index bf542ef9a..8f0db28f8 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -19,8 +19,7 @@ import pubsub, ../../peerinfo, ../../stream/connection, ../../peer, - ../../errors, - ../../utility + ../../errors logScope: topics = "gossipsub" @@ -55,7 +54,6 @@ type heartbeatFut: Future[void] # cancellation future for heartbeat interval heartbeatRunning: bool heartbeatLock: AsyncLock # heartbeat lock to prevent two consecutive concurrent heartbeats - subLock: AsyncLock declareGauge(libp2p_gossipsub_peers_per_topic_mesh, "gossipsub peers per topic in mesh", labels = ["topic"]) declareGauge(libp2p_gossipsub_peers_per_topic_fanout, "gossipsub peers per topic in fanout", labels = ["topic"]) @@ -548,4 +546,3 @@ method initPubSub*(g: GossipSub) = g.gossip = initTable[string, seq[ControlIHave]]() # pending gossip g.control = initTable[string, ControlMessage]() # pending control messages g.heartbeatLock = newAsyncLock() - g.subLock = newAsyncLock() diff --git a/tests/pubsub/testgossipsub.nim b/tests/pubsub/testgossipsub.nim index 85657b203..004245dc3 100644 --- a/tests/pubsub/testgossipsub.nim +++ b/tests/pubsub/testgossipsub.nim @@ -39,7 +39,7 @@ proc waitSub(sender, receiver: auto; key: string) {.async, gcsafe.} = (not fsub.fanout.hasKey(key) or not fsub.fanout[key].contains(receiver.peerInfo.id)): trace "waitSub sleeping..." - await sleepAsync(100.millis) + await sleepAsync(1.seconds) dec ceil doAssert(ceil > 0, "waitSub timeout!") From c788a6a3c0ca614be4c67cc566aee6877da0250b Mon Sep 17 00:00:00 2001 From: Dmitriy Ryajov Date: Mon, 29 Jun 2020 09:15:31 -0600 Subject: [PATCH 18/28] Cleanup resources (#246) * consolidate reading in lpstream * remove debug echo * tune log level * add channel cleanup and cancelation handling * cancelation handling * cancelation handling * cancelation handling * cancelation handling * cleanup and cancelation handling * cancelation handling * cancelation * tests * rename isConnected to connected * remove testing trace * comment out debug stacktraces * explicit raises --- libp2p/multistream.nim | 12 +- libp2p/muxers/mplex/coder.nim | 4 +- libp2p/muxers/mplex/lpchannel.nim | 171 ++++---- libp2p/muxers/mplex/mplex.nim | 230 ++++++----- libp2p/muxers/muxer.nim | 4 + libp2p/peerinfo.nim | 3 +- libp2p/protocols/identify.nim | 12 +- libp2p/protocols/pubsub/gossipsub.nim | 10 +- libp2p/protocols/pubsub/pubsub.nim | 38 +- libp2p/protocols/pubsub/pubsubpeer.nim | 18 +- libp2p/protocols/secure/secure.nim | 35 +- libp2p/stream/bufferstream.nim | 70 ++-- libp2p/stream/lpstream.nim | 6 +- libp2p/switch.nim | 523 +++++++++++++------------ libp2p/transports/tcptransport.nim | 9 +- tests/testmplex.nim | 2 +- tests/testnoise.nim | 4 +- 17 files changed, 619 insertions(+), 532 deletions(-) diff --git a/libp2p/multistream.nim b/libp2p/multistream.nim index fbf19813b..4d4006838 100644 --- a/libp2p/multistream.nim +++ b/libp2p/multistream.nim @@ -37,12 +37,6 @@ type handlers*: seq[HandlerHolder] codec*: string - MultistreamHandshakeException* = object of CatchableError - -proc newMultistreamHandshakeException*(): ref CatchableError {.inline.} = - result = newException(MultistreamHandshakeException, - "could not perform multistream handshake") - proc newMultistream*(): MultistreamSelect = new result result.codec = MSCodec @@ -62,7 +56,7 @@ proc select*(m: MultistreamSelect, s.removeSuffix("\n") if s != Codec: notice "handshake failed", codec = s.toHex() - raise newMultistreamHandshakeException() + return "" if proto.len() == 0: # no protocols, must be a handshake call return Codec @@ -152,8 +146,12 @@ proc handle*(m: MultistreamSelect, conn: Connection) {.async, gcsafe.} = return debug "no handlers for ", protocol = ms await conn.write(Na) + except CancelledError as exc: + await conn.close() + raise exc except CatchableError as exc: trace "exception in multistream", exc = exc.msg + await conn.close() finally: trace "leaving multistream loop" diff --git a/libp2p/muxers/mplex/coder.nim b/libp2p/muxers/mplex/coder.nim index 6e1520f63..6164aa97a 100644 --- a/libp2p/muxers/mplex/coder.nim +++ b/libp2p/muxers/mplex/coder.nim @@ -47,8 +47,8 @@ proc writeMsg*(conn: Connection, msgType: MessageType, data: seq[byte] = @[]) {.async, gcsafe.} = trace "sending data over mplex", id, - msgType, - data = data.len + msgType, + data = data.len var left = data.len offset = 0 diff --git a/libp2p/muxers/mplex/lpchannel.nim b/libp2p/muxers/mplex/lpchannel.nim index 24865725e..771a4e3a3 100644 --- a/libp2p/muxers/mplex/lpchannel.nim +++ b/libp2p/muxers/mplex/lpchannel.nim @@ -15,7 +15,8 @@ import types, ../../stream/connection, ../../stream/bufferstream, ../../utility, - ../../errors + ../../errors, + ../../peerinfo export connection @@ -90,87 +91,104 @@ proc newChannel*(id: uint64, name: string = "", size: int = DefaultBufferSize, lazy: bool = false): LPChannel = - new result - result.id = id - result.name = name - result.conn = conn - result.initiator = initiator - result.msgCode = if initiator: MessageType.MsgOut else: MessageType.MsgIn - result.closeCode = if initiator: MessageType.CloseOut else: MessageType.CloseIn - result.resetCode = if initiator: MessageType.ResetOut else: MessageType.ResetIn - result.isLazy = lazy + result = LPChannel(id: id, + name: name, + conn: conn, + initiator: initiator, + msgCode: if initiator: MessageType.MsgOut else: MessageType.MsgIn, + closeCode: if initiator: MessageType.CloseOut else: MessageType.CloseIn, + resetCode: if initiator: MessageType.ResetOut else: MessageType.ResetIn, + isLazy: lazy) let chan = result + logScope: + id = chan.id + initiator = chan.initiator + name = chan.name + oid = $chan.oid + peer = $chan.conn.peerInfo + # stack = getStackTrace() + proc writeHandler(data: seq[byte]): Future[void] {.async, gcsafe.} = try: if chan.isLazy and not(chan.isOpen): await chan.open() # writes should happen in sequence - trace "sending data", data = data.shortLog, - id = chan.id, - initiator = chan.initiator, - name = chan.name, - oid = chan.oid + trace "sending data" - try: - await conn.writeMsg(chan.id, - chan.msgCode, - data).wait(2.minutes) # write header - except AsyncTimeoutError: - trace "timeout writing channel, resetting" - asyncCheck chan.reset() + await conn.writeMsg(chan.id, + chan.msgCode, + data).wait(2.minutes) # write header except CatchableError as exc: - trace "unable to write in bufferstream handler", exc = exc.msg + trace "exception in lpchannel write handler", exc = exc.msg + await chan.reset() + raise exc result.initBufferStream(writeHandler, size) when chronicles.enabledLogLevel == LogLevel.TRACE: result.name = if result.name.len > 0: result.name else: $result.oid - trace "created new lpchannel", id = result.id, - oid = result.oid, - initiator = result.initiator, - name = result.name + trace "created new lpchannel" proc closeMessage(s: LPChannel) {.async.} = + logScope: + id = s.id + initiator = s.initiator + name = s.name + oid = $s.oid + peer = $s.conn.peerInfo + # stack = getStackTrace() + ## send close message - this will not raise ## on EOF or Closed - withEOFExceptions: - withWriteLock(s.writeLock): - trace "sending close message", id = s.id, - initiator = s.initiator, - name = s.name, - oid = s.oid + withWriteLock(s.writeLock): + trace "sending close message" - await s.conn.writeMsg(s.id, s.closeCode) # write close + await s.conn.writeMsg(s.id, s.closeCode) # write close proc resetMessage(s: LPChannel) {.async.} = + logScope: + id = s.id + initiator = s.initiator + name = s.name + oid = $s.oid + peer = $s.conn.peerInfo + # stack = getStackTrace() + ## send reset message - this will not raise withEOFExceptions: withWriteLock(s.writeLock): - trace "sending reset message", id = s.id, - initiator = s.initiator, - name = s.name, - oid = s.oid + trace "sending reset message" await s.conn.writeMsg(s.id, s.resetCode) # write reset proc open*(s: LPChannel) {.async, gcsafe.} = + logScope: + id = s.id + initiator = s.initiator + name = s.name + oid = $s.oid + peer = $s.conn.peerInfo + # stack = getStackTrace() + ## NOTE: Don't call withExcAndLock or withWriteLock, ## because this already gets called from writeHandler ## which is locked - withEOFExceptions: - await s.conn.writeMsg(s.id, MessageType.New, s.name) - trace "opened channel", oid = s.oid, - name = s.name, - initiator = s.initiator - s.isOpen = true + await s.conn.writeMsg(s.id, MessageType.New, s.name) + trace "opened channel" + s.isOpen = true proc closeRemote*(s: LPChannel) {.async.} = - trace "got EOF, closing channel", id = s.id, - initiator = s.initiator, - name = s.name, - oid = s.oid + logScope: + id = s.id + initiator = s.initiator + name = s.name + oid = $s.oid + peer = $s.conn.peerInfo + # stack = getStackTrace() + + trace "got EOF, closing channel" # wait for all data in the buffer to be consumed while s.len > 0: @@ -181,11 +199,7 @@ proc closeRemote*(s: LPChannel) {.async.} = await s.close() # close local end # call to avoid leaks await procCall BufferStream(s).close() # close parent bufferstream - - trace "channel closed on EOF", id = s.id, - initiator = s.initiator, - oid = s.oid, - name = s.name + trace "channel closed on EOF" method closed*(s: LPChannel): bool = ## this emulates half-closed behavior @@ -195,6 +209,20 @@ method closed*(s: LPChannel): bool = s.closedLocal method reset*(s: LPChannel) {.base, async, gcsafe.} = + logScope: + id = s.id + initiator = s.initiator + name = s.name + oid = $s.oid + peer = $s.conn.peerInfo + # stack = getStackTrace() + + trace "resetting channel" + + if s.closedLocal and s.isEof: + trace "channel already closed or reset" + return + # we asyncCheck here because the other end # might be dead already - reset is always # optimistic @@ -203,33 +231,36 @@ method reset*(s: LPChannel) {.base, async, gcsafe.} = s.isEof = true s.closedLocal = true + trace "channel reset" + method close*(s: LPChannel) {.async, gcsafe.} = + logScope: + id = s.id + initiator = s.initiator + name = s.name + oid = $s.oid + peer = $s.conn.peerInfo + # stack = getStackTrace() + if s.closedLocal: - trace "channel already closed", id = s.id, - initiator = s.initiator, - name = s.name, - oid = s.oid + trace "channel already closed" return - proc closeRemote() {.async.} = + trace "closing local lpchannel" + + proc closeInternal() {.async.} = try: - trace "closing local lpchannel", id = s.id, - initiator = s.initiator, - name = s.name, - oid = s.oid await s.closeMessage().wait(2.minutes) if s.atEof: # already closed by remote close parent buffer immediately await procCall BufferStream(s).close() - except AsyncTimeoutError: - trace "close timed out, reset channel" - asyncCheck s.reset() # reset on timeout + except CancelledError as exc: + await s.reset() # reset on timeout + raise exc except CatchableError as exc: trace "exception closing channel" + await s.reset() # reset on timeout - trace "lpchannel closed local", id = s.id, - initiator = s.initiator, - name = s.name, - oid = s.oid + trace "lpchannel closed local" s.closedLocal = true - asyncCheck closeRemote() + asyncCheck closeInternal() diff --git a/libp2p/muxers/mplex/mplex.nim b/libp2p/muxers/mplex/mplex.nim index eee7a56cf..69666419c 100644 --- a/libp2p/muxers/mplex/mplex.nim +++ b/libp2p/muxers/mplex/mplex.nim @@ -8,12 +8,13 @@ ## those terms. import tables, sequtils, oids -import chronos, chronicles, stew/byteutils +import chronos, chronicles, stew/byteutils, metrics import ../muxer, ../../stream/connection, ../../stream/bufferstream, ../../utility, ../../errors, + ../../peerinfo, coder, types, lpchannel @@ -21,6 +22,8 @@ import ../muxer, logScope: topics = "mplex" +declareGauge(libp2p_mplex_channels, "mplex channels", labels = ["initiator", "peer"]) + type Mplex* = ref object of Muxer remote: Table[uint64, LPChannel] @@ -33,10 +36,10 @@ type proc getChannelList(m: Mplex, initiator: bool): var Table[uint64, LPChannel] = if initiator: - trace "picking local channels", initiator = initiator, oid = m.oid + trace "picking local channels", initiator = initiator, oid = $m.oid result = m.local else: - trace "picking remote channels", initiator = initiator, oid = m.oid + trace "picking remote channels", initiator = initiator, oid = $m.oid result = m.remote proc newStreamInternal*(m: Mplex, @@ -46,6 +49,7 @@ proc newStreamInternal*(m: Mplex, lazy: bool = false): Future[LPChannel] {.async, gcsafe.} = ## create new channel/stream + ## let id = if initiator: m.currentId.inc(); m.currentId else: chanId @@ -53,7 +57,7 @@ proc newStreamInternal*(m: Mplex, trace "creating new channel", channelId = id, initiator = initiator, name = name, - oid = m.oid + oid = $m.oid result = newChannel(id, m.connection, initiator, @@ -63,96 +67,128 @@ proc newStreamInternal*(m: Mplex, result.peerInfo = m.connection.peerInfo result.observedAddr = m.connection.observedAddr - m.getChannelList(initiator)[id] = result + doAssert(id notin m.getChannelList(initiator), + "channel slot already taken!") -proc handleStream(m: Muxer, chann: LPChannel) {.async.} = + m.getChannelList(initiator)[id] = result + libp2p_mplex_channels.set( + m.getChannelList(initiator).len.int64, + labelValues = [$initiator, + $m.connection.peerInfo]) + +proc cleanupChann(m: Mplex, chann: LPChannel) {.async, inline.} = + ## remove the local channel from the internal tables + ## + await chann.closeEvent.wait() + if not isNil(chann): + m.getChannelList(chann.initiator).del(chann.id) + trace "cleaned up channel", id = chann.id + + libp2p_mplex_channels.set( + m.getChannelList(chann.initiator).len.int64, + labelValues = [$chann.initiator, + $m.connection.peerInfo]) + +proc handleStream(m: Mplex, chann: LPChannel) {.async.} = + ## call the muxer stream handler for this channel + ## try: await m.streamHandler(chann) trace "finished handling stream" doAssert(chann.closed, "connection not closed by handler!") + except CancelledError as exc: + trace "cancling stream handler", exc = exc.msg + await chann.reset() + raise except CatchableError as exc: trace "exception in stream handler", exc = exc.msg await chann.reset() + await m.cleanupChann(chann) method handle*(m: Mplex) {.async, gcsafe.} = - trace "starting mplex main loop", oid = m.oid + trace "starting mplex main loop", oid = $m.oid try: - try: - while not m.connection.closed: - trace "waiting for data", oid = m.oid - let (id, msgType, data) = await m.connection.readMsg() - trace "read message from connection", id = id, - msgType = msgType, - data = data.shortLog, - oid = m.oid - - let initiator = bool(ord(msgType) and 1) - var channel: LPChannel - if MessageType(msgType) != MessageType.New: - let channels = m.getChannelList(initiator) - if id notin channels: - - trace "Channel not found, skipping", id = id, - initiator = initiator, - msg = msgType, - oid = m.oid - continue - channel = channels[id] - - logScope: - id = id - initiator = initiator - msgType = msgType - size = data.len - muxer_oid = m.oid - - case msgType: - of MessageType.New: - let name = string.fromBytes(data) - channel = await m.newStreamInternal(false, id, name) - - trace "created channel", name = channel.name, - oid = channel.oid - - if not isNil(m.streamHandler): - # launch handler task - asyncCheck m.handleStream(channel) - - of MessageType.MsgIn, MessageType.MsgOut: - logScope: - name = channel.name - oid = channel.oid - - trace "pushing data to channel" - - if data.len > MaxMsgSize: - raise newLPStreamLimitError() - await channel.pushTo(data) - of MessageType.CloseIn, MessageType.CloseOut: - logScope: - name = channel.name - oid = channel.oid - - trace "closing channel" - - await channel.closeRemote() - m.getChannelList(initiator).del(id) - trace "deleted channel" - of MessageType.ResetIn, MessageType.ResetOut: - logScope: - name = channel.name - oid = channel.oid - - trace "resetting channel" - - await channel.reset() - m.getChannelList(initiator).del(id) - trace "deleted channel" - finally: - trace "stopping mplex main loop", oid = m.oid + defer: + trace "stopping mplex main loop", oid = $m.oid await m.close() + + while not m.connection.closed: + trace "waiting for data", oid = $m.oid + let (id, msgType, data) = await m.connection.readMsg() + trace "read message from connection", id = id, + msgType = msgType, + data = data.shortLog, + oid = $m.oid + + let initiator = bool(ord(msgType) and 1) + var channel: LPChannel + if MessageType(msgType) != MessageType.New: + let channels = m.getChannelList(initiator) + if id notin channels: + + trace "Channel not found, skipping", id = id, + initiator = initiator, + msg = msgType, + oid = $m.oid + continue + channel = channels[id] + + logScope: + id = id + initiator = initiator + msgType = msgType + size = data.len + muxer_oid = $m.oid + + case msgType: + of MessageType.New: + let name = string.fromBytes(data) + channel = await m.newStreamInternal(false, id, name) + + trace "created channel", name = channel.name, + oid = $channel.oid + + if not isNil(m.streamHandler): + # launch handler task + asyncCheck m.handleStream(channel) + + of MessageType.MsgIn, MessageType.MsgOut: + logScope: + name = channel.name + oid = $channel.oid + + trace "pushing data to channel" + + if data.len > MaxMsgSize: + raise newLPStreamLimitError() + await channel.pushTo(data) + + of MessageType.CloseIn, MessageType.CloseOut: + logScope: + name = channel.name + oid = $channel.oid + + trace "closing channel" + + await channel.closeRemote() + await m.cleanupChann(channel) + + trace "deleted channel" + of MessageType.ResetIn, MessageType.ResetOut: + logScope: + name = channel.name + oid = $channel.oid + + trace "resetting channel" + + await channel.reset() + await m.cleanupChann(channel) + + trace "deleted channel" + except CancelledError as exc: + raise exc except CatchableError as exc: - trace "Exception occurred", exception = exc.msg, oid = m.oid + trace "Exception occurred", exception = exc.msg, oid = $m.oid proc newMplex*(conn: Connection, maxChanns: uint = MaxChannels): Mplex = @@ -165,14 +201,6 @@ proc newMplex*(conn: Connection, when chronicles.enabledLogLevel == LogLevel.TRACE: result.oid = genOid() -proc cleanupChann(m: Mplex, chann: LPChannel) {.async, inline.} = - ## remove the local channel from the internal tables - ## - await chann.closeEvent.wait() - if not isNil(chann): - m.getChannelList(true).del(chann.id) - trace "cleaned up channel", id = chann.id - method newStream*(m: Mplex, name: string = "", lazy: bool = false): Future[Connection] {.async, gcsafe.} = @@ -187,19 +215,17 @@ method close*(m: Mplex) {.async, gcsafe.} = if m.isClosed: return - try: - trace "closing mplex muxer", oid = m.oid - let channs = toSeq(m.remote.values) & - toSeq(m.local.values) - - for chann in channs: - try: - await chann.reset() - except CatchableError as exc: - warn "error resetting channel", exc = exc.msg - - await m.connection.close() - finally: + defer: m.remote.clear() m.local.clear() m.isClosed = true + + trace "closing mplex muxer", oid = $m.oid + let channs = toSeq(m.remote.values) & + toSeq(m.local.values) + + for chann in channs: + await chann.reset() + await m.cleanupChann(chann) + + await m.connection.close() diff --git a/libp2p/muxers/muxer.nim b/libp2p/muxers/muxer.nim index 999188b60..2d6116037 100644 --- a/libp2p/muxers/muxer.nim +++ b/libp2p/muxers/muxer.nim @@ -63,8 +63,12 @@ method init(c: MuxerProvider) = futs &= c.muxerHandler(muxer) checkFutures(await allFinished(futs)) + except CancelledError as exc: + raise exc except CatchableError as exc: trace "exception in muxer handler", exc = exc.msg, peer = $conn, proto=proto + finally: + await conn.close() c.handler = handler diff --git a/libp2p/peerinfo.nim b/libp2p/peerinfo.nim index 31c2a6bd7..bfe5048ba 100644 --- a/libp2p/peerinfo.nim +++ b/libp2p/peerinfo.nim @@ -38,7 +38,8 @@ type key: Option[PublicKey] proc id*(p: PeerInfo): string = - p.peerId.pretty() + if not(isNil(p)): + return p.peerId.pretty() proc `$`*(p: PeerInfo): string = p.id diff --git a/libp2p/protocols/identify.nim b/libp2p/protocols/identify.nim index 3c25f0984..b2d3c501a 100644 --- a/libp2p/protocols/identify.nim +++ b/libp2p/protocols/identify.nim @@ -113,13 +113,15 @@ proc newIdentify*(peerInfo: PeerInfo): Identify = method init*(p: Identify) = proc handle(conn: Connection, proto: string) {.async, gcsafe, closure.} = try: - try: - trace "handling identify request", oid = conn.oid - var pb = encodeMsg(p.peerInfo, conn.observedAddr) - await conn.writeLp(pb.buffer) - finally: + defer: trace "exiting identify handler", oid = conn.oid await conn.close() + + trace "handling identify request", oid = conn.oid + var pb = encodeMsg(p.peerInfo, conn.observedAddr) + await conn.writeLp(pb.buffer) + except CancelledError as exc: + raise exc except CatchableError as exc: trace "exception in identify handler", exc = exc.msg diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 028a6ce12..64e8a82d3 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -52,7 +52,7 @@ type gossip*: Table[string, seq[ControlIHave]] # pending gossip control*: Table[string, ControlMessage] # pending control messages mcache*: MCache # messages cache - heartbeatFut: Future[void] # cancellation future for heartbeat interval + heartbeatFut: Future[void] # cancellation future for heartbeat interval heartbeatRunning: bool heartbeatLock: AsyncLock # heartbeat lock to prevent two consecutive concurrent heartbeats @@ -159,6 +159,8 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = trace "mesh balanced, got peers", peers = g.mesh.getOrDefault(topic).len, topicId = topic + except CancelledError as exc: + raise exc except CatchableError as exc: trace "exception occurred re-balancing mesh", exc = exc.msg @@ -227,12 +229,10 @@ proc heartbeat(g: GossipSub) {.async.} = checkFutures(await allFinished(sent)) g.mcache.shift() # shift the cache + except CancelledError as exc: + raise exc except CatchableError as exc: trace "exception ocurred in gossipsub heartbeat", exc = exc.msg - # sleep less in the case of an error - # but still throttle - await sleepAsync(100.millis) - continue await sleepAsync(1.seconds) diff --git a/libp2p/protocols/pubsub/pubsub.nim b/libp2p/protocols/pubsub/pubsub.nim index 8a0bbab5b..da1c24a0c 100644 --- a/libp2p/protocols/pubsub/pubsub.nim +++ b/libp2p/protocols/pubsub/pubsub.nim @@ -104,13 +104,13 @@ method handleDisconnect*(p: PubSub, peer: PubSubPeer) {.async, base.} = p.peers.del(peer.id) # metrics - libp2p_pubsub_peers.dec() + libp2p_pubsub_peers.set(p.peers.len.int64) proc cleanUpHelper(p: PubSub, peer: PubSubPeer) {.async.} = try: await p.cleanupLock.acquire() peer.refs.dec() # decrement refcount - if peer.refs == 0: + if peer.refs <= 0: await p.handleDisconnect(peer) finally: p.cleanupLock.release() @@ -119,24 +119,23 @@ proc getPeer(p: PubSub, peerInfo: PeerInfo, proto: string): PubSubPeer = if peerInfo.id in p.peers: - result = p.peers[peerInfo.id] - return + return p.peers[peerInfo.id] # create new pubsub peer let peer = newPubSubPeer(peerInfo, proto) trace "created new pubsub peer", peerId = peer.id # metrics - libp2p_pubsub_peers.inc() p.peers[peer.id] = peer peer.refs.inc # increment reference count peer.observers = p.observers - result = peer + libp2p_pubsub_peers.set(p.peers.len.int64) + return peer proc internalCleanup(p: PubSub, conn: Connection) {.async.} = # handle connection close - if conn.closed: + if isNil(conn): return var peer = p.getPeer(conn.peerInfo, p.codec) @@ -168,6 +167,7 @@ method handleConn*(p: PubSub, # call pubsub rpc handler await p.rpcHandler(peer, msgs) + asyncCheck p.internalCleanup(conn) let peer = p.getPeer(conn.peerInfo, proto) let topics = toSeq(p.topics.keys) if topics.len > 0: @@ -176,18 +176,27 @@ method handleConn*(p: PubSub, peer.handler = handler await peer.handle(conn) # spawn peer read loop trace "pubsub peer handler ended, cleaning up" - await p.internalCleanup(conn) + except CancelledError as exc: + await conn.close() + raise exc except CatchableError as exc: trace "exception ocurred in pubsub handle", exc = exc.msg + await conn.close() method subscribeToPeer*(p: PubSub, conn: Connection) {.base, async.} = - var peer = p.getPeer(conn.peerInfo, p.codec) - trace "setting connection for peer", peerId = conn.peerInfo.id - if not peer.isConnected: - peer.conn = conn + if not(isNil(conn)): + let peer = p.getPeer(conn.peerInfo, p.codec) + trace "setting connection for peer", peerId = conn.peerInfo.id + if not peer.connected: + peer.conn = conn - asyncCheck p.internalCleanup(conn) + asyncCheck p.internalCleanup(conn) + +proc connected*(p: PubSub, peer: PeerInfo): bool = + let peer = p.getPeer(peer, p.codec) + if not(isNil(peer)): + return peer.connected method unsubscribe*(p: PubSub, topics: seq[TopicPair]) {.base, async.} = @@ -309,7 +318,8 @@ proc newPubSub*(P: typedesc[PubSub], msgIdProvider: msgIdProvider) result.initPubSub() -proc addObserver*(p: PubSub; observer: PubSubObserver) = p.observers[] &= observer +proc addObserver*(p: PubSub; observer: PubSubObserver) = + p.observers[] &= observer proc removeObserver*(p: PubSub; observer: PubSubObserver) = let idx = p.observers[].find(observer) diff --git a/libp2p/protocols/pubsub/pubsubpeer.nim b/libp2p/protocols/pubsub/pubsubpeer.nim index 3fb437772..1ea920085 100644 --- a/libp2p/protocols/pubsub/pubsubpeer.nim +++ b/libp2p/protocols/pubsub/pubsubpeer.nim @@ -47,8 +47,8 @@ type proc id*(p: PubSubPeer): string = p.peerInfo.id -proc isConnected*(p: PubSubPeer): bool = - (not isNil(p.sendConn)) +proc connected*(p: PubSubPeer): bool = + not(isNil(p.sendConn)) proc `conn=`*(p: PubSubPeer, conn: Connection) = if not(isNil(conn)): @@ -126,8 +126,10 @@ proc send*(p: PubSubPeer, msgs: seq[RPCMsg]) {.async.} = try: trace "about to send message", peer = p.id, encoded = digest - await p.onConnect.wait() - if p.isConnected: # this can happen if the remote disconnected + if not p.onConnect.isSet: + await p.onConnect.wait() + + if p.connected: # this can happen if the remote disconnected trace "sending encoded msgs to peer", peer = p.id, encoded = encoded.buffer.shortLog await p.sendConn.writeLp(encoded.buffer) @@ -139,10 +141,14 @@ proc send*(p: PubSubPeer, msgs: seq[RPCMsg]) {.async.} = # metrics libp2p_pubsub_sent_messages.inc(labelValues = [p.id, t]) + except CancelledError as exc: + raise exc except CatchableError as exc: trace "unable to send to remote", exc = exc.msg - p.sendConn = nil - p.onConnect.clear() + if not(isNil(p.sendConn)): + await p.sendConn.close() + p.sendConn = nil + p.onConnect.clear() # if no connection has been set, # queue messages until a connection diff --git a/libp2p/protocols/secure/secure.nim b/libp2p/protocols/secure/secure.nim index baeaece02..3aa354f92 100644 --- a/libp2p/protocols/secure/secure.nim +++ b/libp2p/protocols/secure/secure.nim @@ -72,6 +72,10 @@ method init*(s: Secure) {.gcsafe.} = # We don't need the result but we definitely need to await the handshake discard await s.handleConn(conn, false) trace "connection secured" + except CancelledError as exc: + warn "securing connection canceled" + await conn.close() + raise except CatchableError as exc: warn "securing connection failed", msg = exc.msg await conn.close() @@ -79,31 +83,20 @@ method init*(s: Secure) {.gcsafe.} = s.handler = handle method secure*(s: Secure, conn: Connection, initiator: bool): Future[Connection] {.async, base, gcsafe.} = - try: - result = await s.handleConn(conn, initiator) - except CancelledError as exc: - raise exc - except CatchableError as exc: - warn "securing connection failed", msg = exc.msg - return nil + result = await s.handleConn(conn, initiator) method readOnce*(s: SecureConn, pbytes: pointer, nbytes: int): Future[int] {.async, gcsafe.} = - try: - if nbytes == 0: - return 0 + if nbytes == 0: + return 0 - if s.buf.data().len() == 0: - let buf = await s.readMessage() - if buf.len == 0: - raise newLPStreamIncompleteError() - s.buf.add(buf) + if s.buf.data().len() == 0: + let buf = await s.readMessage() + if buf.len == 0: + raise newLPStreamIncompleteError() + s.buf.add(buf) - var p = cast[ptr UncheckedArray[byte]](pbytes) - return s.buf.consumeTo(toOpenArray(p, 0, nbytes - 1)) - except CatchableError as exc: - trace "exception reading from secure connection", exc = exc.msg, oid = s.oid - await s.close() # make sure to close the wrapped connection - raise exc + var p = cast[ptr UncheckedArray[byte]](pbytes) + return s.buf.consumeTo(toOpenArray(p, 0, nbytes - 1)) diff --git a/libp2p/stream/bufferstream.nim b/libp2p/stream/bufferstream.nim index e879766cb..c15fa7bf5 100644 --- a/libp2p/stream/bufferstream.nim +++ b/libp2p/stream/bufferstream.nim @@ -128,19 +128,19 @@ proc initBufferStream*(s: BufferStream, if not(isNil(handler)): s.writeHandler = proc (data: seq[byte]) {.async, gcsafe.} = - try: - # Using a lock here to guarantee - # proper write ordering. This is - # specially important when - # implementing half-closed in mplex - # or other functionality that requires - # strict message ordering - await s.writeLock.acquire() - await handler(data) - finally: + defer: s.writeLock.release() - trace "created bufferstream", oid = s.oid + # Using a lock here to guarantee + # proper write ordering. This is + # specially important when + # implementing half-closed in mplex + # or other functionality that requires + # strict message ordering + await s.writeLock.acquire() + await handler(data) + + trace "created bufferstream", oid = $s.oid proc newBufferStream*(handler: WriteHandler = nil, size: int = DefaultBufferSize): BufferStream = @@ -173,31 +173,31 @@ method pushTo*(s: BufferStream, data: seq[byte]) {.base, async.} = if s.atEof: raise newLPStreamEOFError() - try: - await s.lock.acquire() - var index = 0 - while not s.closed(): - while index < data.len and s.readBuf.len < s.maxSize: - s.readBuf.addLast(data[index]) - inc(index) - # trace "pushTo()", msg = "added " & $s.len & " bytes to readBuf", oid = s.oid - - # resolve the next queued read request - if s.readReqs.len > 0: - s.readReqs.popFirst().complete() - # trace "pushTo(): completed a readReqs future", oid = s.oid - - if index >= data.len: - return - - # if we couldn't transfer all the data to the - # internal buf wait on a read event - await s.dataReadEvent.wait() - s.dataReadEvent.clear() - finally: + defer: # trace "ended", size = s.len s.lock.release() + await s.lock.acquire() + var index = 0 + while not s.closed(): + while index < data.len and s.readBuf.len < s.maxSize: + s.readBuf.addLast(data[index]) + inc(index) + # trace "pushTo()", msg = "added " & $s.len & " bytes to readBuf", oid = s.oid + + # resolve the next queued read request + if s.readReqs.len > 0: + s.readReqs.popFirst().complete() + # trace "pushTo(): completed a readReqs future", oid = s.oid + + if index >= data.len: + return + + # if we couldn't transfer all the data to the + # internal buf wait on a read event + await s.dataReadEvent.wait() + s.dataReadEvent.clear() + method readOnce*(s: BufferStream, pbytes: pointer, nbytes: int): @@ -290,8 +290,10 @@ method close*(s: BufferStream) {.async, gcsafe.} = await procCall Connection(s).close() inc getBufferStreamTracker().closed - trace "bufferstream closed", oid = s.oid + trace "bufferstream closed", oid = $s.oid else: trace "attempt to close an already closed bufferstream", trace = getStackTrace() + except CancelledError as exc: + raise except CatchableError as exc: trace "error closing buffer stream", exc = exc.msg diff --git a/libp2p/stream/lpstream.nim b/libp2p/stream/lpstream.nim index 410d3ed46..f7a4eda80 100644 --- a/libp2p/stream/lpstream.nim +++ b/libp2p/stream/lpstream.nim @@ -74,7 +74,7 @@ method initStream*(s: LPStream) {.base.} = s.oid = genOid() libp2p_open_streams.inc(labelValues = [s.objName]) - trace "stream created", oid = s.oid, name = s.objName + trace "stream created", oid = $s.oid, name = s.objName # TODO: debuging aid to troubleshoot streams open/close # try: @@ -150,7 +150,6 @@ proc readVarint*(conn: LPStream): Future[uint64] {.async, gcsafe.} = for i in 0.. MaxConnectionsPerPeer: - warn "disconnecting peer, too many connections", peer = $conn.peerInfo, - conns = s.connections - .getOrDefault(id).len - await muxer.close() - await s.disconnect(conn.peerInfo) - raise newTooManyConnections() + if isNil(muxer): + return - s.connections.mgetOrPut( - id, - newSeq[ConnectionHolder]()) - .add(ConnectionHolder(conn: conn, dir: dir)) + let conn = muxer.connection + if isNil(conn): + return - s.muxed.mgetOrPut( - muxer.connection.peerInfo.id, - newSeq[MuxerHolder]()) - .add(MuxerHolder(muxer: muxer, handle: handle, dir: dir)) + let id = conn.peerInfo.id + if s.connections.getOrDefault(id).len > MaxConnectionsPerPeer: + warn "disconnecting peer, too many connections", peer = $conn.peerInfo, + conns = s.connections + .getOrDefault(id).len + await s.disconnect(conn.peerInfo) + raise newTooManyConnections() + + s.connections.mgetOrPut( + id, + newSeq[ConnectionHolder]()) + .add(ConnectionHolder(conn: conn, dir: dir)) + + s.muxed.mgetOrPut( + muxer.connection.peerInfo.id, + newSeq[MuxerHolder]()) + .add(MuxerHolder(muxer: muxer, handle: handle, dir: dir)) proc secure(s: Switch, conn: Connection): Future[Connection] {.async, gcsafe.} = if s.secureManagers.len <= 0: @@ -164,50 +167,41 @@ proc secure(s: Switch, conn: Connection): Future[Connection] {.async, gcsafe.} = if manager.len == 0: raise newException(CatchableError, "Unable to negotiate a secure channel!") - trace "securing connection", codec=manager + trace "securing connection", codec = manager let secureProtocol = s.secureManagers.filterIt(it.codec == manager) # ms.select should deal with the correctness of this # let's avoid duplicating checks but detect if it fails to do it properly doAssert(secureProtocol.len > 0) result = await secureProtocol[0].secure(conn, true) -proc identify(s: Switch, conn: Connection): Future[PeerInfo] {.async, gcsafe.} = +proc identify(s: Switch, conn: Connection) {.async, gcsafe.} = ## identify the connection - if not isNil(conn.peerInfo): - result = conn.peerInfo + if (await s.ms.select(conn, s.identity.codec)): + let info = await s.identity.identify(conn, conn.peerInfo) - try: - if (await s.ms.select(conn, s.identity.codec)): - let info = await s.identity.identify(conn, conn.peerInfo) + if info.pubKey.isNone and isNil(conn): + raise newException(CatchableError, + "no public key provided and no existing peer identity found") - if info.pubKey.isNone and isNil(result): - raise newException(CatchableError, - "no public key provided and no existing peer identity found") + if isNil(conn.peerInfo): + conn.peerInfo = PeerInfo.init(info.pubKey.get()) - if info.pubKey.isSome: - result = PeerInfo.init(info.pubKey.get()) - trace "identify: identified remote peer", peer = result.id + if info.addrs.len > 0: + conn.peerInfo.addrs = info.addrs - if info.addrs.len > 0: - result.addrs = info.addrs + if info.agentVersion.isSome: + conn.peerInfo.agentVersion = info.agentVersion.get() - if info.agentVersion.isSome: - result.agentVersion = info.agentVersion.get() + if info.protoVersion.isSome: + conn.peerInfo.protoVersion = info.protoVersion.get() - if info.protoVersion.isSome: - result.protoVersion = info.protoVersion.get() + if info.protos.len > 0: + conn.peerInfo.protocols = info.protos - if info.protos.len > 0: - result.protocols = info.protos + trace "identify: identified remote peer", peer = $conn.peerInfo - trace "identify", info = shortLog(result) - except IdentityInvalidMsgError as exc: - debug "identify: invalid message", msg = exc.msg - except IdentityNoMatchError as exc: - debug "identify: peer's public keys don't match ", msg = exc.msg - -proc mux(s: Switch, conn: Connection): Future[void] {.async, gcsafe.} = +proc mux(s: Switch, conn: Connection) {.async, gcsafe.} = ## mux incoming connection trace "muxing connection", peer = $conn @@ -224,48 +218,61 @@ proc mux(s: Switch, conn: Connection): Future[void] {.async, gcsafe.} = # create new muxer for connection let muxer = s.muxers[muxerName].newMuxer(conn) - trace "found a muxer", name=muxerName, peer = $conn + trace "found a muxer", name = muxerName, peer = $conn # install stream handler muxer.streamHandler = s.streamHandler # new stream for identify var stream = await muxer.newStream() + var handlerFut: Future[void] + + defer: + if not(isNil(stream)): + await stream.close() # close identify stream + # call muxer handler, this should # not end until muxer ends - let handlerFut = muxer.handle() + handlerFut = muxer.handle() - try: - # do identify first, so that we have a - # PeerInfo in case we didn't before - conn.peerInfo = await s.identify(stream) - finally: - await stream.close() # close identify stream + # do identify first, so that we have a + # PeerInfo in case we didn't before + await s.identify(stream) if isNil(conn.peerInfo): await muxer.close() - return + raise newException(CatchableError, + "unable to identify peer, aborting upgrade") # store it in muxed connections if we have a peer for it trace "adding muxer for peer", peer = conn.peerInfo.id await s.storeConn(muxer, Direction.Out, handlerFut) proc cleanupConn(s: Switch, conn: Connection) {.async, gcsafe.} = - try: - if not isNil(conn.peerInfo): - let id = conn.peerInfo.id - trace "cleaning up connection for peer", peerId = id + if isNil(conn): + return + + defer: + await conn.close() + libp2p_peers.set(s.connections.len.int64) + + if isNil(conn.peerInfo): + return + + let id = conn.peerInfo.id + trace "cleaning up connection for peer", peerId = id + if id in s.muxed: + let muxerHolder = s.muxed[id] + .filterIt( + it.muxer.connection == conn + ) + + if muxerHolder.len > 0: + await muxerHolder[0].muxer.close() + if not(isNil(muxerHolder[0].handle)): + await muxerHolder[0].handle + if id in s.muxed: - let muxerHolder = s.muxed[id] - .filterIt( - it.muxer.connection == conn - ) - - if muxerHolder.len > 0: - await muxerHolder[0].muxer.close() - if not(isNil(muxerHolder[0].handle)): - await muxerHolder[0].handle - s.muxed[id].keepItIf( it.muxer.connection != conn ) @@ -273,25 +280,17 @@ proc cleanupConn(s: Switch, conn: Connection) {.async, gcsafe.} = if s.muxed[id].len == 0: s.muxed.del(id) - if id in s.connections: - s.connections[id].keepItIf( - it.conn != conn - ) + if id in s.connections: + s.connections[id].keepItIf( + it.conn != conn + ) - if s.connections[id].len == 0: - s.connections.del(id) + if s.connections[id].len == 0: + s.connections.del(id) - await conn.close() - s.dialedPubSubPeers.excl(id) - - # TODO: Investigate cleanupConn() always called twice for one peer. - if not(conn.peerInfo.isClosed()): - conn.peerInfo.close() - - except CatchableError as exc: - trace "exception cleaning up connection", exc = exc.msg - finally: - libp2p_peers.set(s.connections.len.int64) + # TODO: Investigate cleanupConn() always called twice for one peer. + if not(conn.peerInfo.isClosed()): + conn.peerInfo.close() proc disconnect*(s: Switch, peer: PeerInfo) {.async, gcsafe.} = let connections = s.connections.getOrDefault(peer.id) @@ -308,27 +307,25 @@ proc getMuxedStream(s: Switch, peerInfo: PeerInfo): Future[Connection] {.async, return await muxer.newStream() proc upgradeOutgoing(s: Switch, conn: Connection): Future[Connection] {.async, gcsafe.} = - trace "handling connection", conn = $conn, oid = conn.oid + logScope: + conn = $conn + oid = $conn.oid let sconn = await s.secure(conn) # secure the connection if isNil(sconn): - trace "unable to secure connection, stopping upgrade", conn = $conn, - oid = conn.oid - await conn.close() - return + raise newException(CatchableError, + "unable to secure connection, stopping upgrade") + trace "upgrading connection" await s.mux(sconn) # mux it if possible - if isNil(conn.peerInfo): - trace "unable to mux connection, stopping upgrade", conn = $conn, - oid = conn.oid + if isNil(sconn.peerInfo): await sconn.close() - return + raise newException(CatchableError, + "unable to mux connection, stopping upgrade") libp2p_peers.set(s.connections.len.int64) - trace "succesfully upgraded outgoing connection", conn = $conn, - oid = conn.oid, - uoid = sconn.oid - result = sconn + trace "succesfully upgraded outgoing connection", uoid = sconn.oid + return sconn proc upgradeIncoming(s: Switch, conn: Connection) {.async, gcsafe.} = trace "upgrading incoming connection", conn = $conn, oid = conn.oid @@ -338,45 +335,38 @@ proc upgradeIncoming(s: Switch, conn: Connection) {.async, gcsafe.} = proc securedHandler (conn: Connection, proto: string) {.async, gcsafe, closure.} = + + var sconn: Connection + trace "Securing connection", oid = conn.oid + let secure = s.secureManagers.filterIt(it.codec == proto)[0] + try: - trace "Securing connection", oid = conn.oid - let secure = s.secureManagers.filterIt(it.codec == proto)[0] - let sconn = await secure.secure(conn, false) - if sconn.isNil: + sconn = await secure.secure(conn, false) + if isNil(sconn): return + defer: + await sconn.close() + # add the muxer for muxer in s.muxers.values: ms.addHandler(muxer.codec, muxer) # handle subsequent requests - try: - await ms.handle(sconn) - finally: - await sconn.close() + await ms.handle(sconn) except CancelledError as exc: raise exc except CatchableError as exc: debug "ending secured handler", err = exc.msg - try: - try: - if (await ms.select(conn)): # just handshake - # add the secure handlers - for k in s.secureManagers: - ms.addHandler(k.codec, securedHandler) + if (await ms.select(conn)): # just handshake + # add the secure handlers + for k in s.secureManagers: + ms.addHandler(k.codec, securedHandler) - # handle secured connections - await ms.handle(conn) - finally: - await conn.close() - except CancelledError as exc: - raise exc - except CatchableError as exc: - trace "error in multistream", err = exc.msg - -proc subscribeToPeer*(s: Switch, peerInfo: PeerInfo) {.async, gcsafe.} + # handle secured connections + await ms.handle(conn) proc internalConnect(s: Switch, peer: PeerInfo): Future[Connection] {.async.} = @@ -388,79 +378,88 @@ proc internalConnect(s: Switch, let lock = s.dialLock.mgetOrPut(id, newAsyncLock()) var conn: Connection - try: - await lock.acquire() - trace "about to dial peer", peer = id - conn = s.selectConn(peer) - if conn.isNil or conn.closed: - trace "Dialing peer", peer = id - for t in s.transports: # for each transport - for a in peer.addrs: # for each address - if t.handles(a): # check if it can dial it - trace "Dialing address", address = $a - try: - conn = await t.dial(a) - libp2p_dialed_peers.inc() - except CatchableError as exc: - trace "dialing failed", exc = exc.msg - libp2p_failed_dials.inc() - continue - - # make sure to assign the peer to the connection - conn.peerInfo = peer - conn = await s.upgradeOutgoing(conn) - if isNil(conn): - libp2p_failed_upgrade.inc() - continue - - conn.closeEvent.wait() - .addCallback do(udata: pointer): - asyncCheck s.cleanupConn(conn) - break - else: - trace "Reusing existing connection", oid = conn.oid - except CatchableError as exc: - trace "exception connecting to peer", exc = exc.msg - if not(isNil(conn)): - await conn.close() - - raise exc # re-raise - finally: + defer: if lock.locked(): lock.release() - if not isNil(conn): - doAssert(conn.peerInfo.id in s.connections, "connection not tracked!") - trace "dial succesfull", oid = conn.oid - await s.subscribeToPeer(peer) - result = conn + await lock.acquire() + trace "about to dial peer", peer = id + conn = s.selectConn(peer) + if conn.isNil or conn.closed: + trace "Dialing peer", peer = id + for t in s.transports: # for each transport + for a in peer.addrs: # for each address + if t.handles(a): # check if it can dial it + trace "Dialing address", address = $a + try: + conn = await t.dial(a) + libp2p_dialed_peers.inc() + except CancelledError as exc: + trace "dialing canceled", exc = exc.msg + raise + except CatchableError as exc: + trace "dialing failed", exc = exc.msg + libp2p_failed_dials.inc() + continue + + # make sure to assign the peer to the connection + conn.peerInfo = peer + try: + conn = await s.upgradeOutgoing(conn) + except CatchableError as exc: + if not(isNil(conn)): + await conn.close() + + trace "Unable to establish outgoing link", exc = exc.msg + raise exc + + if isNil(conn): + libp2p_failed_upgrade.inc() + continue + + conn.closeEvent.wait() + .addCallback do(udata: pointer): + asyncCheck s.cleanupConn(conn) + break + else: + trace "Reusing existing connection", oid = conn.oid + + if isNil(conn): + raise newException(CatchableError, + "Unable to establish outgoing link") + + if conn.closed or conn.atEof: + await conn.close() + raise newException(CatchableError, + "Connection dead on arrival") + + doAssert(conn.peerInfo.id in s.connections, + "connection not tracked!") + + trace "dial succesfull", oid = conn.oid + await s.subscribeToPeer(peer) + return conn proc connect*(s: Switch, peer: PeerInfo) {.async.} = var conn = await s.internalConnect(peer) - if isNil(conn): - raise newException(CatchableError, "Unable to connect to peer") proc dial*(s: Switch, peer: PeerInfo, proto: string): Future[Connection] {.async.} = var conn = await s.internalConnect(peer) - if isNil(conn): - raise newException(CatchableError, "Unable to establish outgoing link") - - if conn.closed: - raise newException(CatchableError, "Connection dead on arrival") - - result = conn let stream = await s.getMuxedStream(peer) - if not isNil(stream): - trace "Connection is muxed, return muxed stream", oid = conn.oid - result = stream - trace "Attempting to select remote", proto = proto, oid = conn.oid + if isNil(stream): + await conn.close() + raise newException(CatchableError, "Couldn't get muxed stream") - if not await s.ms.select(result, proto): + trace "Attempting to select remote", proto = proto, oid = conn.oid + if not await s.ms.select(stream, proto): + await stream.close() raise newException(CatchableError, "Unable to select sub-protocol " & proto) + return stream + proc mount*[T: LPProtocol](s: Switch, proto: T) {.gcsafe.} = if isNil(proto.handler): raise newException(CatchableError, @@ -477,10 +476,10 @@ proc start*(s: Switch): Future[seq[Future[void]]] {.async, gcsafe.} = proc handle(conn: Connection): Future[void] {.async, closure, gcsafe.} = try: - try: - await s.upgradeIncoming(conn) # perform upgrade on incoming connection - finally: + defer: await s.cleanupConn(conn) + + await s.upgradeIncoming(conn) # perform upgrade on incoming connection except CancelledError as exc: raise exc except CatchableError as exc: @@ -501,52 +500,61 @@ proc start*(s: Switch): Future[seq[Future[void]]] {.async, gcsafe.} = result = startFuts # listen for incoming connections proc stop*(s: Switch) {.async.} = - try: - trace "stopping switch" + trace "stopping switch" - # we want to report errors but we do not want to fail - # or crash here, cos we need to clean possibly MANY items - # and any following conn/transport won't be cleaned up - if s.pubSub.isSome: - await s.pubSub.get().stop() + # we want to report errors but we do not want to fail + # or crash here, cos we need to clean possibly MANY items + # and any following conn/transport won't be cleaned up + if s.pubSub.isSome: + await s.pubSub.get().stop() - for conns in toSeq(s.connections.values): - for conn in conns: - try: - await s.cleanupConn(conn.conn) - except CatchableError as exc: - warn "error cleaning up connections" - - for t in s.transports: + for conns in toSeq(s.connections.values): + for conn in conns: try: - await t.close() + await s.cleanupConn(conn.conn) + except CancelledError as exc: + raise exc except CatchableError as exc: - warn "error cleaning up transports" + warn "error cleaning up connections" - trace "switch stopped" - except CatchableError as exc: - warn "error stopping switch", exc = exc.msg + for t in s.transports: + try: + await t.close() + except CancelledError as exc: + raise exc + except CatchableError as exc: + warn "error cleaning up transports" + + trace "switch stopped" proc subscribeToPeer*(s: Switch, peerInfo: PeerInfo) {.async, gcsafe.} = - trace "about to subscribe to pubsub peer", peer = peerInfo.shortLog() ## Subscribe to pub sub peer - if s.pubSub.isSome and (peerInfo.id notin s.dialedPubSubPeers): - let conn = await s.getMuxedStream(peerInfo) - if isNil(conn): + if s.pubSub.isSome and not(s.pubSub.get().connected(peerInfo)): + trace "about to subscribe to pubsub peer", peer = peerInfo.shortLog() + var stream: Connection + try: + stream = await s.getMuxedStream(peerInfo) + except CancelledError as exc: + if not(isNil(stream)): + await stream.close() + + raise exc + except CatchableError as exc: + trace "exception in subscribe to peer", peer = peerInfo.shortLog, + exc = exc.msg + if not(isNil(stream)): + await stream.close() + + if isNil(stream): trace "unable to subscribe to peer", peer = peerInfo.shortLog return - s.dialedPubSubPeers.incl(peerInfo.id) - try: - if (await s.ms.select(conn, s.pubSub.get().codec)): - await s.pubSub.get().subscribeToPeer(conn) - else: - await conn.close() - except CatchableError as exc: - trace "exception in subscribe to peer", peer = peerInfo.shortLog, exc = exc.msg - await conn.close() - finally: - s.dialedPubSubPeers.excl(peerInfo.id) + if not await s.ms.select(stream, s.pubSub.get().codec): + if not(isNil(stream)): + await stream.close() + return + + await s.pubSub.get().subscribeToPeer(stream) proc subscribe*(s: Switch, topic: string, handler: TopicHandler): Future[void] = @@ -594,6 +602,43 @@ proc removeValidator*(s: Switch, s.pubSub.get().removeValidator(topics, hook) +proc muxerHandler(s: Switch, muxer: Muxer) {.async, gcsafe.} = + var stream = await muxer.newStream() + defer: + if not(isNil(stream)): + await stream.close() + + trace "got new muxer" + + try: + # once we got a muxed connection, attempt to + # identify it + await s.identify(stream) + if isNil(stream.peerInfo): + await muxer.close() + return + + muxer.connection.peerInfo = stream.peerInfo + + # store muxer and muxed connection + await s.storeConn(muxer, Direction.In) + libp2p_peers.set(s.connections.len.int64) + + muxer.connection.closeEvent.wait() + .addCallback do(udata: pointer): + asyncCheck s.cleanupConn(muxer.connection) + + # try establishing a pubsub connection + await s.subscribeToPeer(muxer.connection.peerInfo) + + except CancelledError as exc: + await muxer.close() + raise exc + except CatchableError as exc: + await muxer.close() + libp2p_failed_upgrade.inc() + trace "exception in muxer handler", exc = exc.msg + proc newSwitch*(peerInfo: PeerInfo, transports: seq[Transport], identity: Identify, @@ -609,49 +654,25 @@ proc newSwitch*(peerInfo: PeerInfo, result.identity = identity result.muxers = muxers result.secureManagers = @secureManagers - result.dialedPubSubPeers = initHashSet[string]() let s = result # can't capture result result.streamHandler = proc(stream: Connection) {.async, gcsafe.} = try: trace "handling connection for", peerInfo = $stream - try: - await s.ms.handle(stream) # handle incoming connection - finally: - if not(stream.closed): + defer: + if not(isNil(stream)): await stream.close() + await s.ms.handle(stream) # handle incoming connection + except CancelledError as exc: + raise exc except CatchableError as exc: trace "exception in stream handler", exc = exc.msg result.mount(identity) for key, val in muxers: val.streamHandler = result.streamHandler - val.muxerHandler = proc(muxer: Muxer) {.async, gcsafe.} = - var stream: Connection - try: - trace "got new muxer" - stream = await muxer.newStream() - # once we got a muxed connection, attempt to - # identify it - muxer.connection.peerInfo = await s.identify(stream) - - # store muxer and muxed connection - await s.storeConn(muxer, Direction.In) - libp2p_peers.set(s.connections.len.int64) - - muxer.connection.closeEvent.wait() - .addCallback do(udata: pointer): - asyncCheck s.cleanupConn(muxer.connection) - - # try establishing a pubsub connection - await s.subscribeToPeer(muxer.connection.peerInfo) - - except CatchableError as exc: - libp2p_failed_upgrade.inc() - trace "exception in muxer handler", exc = exc.msg - finally: - if not(isNil(stream)): - await stream.close() + val.muxerHandler = proc(muxer: Muxer): Future[void] = + s.muxerHandler(muxer) if result.secureManagers.len <= 0: # use plain text if no secure managers are provided diff --git a/libp2p/transports/tcptransport.nim b/libp2p/transports/tcptransport.nim index a83292465..6edb510ea 100644 --- a/libp2p/transports/tcptransport.nim +++ b/libp2p/transports/tcptransport.nim @@ -97,14 +97,7 @@ proc connCb(server: StreamServer, raise exc except CatchableError as err: debug "Connection setup failed", err = err.msg - if not client.closed: - try: - client.close() - except CancelledError as err: - raise err - except CatchableError as err: - # shouldn't happen but.. - warn "Error closing connection", err = err.msg + client.close() proc init*(T: type TcpTransport, flags: set[ServerFlags] = {}): T = result = T(flags: flags) diff --git a/tests/testmplex.nim b/tests/testmplex.nim index 613f1e3f0..987d8a734 100644 --- a/tests/testmplex.nim +++ b/tests/testmplex.nim @@ -243,7 +243,7 @@ suite "Mplex": await done.wait(1.seconds) await conn.close() - await mplexDialFut + await mplexDialFut.wait(1.seconds) await allFuturesThrowing( transport1.close(), transport2.close()) diff --git a/tests/testnoise.nim b/tests/testnoise.nim index d3a7769d4..b3f21b45f 100644 --- a/tests/testnoise.nim +++ b/tests/testnoise.nim @@ -71,8 +71,8 @@ proc createSwitch(ma: MultiAddress; outgoing: bool): (Switch, PeerInfo) = suite "Noise": teardown: for tracker in testTrackers(): - echo tracker.dump() - # check tracker.isLeaked() == false + # echo tracker.dump() + check tracker.isLeaked() == false test "e2e: handle write + noise": proc testListenerDialer(): Future[bool] {.async.} = From 9d33390239f850c27a75d119a36cf6de9716d357 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Tue, 30 Jun 2020 00:17:58 +0900 Subject: [PATCH 19/28] restore utility module import --- libp2p/protocols/pubsub/gossipsub.nim | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 8f0db28f8..1e029dd3f 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -19,7 +19,8 @@ import pubsub, ../../peerinfo, ../../stream/connection, ../../peer, - ../../errors + ../../errors, + ../../utility logScope: topics = "gossipsub" From 509d84f03d7dd319e6d5da4fd3f27fcc419211e0 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Tue, 30 Jun 2020 11:28:45 +0900 Subject: [PATCH 20/28] restore trace vs debug in gossip --- libp2p/protocols/pubsub/gossipsub.nim | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 1e029dd3f..668c8434b 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -105,7 +105,7 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = await sleepAsync(1.millis) # don't starve the event loop var id: string if topic in g.fanout and g.fanout.getOrDefault(topic).len > 0: - debug "getting peer from fanout", topic, + trace "getting peer from fanout", topic, peers = g.fanout.getOrDefault(topic).len id = sample(toSeq(g.fanout.getOrDefault(topic))) @@ -114,9 +114,9 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = if id in g.fanout[topic]: continue # we already have this peer in the mesh, try again - debug "got fanout peer", peer = id + trace "got fanout peer", peer = id elif topic in g.gossipsub and g.gossipsub.getOrDefault(topic).len > 0: - debug "getting peer from gossipsub", topic, + trace "getting peer from gossipsub", topic, peers = g.gossipsub.getOrDefault(topic).len id = sample(toSeq(g.gossipsub[topic])) @@ -125,7 +125,7 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = if id in g.mesh[topic]: continue # we already have this peer in the mesh, try again - debug "got gossipsub peer", peer = id + trace "got gossipsub peer", peer = id else: trace "no more peers" break @@ -157,7 +157,7 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = libp2p_gossipsub_peers_per_topic_mesh .set(g.mesh.getOrDefault(topic).len.int64, labelValues = [topic]) - debug "mesh balanced, got peers", peers = g.mesh.getOrDefault(topic).len, + trace "mesh balanced, got peers", peers = g.mesh.getOrDefault(topic).len, topicId = topic except CatchableError as exc: warn "exception occurred re-balancing mesh", exc = exc.msg @@ -170,7 +170,7 @@ proc dropFanoutPeers(g: GossipSub) {.async.} = if Moment.now > val: dropping.add(topic) g.fanout.del(topic) - debug "dropping fanout topic", topic + trace "dropping fanout topic", topic for topic in dropping: g.lastFanoutPubSub.del(topic) @@ -280,7 +280,7 @@ method subscribeTopic*(g: GossipSub, g.gossipsub[topic] = initHashSet[string]() if subscribe: - debug "adding subscription for topic", peer = peerId, name = topic + trace "adding subscription for topic", peer = peerId, name = topic # subscribe remote peer to the topic g.gossipsub[topic].incl(peerId) else: @@ -291,7 +291,7 @@ method subscribeTopic*(g: GossipSub, libp2p_gossipsub_peers_per_topic_gossipsub .set(g.gossipsub[topic].len.int64, labelValues = [topic]) - debug "gossip peers", peers = g.gossipsub[topic].len, topic + trace "gossip peers", peers = g.gossipsub[topic].len, topic # also rebalance current topic if we are subbed to if topic in g.topics: @@ -505,7 +505,7 @@ method publish*(g: GossipSub, method start*(g: GossipSub) {.async.} = - debug "gossipsub start" + trace "gossipsub start" ## start pubsub ## start long running/repeating procedures @@ -520,7 +520,7 @@ method start*(g: GossipSub) {.async.} = g.heartbeatLock.release() method stop*(g: GossipSub) {.async.} = - debug "gossipsub stop" + trace "gossipsub stop" ## stop pubsub ## stop long running tasks @@ -530,7 +530,7 @@ method stop*(g: GossipSub) {.async.} = # stop heartbeat interval g.heartbeatRunning = false if not g.heartbeatFut.finished: - debug "awaiting last heartbeat" + trace "awaiting last heartbeat" await g.heartbeatFut g.heartbeatLock.release() From f49e59cb4e86b8342618a18062c88ef5d15f11aa Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Tue, 30 Jun 2020 11:36:47 +0900 Subject: [PATCH 21/28] improve fanout replenish behavior further --- libp2p/protocols/pubsub/gossipsub.nim | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 668c8434b..861a6c432 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -218,6 +218,11 @@ proc heartbeat(g: GossipSub) {.async.} = await g.rebalanceMesh(t) await g.dropFanoutPeers() + + # replenish known topics to the fanout + for t in toSeq(g.fanout.keys): + g.replenishFanout(t) + let peers = g.getGossipPeers() var sent: seq[Future[void]] for peer in peers.keys: @@ -470,8 +475,12 @@ method publish*(g: GossipSub, if topic in g.topics: # if we're subscribed use the mesh peers = g.mesh.getOrDefault(topic) else: # not subscribed, send to fanout peers - g.replenishFanout(topic) + # try optimistically peers = g.fanout.getOrDefault(topic) + if peers.len == 0: + # ok we had nothing.. let's try replenish inline + g.replenishFanout(topic) + peers = g.fanout.getOrDefault(topic) let msg = Message.init(g.peerInfo, data, topic, g.sign) From ec00c7fc50fdbc73a1d6051f92b48059fc7a5ec9 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Wed, 1 Jul 2020 15:25:09 +0900 Subject: [PATCH 22/28] Peer resultification and defect only (#245) * Peer resultification and defect only * Fixing some tests * test fixes * Rename peer into peerid * better result error message in identify * further merge fixes --- docs/GETTING_STARTED.md | 2 +- docs/tutorial/directchat/second.nim | 2 +- docs/tutorial/second.nim | 2 +- examples/directchat.nim | 2 +- libp2p/crypto/crypto.nim | 2 +- libp2p/daemon/daemonapi.nim | 4 +- libp2p/multiaddress.nim | 3 +- libp2p/{peer.nim => peerid.nim} | 49 ++++++++++++++++-------- libp2p/peerinfo.nim | 8 ++-- libp2p/protocols/identify.nim | 22 ++++++----- libp2p/protocols/pubsub/floodsub.nim | 2 +- libp2p/protocols/pubsub/gossipsub.nim | 2 +- libp2p/protocols/pubsub/pubsub.nim | 2 +- libp2p/protocols/pubsub/pubsubpeer.nim | 2 +- libp2p/protocols/pubsub/rpc/message.nim | 2 +- libp2p/protocols/pubsub/rpc/messages.nim | 2 +- libp2p/protocols/pubsub/rpc/protobuf.nim | 4 +- libp2p/protocols/secure/noise.nim | 4 +- libp2p/protocols/secure/secio.nim | 6 +-- libp2p/standard_setup.nim | 4 +- libp2p/switch.nim | 2 +- tests/pubsub/testgossipsub.nim | 2 +- tests/pubsub/testmcache.nim | 20 +++++----- tests/pubsub/testmessage.nim | 2 +- tests/testdaemon.nim | 2 +- tests/testidentify.nim | 2 +- tests/testinterop.nim | 2 +- tests/testpeer.nim | 10 ++--- tests/testpeerinfo.nim | 18 ++++----- 29 files changed, 103 insertions(+), 83 deletions(-) rename libp2p/{peer.nim => peerid.nim} (83%) diff --git a/docs/GETTING_STARTED.md b/docs/GETTING_STARTED.md index 52ad72269..1c74e7ff2 100644 --- a/docs/GETTING_STARTED.md +++ b/docs/GETTING_STARTED.md @@ -18,7 +18,7 @@ import ../libp2p/[switch, multiaddress, peerinfo, crypto/crypto, - peer, + peerid, protocols/protocol, muxers/muxer, muxers/mplex/mplex, diff --git a/docs/tutorial/directchat/second.nim b/docs/tutorial/directchat/second.nim index d1858baae..de83d3db5 100644 --- a/docs/tutorial/directchat/second.nim +++ b/docs/tutorial/directchat/second.nim @@ -12,7 +12,7 @@ import ../libp2p/[switch, transports/tcptransport, multiaddress, peerinfo, - peer, + peerid, protocols/protocol, protocols/secure/secure, protocols/secure/secio, diff --git a/docs/tutorial/second.nim b/docs/tutorial/second.nim index d1858baae..de83d3db5 100644 --- a/docs/tutorial/second.nim +++ b/docs/tutorial/second.nim @@ -12,7 +12,7 @@ import ../libp2p/[switch, transports/tcptransport, multiaddress, peerinfo, - peer, + peerid, protocols/protocol, protocols/secure/secure, protocols/secure/secio, diff --git a/examples/directchat.nim b/examples/directchat.nim index 4afb181b6..22a9472f0 100644 --- a/examples/directchat.nim +++ b/examples/directchat.nim @@ -13,7 +13,7 @@ import ../libp2p/[switch, # manage transports, a single entry transports/tcptransport, # listen and dial to other peers using client-server protocol multiaddress, # encode different addressing schemes. For example, /ip4/7.7.7.7/tcp/6543 means it is using IPv4 protocol and TCP peerinfo, # manage the information of a peer, such as peer ID and public / private key - peer, # Implement how peers interact + peerid, # Implement how peers interact protocols/protocol, # define the protocol base type protocols/secure/secure, # define the protocol of secure connection protocols/secure/secio, # define the protocol of secure input / output, allows encrypted communication that uses public keys to validate signed messages instead of a certificate authority like in TLS diff --git a/libp2p/crypto/crypto.nim b/libp2p/crypto/crypto.nim index 9c996f357..ef3807485 100644 --- a/libp2p/crypto/crypto.nim +++ b/libp2p/crypto/crypto.nim @@ -95,7 +95,7 @@ const SupportedSchemesInt* = {int8(RSA), int8(Ed25519), int8(Secp256k1), int8(ECDSA)} -template orError(exp: untyped, err: CryptoError): untyped = +template orError*(exp: untyped, err: untyped): untyped = (exp.mapErr do (_: auto) -> auto: err) proc random*(t: typedesc[PrivateKey], scheme: PKScheme, diff --git a/libp2p/daemon/daemonapi.nim b/libp2p/daemon/daemonapi.nim index 68570ddb3..e610fd384 100644 --- a/libp2p/daemon/daemonapi.nim +++ b/libp2p/daemon/daemonapi.nim @@ -10,11 +10,11 @@ ## This module implementes API for `go-libp2p-daemon`. import os, osproc, strutils, tables, strtabs import chronos -import ../varint, ../multiaddress, ../multicodec, ../cid, ../peer +import ../varint, ../multiaddress, ../multicodec, ../cid, ../peerid import ../wire, ../multihash, ../protobuf/minprotobuf import ../crypto/crypto -export peer, multiaddress, multicodec, multihash, cid, crypto, wire +export peerid, multiaddress, multicodec, multihash, cid, crypto, wire when not defined(windows): import posix diff --git a/libp2p/multiaddress.nim b/libp2p/multiaddress.nim index 0d642e1fa..c6064f075 100644 --- a/libp2p/multiaddress.nim +++ b/libp2p/multiaddress.nim @@ -14,9 +14,8 @@ import nativesockets import tables, strutils, stew/shims/net import chronos -import multicodec, multihash, multibase, transcoder, vbuffer +import multicodec, multihash, multibase, transcoder, vbuffer, peerid import stew/[base58, base32, endians2, results] -from peer import PeerID export results type diff --git a/libp2p/peer.nim b/libp2p/peerid.nim similarity index 83% rename from libp2p/peer.nim rename to libp2p/peerid.nim index a8d90af87..e20417d0c 100644 --- a/libp2p/peer.nim +++ b/libp2p/peerid.nim @@ -8,10 +8,15 @@ ## those terms. ## This module implementes API for libp2p peer. + +{.push raises: [Defect].} + import hashes import nimcrypto/utils, stew/base58 import crypto/crypto, multicodec, multihash, vbuffer import protobuf/minprotobuf +import stew/results +export results const maxInlineKeyLength* = 42 @@ -143,37 +148,51 @@ proc init*(pid: var PeerID, data: string): bool = pid = opid result = true -proc init*(t: typedesc[PeerID], data: openarray[byte]): PeerID {.inline.} = +proc init*(t: typedesc[PeerID], data: openarray[byte]): Result[PeerID, cstring] {.inline.} = ## Create new peer id from raw binary representation ``data``. - if not init(result, data): - raise newException(PeerIDError, "Incorrect PeerID binary form") + var res: PeerID + if not init(res, data): + err("peerid: incorrect PeerID binary form") + else: + ok(res) -proc init*(t: typedesc[PeerID], data: string): PeerID {.inline.} = +proc init*(t: typedesc[PeerID], data: string): Result[PeerID, cstring] {.inline.} = ## Create new peer id from base58 encoded string representation ``data``. - if not init(result, data): - raise newException(PeerIDError, "Incorrect PeerID string") + var res: PeerID + if not init(res, data): + err("peerid: incorrect PeerID string") + else: + ok(res) -proc init*(t: typedesc[PeerID], pubkey: PublicKey): PeerID = +proc init*(t: typedesc[PeerID], pubkey: PublicKey): Result[PeerID, cstring] = ## Create new peer id from public key ``pubkey``. - var pubraw = pubkey.getBytes().tryGet() + var pubraw = ? pubkey.getBytes().orError("peerid: failed to get bytes from given key") var mh: MultiHash if len(pubraw) <= maxInlineKeyLength: - mh = MultiHash.digest("identity", pubraw).tryGet() + mh = ? MultiHash.digest("identity", pubraw) else: - mh = MultiHash.digest("sha2-256", pubraw).tryGet() - result.data = mh.data.buffer + mh = ? MultiHash.digest("sha2-256", pubraw) + ok(PeerID(data: mh.data.buffer)) -proc init*(t: typedesc[PeerID], seckey: PrivateKey): PeerID {.inline.} = +proc init*(t: typedesc[PeerID], seckey: PrivateKey): Result[PeerID, cstring] {.inline.} = ## Create new peer id from private key ``seckey``. - result = PeerID.init(seckey.getKey().tryGet()) + PeerID.init(? seckey.getKey().orError("invalid private key")) proc match*(pid: PeerID, pubkey: PublicKey): bool {.inline.} = ## Returns ``true`` if ``pid`` matches public key ``pubkey``. - result = (pid == PeerID.init(pubkey)) + let p = PeerID.init(pubkey) + if p.isErr: + false + else: + pid == p.get() proc match*(pid: PeerID, seckey: PrivateKey): bool {.inline.} = ## Returns ``true`` if ``pid`` matches private key ``seckey``. - result = (pid == PeerID.init(seckey)) + let p = PeerID.init(seckey) + if p.isErr: + false + else: + pid == p.get() ## Serialization/Deserialization helpers diff --git a/libp2p/peerinfo.nim b/libp2p/peerinfo.nim index bfe5048ba..978d04808 100644 --- a/libp2p/peerinfo.nim +++ b/libp2p/peerinfo.nim @@ -9,7 +9,7 @@ import options, sequtils import chronos, chronicles -import peer, multiaddress, crypto/crypto +import peerid, multiaddress, crypto/crypto ## A peer can be constructed in one of tree ways: ## 1) A local peer with a private key @@ -65,7 +65,7 @@ proc init*(p: typedesc[PeerInfo], key: PrivateKey, addrs: openarray[MultiAddress] = [], protocols: openarray[string] = []): PeerInfo {.inline.} = - result = PeerInfo(keyType: HasPrivate, peerId: PeerID.init(key), + result = PeerInfo(keyType: HasPrivate, peerId: PeerID.init(key).tryGet(), privateKey: key) result.postInit(addrs, protocols) @@ -80,7 +80,7 @@ proc init*(p: typedesc[PeerInfo], peerId: string, addrs: openarray[MultiAddress] = [], protocols: openarray[string] = []): PeerInfo {.inline.} = - result = PeerInfo(keyType: HasPublic, peerId: PeerID.init(peerId)) + result = PeerInfo(keyType: HasPublic, peerId: PeerID.init(peerId).tryGet()) result.postInit(addrs, protocols) proc init*(p: typedesc[PeerInfo], @@ -88,7 +88,7 @@ proc init*(p: typedesc[PeerInfo], addrs: openarray[MultiAddress] = [], protocols: openarray[string] = []): PeerInfo {.inline.} = result = PeerInfo(keyType: HasPublic, - peerId: PeerID.init(key), + peerId: PeerID.init(key).tryGet(), key: some(key)) result.postInit(addrs, protocols) diff --git a/libp2p/protocols/identify.nim b/libp2p/protocols/identify.nim index b2d3c501a..735d740af 100644 --- a/libp2p/protocols/identify.nim +++ b/libp2p/protocols/identify.nim @@ -12,7 +12,7 @@ import chronos, chronicles import ../protobuf/minprotobuf, ../peerinfo, ../stream/connection, - ../peer, + ../peerid, ../crypto/crypto, ../multiaddress, ../protocols/protocol, @@ -142,16 +142,18 @@ proc identify*(p: Identify, if not isNil(remotePeerInfo) and result.pubKey.isSome: let peer = PeerID.init(result.pubKey.get()) + if peer.isErr: + raise newException(IdentityInvalidMsgError, $peer.error) + else: + # do a string comaprison of the ids, + # because that is the only thing we + # have in most cases + if peer.get() != remotePeerInfo.peerId: + trace "Peer ids don't match", + remote = peer.get().pretty(), + local = remotePeerInfo.id - # do a string comparison of the ids, - # because that is the only thing we - # have in most cases - if peer != remotePeerInfo.peerId: - trace "Peer ids don't match", - remote = peer.pretty(), - local = remotePeerInfo.id - - raise newException(IdentityNoMatchError, "Peer ids don't match") + raise newException(IdentityNoMatchError, "Peer ids don't match") proc push*(p: Identify, conn: Connection) {.async.} = await conn.write(IdentifyPushCodec) diff --git a/libp2p/protocols/pubsub/floodsub.nim b/libp2p/protocols/pubsub/floodsub.nim index 91f31c162..adf2b7d93 100644 --- a/libp2p/protocols/pubsub/floodsub.nim +++ b/libp2p/protocols/pubsub/floodsub.nim @@ -14,7 +14,7 @@ import pubsub, timedcache, rpc/[messages, message], ../../stream/connection, - ../../peer, + ../../peerid, ../../peerinfo, ../../utility, ../../errors diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 64e8a82d3..9498266c0 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -18,7 +18,7 @@ import pubsub, ../protocol, ../../peerinfo, ../../stream/connection, - ../../peer, + ../../peerid, ../../errors, ../../utility diff --git a/libp2p/protocols/pubsub/pubsub.nim b/libp2p/protocols/pubsub/pubsub.nim index da1c24a0c..434edc15c 100644 --- a/libp2p/protocols/pubsub/pubsub.nim +++ b/libp2p/protocols/pubsub/pubsub.nim @@ -13,7 +13,7 @@ import pubsubpeer, rpc/[message, messages], ../protocol, ../../stream/connection, - ../../peer, + ../../peerid, ../../peerinfo import metrics diff --git a/libp2p/protocols/pubsub/pubsubpeer.nim b/libp2p/protocols/pubsub/pubsubpeer.nim index 1ea920085..ee9851b45 100644 --- a/libp2p/protocols/pubsub/pubsubpeer.nim +++ b/libp2p/protocols/pubsub/pubsubpeer.nim @@ -11,7 +11,7 @@ import options, hashes, strutils, tables, hashes import chronos, chronicles, nimcrypto/sha2, metrics import rpc/[messages, message, protobuf], timedcache, - ../../peer, + ../../peerid, ../../peerinfo, ../../stream/connection, ../../crypto/crypto, diff --git a/libp2p/protocols/pubsub/rpc/message.nim b/libp2p/protocols/pubsub/rpc/message.nim index d7a35ae14..d203035d4 100644 --- a/libp2p/protocols/pubsub/rpc/message.nim +++ b/libp2p/protocols/pubsub/rpc/message.nim @@ -15,7 +15,7 @@ import metrics import chronicles import nimcrypto/sysrand import messages, protobuf, - ../../../peer, + ../../../peerid, ../../../peerinfo, ../../../crypto/crypto, ../../../protobuf/minprotobuf diff --git a/libp2p/protocols/pubsub/rpc/messages.nim b/libp2p/protocols/pubsub/rpc/messages.nim index afb6a5f3e..5a0ae615d 100644 --- a/libp2p/protocols/pubsub/rpc/messages.nim +++ b/libp2p/protocols/pubsub/rpc/messages.nim @@ -9,7 +9,7 @@ import options, sequtils import ../../../utility -import ../../../peer +import ../../../peerid type SubOpts* = object diff --git a/libp2p/protocols/pubsub/rpc/protobuf.nim b/libp2p/protocols/pubsub/rpc/protobuf.nim index 556232475..ce42275d0 100644 --- a/libp2p/protocols/pubsub/rpc/protobuf.nim +++ b/libp2p/protocols/pubsub/rpc/protobuf.nim @@ -10,7 +10,7 @@ import options import chronicles import messages, - ../../../peer, + ../../../peerid, ../../../utility, ../../../protobuf/minprotobuf @@ -186,7 +186,7 @@ proc decodeMessages*(pb: var ProtoBuffer): seq[Message] {.gcsafe.} = if pb.getBytes(1, fromPeer) < 0: break try: - msg.fromPeer = PeerID.init(fromPeer) + msg.fromPeer = PeerID.init(fromPeer).tryGet() except CatchableError as err: debug "Invalid fromPeer in message", msg = err.msg break diff --git a/libp2p/protocols/secure/noise.nim b/libp2p/protocols/secure/noise.nim index cdd37eec0..4319573ed 100644 --- a/libp2p/protocols/secure/noise.nim +++ b/libp2p/protocols/secure/noise.nim @@ -12,7 +12,7 @@ import chronicles import stew/[endians2, byteutils] import nimcrypto/[utils, sysrand, sha2, hmac] import ../../stream/lpstream -import ../../peer +import ../../peerid import ../../peerinfo import ../../protobuf/minprotobuf import ../../utility @@ -460,7 +460,7 @@ method handshake*(p: Noise, conn: Connection, initiator: bool): Future[SecureCon let pid = PeerID.init(remotePubKey) if not conn.peerInfo.peerId.validate(): raise newException(NoiseHandshakeError, "Failed to validate peerId.") - if pid != conn.peerInfo.peerId: + if pid.isErr or pid.get() != conn.peerInfo.peerId: var failedKey: PublicKey discard extractPublicKey(conn.peerInfo.peerId, failedKey) diff --git a/libp2p/protocols/secure/secio.nim b/libp2p/protocols/secure/secio.nim index 3d369b92d..331092461 100644 --- a/libp2p/protocols/secure/secio.nim +++ b/libp2p/protocols/secure/secio.nim @@ -13,7 +13,7 @@ import secure, ../../peerinfo, ../../crypto/crypto, ../../crypto/ecnist, - ../../peer, + ../../peerid, ../../utility export hmac, sha2, sha, hash, rijndael, bcmode @@ -297,7 +297,7 @@ method handshake*(s: Secio, conn: Connection, initiator: bool = false): Future[S SecioCiphers, SecioHashes) - localPeerId = PeerID.init(s.localPublicKey) + localPeerId = PeerID.init(s.localPublicKey).tryGet() trace "Local proposal", schemes = SecioExchanges, ciphers = SecioCiphers, @@ -320,7 +320,7 @@ method handshake*(s: Secio, conn: Connection, initiator: bool = false): Future[S trace "Remote public key incorrect or corrupted", pubkey = remoteBytesPubkey.shortLog raise (ref SecioError)(msg: "Remote public key incorrect or corrupted") - remotePeerId = PeerID.init(remotePubkey) + remotePeerId = PeerID.init(remotePubkey).tryGet() # TODO: PeerID check against supplied PeerID let order = getOrder(remoteBytesPubkey, localNonce, localBytesPubkey, diff --git a/libp2p/standard_setup.nim b/libp2p/standard_setup.nim index 991378a1e..73695d437 100644 --- a/libp2p/standard_setup.nim +++ b/libp2p/standard_setup.nim @@ -5,7 +5,7 @@ const import options, tables, chronos, - switch, peer, peerinfo, stream/connection, multiaddress, + switch, peerid, peerinfo, stream/connection, multiaddress, crypto/crypto, transports/[transport, tcptransport], muxers/[muxer, mplex/mplex, mplex/types], protocols/[identify, secure/secure], @@ -17,7 +17,7 @@ import protocols/secure/secio export - switch, peer, peerinfo, connection, multiaddress, crypto + switch, peerid, peerinfo, connection, multiaddress, crypto type SecureProtocol* {.pure.} = enum diff --git a/libp2p/switch.nim b/libp2p/switch.nim index 6b7b82471..c7047c98c 100644 --- a/libp2p/switch.nim +++ b/libp2p/switch.nim @@ -31,7 +31,7 @@ import stream/connection, protocols/pubsub/pubsub, muxers/muxer, errors, - peer + peerid logScope: topics = "switch" diff --git a/tests/pubsub/testgossipsub.nim b/tests/pubsub/testgossipsub.nim index 6a610ffad..197c967c0 100644 --- a/tests/pubsub/testgossipsub.nim +++ b/tests/pubsub/testgossipsub.nim @@ -13,7 +13,7 @@ import unittest, sequtils, options, tables, sets import chronos, stew/byteutils import chronicles import utils, ../../libp2p/[errors, - peer, + peerid, peerinfo, stream/connection, crypto/crypto, diff --git a/tests/pubsub/testmcache.nim b/tests/pubsub/testmcache.nim index 118be8b3b..90a1b1139 100644 --- a/tests/pubsub/testmcache.nim +++ b/tests/pubsub/testmcache.nim @@ -2,7 +2,7 @@ import unittest, options, sets, sequtils import stew/byteutils -import ../../libp2p/[peer, +import ../../libp2p/[peerid, crypto/crypto, protocols/pubsub/mcache, protocols/pubsub/rpc/message, @@ -11,7 +11,7 @@ import ../../libp2p/[peer, suite "MCache": test "put/get": var mCache = newMCache(3, 5) - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).get(), seqno: "12345".toBytes()) let msgId = defaultMsgIdProvider(msg) mCache.put(msgId, msg) @@ -21,13 +21,13 @@ suite "MCache": var mCache = newMCache(3, 5) for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).get(), seqno: "12345".toBytes(), topicIDs: @["foo"]) mCache.put(defaultMsgIdProvider(msg), msg) for i in 0..<5: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).get(), seqno: "12345".toBytes(), topicIDs: @["bar"]) mCache.put(defaultMsgIdProvider(msg), msg) @@ -42,7 +42,7 @@ suite "MCache": var mCache = newMCache(1, 5) for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).get(), seqno: "12345".toBytes(), topicIDs: @["foo"]) mCache.put(defaultMsgIdProvider(msg), msg) @@ -51,7 +51,7 @@ suite "MCache": check mCache.window("foo").len == 0 for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).get(), seqno: "12345".toBytes(), topicIDs: @["bar"]) mCache.put(defaultMsgIdProvider(msg), msg) @@ -60,7 +60,7 @@ suite "MCache": check mCache.window("bar").len == 0 for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).get(), seqno: "12345".toBytes(), topicIDs: @["baz"]) mCache.put(defaultMsgIdProvider(msg), msg) @@ -72,19 +72,19 @@ suite "MCache": var mCache = newMCache(1, 5) for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).get(), seqno: "12345".toBytes(), topicIDs: @["foo"]) mCache.put(defaultMsgIdProvider(msg), msg) for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).get(), seqno: "12345".toBytes(), topicIDs: @["bar"]) mCache.put(defaultMsgIdProvider(msg), msg) for i in 0..<3: - var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()), + var msg = Message(fromPeer: PeerID.init(PrivateKey.random(ECDSA).get()).get(), seqno: "12345".toBytes(), topicIDs: @["baz"]) mCache.put(defaultMsgIdProvider(msg), msg) diff --git a/tests/pubsub/testmessage.nim b/tests/pubsub/testmessage.nim index 77128ef05..1c9092d45 100644 --- a/tests/pubsub/testmessage.nim +++ b/tests/pubsub/testmessage.nim @@ -1,6 +1,6 @@ import unittest -import ../../libp2p/[peer, peerinfo, +import ../../libp2p/[peerid, peerinfo, crypto/crypto, protocols/pubsub/rpc/message, protocols/pubsub/rpc/messages] diff --git a/tests/testdaemon.nim b/tests/testdaemon.nim index cd8642a75..485b27886 100644 --- a/tests/testdaemon.nim +++ b/tests/testdaemon.nim @@ -1,7 +1,7 @@ import unittest import chronos import ../libp2p/daemon/daemonapi, ../libp2p/multiaddress, ../libp2p/multicodec, - ../libp2p/cid, ../libp2p/multihash, ../libp2p/peer + ../libp2p/cid, ../libp2p/multihash, ../libp2p/peerid when defined(nimHasUsed): {.used.} diff --git a/tests/testidentify.nim b/tests/testidentify.nim index 305e373de..d91f7079a 100644 --- a/tests/testidentify.nim +++ b/tests/testidentify.nim @@ -3,7 +3,7 @@ import chronos, strutils import ../libp2p/[protocols/identify, multiaddress, peerinfo, - peer, + peerid, stream/connection, multistream, transports/transport, diff --git a/tests/testinterop.nim b/tests/testinterop.nim index e3dea9350..988846ec6 100644 --- a/tests/testinterop.nim +++ b/tests/testinterop.nim @@ -11,7 +11,7 @@ import ../libp2p/[daemon/daemonapi, varint, multihash, standard_setup, - peer, + peerid, peerinfo, switch, stream/connection, diff --git a/tests/testpeer.nim b/tests/testpeer.nim index 949e796df..2f497951b 100644 --- a/tests/testpeer.nim +++ b/tests/testpeer.nim @@ -11,7 +11,7 @@ ## https://github.com/libp2p/go-libp2p-peer import unittest import nimcrypto/utils, stew/base58 -import ../libp2p/crypto/crypto, ../libp2p/peer +import ../libp2p/crypto/crypto, ../libp2p/peerid when defined(nimHasUsed): {.used.} @@ -103,11 +103,11 @@ suite "Peer testing suite": for i in 0.. Date: Wed, 1 Jul 2020 09:22:29 +0200 Subject: [PATCH 23/28] export public field types (#248) * export public field types * one more --- libp2p/peerinfo.nim | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libp2p/peerinfo.nim b/libp2p/peerinfo.nim index 978d04808..2b5353349 100644 --- a/libp2p/peerinfo.nim +++ b/libp2p/peerinfo.nim @@ -11,6 +11,8 @@ import options, sequtils import chronos, chronicles import peerid, multiaddress, crypto/crypto +export peerid, multiaddress, crypto + ## A peer can be constructed in one of tree ways: ## 1) A local peer with a private key ## 2) A remote peer with a PeerID and it's public key stored From a976c22dae170548578c8776e53b9b3334994d43 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Wed, 1 Jul 2020 16:54:28 +0900 Subject: [PATCH 24/28] triage publish nil peers (issue is on master too but just hidden behind a if/in) --- libp2p/protocols/pubsub/gossipsub.nim | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 861a6c432..022901194 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -499,9 +499,18 @@ method publish*(g: GossipSub, continue let peer = g.peers.getOrDefault(p) - if not isNil(peer.peerInfo): + if not isNil(peer) and not isNil(peer.peerInfo): trace "publish: sending message to peer", peer = p sent.add(peer.send(@[RPCMsg(messages: @[msg])])) + else: + # Notice this needs a better fix! for now it's a hack + error "publish: peer or peerInfo was nil" + if topic in g.mesh: + g.mesh[topic].excl(p) + if topic in g.fanout: + g.fanout[topic].excl(p) + if topic in g.gossipsub: + g.gossipsub[topic].excl(p) sent = await allFinished(sent) checkFutures(sent) From d9e0ca6091c6ba4cea71d967f4b100b4e5dfb925 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Wed, 1 Jul 2020 17:26:42 +0900 Subject: [PATCH 25/28] getGossipPeers fixes --- libp2p/protocols/pubsub/gossipsub.nim | 31 +++++++++++++-------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 022901194..25b091c98 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -135,7 +135,7 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = let p = g.peers[id] # send a graft message to the peer await p.sendGraft(@[topic]) - + # prune peers if we've gone over if g.mesh.getOrDefault(topic).len > GossipSubDhi: trace "about to prune mesh", mesh = g.mesh.getOrDefault(topic).len @@ -178,6 +178,11 @@ proc dropFanoutPeers(g: GossipSub) {.async.} = libp2p_gossipsub_peers_per_topic_fanout .set(g.fanout.getOrDefault(topic).len.int64, labelValues = [topic]) +proc shuffle[T](collection: var seq[T]) = + for i in 0..collection.high: + let r = rand(collection.high) + swap(collection[i], collection[r]) + proc getGossipPeers(g: GossipSub): Table[string, ControlMessage] {.gcsafe.} = ## gossip iHave messages to peers let topics = toHashSet(toSeq(g.mesh.keys)) + toHashSet(toSeq(g.fanout.keys)) @@ -186,28 +191,22 @@ proc getGossipPeers(g: GossipSub): Table[string, ControlMessage] {.gcsafe.} = let fanout: HashSet[string] = g.fanout.getOrDefault(topic) let gossipPeers = mesh + fanout - var extraPeers = g.gossipsub # copy it! let mids = g.mcache.window(topic) if mids.len > 0: let ihave = ControlIHave(topicID: topic, messageIDs: toSeq(mids)) - if topic notin extraPeers: + if topic notin g.gossipsub: trace "topic not in gossip array, skipping", topicID = topic continue - - while result.len < GossipSubD: - if extraPeers.getOrDefault(topic).len == 0: - trace "no peers for topic, skipping", topicID = topic - break - - let id = toSeq(extraPeers.getOrDefault(topic)).sample() - if id in extraPeers.getOrDefault(topic): - extraPeers[topic].excl(id) - if id notin gossipPeers: - if id notin result: - result[id] = ControlMessage() - result[id].ihave.add(ihave) + + var extraPeers = toSeq(g.gossipsub[topic]) + shuffle(extraPeers) + for peer in extraPeers: + if result.len < GossipSubD and + peer notin gossipPeers and + peer notin result: + result[peer] = ControlMessage(ihave: @[ihave]) proc heartbeat(g: GossipSub) {.async.} = while g.heartbeatRunning: From 4f5c97e27cdee87b1d970f7c36e9d9969061bce7 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Wed, 1 Jul 2020 17:44:26 +0900 Subject: [PATCH 26/28] remove topics from pubsubpeer (was unused) --- libp2p/protocols/pubsub/pubsub.nim | 18 ------------------ libp2p/protocols/pubsub/pubsubpeer.nim | 1 - tests/pubsub/testgossipinternal.nim | 1 - 3 files changed, 20 deletions(-) diff --git a/libp2p/protocols/pubsub/pubsub.nim b/libp2p/protocols/pubsub/pubsub.nim index 197b5d4d0..3c0ab4243 100644 --- a/libp2p/protocols/pubsub/pubsub.nim +++ b/libp2p/protocols/pubsub/pubsub.nim @@ -28,7 +28,6 @@ declareGauge(libp2p_pubsub_topics, "pubsub subscribed topics") declareCounter(libp2p_pubsub_validation_success, "pubsub successfully validated messages") declareCounter(libp2p_pubsub_validation_failure, "pubsub failed validated messages") declarePublicCounter(libp2p_pubsub_messages_published, "published messages", labels = ["topic"]) -declareGauge(libp2p_pubsub_peers_per_topic, "pubsub peers per topic", labels = ["topic"]) type TopicHandler* = proc(topic: string, @@ -84,23 +83,6 @@ method subscribeTopic*(p: PubSub, if isNil(peer) or isNil(peer.peerInfo): # should not happen if subscribe: warn "subscribeTopic but peer was unknown!" - return # Stop causing bad metrics! - else: - return # Stop causing bad metrics! - - let idx = peer.topics.find(topic) - if subscribe: - libp2p_pubsub_peers_per_topic.inc(labelValues = [topic]) - if idx == -1: - peer.topics &= topic - else: - warn "subscribe but topic was already previously subscribed", topic, peer = peerId - else: - libp2p_pubsub_peers_per_topic.dec(labelValues = [topic]) - if idx == -1: - warn "unsubscribe but topic was not previously subscribed", topic, peer = peerId - else: - peer.topics.del(idx) method rpcHandler*(p: PubSub, peer: PubSubPeer, diff --git a/libp2p/protocols/pubsub/pubsubpeer.nim b/libp2p/protocols/pubsub/pubsubpeer.nim index fa2c5c5ae..e73d2e073 100644 --- a/libp2p/protocols/pubsub/pubsubpeer.nim +++ b/libp2p/protocols/pubsub/pubsubpeer.nim @@ -36,7 +36,6 @@ type sendConn: Connection peerInfo*: PeerInfo handler*: RPCHandler - topics*: seq[string] sentRpcCache: TimedCache[string] # cache for already sent messages recvdRpcCache: TimedCache[string] # cache for already received messages refs*: int # refcount of the connections this peer is handling diff --git a/tests/pubsub/testgossipinternal.nim b/tests/pubsub/testgossipinternal.nim index 4460c7672..35b6bdd1b 100644 --- a/tests/pubsub/testgossipinternal.nim +++ b/tests/pubsub/testgossipinternal.nim @@ -99,7 +99,6 @@ suite "GossipSub internal": conn.peerInfo = peerInfo gossipSub.peers[peerInfo.id] = newPubSubPeer(peerInfo, GossipSubCodec) gossipSub.peers[peerInfo.id].handler = handler - gossipSub.peers[peerInfo.id].topics &= topic gossipSub.gossipsub[topic].incl(peerInfo.id) check gossipSub.gossipsub[topic].len == 15 From 27d22d534d8c22cca252af0e545c845eb9c35943 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Thu, 2 Jul 2020 23:07:17 +0900 Subject: [PATCH 27/28] simplify rebalanceMesh (following spec) and make it finally reach D_high --- libp2p/protocols/pubsub/gossipsub.nim | 47 +++++++-------------------- 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 3409c3657..4cf7d868f 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -97,45 +97,27 @@ proc rebalanceMesh(g: GossipSub, topic: string) {.async.} = if topic notin g.mesh: g.mesh[topic] = initHashSet[string]() - if g.mesh.getOrDefault(topic).len < GossipSubDlo: - trace "replenishing mesh", topic - # replenish the mesh if we're below GossipSubDlo - while g.mesh.getOrDefault(topic).len < GossipSubD and topic in g.topics: - trace "gathering peers", peers = g.mesh.getOrDefault(topic).len - await sleepAsync(1.millis) # don't starve the event loop - var id: string - if topic in g.fanout and g.fanout.getOrDefault(topic).len > 0: - trace "getting peer from fanout", topic, - peers = g.fanout.getOrDefault(topic).len + # https://github.com/libp2p/specs/blob/master/pubsub/gossipsub/gossipsub-v1.0.md#mesh-maintenance + if g.mesh.getOrDefault(topic).len < GossipSubDlo and topic in g.topics: + var availPeers = toSeq(g.gossipsub.getOrDefault(topic)) + shuffle(availPeers) + if availPeers.len > GossipSubD: + availPeers = availPeers[0.. 0: - trace "getting peer from gossipsub", topic, - peers = g.gossipsub.getOrDefault(topic).len - - id = sample(toSeq(g.gossipsub[topic])) - g.gossipsub[topic].excl(id) - - if id in g.mesh[topic]: - continue # we already have this peer in the mesh, try again - - trace "got gossipsub peer", peer = id - else: - trace "no more peers" - break + trace "got gossipsub peer", peer = id g.mesh[topic].incl(id) if id in g.peers: let p = g.peers[id] # send a graft message to the peer await p.sendGraft(@[topic]) - + # prune peers if we've gone over if g.mesh.getOrDefault(topic).len > GossipSubDhi: trace "about to prune mesh", mesh = g.mesh.getOrDefault(topic).len @@ -180,11 +162,6 @@ proc dropFanoutPeers(g: GossipSub) {.async.} = libp2p_gossipsub_peers_per_topic_fanout .set(g.fanout.getOrDefault(topic).len.int64, labelValues = [topic]) -proc shuffle[T](collection: var seq[T]) = - for i in 0..collection.high: - let r = rand(collection.high) - swap(collection[i], collection[r]) - proc getGossipPeers(g: GossipSub): Table[string, ControlMessage] {.gcsafe.} = ## gossip iHave messages to peers let topics = toHashSet(toSeq(g.mesh.keys)) + toHashSet(toSeq(g.fanout.keys)) From 0d0309a601682b9bc03113f9287f04187142ebf4 Mon Sep 17 00:00:00 2001 From: Giovanni Petrantoni Date: Fri, 3 Jul 2020 09:28:20 +0900 Subject: [PATCH 28/28] better diagnostics --- libp2p/protocols/pubsub/gossipsub.nim | 2 +- libp2p/protocols/pubsub/pubsub.nim | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libp2p/protocols/pubsub/gossipsub.nim b/libp2p/protocols/pubsub/gossipsub.nim index 4cf7d868f..82d10a7a1 100644 --- a/libp2p/protocols/pubsub/gossipsub.nim +++ b/libp2p/protocols/pubsub/gossipsub.nim @@ -480,7 +480,7 @@ method publish*(g: GossipSub, sent.add(peer.send(@[RPCMsg(messages: @[msg])])) else: # Notice this needs a better fix! for now it's a hack - error "publish: peer or peerInfo was nil" + error "publish: peer or peerInfo was nil", missing = p if topic in g.mesh: g.mesh[topic].excl(p) if topic in g.fanout: diff --git a/libp2p/protocols/pubsub/pubsub.nim b/libp2p/protocols/pubsub/pubsub.nim index 27ce6df60..119b136c4 100644 --- a/libp2p/protocols/pubsub/pubsub.nim +++ b/libp2p/protocols/pubsub/pubsub.nim @@ -100,6 +100,7 @@ method rpcHandler*(p: PubSub, method handleDisconnect*(p: PubSub, peer: PubSubPeer) {.async, base.} = ## handle peer disconnects if peer.id in p.peers: + trace "deleting peer", id = peer.id p.peers.del(peer.id) # metrics