nim-libp2p/libp2p/switch.nim
Dmitriy Ryajov b6877b8aac
increase send timeout for prune and graft msgs (#306)
* increase send timeout for prune and graft msgs

* use trace logs for subscribe monitor
2020-08-03 17:55:42 -06:00

670 lines
20 KiB
Nim

## Nim-LibP2P
## Copyright (c) 2019 Status Research & Development GmbH
## Licensed under either of
## * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE))
## * MIT license ([LICENSE-MIT](LICENSE-MIT))
## at your option.
## This file may not be copied, modified, or distributed except according to
## those terms.
import tables,
sequtils,
options,
sets,
oids
import chronos,
chronicles,
metrics
import stream/connection,
transports/transport,
multistream,
multiaddress,
protocols/protocol,
protocols/secure/secure,
peerinfo,
protocols/identify,
protocols/pubsub/pubsub,
muxers/muxer,
connmanager,
peerid,
errors
logScope:
topics = "switch"
#TODO: General note - use a finite state machine to manage the different
# steps of connections establishing and upgrading. This makes everything
# more robust and less prone to ordering attacks - i.e. muxing can come if
# and only if the channel has been secured (i.e. if a secure manager has been
# previously provided)
declareCounter(libp2p_dialed_peers, "dialed peers")
declareCounter(libp2p_failed_dials, "failed dials")
declareCounter(libp2p_failed_upgrade, "peers failed upgrade")
const
MaxPubsubReconnectAttempts* = 10
type
NoPubSubException* = object of CatchableError
Lifecycle* {.pure.} = enum
Connected,
Upgraded,
Disconnected
Hook* = proc(peer: PeerInfo, cycle: Lifecycle): Future[void] {.gcsafe.}
Switch* = ref object of RootObj
peerInfo*: PeerInfo
connManager: ConnManager
transports*: seq[Transport]
protocols*: seq[LPProtocol]
muxers*: Table[string, MuxerProvider]
ms*: MultistreamSelect
identity*: Identify
streamHandler*: StreamHandler
secureManagers*: seq[Secure]
pubSub*: Option[PubSub]
dialLock: Table[string, AsyncLock]
hooks: Table[Lifecycle, HashSet[Hook]]
pubsubMonitors: Table[PeerId, Future[void]]
proc newNoPubSubException(): ref NoPubSubException {.inline.} =
result = newException(NoPubSubException, "no pubsub provided!")
proc addHook*(s: Switch, hook: Hook, cycle: Lifecycle) =
s.hooks.mgetOrPut(cycle, initHashSet[Hook]()).incl(hook)
proc removeHook*(s: Switch, hook: Hook, cycle: Lifecycle) =
s.hooks.mgetOrPut(cycle, initHashSet[Hook]()).excl(hook)
proc triggerHooks(s: Switch, peer: PeerInfo, cycle: Lifecycle) {.async, gcsafe.} =
try:
if cycle in s.hooks:
var hooks: seq[Future[void]]
for h in s.hooks[cycle]:
if not(isNil(h)):
hooks.add(h(peer, cycle))
checkFutures(await allFinished(hooks))
except CancelledError as exc:
raise exc
except CatchableError as exc:
trace "exception in trigger hooks", exc = exc.msg
proc disconnect*(s: Switch, peer: PeerInfo) {.async, gcsafe.}
proc subscribePeer*(s: Switch, peerInfo: PeerInfo) {.async, gcsafe.}
proc subscribePeerInternal(s: Switch, peerInfo: PeerInfo) {.async, gcsafe.}
proc cleanupPubSubPeer(s: Switch, conn: Connection) {.async.} =
try:
await conn.closeEvent.wait()
trace "about to cleanup pubsub peer"
if s.pubSub.isSome:
let fut = s.pubsubMonitors.getOrDefault(conn.peerInfo.peerId)
if not(isNil(fut)) and not(fut.finished):
fut.cancel()
await s.pubSub.get().unsubscribePeer(conn.peerInfo)
except CancelledError as exc:
raise exc
except CatchableError as exc:
trace "exception cleaning pubsub peer", exc = exc.msg
proc isConnected*(s: Switch, peer: PeerInfo): bool =
## returns true if the peer has one or more
## associated connections (sockets)
##
peer.peerId in s.connManager
proc secure(s: Switch, conn: Connection): Future[Connection] {.async, gcsafe.} =
if s.secureManagers.len <= 0:
raise newException(CatchableError, "No secure managers registered!")
let manager = await s.ms.select(conn, s.secureManagers.mapIt(it.codec))
if manager.len == 0:
raise newException(CatchableError, "Unable to negotiate a secure channel!")
trace "securing connection", codec = manager
let secureProtocol = s.secureManagers.filterIt(it.codec == manager)
# ms.select should deal with the correctness of this
# let's avoid duplicating checks but detect if it fails to do it properly
doAssert(secureProtocol.len > 0)
result = await secureProtocol[0].secure(conn, true)
proc identify(s: Switch, conn: Connection) {.async, gcsafe.} =
## identify the connection
if (await s.ms.select(conn, s.identity.codec)):
let info = await s.identity.identify(conn, conn.peerInfo)
if info.pubKey.isNone and isNil(conn):
raise newException(CatchableError,
"no public key provided and no existing peer identity found")
if isNil(conn.peerInfo):
conn.peerInfo = PeerInfo.init(info.pubKey.get())
if info.addrs.len > 0:
conn.peerInfo.addrs = info.addrs
if info.agentVersion.isSome:
conn.peerInfo.agentVersion = info.agentVersion.get()
if info.protoVersion.isSome:
conn.peerInfo.protoVersion = info.protoVersion.get()
if info.protos.len > 0:
conn.peerInfo.protocols = info.protos
trace "identify: identified remote peer", peer = $conn.peerInfo
proc mux(s: Switch, conn: Connection) {.async, gcsafe.} =
## mux incoming connection
trace "muxing connection", peer = $conn
if s.muxers.len == 0:
warn "no muxers registered, skipping upgrade flow"
return
let muxerName = await s.ms.select(conn, toSeq(s.muxers.keys()))
if muxerName.len == 0 or muxerName == "na":
debug "no muxer available, early exit", peer = $conn
return
# create new muxer for connection
let muxer = s.muxers[muxerName].newMuxer(conn)
s.connManager.storeMuxer(muxer)
trace "found a muxer", name = muxerName, peer = $conn
# install stream handler
muxer.streamHandler = s.streamHandler
# new stream for identify
var stream = await muxer.newStream()
defer:
if not(isNil(stream)):
await stream.close() # close identify stream
# call muxer handler, this should
# not end until muxer ends
let handlerFut = muxer.handle()
# do identify first, so that we have a
# PeerInfo in case we didn't before
await s.identify(stream)
if isNil(conn.peerInfo):
await muxer.close()
raise newException(CatchableError,
"unable to identify peer, aborting upgrade")
# store it in muxed connections if we have a peer for it
trace "adding muxer for peer", peer = conn.peerInfo.id
s.connManager.storeMuxer(muxer, handlerFut) # update muxer with handler
proc disconnect*(s: Switch, peer: PeerInfo) {.async, gcsafe.} =
if not peer.isNil:
await s.connManager.dropPeer(peer.peerId)
proc upgradeOutgoing(s: Switch, conn: Connection): Future[Connection] {.async, gcsafe.} =
logScope:
conn = $conn
oid = $conn.oid
let sconn = await s.secure(conn) # secure the connection
if isNil(sconn):
raise newException(CatchableError,
"unable to secure connection, stopping upgrade")
trace "upgrading connection"
await s.mux(sconn) # mux it if possible
if isNil(sconn.peerInfo):
await sconn.close()
raise newException(CatchableError,
"unable to identify connection, stopping upgrade")
trace "successfully upgraded outgoing connection", oid = sconn.oid
return sconn
proc upgradeIncoming(s: Switch, conn: Connection) {.async, gcsafe.} =
trace "upgrading incoming connection", conn = $conn, oid = $conn.oid
let ms = newMultistream()
# secure incoming connections
proc securedHandler (conn: Connection,
proto: string)
{.async, gcsafe, closure.} =
var sconn: Connection
trace "Securing connection", oid = $conn.oid
let secure = s.secureManagers.filterIt(it.codec == proto)[0]
try:
sconn = await secure.secure(conn, false)
if isNil(sconn):
return
defer:
await sconn.close()
# add the muxer
for muxer in s.muxers.values:
ms.addHandler(muxer.codec, muxer)
# handle subsequent secure requests
await ms.handle(sconn)
except CancelledError as exc:
raise exc
except CatchableError as exc:
debug "ending secured handler", err = exc.msg
if (await ms.select(conn)): # just handshake
# add the secure handlers
for k in s.secureManagers:
ms.addHandler(k.codec, securedHandler)
# handle un-secured connections
# we handshaked above, set this ms handler as active
await ms.handle(conn, active = true)
proc internalConnect(s: Switch,
peer: PeerInfo): Future[Connection] {.async.} =
if s.peerInfo.peerId == peer.peerId:
raise newException(CatchableError, "can't dial self!")
let id = peer.id
var conn: Connection
let lock = s.dialLock.mgetOrPut(id, newAsyncLock())
try:
await lock.acquire()
trace "about to dial peer", peer = id
conn = s.connManager.selectConn(peer.peerId)
if conn.isNil or (conn.closed or conn.atEof):
trace "Dialing peer", peer = id
for t in s.transports: # for each transport
for a in peer.addrs: # for each address
if t.handles(a): # check if it can dial it
trace "Dialing address", address = $a, peer = id
try:
conn = await t.dial(a)
# make sure to assign the peer to the connection
conn.peerInfo = peer
conn.closeEvent.wait()
.addCallback do(udata: pointer):
asyncCheck s.triggerHooks(
conn.peerInfo,
Lifecycle.Disconnected)
asyncCheck s.triggerHooks(conn.peerInfo, Lifecycle.Connected)
libp2p_dialed_peers.inc()
except CancelledError as exc:
trace "dialing canceled", exc = exc.msg
raise
except CatchableError as exc:
trace "dialing failed", exc = exc.msg
libp2p_failed_dials.inc()
continue
try:
let uconn = await s.upgradeOutgoing(conn)
s.connManager.storeOutgoing(uconn)
asyncCheck s.triggerHooks(uconn.peerInfo, Lifecycle.Upgraded)
conn = uconn
trace "dial successful", oid = $conn.oid, peer = $conn.peerInfo
except CatchableError as exc:
if not(isNil(conn)):
await conn.close()
trace "Unable to establish outgoing link", exc = exc.msg
raise exc
if isNil(conn):
libp2p_failed_upgrade.inc()
continue
break
else:
trace "Reusing existing connection", oid = $conn.oid,
direction = $conn.dir,
peer = $conn.peerInfo
finally:
if lock.locked():
lock.release()
if isNil(conn):
raise newException(CatchableError,
"Unable to establish outgoing link")
if conn.closed or conn.atEof:
await conn.close()
raise newException(CatchableError,
"Connection dead on arrival")
doAssert(conn in s.connManager, "connection not tracked!")
trace "dial successful", oid = $conn.oid,
peer = $conn.peerInfo
asyncCheck s.cleanupPubSubPeer(conn)
asyncCheck s.subscribePeer(conn.peerInfo)
trace "got connection", oid = $conn.oid,
direction = $conn.dir,
peer = $conn.peerInfo
return conn
proc connect*(s: Switch, peer: PeerInfo) {.async.} =
discard await s.internalConnect(peer)
proc dial*(s: Switch,
peer: PeerInfo,
proto: string):
Future[Connection] {.async.} =
let conn = await s.internalConnect(peer)
let stream = await s.connManager.getMuxedStream(conn)
proc cleanup() {.async.} =
if not(isNil(stream)):
await stream.close()
if not(isNil(conn)):
await conn.close()
try:
if isNil(stream):
await conn.close()
raise newException(CatchableError, "Couldn't get muxed stream")
trace "Attempting to select remote", proto = proto,
streamOid = $stream.oid,
oid = $conn.oid
if not await s.ms.select(stream, proto):
await stream.close()
raise newException(CatchableError, "Unable to select sub-protocol" & proto)
return stream
except CancelledError as exc:
trace "dial canceled"
await cleanup()
raise exc
except CatchableError as exc:
trace "error dialing", exc = exc.msg
await cleanup()
raise exc
proc mount*[T: LPProtocol](s: Switch, proto: T) {.gcsafe.} =
if isNil(proto.handler):
raise newException(CatchableError,
"Protocol has to define a handle method or proc")
if proto.codec.len == 0:
raise newException(CatchableError,
"Protocol has to define a codec string")
s.ms.addHandler(proto.codec, proto)
proc start*(s: Switch): Future[seq[Future[void]]] {.async, gcsafe.} =
trace "starting switch for peer", peerInfo = shortLog(s.peerInfo)
proc handle(conn: Connection): Future[void] {.async, closure, gcsafe.} =
try:
conn.closeEvent.wait()
.addCallback do(udata: pointer):
asyncCheck s.triggerHooks(
conn.peerInfo,
Lifecycle.Disconnected)
asyncCheck s.triggerHooks(conn.peerInfo, Lifecycle.Connected)
await s.upgradeIncoming(conn) # perform upgrade on incoming connection
except CancelledError as exc:
raise exc
except CatchableError as exc:
trace "Exception occurred in Switch.start", exc = exc.msg
finally:
await conn.close()
var startFuts: seq[Future[void]]
for t in s.transports: # for each transport
for i, a in s.peerInfo.addrs:
if t.handles(a): # check if it handles the multiaddr
var server = await t.listen(a, handle)
s.peerInfo.addrs[i] = t.ma # update peer's address
startFuts.add(server)
if s.pubSub.isSome:
await s.pubSub.get().start()
debug "started libp2p node", peer = $s.peerInfo, addrs = s.peerInfo.addrs
result = startFuts # listen for incoming connections
proc stop*(s: Switch) {.async.} =
trace "stopping switch"
# we want to report errors but we do not want to fail
# or crash here, cos we need to clean possibly MANY items
# and any following conn/transport won't be cleaned up
if s.pubSub.isSome:
await s.pubSub.get().stop()
# close and cleanup all connections
await s.connManager.close()
for t in s.transports:
try:
await t.close()
except CancelledError as exc:
raise exc
except CatchableError as exc:
warn "error cleaning up transports"
trace "switch stopped"
proc subscribePeerInternal(s: Switch, peerInfo: PeerInfo) {.async, gcsafe.} =
## Subscribe to pub sub peer
##
if s.pubSub.isSome and not s.pubSub.get().connected(peerInfo):
trace "about to subscribe to pubsub peer", peer = peerInfo.shortLog()
var stream: Connection
try:
stream = await s.connManager.getMuxedStream(peerInfo.peerId)
if isNil(stream):
trace "unable to subscribe to peer", peer = peerInfo.shortLog
return
if not await s.ms.select(stream, s.pubSub.get().codec):
if not(isNil(stream)):
trace "couldn't select pubsub", codec = s.pubSub.get().codec
await stream.close()
return
s.pubSub.get().subscribePeer(stream)
await stream.closeEvent.wait()
except CancelledError as exc:
if not(isNil(stream)):
await stream.close()
raise exc
except CatchableError as exc:
trace "exception in subscribe to peer", peer = peerInfo.shortLog,
exc = exc.msg
if not(isNil(stream)):
await stream.close()
proc pubsubMonitor(s: Switch, peer: PeerInfo) {.async.} =
## while peer connected maintain a
## pubsub connection as well
##
while s.isConnected(peer):
try:
trace "subscribing to pubsub peer", peer = $peer
await s.subscribePeerInternal(peer)
except CancelledError as exc:
raise exc
except CatchableError as exc:
trace "exception in pubsub monitor", peer = $peer, exc = exc.msg
finally:
trace "sleeping before trying pubsub peer", peer = $peer
await sleepAsync(1.seconds) # allow the peer to cooldown
trace "exiting pubsub monitor", peer = $peer
proc subscribePeer*(s: Switch, peerInfo: PeerInfo) {.async, gcsafe.} =
if peerInfo.peerId notin s.pubsubMonitors:
s.pubsubMonitors[peerInfo.peerId] = s.pubsubMonitor(peerInfo)
result = s.pubsubMonitors.getOrDefault(peerInfo.peerId)
proc subscribe*(s: Switch, topic: string,
handler: TopicHandler) {.async.} =
## subscribe to a pubsub topic
##
if s.pubSub.isNone:
raise newNoPubSubException()
await s.pubSub.get().subscribe(topic, handler)
proc unsubscribe*(s: Switch, topics: seq[TopicPair]) {.async.} =
## unsubscribe from topics
##
if s.pubSub.isNone:
raise newNoPubSubException()
await s.pubSub.get().unsubscribe(topics)
proc unsubscribeAll*(s: Switch, topic: string) {.async.} =
## unsubscribe from topics
if s.pubSub.isNone:
raise newNoPubSubException()
await s.pubSub.get().unsubscribeAll(topic)
proc publish*(s: Switch,
topic: string,
data: seq[byte],
timeout: Duration = InfiniteDuration): Future[int] {.async.} =
## pubslish to pubsub topic
##
if s.pubSub.isNone:
raise newNoPubSubException()
return await s.pubSub.get().publish(topic, data, timeout)
proc addValidator*(s: Switch,
topics: varargs[string],
hook: ValidatorHandler) =
## add validator
##
if s.pubSub.isNone:
raise newNoPubSubException()
s.pubSub.get().addValidator(topics, hook)
proc removeValidator*(s: Switch,
topics: varargs[string],
hook: ValidatorHandler) =
## pubslish to pubsub topic
##
if s.pubSub.isNone:
raise newNoPubSubException()
s.pubSub.get().removeValidator(topics, hook)
proc muxerHandler(s: Switch, muxer: Muxer) {.async, gcsafe.} =
var stream = await muxer.newStream()
defer:
if not(isNil(stream)):
await stream.close()
try:
# once we got a muxed connection, attempt to
# identify it
await s.identify(stream)
if isNil(stream.peerInfo):
await muxer.close()
return
muxer.connection.peerInfo = stream.peerInfo
# store incoming connection
s.connManager.storeIncoming(muxer.connection)
# store muxer and muxed connection
s.connManager.storeMuxer(muxer)
trace "got new muxer", peer = $muxer.connection.peerInfo
asyncCheck s.triggerHooks(muxer.connection.peerInfo, Lifecycle.Upgraded)
# try establishing a pubsub connection
asyncCheck s.cleanupPubSubPeer(muxer.connection)
asyncCheck s.subscribePeer(muxer.connection.peerInfo)
except CancelledError as exc:
await muxer.close()
raise exc
except CatchableError as exc:
await muxer.close()
libp2p_failed_upgrade.inc()
trace "exception in muxer handler", exc = exc.msg
proc newSwitch*(peerInfo: PeerInfo,
transports: seq[Transport],
identity: Identify,
muxers: Table[string, MuxerProvider],
secureManagers: openarray[Secure] = [],
pubSub: Option[PubSub] = none(PubSub)): Switch =
if secureManagers.len == 0:
raise (ref CatchableError)(msg: "Provide at least one secure manager")
result = Switch(
peerInfo: peerInfo,
ms: newMultistream(),
transports: transports,
connManager: ConnManager.init(),
identity: identity,
muxers: muxers,
secureManagers: @secureManagers,
)
let s = result # can't capture result
result.streamHandler = proc(stream: Connection) {.async, gcsafe.} =
try:
trace "handling connection for", peerInfo = $stream
defer:
if not(isNil(stream)):
await stream.close()
await s.ms.handle(stream) # handle incoming connection
except CancelledError as exc:
raise exc
except CatchableError as exc:
trace "exception in stream handler", exc = exc.msg
result.mount(identity)
for key, val in muxers:
val.streamHandler = result.streamHandler
val.muxerHandler = proc(muxer: Muxer): Future[void] =
s.muxerHandler(muxer)
if pubSub.isSome:
result.pubSub = pubSub
result.mount(pubSub.get())