peer hooks -> events (#320)

* peer hooks -> events

* peerinfo -> peerid
* include connection direction in event
* check connection status after event
* lock connmanager lookup also when dialling peer
* clean up un-upgraded connection when upgrade fails
* await peer eventing

* remove join/lifetime future from peerinfo

Peerinfo instances are not unique per peer so the lifetime future is
misleading - it fires when a random connection is closed, not the "last"
one

* document switch values

* naming

* peerevent->conneevent
This commit is contained in:
Jacek Sieka 2020-08-08 08:52:20 +02:00 committed by GitHub
parent fbb59c3638
commit f303954989
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 135 additions and 157 deletions

View File

@ -7,6 +7,8 @@
## This file may not be copied, modified, or distributed except according to ## This file may not be copied, modified, or distributed except according to
## those terms. ## those terms.
{.push raises: [Defect].}
import options, sequtils, hashes import options, sequtils, hashes
import chronos, chronicles import chronos, chronicles
import peerid, multiaddress, crypto/crypto import peerid, multiaddress, crypto/crypto
@ -30,7 +32,6 @@ type
peerId*: PeerID peerId*: PeerID
addrs*: seq[MultiAddress] addrs*: seq[MultiAddress]
protocols*: seq[string] protocols*: seq[string]
lifefut: Future[void]
protoVersion*: string protoVersion*: string
agentVersion*: string agentVersion*: string
secure*: string secure*: string
@ -62,12 +63,12 @@ template postInit(peerinfo: PeerInfo,
peerinfo.addrs = @addrs peerinfo.addrs = @addrs
if len(protocols) > 0: if len(protocols) > 0:
peerinfo.protocols = @protocols peerinfo.protocols = @protocols
peerinfo.lifefut = newFuture[void]("libp2p.peerinfo.lifetime")
proc init*(p: typedesc[PeerInfo], proc init*(p: typedesc[PeerInfo],
key: PrivateKey, key: PrivateKey,
addrs: openarray[MultiAddress] = [], addrs: openarray[MultiAddress] = [],
protocols: openarray[string] = []): PeerInfo {.inline.} = protocols: openarray[string] = []): PeerInfo {.
raises: [Defect, ResultError[cstring]].} =
result = PeerInfo(keyType: HasPrivate, peerId: PeerID.init(key).tryGet(), result = PeerInfo(keyType: HasPrivate, peerId: PeerID.init(key).tryGet(),
privateKey: key) privateKey: key)
result.postInit(addrs, protocols) result.postInit(addrs, protocols)
@ -75,55 +76,31 @@ proc init*(p: typedesc[PeerInfo],
proc init*(p: typedesc[PeerInfo], proc init*(p: typedesc[PeerInfo],
peerId: PeerID, peerId: PeerID,
addrs: openarray[MultiAddress] = [], addrs: openarray[MultiAddress] = [],
protocols: openarray[string] = []): PeerInfo {.inline.} = protocols: openarray[string] = []): PeerInfo =
result = PeerInfo(keyType: HasPublic, peerId: peerId) result = PeerInfo(keyType: HasPublic, peerId: peerId)
result.postInit(addrs, protocols) result.postInit(addrs, protocols)
proc init*(p: typedesc[PeerInfo], proc init*(p: typedesc[PeerInfo],
peerId: string, peerId: string,
addrs: openarray[MultiAddress] = [], addrs: openarray[MultiAddress] = [],
protocols: openarray[string] = []): PeerInfo {.inline.} = protocols: openarray[string] = []): PeerInfo {.
raises: [Defect, ResultError[cstring]].} =
result = PeerInfo(keyType: HasPublic, peerId: PeerID.init(peerId).tryGet()) result = PeerInfo(keyType: HasPublic, peerId: PeerID.init(peerId).tryGet())
result.postInit(addrs, protocols) result.postInit(addrs, protocols)
proc init*(p: typedesc[PeerInfo], proc init*(p: typedesc[PeerInfo],
key: PublicKey, key: PublicKey,
addrs: openarray[MultiAddress] = [], addrs: openarray[MultiAddress] = [],
protocols: openarray[string] = []): PeerInfo {.inline.} = protocols: openarray[string] = []): PeerInfo {.
raises: [Defect, ResultError[cstring]].}=
result = PeerInfo(keyType: HasPublic, result = PeerInfo(keyType: HasPublic,
peerId: PeerID.init(key).tryGet(), peerId: PeerID.init(key).tryGet(),
key: some(key)) key: some(key))
result.postInit(addrs, protocols) result.postInit(addrs, protocols)
proc close*(p: PeerInfo) {.inline.} = proc publicKey*(p: PeerInfo): Option[PublicKey] {.
if not p.lifefut.finished: raises: [Defect, ResultError[CryptoError]].} =
p.lifefut.complete()
else:
# TODO this should ideally not happen
notice "Closing closed peer", peer = p.id
proc join*(p: PeerInfo): Future[void] {.inline.} =
var retFuture = newFuture[void]()
proc continuation(udata: pointer) {.gcsafe.} =
if not(retFuture.finished()):
retFuture.complete()
proc cancellation(udata: pointer) {.gcsafe.} =
p.lifefut.removeCallback(continuation)
if p.lifefut.finished:
retFuture.complete()
else:
p.lifefut.addCallback(continuation)
retFuture.cancelCallback = cancellation
return retFuture
proc isClosed*(p: PeerInfo): bool {.inline.} =
result = p.lifefut.finished()
proc lifeFuture*(p: PeerInfo): Future[void] {.inline.} =
result = p.lifefut
proc publicKey*(p: PeerInfo): Option[PublicKey] {.inline.} =
if p.keyType == HasPublic: if p.keyType == HasPublic:
if p.peerId.hasPublicKey(): if p.peerId.hasPublicKey():
var pubKey: PublicKey var pubKey: PublicKey

View File

@ -134,7 +134,7 @@ proc decryptWithAd(state: var CipherState, ad, data: openArray[byte]): seq[byte]
ChaChaPoly.decrypt(state.k, nonce, tagOut, result, ad) ChaChaPoly.decrypt(state.k, nonce, tagOut, result, ad)
trace "decryptWithAd", tagIn = tagIn.shortLog, tagOut = tagOut.shortLog, nonce = state.n trace "decryptWithAd", tagIn = tagIn.shortLog, tagOut = tagOut.shortLog, nonce = state.n
if tagIn != tagOut: if tagIn != tagOut:
error "decryptWithAd failed", data = byteutils.toHex(data) debug "decryptWithAd failed", data = shortLog(data)
raise newException(NoiseDecryptTagError, "decryptWithAd failed tag authentication.") raise newException(NoiseDecryptTagError, "decryptWithAd failed tag authentication.")
inc state.n inc state.n
if state.n > NonceMax: if state.n > NonceMax:

View File

@ -50,12 +50,22 @@ const
type type
NoPubSubException* = object of CatchableError NoPubSubException* = object of CatchableError
Lifecycle* {.pure.} = enum ConnEventKind* {.pure.} = enum
Connected, Connected, # A connection was made and securely upgraded - there may be
Upgraded, # more than one concurrent connection thus more than one upgrade
Disconnected # event per peer.
Disconnected # Peer disconnected - this event is fired once per upgrade
# when the associated connection is terminated.
Hook* = proc(peer: PeerInfo, cycle: Lifecycle): Future[void] {.gcsafe.} ConnEvent* = object
case kind*: ConnEventKind
of ConnEventKind.Connected:
incoming*: bool
else:
discard
ConnEventHandler* =
proc(peerId: PeerID, event: ConnEvent): Future[void] {.gcsafe.}
Switch* = ref object of RootObj Switch* = ref object of RootObj
peerInfo*: PeerInfo peerInfo*: PeerInfo
@ -69,31 +79,35 @@ type
secureManagers*: seq[Secure] secureManagers*: seq[Secure]
pubSub*: Option[PubSub] pubSub*: Option[PubSub]
dialLock: Table[PeerID, AsyncLock] dialLock: Table[PeerID, AsyncLock]
hooks: Table[Lifecycle, HashSet[Hook]] ConnEvents: Table[ConnEventKind, HashSet[ConnEventHandler]]
pubsubMonitors: Table[PeerId, Future[void]] pubsubMonitors: Table[PeerId, Future[void]]
proc newNoPubSubException(): ref NoPubSubException {.inline.} = proc newNoPubSubException(): ref NoPubSubException {.inline.} =
result = newException(NoPubSubException, "no pubsub provided!") result = newException(NoPubSubException, "no pubsub provided!")
proc addHook*(s: Switch, hook: Hook, cycle: Lifecycle) = proc addConnEventHandler*(s: Switch,
s.hooks.mgetOrPut(cycle, initHashSet[Hook]()).incl(hook) handler: ConnEventHandler, kind: ConnEventKind) =
## Add peer event handler - handlers must not raise exceptions!
if isNil(handler): return
s.ConnEvents.mgetOrPut(kind, initHashSet[ConnEventHandler]()).incl(handler)
proc removeHook*(s: Switch, hook: Hook, cycle: Lifecycle) = proc removeConnEventHandler*(s: Switch,
s.hooks.mgetOrPut(cycle, initHashSet[Hook]()).excl(hook) handler: ConnEventHandler, kind: ConnEventKind) =
s.ConnEvents.withValue(kind, handlers) do:
handlers[].excl(handler)
proc triggerHooks(s: Switch, peer: PeerInfo, cycle: Lifecycle) {.async, gcsafe.} = proc triggerConnEvent(s: Switch, peerId: PeerID, event: ConnEvent) {.async, gcsafe.} =
try: try:
if cycle in s.hooks: if event.kind in s.ConnEvents:
var hooks: seq[Future[void]] var ConnEvents: seq[Future[void]]
for h in s.hooks[cycle]: for h in s.ConnEvents[event.kind]:
if not(isNil(h)): ConnEvents.add(h(peerId, event))
hooks.add(h(peer, cycle))
checkFutures(await allFinished(hooks)) checkFutures(await allFinished(ConnEvents))
except CancelledError as exc: except CancelledError as exc:
raise exc raise exc
except CatchableError as exc: except CatchableError as exc: # handlers should not raise!
trace "exception in trigger hooks", exc = exc.msg warn "exception in trigger ConnEvents", exc = exc.msg
proc disconnect*(s: Switch, peerId: PeerID) {.async, gcsafe.} proc disconnect*(s: Switch, peerId: PeerID) {.async, gcsafe.}
proc subscribePeer*(s: Switch, peerId: PeerID) {.async, gcsafe.} proc subscribePeer*(s: Switch, peerId: PeerID) {.async, gcsafe.}
@ -280,86 +294,96 @@ proc upgradeIncoming(s: Switch, conn: Connection) {.async, gcsafe.} =
proc internalConnect(s: Switch, proc internalConnect(s: Switch,
peerId: PeerID, peerId: PeerID,
addrs: seq[MultiAddress]): Future[Connection] {.async.} = addrs: seq[MultiAddress]): Future[Connection] {.async.} =
logScope: peer = peerId
if s.peerInfo.peerId == peerId: if s.peerInfo.peerId == peerId:
raise newException(CatchableError, "can't dial self!") raise newException(CatchableError, "can't dial self!")
var conn = s.connManager.selectConn(peerId) var conn: Connection
if conn != nil and not conn.atEof and not conn.closed: # Ensure there's only one in-flight attempt per peer
trace "Reusing existing connection", oid = $conn.oid,
direction = $conn.dir,
peer = peerId
return conn
let lock = s.dialLock.mgetOrPut(peerId, newAsyncLock()) let lock = s.dialLock.mgetOrPut(peerId, newAsyncLock())
try: try:
await lock.acquire() await lock.acquire()
trace "Dialing peer", peer = peerId
# Check if we have a connection already and try to reuse it
conn = s.connManager.selectConn(peerId)
if conn != nil:
if conn.atEof or conn.closed:
# This connection should already have been removed from the connection
# manager - it's essentially a bug that we end up here - we'll fail
# for now, hoping that this will clean themselves up later...
warn "dead connection in connection manager"
await conn.close()
raise newException(CatchableError, "Zombie connection encountered")
trace "Reusing existing connection", oid = $conn.oid,
direction = $conn.dir
return conn
trace "Dialing peer"
for t in s.transports: # for each transport for t in s.transports: # for each transport
for a in addrs: # for each address for a in addrs: # for each address
if t.handles(a): # check if it can dial it if t.handles(a): # check if it can dial it
trace "Dialing address", address = $a, peer = peerId trace "Dialing address", address = $a
try: let dialed = try:
conn = await t.dial(a) await t.dial(a)
# make sure to assign the peer to the connection except CancelledError as exc:
conn.peerInfo = PeerInfo.init(peerId, addrs) trace "dialing canceled", exc = exc.msg
raise exc
except CatchableError as exc:
trace "dialing failed", exc = exc.msg
libp2p_failed_dials.inc()
continue # Try the next address
conn.closeEvent.wait() # make sure to assign the peer to the connection
.addCallback do(udata: pointer): dialed.peerInfo = PeerInfo.init(peerId, addrs)
asyncCheck s.triggerHooks(
conn.peerInfo,
Lifecycle.Disconnected)
asyncCheck s.triggerHooks(conn.peerInfo, Lifecycle.Connected) libp2p_dialed_peers.inc()
libp2p_dialed_peers.inc()
except CancelledError as exc:
trace "dialing canceled", exc = exc.msg, peer = peerId
raise exc
except CatchableError as exc:
trace "dialing failed", exc = exc.msg, peer = peerId
libp2p_failed_dials.inc()
continue
try: let upgraded = try:
let uconn = await s.upgradeOutgoing(conn) await s.upgradeOutgoing(dialed)
s.connManager.storeOutgoing(uconn) except CatchableError as exc:
asyncCheck s.triggerHooks(uconn.peerInfo, Lifecycle.Upgraded) # If we failed to establish the connection through one transport,
conn = uconn # we won't succeeed through another - no use in trying again
trace "dial successful", oid = $conn.oid, peer = $conn.peerInfo await dialed.close()
except CatchableError as exc: debug "upgrade failed", exc = exc.msg
if not(isNil(conn)): if exc isnot CancelledError:
await conn.close() libp2p_failed_upgrade.inc()
raise exc
trace "Unable to establish outgoing link", exc = exc.msg, peer = peerId doAssert not isNil(upgraded), "checked in upgradeOutgoing"
raise exc
if isNil(conn): s.connManager.storeOutgoing(upgraded)
libp2p_failed_upgrade.inc() trace "dial successful",
continue oid = $conn.oid,
peerInfo = shortLog(upgraded.peerInfo)
conn = upgraded
break break
finally: finally:
if lock.locked(): if lock.locked():
lock.release() lock.release()
if isNil(conn): if isNil(conn): # None of the addresses connected
raise newException(CatchableError, "Unable to establish outgoing link") raise newException(CatchableError, "Unable to establish outgoing link")
if conn.closed or conn.atEof: conn.closeEvent.wait()
await conn.close() .addCallback do(udata: pointer):
raise newException(CatchableError, "Connection dead on arrival") asyncCheck s.triggerConnEvent(
peerId, ConnEvent(kind: ConnEventKind.Disconnected))
doAssert(conn in s.connManager, "connection not tracked!") await s.triggerConnEvent(
peerId, ConnEvent(kind: ConnEventKind.Connected, incoming: false))
trace "dial successful", oid = $conn.oid, if conn.closed():
peer = shortLog(conn.peerInfo) # This can happen if one of the peer event handlers deems the peer
# unworthy and disconnects it
raise newException(CatchableError, "Connection closed during handshake")
asyncCheck s.cleanupPubSubPeer(conn) asyncCheck s.cleanupPubSubPeer(conn)
asyncCheck s.subscribePeer(peerId) asyncCheck s.subscribePeer(peerId)
trace "got connection", oid = $conn.oid,
direction = $conn.dir,
peer = shortLog(conn.peerInfo)
return conn return conn
proc connect*(s: Switch, peerId: PeerID, addrs: seq[MultiAddress]) {.async.} = proc connect*(s: Switch, peerId: PeerID, addrs: seq[MultiAddress]) {.async.} =
@ -418,13 +442,6 @@ proc start*(s: Switch): Future[seq[Future[void]]] {.async, gcsafe.} =
proc handle(conn: Connection): Future[void] {.async, closure, gcsafe.} = proc handle(conn: Connection): Future[void] {.async, closure, gcsafe.} =
try: try:
conn.closeEvent.wait()
.addCallback do(udata: pointer):
asyncCheck s.triggerHooks(
conn.peerInfo,
Lifecycle.Disconnected)
asyncCheck s.triggerHooks(conn.peerInfo, Lifecycle.Connected)
await s.upgradeIncoming(conn) # perform upgrade on incoming connection await s.upgradeIncoming(conn) # perform upgrade on incoming connection
except CancelledError as exc: except CancelledError as exc:
raise exc raise exc
@ -616,7 +633,10 @@ proc muxerHandler(s: Switch, muxer: Muxer) {.async, gcsafe.} =
await muxer.close() await muxer.close()
return return
muxer.connection.peerInfo = stream.peerInfo let
peerInfo = stream.peerInfo
peerId = peerInfo.peerId
muxer.connection.peerInfo = peerInfo
# store incoming connection # store incoming connection
s.connManager.storeIncoming(muxer.connection) s.connManager.storeIncoming(muxer.connection)
@ -624,12 +644,19 @@ proc muxerHandler(s: Switch, muxer: Muxer) {.async, gcsafe.} =
# store muxer and muxed connection # store muxer and muxed connection
s.connManager.storeMuxer(muxer) s.connManager.storeMuxer(muxer)
trace "got new muxer", peer = $muxer.connection.peerInfo trace "got new muxer", peer = shortLog(peerInfo)
asyncCheck s.triggerHooks(muxer.connection.peerInfo, Lifecycle.Upgraded)
muxer.connection.closeEvent.wait()
.addCallback do(udata: pointer):
asyncCheck s.triggerConnEvent(
peerId, ConnEvent(kind: ConnEventKind.Disconnected))
asyncCheck s.triggerConnEvent(
peerId, ConnEvent(kind: ConnEventKind.Connected, incoming: true))
# try establishing a pubsub connection # try establishing a pubsub connection
asyncCheck s.cleanupPubSubPeer(muxer.connection) asyncCheck s.cleanupPubSubPeer(muxer.connection)
asyncCheck s.subscribePeer(muxer.connection.peerInfo.peerId) asyncCheck s.subscribePeer(peerId)
except CancelledError as exc: except CancelledError as exc:
await muxer.close() await muxer.close()

View File

@ -55,16 +55,3 @@ suite "PeerInfo":
test "Should return some if pubkey is present in id": test "Should return some if pubkey is present in id":
let peerInfo = PeerInfo.init(PeerID.init(PrivateKey.random(Ed25519, rng[]).get()).get()) let peerInfo = PeerInfo.init(PeerID.init(PrivateKey.random(Ed25519, rng[]).get()).get())
check peerInfo.publicKey.isSome check peerInfo.publicKey.isSome
test "join() and isClosed() test":
proc testJoin(): Future[bool] {.async, gcsafe.} =
let peerInfo = PeerInfo.init(PeerID.init(PrivateKey.random(Ed25519, rng[]).get()).get())
check peerInfo.isClosed() == false
var joinFut = peerInfo.join()
check joinFut.finished() == false
peerInfo.close()
await wait(joinFut, 100.milliseconds)
check peerInfo.isClosed() == true
check (joinFut.finished() == true) and (joinFut.cancelled() == false)
result = true
check waitFor(testJoin()) == true

View File

@ -237,38 +237,26 @@ suite "Switch":
let switch2 = newStandardSwitch(secureManagers = [SecureProtocol.Secio]) let switch2 = newStandardSwitch(secureManagers = [SecureProtocol.Secio])
var step = 0 var step = 0
var cycles: set[Lifecycle] var kinds: set[ConnEventKind]
proc hook(peer: PeerInfo, cycle: Lifecycle) {.async, gcsafe.} = proc hook(peerId: PeerID, event: ConnEvent) {.async, gcsafe.} =
cycles = cycles + {cycle} kinds = kinds + {event.kind}
case step: case step:
of 0: of 0:
check cycle == Lifecycle.Connected check:
check if not(isNil(peer)): event.kind == ConnEventKind.Connected
peer.peerId == switch2.peerInfo.peerId peerId == switch2.peerInfo.peerId
else:
true
of 1: of 1:
assert(isNil(peer) == false)
check: check:
cycle == Lifecycle.Upgraded event.kind == ConnEventKind.Disconnected
peer.peerId == switch2.peerInfo.peerId
of 2:
check:
cycle == Lifecycle.Disconnected
check if not(isNil(peer)): check peerId == switch2.peerInfo.peerId
peer.peerId == switch2.peerInfo.peerId
else:
true
else: else:
echo "unkown cycle! ", $cycle
check false check false
step.inc() step.inc()
switch1.addHook(hook, Lifecycle.Connected) switch1.addConnEventHandler(hook, ConnEventKind.Connected)
switch1.addHook(hook, Lifecycle.Upgraded) switch1.addConnEventHandler(hook, ConnEventKind.Disconnected)
switch1.addHook(hook, Lifecycle.Disconnected)
awaiters.add(await switch1.start()) awaiters.add(await switch1.start())
awaiters.add(await switch2.start()) awaiters.add(await switch2.start())
@ -294,10 +282,9 @@ suite "Switch":
check connTracker.isLeaked() == false check connTracker.isLeaked() == false
check: check:
cycles == { kinds == {
Lifecycle.Connected, ConnEventKind.Connected,
Lifecycle.Upgraded, ConnEventKind.Disconnected
Lifecycle.Disconnected
} }
await allFuturesThrowing( await allFuturesThrowing(