Add rlpx metrics and avoid immediate peer reconnections (#585)

* Add metrics related to devp2p peer connections

* Avoid reconnecting to peers that just failed connection

- Add SeenTable to avoid reconnecting to peers immediately after
a failed connect. Depending on the failure, the amount of time is
different. This is similar to what is done in nimbus-eth2.
- Attempt to rework rlpxConnect at the same time, in order to
make sure that errors are properly handled. The current structure
is far from ideal, but it is hopefully a small step in the right
direction. To many oddities in there right now to really rework
rlpxConnect properply.

* Fix rlpx thunk fuzzer
This commit is contained in:
Kim De Mey 2023-03-16 16:45:12 +01:00 committed by GitHub
parent 29b14749fa
commit d2ba753792
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 274 additions and 109 deletions

View File

@ -9,12 +9,15 @@
import import
std/[tables, hashes, times, algorithm, sets, sequtils], std/[tables, hashes, times, algorithm, sets, sequtils],
chronos, chronicles, stint, nimcrypto/keccak, chronos, chronicles, stint, nimcrypto/keccak, metrics,
../keys, ./discoveryv5/random2, ../keys, ./discoveryv5/random2,
./enode ./enode
export sets # TODO: This should not be needed, but compilation fails otherwise export sets # TODO: This should not be needed, but compilation fails otherwise
declarePublicGauge routing_table_nodes,
"Discovery routing table nodes"
logScope: logScope:
topics = "eth p2p kademlia" topics = "eth p2p kademlia"
@ -215,6 +218,7 @@ proc add(k: KBucket, n: Node): Node =
k.nodes.add(n) k.nodes.add(n)
elif k.len < BUCKET_SIZE: elif k.len < BUCKET_SIZE:
k.nodes.add(n) k.nodes.add(n)
routing_table_nodes.inc()
else: else:
k.replacementCache.add(n) k.replacementCache.add(n)
return k.head return k.head
@ -222,7 +226,9 @@ proc add(k: KBucket, n: Node): Node =
proc removeNode(k: KBucket, n: Node) = proc removeNode(k: KBucket, n: Node) =
let i = k.nodes.find(n) let i = k.nodes.find(n)
if i != -1: k.nodes.delete(i) if i != -1:
routing_table_nodes.dec()
k.nodes.delete(i)
proc split(k: KBucket): tuple[lower, upper: KBucket] = proc split(k: KBucket): tuple[lower, upper: KBucket] =
## Split at the median id ## Split at the median id

View File

@ -26,6 +26,42 @@ const
maxConcurrentConnectionRequests = 40 maxConcurrentConnectionRequests = 40
sleepBeforeTryingARandomBootnode = chronos.milliseconds(3000) sleepBeforeTryingARandomBootnode = chronos.milliseconds(3000)
## Period of time for dead / unreachable peers.
SeenTableTimeDeadPeer = chronos.minutes(10)
## Period of time for Useless peers, either because of no matching
## capabilities or on an irrelevant network.
SeenTableTimeUselessPeer = chronos.hours(24)
## Period of time for peers with a protocol error.
SeenTableTimeProtocolError = chronos.minutes(30)
## Period of time for peers with general disconnections / transport errors.
SeenTableTimeReconnect = chronos.minutes(5)
proc isSeen(p: PeerPool, nodeId: NodeId): bool =
## Returns ``true`` if ``nodeId`` present in SeenTable and time period is not
## yet expired.
let currentTime = now(chronos.Moment)
if nodeId notin p.seenTable:
false
else:
let item = try: p.seenTable[nodeId]
except KeyError: raiseAssert "checked with notin"
if currentTime >= item.stamp:
# Peer is in SeenTable, but the time period has expired.
p.seenTable.del(nodeId)
false
else:
true
proc addSeen(
p: PeerPool, nodeId: NodeId, period: chronos.Duration) =
## Adds peer with NodeId ``nodeId`` to SeenTable and timeout ``period``.
let item = SeenNode(nodeId: nodeId, stamp: now(chronos.Moment) + period)
withValue(p.seenTable, nodeId, entry) do:
if entry.stamp < item.stamp:
entry.stamp = item.stamp
do:
p.seenTable[nodeId] = item
proc newPeerPool*( proc newPeerPool*(
network: EthereumNode, networkId: NetworkId, keyPair: KeyPair, network: EthereumNode, networkId: NetworkId, keyPair: KeyPair,
discovery: DiscoveryProtocol, clientId: string, minPeers = 10): PeerPool = discovery: DiscoveryProtocol, clientId: string, minPeers = 10): PeerPool =
@ -84,28 +120,36 @@ proc connect(p: PeerPool, remote: Node): Future[Peer] {.async.} =
# debug "skipping connection" # debug "skipping connection"
return nil return nil
if p.isSeen(remote.id):
return nil
trace "Connecting to node", remote trace "Connecting to node", remote
p.connectingNodes.incl(remote) p.connectingNodes.incl(remote)
result = await p.network.rlpxConnect(remote) let res = await p.network.rlpxConnect(remote)
p.connectingNodes.excl(remote) p.connectingNodes.excl(remote)
# expected_exceptions = ( # TODO: Probably should move all this logic to rlpx.nim
# UnreachablePeer, TimeoutError, PeerConnectionLost, HandshakeFailure) if res.isOk():
# try: rlpx_connect_success.inc()
# self.logger.debug("Connecting to %s...", remote) return res.get()
# peer = await wait_with_token( else:
# handshake(remote, self.privkey, self.peer_class, self.network_id), rlpx_connect_failure.inc()
# token=self.cancel_token, rlpx_connect_failure.inc(labelValues = [$res.error])
# timeout=HANDSHAKE_TIMEOUT) case res.error():
# return peer of UselessRlpxPeerError:
# except OperationCancelled: p.addSeen(remote.id, SeenTableTimeUselessPeer)
# # Pass it on to instruct our main loop to stop. of TransportConnectError:
# raise p.addSeen(remote.id, SeenTableTimeDeadPeer)
# except expected_exceptions as e: of RlpxHandshakeError, ProtocolError, InvalidIdentityError:
# self.logger.debug("Could not complete handshake with %s: %s", remote, repr(e)) p.addSeen(remote.id, SeenTableTimeProtocolError)
# except Exception: of RlpxHandshakeTransportError,
# self.logger.exception("Unexpected error during auth/p2p handshake with %s", remote) P2PHandshakeError,
# return None P2PTransportError,
PeerDisconnectedError,
TooManyPeersError:
p.addSeen(remote.id, SeenTableTimeReconnect)
return nil
proc lookupRandomNode(p: PeerPool) {.async.} = proc lookupRandomNode(p: PeerPool) {.async.} =
discard await p.discovery.lookupRandom() discard await p.discovery.lookupRandom()
@ -118,7 +162,7 @@ proc getRandomBootnode(p: PeerPool): Option[Node] =
proc addPeer*(pool: PeerPool, peer: Peer) {.gcsafe.} = proc addPeer*(pool: PeerPool, peer: Peer) {.gcsafe.} =
doAssert(peer.remote notin pool.connectedNodes) doAssert(peer.remote notin pool.connectedNodes)
pool.connectedNodes[peer.remote] = peer pool.connectedNodes[peer.remote] = peer
connected_peers.inc() rlpx_connected_peers.inc()
for o in pool.observers.values: for o in pool.observers.values:
if not o.onPeerConnected.isNil: if not o.onPeerConnected.isNil:
if o.protocol.isNil or peer.supports(o.protocol): if o.protocol.isNil or peer.supports(o.protocol):

View File

@ -53,6 +53,10 @@ type
when useSnappy: when useSnappy:
snappyEnabled*: bool snappyEnabled*: bool
SeenNode* = object
nodeId*: NodeId
stamp*: chronos.Moment
PeerPool* = ref object PeerPool* = ref object
# Private fields: # Private fields:
network*: EthereumNode network*: EthereumNode
@ -63,6 +67,7 @@ type
discovery*: DiscoveryProtocol discovery*: DiscoveryProtocol
lastLookupTime*: float lastLookupTime*: float
connQueue*: AsyncQueue[Node] connQueue*: AsyncQueue[Node]
seenTable*: Table[NodeId, SeenNode]
connectedNodes*: Table[Node, Peer] connectedNodes*: Table[Node, Peer]
connectingNodes*: HashSet[Node] connectingNodes*: HashSet[Node]
running*: bool running*: bool

View File

@ -8,8 +8,8 @@
{.push raises: [Defect].} {.push raises: [Defect].}
import import
std/[tables, algorithm, deques, hashes, options, typetraits], std/[tables, algorithm, deques, hashes, options, typetraits, os],
stew/shims/macros, chronicles, nimcrypto/utils, chronos, stew/shims/macros, chronicles, nimcrypto/utils, chronos, metrics,
".."/[rlp, common, keys, async_utils], ".."/[rlp, common, keys, async_utils],
./private/p2p_types, "."/[kademlia, auth, rlpxcrypt, enode, p2p_protocol_dsl] ./private/p2p_types, "."/[kademlia, auth, rlpxcrypt, enode, p2p_protocol_dsl]
@ -24,6 +24,9 @@ const
# github.com/ethereum/devp2p/blob/master/rlpx.md#framing # github.com/ethereum/devp2p/blob/master/rlpx.md#framing
allowObsoletedChunkedMessages = defined(chunked_rlpx_enabled) allowObsoletedChunkedMessages = defined(chunked_rlpx_enabled)
# TODO: This doesn't get enabled currently in any of the builds, so we send a
# devp2p protocol handshake message with version. Need to check if some peers
# drop us because of this.
when useSnappy: when useSnappy:
import snappy import snappy
const devp2pSnappyVersion* = 5 const devp2pSnappyVersion* = 5
@ -34,7 +37,20 @@ when useSnappy:
export export
options, p2pProtocol, rlp, chronicles options, p2pProtocol, rlp, chronicles
declarePublicGauge connected_peers, "number of peers in the pool" declarePublicGauge rlpx_connected_peers,
"Number of connected peers in the pool"
declarePublicCounter rlpx_connect_success,
"Number of successfull rlpx connects"
declarePublicCounter rlpx_connect_failure,
"Number of rlpx connects that failed", labels = ["reason"]
declarePublicCounter rlpx_accept_success,
"Number of successful rlpx accepted peers"
declarePublicCounter rlpx_accept_failure,
"Number of rlpx accept attempts that failed", labels = ["reason"]
logScope: logScope:
topics = "eth p2p rlpx" topics = "eth p2p rlpx"
@ -185,6 +201,10 @@ proc handshakeImpl[T](peer: Peer,
doAssert timeout.milliseconds > 0 doAssert timeout.milliseconds > 0
yield responseFut or sleepAsync(timeout) yield responseFut or sleepAsync(timeout)
if not responseFut.finished: if not responseFut.finished:
# TODO: Really shouldn't disconnect and raise everywhere. In order to avoid
# understanding what error occured where.
# And also, incoming and outgoing disconnect errors should be seperated,
# probably by seperating the actual disconnect call to begin with.
await disconnectAndRaise(peer, HandshakeTimeout, await disconnectAndRaise(peer, HandshakeTimeout,
"Protocol handshake was not received in time.") "Protocol handshake was not received in time.")
elif responseFut.failed: elif responseFut.failed:
@ -708,6 +728,7 @@ proc waitSingleMsg(peer: Peer, MsgType: type): Future[MsgType] {.async.} =
"Invalid RLPx message body") "Invalid RLPx message body")
elif nextMsgId == 1: # p2p.disconnect elif nextMsgId == 1: # p2p.disconnect
# TODO: can still raise RlpError here...?
let reasonList = nextMsgData.read(DisconnectionReasonList) let reasonList = nextMsgData.read(DisconnectionReasonList)
let reason = reasonList.value let reason = reasonList.value
await peer.disconnect(reason) await peer.disconnect(reason)
@ -716,6 +737,7 @@ proc waitSingleMsg(peer: Peer, MsgType: type): Future[MsgType] {.async.} =
else: else:
warn "Dropped RLPX message", warn "Dropped RLPX message",
msg = peer.dispatcher.messages[nextMsgId].name msg = peer.dispatcher.messages[nextMsgId].name
# TODO: This is breach of protocol?
proc nextMsg*(peer: Peer, MsgType: type): Future[MsgType] = proc nextMsg*(peer: Peer, MsgType: type): Future[MsgType] =
## This procs awaits a specific RLPx message. ## This procs awaits a specific RLPx message.
@ -1048,7 +1070,7 @@ proc removePeer(network: EthereumNode, peer: Peer) =
if network.peerPool != nil and not peer.remote.isNil and if network.peerPool != nil and not peer.remote.isNil and
peer.remote in network.peerPool.connectedNodes: peer.remote in network.peerPool.connectedNodes:
network.peerPool.connectedNodes.del(peer.remote) network.peerPool.connectedNodes.del(peer.remote)
connected_peers.dec() rlpx_connected_peers.dec()
# Note: we need to do this check as disconnect (and thus removePeer) # Note: we need to do this check as disconnect (and thus removePeer)
# currently can get called before the dispatcher is initialized. # currently can get called before the dispatcher is initialized.
@ -1104,11 +1126,11 @@ proc disconnect*(peer: Peer, reason: DisconnectionReason,
peer.connectionState = Disconnected peer.connectionState = Disconnected
removePeer(peer.network, peer) removePeer(peer.network, peer)
proc validatePubKeyInHello(msg: DevP2P.hello, pubKey: PublicKey): bool = func validatePubKeyInHello(msg: DevP2P.hello, pubKey: PublicKey): bool =
let pk = PublicKey.fromRaw(msg.nodeId) let pk = PublicKey.fromRaw(msg.nodeId)
pk.isOk and pk[] == pubKey pk.isOk and pk[] == pubKey
proc checkUselessPeer(peer: Peer) {.raises: [UselessPeerError, Defect].} = func checkUselessPeer(peer: Peer) {.raises: [UselessPeerError, Defect].} =
if peer.dispatcher.numProtocols == 0: if peer.dispatcher.numProtocols == 0:
# XXX: Send disconnect + UselessPeer # XXX: Send disconnect + UselessPeer
raise newException(UselessPeerError, "Useless peer") raise newException(UselessPeerError, "Useless peer")
@ -1183,13 +1205,12 @@ template `^`(arr): auto =
# variable as an open array # variable as an open array
arr.toOpenArray(0, `arr Len` - 1) arr.toOpenArray(0, `arr Len` - 1)
proc initSecretState(hs: var Handshake, authMsg, ackMsg: openArray[byte], proc initSecretState(p: Peer, hs: Handshake, authMsg, ackMsg: openArray[byte]) =
p: Peer) =
var secrets = hs.getSecrets(authMsg, ackMsg) var secrets = hs.getSecrets(authMsg, ackMsg)
initSecretState(secrets, p.secretsState) initSecretState(secrets, p.secretsState)
burnMem(secrets) burnMem(secrets)
template checkSnappySupport(node: EthereumNode, handshake: Handshake, peer: Peer) = template setSnappySupport(peer: Peer, node: EthereumNode, handshake: Handshake) =
when useSnappy: when useSnappy:
peer.snappyEnabled = node.protocolVersion >= devp2pSnappyVersion.uint and peer.snappyEnabled = node.protocolVersion >= devp2pSnappyVersion.uint and
handshake.version >= devp2pSnappyVersion.uint handshake.version >= devp2pSnappyVersion.uint
@ -1213,106 +1234,170 @@ template baseProtocolVersion(peer: Peer): uint =
else: else:
devp2pVersion devp2pVersion
proc rlpxConnect*(node: EthereumNode, remote: Node): Future[Peer] {.async.} = type
RlpxError* = enum
TransportConnectError,
RlpxHandshakeTransportError,
RlpxHandshakeError,
ProtocolError,
P2PHandshakeError,
P2PTransportError,
InvalidIdentityError,
UselessRlpxPeerError,
PeerDisconnectedError,
TooManyPeersError
proc rlpxConnect*(node: EthereumNode, remote: Node):
Future[Result[Peer, RlpxError]] {.async.} =
# TODO: Should we not set some timeouts on the `connect` and `readExactly`s?
# Or should we have a general timeout on the whole rlpxConnect where it gets
# called?
# Now, some parts could potential hang until a tcp timeout is hit?
initTracing(devp2pInfo, node.protocols) initTracing(devp2pInfo, node.protocols)
let peer = Peer(remote: remote, network: node) let peer = Peer(remote: remote, network: node)
let ta = initTAddress(remote.node.address.ip, remote.node.address.tcpPort) let ta = initTAddress(remote.node.address.ip, remote.node.address.tcpPort)
var ok = false var error = true
try:
peer.transport = await connect(ta)
var handshake = Handshake.init(
node.rng[], node.keys, {Initiator, EIP8}, node.baseProtocolVersion)
var authMsg: array[AuthMessageMaxEIP8, byte] defer:
var authMsgLen = 0 if error: # TODO: Not sure if I like this much
if not isNil(peer.transport):
if not peer.transport.closed:
peer.transport.close()
peer.transport =
try:
await connect(ta)
except TransportError:
return err(TransportConnectError)
except CatchableError as e:
# Aside from TransportOsError, seems raw CatchableError can also occur?
debug "TCP connect with peer failed", err = $e.name, errMsg = $e.msg
return err(TransportConnectError)
# RLPx initial handshake
var
handshake = Handshake.init(
node.rng[], node.keys, {Initiator, EIP8}, node.baseProtocolVersion)
authMsg: array[AuthMessageMaxEIP8, byte]
authMsgLen = 0
# TODO: Rework this so we won't have to pass an array as parameter?
authMessage( authMessage(
handshake, node.rng[], remote.node.pubkey, authMsg, authMsgLen).tryGet() handshake, node.rng[], remote.node.pubkey, authMsg, authMsgLen).tryGet()
var res = await peer.transport.write(addr authMsg[0], authMsgLen)
if res != authMsgLen: let writeRes =
raisePeerDisconnected("Unexpected disconnect while authenticating", try:
TcpError) await peer.transport.write(addr authMsg[0], authMsgLen)
except TransportError:
return err(RlpxHandshakeTransportError)
except CatchableError as e: # TODO: Only TransportErrors can occur?
raiseAssert($e.name & " " & $e.msg)
if writeRes != authMsgLen:
return err(RlpxHandshakeTransportError)
let initialSize = handshake.expectedLength let initialSize = handshake.expectedLength
var ackMsg = newSeqOfCap[byte](1024) var ackMsg = newSeqOfCap[byte](1024)
ackMsg.setLen(initialSize) ackMsg.setLen(initialSize)
# TODO: Should we not set some timeouts on these `readExactly`s? try:
await peer.transport.readExactly(addr ackMsg[0], len(ackMsg)) await peer.transport.readExactly(addr ackMsg[0], len(ackMsg))
except TransportError:
return err(RlpxHandshakeTransportError)
except CatchableError as e:
raiseAssert($e.name & " " & $e.msg)
var ret = handshake.decodeAckMessage(ackMsg) let res = handshake.decodeAckMessage(ackMsg)
if ret.isErr and ret.error == AuthError.IncompleteError: if res.isErr and res.error == AuthError.IncompleteError:
ackMsg.setLen(handshake.expectedLength) ackMsg.setLen(handshake.expectedLength)
try:
await peer.transport.readExactly(addr ackMsg[initialSize], await peer.transport.readExactly(addr ackMsg[initialSize],
len(ackMsg) - initialSize) len(ackMsg) - initialSize)
ret = handshake.decodeAckMessage(ackMsg) except TransportError:
return err(RlpxHandshakeTransportError)
except CatchableError as e: # TODO: Only TransportErrors can occur?
raiseAssert($e.name & " " & $e.msg)
if ret.isErr(): # TODO: Bullet 1 of https://github.com/status-im/nim-eth/issues/559
debug "rlpxConnect handshake error", error = ret.error let res = handshake.decodeAckMessage(ackMsg)
if not isNil(peer.transport): if res.isErr():
peer.transport.close() debug "rlpxConnect handshake error", error = res.error
return nil return err(RlpxHandshakeError)
ret.get() peer.setSnappySupport(node, handshake)
peer.initSecretState(handshake, ^authMsg, ackMsg)
node.checkSnappySupport(handshake, peer)
initSecretState(handshake, ^authMsg, ackMsg, peer)
# if handshake.remoteHPubkey != remote.node.pubKey:
# raise newException(Exception, "Remote pubkey is wrong")
logConnectedPeer peer logConnectedPeer peer
var sendHelloFut = peer.hello( # RLPx p2p capability handshake: After the initial handshake, both sides of
# the connection must send either Hello or a Disconnect message.
let
sendHelloFut = peer.hello(
handshake.getVersion(), handshake.getVersion(),
node.clientId, node.clientId,
node.capabilities, node.capabilities,
uint(node.address.tcpPort), uint(node.address.tcpPort),
node.keys.pubkey.toRaw()) node.keys.pubkey.toRaw())
var response = await peer.handshakeImpl( receiveHelloFut = peer.waitSingleMsg(DevP2P.hello)
response =
try:
await peer.handshakeImpl(
sendHelloFut, sendHelloFut,
peer.waitSingleMsg(DevP2P.hello), receiveHelloFut,
10.seconds) 10.seconds)
except RlpError:
return err(ProtocolError)
except PeerDisconnected as e:
return err(PeerDisconnectedError)
# TODO: Strange compiler error
# case e.reason:
# of HandshakeTimeout:
# # Yeah, a bit odd but in this case PeerDisconnected comes from a
# # timeout on the P2P Hello message. TODO: Clean-up that handshakeImpl
# return err(P2PHandshakeError)
# of TooManyPeers:
# return err(TooManyPeersError)
# else:
# return err(PeerDisconnectedError)
except TransportError:
return err(P2PTransportError)
except CatchableError as e:
raiseAssert($e.name & " " & $e.msg)
if not validatePubKeyInHello(response, remote.node.pubkey): if not validatePubKeyInHello(response, remote.node.pubkey):
warn "Remote nodeId is not its public key" # XXX: Do we care? warn "Wrong devp2p identity in Hello message"
return err(InvalidIdentityError)
trace "DevP2P handshake completed", peer = remote, trace "DevP2P handshake completed", peer = remote,
clientId = response.clientId clientId = response.clientId
try:
await postHelloSteps(peer, response) await postHelloSteps(peer, response)
ok = true except RlpError:
trace "Peer fully connected", peer = remote, clientId = response.clientId return err(ProtocolError)
except PeerDisconnected as e: except PeerDisconnected as e:
case e.reason case e.reason:
of AlreadyConnected, TooManyPeers, MessageTimeout: of TooManyPeers:
trace "Disconnect during rlpxConnect", reason = e.reason, peer = remote return err(TooManyPeersError)
else: else:
debug "Unexpected disconnect during rlpxConnect", reason = e.reason, return err(PeerDisconnectedError)
msg = e.msg, peer = remote
except TransportIncompleteError:
trace "Connection dropped in rlpxConnect", remote
except UselessPeerError: except UselessPeerError:
trace "Disconnecting useless peer", peer = remote return err(UselessRlpxPeerError)
except RlpTypeMismatch: except TransportError:
# Some peers report capabilities with names longer than 3 chars. We ignore return err(P2PTransportError)
# those for now. Maybe we should allow this though.
debug "Rlp error in rlpxConnect"
except TransportOsError as e:
trace "TransportOsError", err = e.msg
except CatchableError as e: except CatchableError as e:
error "Unexpected exception in rlpxConnect", remote, exc = e.name, raiseAssert($e.name & " " & $e.msg)
err = e.msg
if not ok: debug "Peer fully connected", peer = remote, clientId = response.clientId
if not isNil(peer.transport):
peer.transport.close()
return nil
else:
return peer
proc rlpxAccept*(node: EthereumNode, error = false
transport: StreamTransport): Future[Peer] {.async.} =
return ok(peer)
# TODO: rework rlpxAccept similar to rlpxConnect.
proc rlpxAccept*(
node: EthereumNode, transport: StreamTransport): Future[Peer] {.async.} =
initTracing(devp2pInfo, node.protocols) initTracing(devp2pInfo, node.protocols)
let peer = Peer(transport: transport, network: node) let peer = Peer(transport: transport, network: node)
@ -1340,11 +1425,14 @@ proc rlpxAccept*(node: EthereumNode,
trace "rlpxAccept handshake error", error = ret.error trace "rlpxAccept handshake error", error = ret.error
if not isNil(peer.transport): if not isNil(peer.transport):
peer.transport.close() peer.transport.close()
rlpx_accept_failure.inc()
rlpx_accept_failure.inc(labelValues = ["handshake_error"])
return nil return nil
ret.get() ret.get()
node.checkSnappySupport(handshake, peer) peer.setSnappySupport(node, handshake)
handshake.version = uint8(peer.baseProtocolVersion) handshake.version = uint8(peer.baseProtocolVersion)
var ackMsg: array[AckMessageMaxEIP8, byte] var ackMsg: array[AckMessageMaxEIP8, byte]
@ -1355,7 +1443,7 @@ proc rlpxAccept*(node: EthereumNode,
raisePeerDisconnected("Unexpected disconnect while authenticating", raisePeerDisconnected("Unexpected disconnect while authenticating",
TcpError) TcpError)
initSecretState(handshake, authMsg, ^ackMsg, peer) peer.initSecretState(handshake, authMsg, ^ackMsg)
let listenPort = transport.localAddress().port let listenPort = transport.localAddress().port
@ -1407,24 +1495,37 @@ proc rlpxAccept*(node: EthereumNode,
else: else:
debug "Unexpected disconnect during rlpxAccept", reason = e.reason, debug "Unexpected disconnect during rlpxAccept", reason = e.reason,
msg = e.msg, peer = peer.remote msg = e.msg, peer = peer.remote
rlpx_accept_failure.inc(labelValues = [$e.reason])
except TransportIncompleteError: except TransportIncompleteError:
trace "Connection dropped in rlpxAccept", remote = peer.remote trace "Connection dropped in rlpxAccept", remote = peer.remote
rlpx_accept_failure.inc(labelValues = [$TransportIncompleteError])
except UselessPeerError: except UselessPeerError:
trace "Disconnecting useless peer", peer = peer.remote trace "Disconnecting useless peer", peer = peer.remote
except RlpTypeMismatch: rlpx_accept_failure.inc(labelValues = [$UselessPeerError])
except RlpTypeMismatch as e:
# Some peers report capabilities with names longer than 3 chars. We ignore # Some peers report capabilities with names longer than 3 chars. We ignore
# those for now. Maybe we should allow this though. # those for now. Maybe we should allow this though.
debug "Rlp error in rlpxAccept" debug "Rlp error in rlpxAccept", err = e.msg, errName = e.name
rlpx_accept_failure.inc(labelValues = [$RlpTypeMismatch])
except TransportOsError as e: except TransportOsError as e:
trace "TransportOsError", err = e.msg trace "TransportOsError", err = e.msg, errName = e.name
if e.code == OSErrorCode(110):
rlpx_accept_failure.inc(labelValues = ["tcp_timeout"])
else:
rlpx_accept_failure.inc(labelValues = [$e.name])
except CatchableError as e: except CatchableError as e:
error "Unexpected exception in rlpxAccept", exc = e.name, err = e.msg error "Unexpected exception in rlpxAccept", exc = e.name, err = e.msg
rlpx_accept_failure.inc(labelValues = [$e.name])
if not ok: if not ok:
if not isNil(peer.transport): if not isNil(peer.transport):
peer.transport.close() peer.transport.close()
rlpx_accept_failure.inc()
return nil return nil
else: else:
rlpx_accept_success.inc()
return peer return peer
when isMainModule: when isMainModule:

View File

@ -18,7 +18,11 @@ init:
node2 = setupTestNode(rng, eth) node2 = setupTestNode(rng, eth)
node2.startListening() node2.startListening()
peer = waitFor node1.rlpxConnect(newNode(node2.toENode())) let res = waitFor node1.rlpxConnect(newNode(node2.toENode()))
if res.isErr():
quit 1
else:
peer = res.get()
test: test:
aflLoop: # This appears to have unstable results with afl-clang-fast, probably aflLoop: # This appears to have unstable results with afl-clang-fast, probably

View File

@ -65,9 +65,11 @@ suite "Testing protocol handlers":
var node2 = setupTestNode(rng, abc, xyz) var node2 = setupTestNode(rng, abc, xyz)
node2.startListening() node2.startListening()
let peer = await node1.rlpxConnect(newNode(node2.toENode())) let res = await node1.rlpxConnect(newNode(node2.toENode()))
check: check:
peer.isNil == false res.isOk()
let peer = res.get()
await peer.disconnect(SubprotocolReason, true) await peer.disconnect(SubprotocolReason, true)
check: check:
@ -82,9 +84,9 @@ suite "Testing protocol handlers":
var node1 = setupTestNode(rng, hah) var node1 = setupTestNode(rng, hah)
var node2 = setupTestNode(rng, hah) var node2 = setupTestNode(rng, hah)
node2.startListening() node2.startListening()
let peer = await node1.rlpxConnect(newNode(node2.toENode())) let res = await node1.rlpxConnect(newNode(node2.toENode()))
check: check:
peer.isNil == true res.isErr()
# To check if the disconnection handler did not run # To check if the disconnection handler did not run
node1.protocolState(hah).count == 0 node1.protocolState(hah).count == 0

View File

@ -16,7 +16,10 @@ var
node2 = setupTestNode(rng, eth) node2 = setupTestNode(rng, eth)
node2.startListening() node2.startListening()
var peer = waitFor node1.rlpxConnect(newNode(node2.toENode())) let res = waitFor node1.rlpxConnect(newNode(node2.toENode()))
check res.isOk()
let peer = res.get()
proc testThunk(payload: openArray[byte]) = proc testThunk(payload: openArray[byte]) =
var (msgId, msgData) = recvMsgMock(payload) var (msgId, msgData) = recvMsgMock(payload)