mirror of https://github.com/status-im/nim-eth.git
Add rlpx metrics and avoid immediate peer reconnections (#585)
* Add metrics related to devp2p peer connections * Avoid reconnecting to peers that just failed connection - Add SeenTable to avoid reconnecting to peers immediately after a failed connect. Depending on the failure, the amount of time is different. This is similar to what is done in nimbus-eth2. - Attempt to rework rlpxConnect at the same time, in order to make sure that errors are properly handled. The current structure is far from ideal, but it is hopefully a small step in the right direction. To many oddities in there right now to really rework rlpxConnect properply. * Fix rlpx thunk fuzzer
This commit is contained in:
parent
29b14749fa
commit
d2ba753792
|
@ -9,12 +9,15 @@
|
||||||
|
|
||||||
import
|
import
|
||||||
std/[tables, hashes, times, algorithm, sets, sequtils],
|
std/[tables, hashes, times, algorithm, sets, sequtils],
|
||||||
chronos, chronicles, stint, nimcrypto/keccak,
|
chronos, chronicles, stint, nimcrypto/keccak, metrics,
|
||||||
../keys, ./discoveryv5/random2,
|
../keys, ./discoveryv5/random2,
|
||||||
./enode
|
./enode
|
||||||
|
|
||||||
export sets # TODO: This should not be needed, but compilation fails otherwise
|
export sets # TODO: This should not be needed, but compilation fails otherwise
|
||||||
|
|
||||||
|
declarePublicGauge routing_table_nodes,
|
||||||
|
"Discovery routing table nodes"
|
||||||
|
|
||||||
logScope:
|
logScope:
|
||||||
topics = "eth p2p kademlia"
|
topics = "eth p2p kademlia"
|
||||||
|
|
||||||
|
@ -215,6 +218,7 @@ proc add(k: KBucket, n: Node): Node =
|
||||||
k.nodes.add(n)
|
k.nodes.add(n)
|
||||||
elif k.len < BUCKET_SIZE:
|
elif k.len < BUCKET_SIZE:
|
||||||
k.nodes.add(n)
|
k.nodes.add(n)
|
||||||
|
routing_table_nodes.inc()
|
||||||
else:
|
else:
|
||||||
k.replacementCache.add(n)
|
k.replacementCache.add(n)
|
||||||
return k.head
|
return k.head
|
||||||
|
@ -222,7 +226,9 @@ proc add(k: KBucket, n: Node): Node =
|
||||||
|
|
||||||
proc removeNode(k: KBucket, n: Node) =
|
proc removeNode(k: KBucket, n: Node) =
|
||||||
let i = k.nodes.find(n)
|
let i = k.nodes.find(n)
|
||||||
if i != -1: k.nodes.delete(i)
|
if i != -1:
|
||||||
|
routing_table_nodes.dec()
|
||||||
|
k.nodes.delete(i)
|
||||||
|
|
||||||
proc split(k: KBucket): tuple[lower, upper: KBucket] =
|
proc split(k: KBucket): tuple[lower, upper: KBucket] =
|
||||||
## Split at the median id
|
## Split at the median id
|
||||||
|
|
|
@ -26,6 +26,42 @@ const
|
||||||
maxConcurrentConnectionRequests = 40
|
maxConcurrentConnectionRequests = 40
|
||||||
sleepBeforeTryingARandomBootnode = chronos.milliseconds(3000)
|
sleepBeforeTryingARandomBootnode = chronos.milliseconds(3000)
|
||||||
|
|
||||||
|
## Period of time for dead / unreachable peers.
|
||||||
|
SeenTableTimeDeadPeer = chronos.minutes(10)
|
||||||
|
## Period of time for Useless peers, either because of no matching
|
||||||
|
## capabilities or on an irrelevant network.
|
||||||
|
SeenTableTimeUselessPeer = chronos.hours(24)
|
||||||
|
## Period of time for peers with a protocol error.
|
||||||
|
SeenTableTimeProtocolError = chronos.minutes(30)
|
||||||
|
## Period of time for peers with general disconnections / transport errors.
|
||||||
|
SeenTableTimeReconnect = chronos.minutes(5)
|
||||||
|
|
||||||
|
proc isSeen(p: PeerPool, nodeId: NodeId): bool =
|
||||||
|
## Returns ``true`` if ``nodeId`` present in SeenTable and time period is not
|
||||||
|
## yet expired.
|
||||||
|
let currentTime = now(chronos.Moment)
|
||||||
|
if nodeId notin p.seenTable:
|
||||||
|
false
|
||||||
|
else:
|
||||||
|
let item = try: p.seenTable[nodeId]
|
||||||
|
except KeyError: raiseAssert "checked with notin"
|
||||||
|
if currentTime >= item.stamp:
|
||||||
|
# Peer is in SeenTable, but the time period has expired.
|
||||||
|
p.seenTable.del(nodeId)
|
||||||
|
false
|
||||||
|
else:
|
||||||
|
true
|
||||||
|
|
||||||
|
proc addSeen(
|
||||||
|
p: PeerPool, nodeId: NodeId, period: chronos.Duration) =
|
||||||
|
## Adds peer with NodeId ``nodeId`` to SeenTable and timeout ``period``.
|
||||||
|
let item = SeenNode(nodeId: nodeId, stamp: now(chronos.Moment) + period)
|
||||||
|
withValue(p.seenTable, nodeId, entry) do:
|
||||||
|
if entry.stamp < item.stamp:
|
||||||
|
entry.stamp = item.stamp
|
||||||
|
do:
|
||||||
|
p.seenTable[nodeId] = item
|
||||||
|
|
||||||
proc newPeerPool*(
|
proc newPeerPool*(
|
||||||
network: EthereumNode, networkId: NetworkId, keyPair: KeyPair,
|
network: EthereumNode, networkId: NetworkId, keyPair: KeyPair,
|
||||||
discovery: DiscoveryProtocol, clientId: string, minPeers = 10): PeerPool =
|
discovery: DiscoveryProtocol, clientId: string, minPeers = 10): PeerPool =
|
||||||
|
@ -84,28 +120,36 @@ proc connect(p: PeerPool, remote: Node): Future[Peer] {.async.} =
|
||||||
# debug "skipping connection"
|
# debug "skipping connection"
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
|
if p.isSeen(remote.id):
|
||||||
|
return nil
|
||||||
|
|
||||||
trace "Connecting to node", remote
|
trace "Connecting to node", remote
|
||||||
p.connectingNodes.incl(remote)
|
p.connectingNodes.incl(remote)
|
||||||
result = await p.network.rlpxConnect(remote)
|
let res = await p.network.rlpxConnect(remote)
|
||||||
p.connectingNodes.excl(remote)
|
p.connectingNodes.excl(remote)
|
||||||
|
|
||||||
# expected_exceptions = (
|
# TODO: Probably should move all this logic to rlpx.nim
|
||||||
# UnreachablePeer, TimeoutError, PeerConnectionLost, HandshakeFailure)
|
if res.isOk():
|
||||||
# try:
|
rlpx_connect_success.inc()
|
||||||
# self.logger.debug("Connecting to %s...", remote)
|
return res.get()
|
||||||
# peer = await wait_with_token(
|
else:
|
||||||
# handshake(remote, self.privkey, self.peer_class, self.network_id),
|
rlpx_connect_failure.inc()
|
||||||
# token=self.cancel_token,
|
rlpx_connect_failure.inc(labelValues = [$res.error])
|
||||||
# timeout=HANDSHAKE_TIMEOUT)
|
case res.error():
|
||||||
# return peer
|
of UselessRlpxPeerError:
|
||||||
# except OperationCancelled:
|
p.addSeen(remote.id, SeenTableTimeUselessPeer)
|
||||||
# # Pass it on to instruct our main loop to stop.
|
of TransportConnectError:
|
||||||
# raise
|
p.addSeen(remote.id, SeenTableTimeDeadPeer)
|
||||||
# except expected_exceptions as e:
|
of RlpxHandshakeError, ProtocolError, InvalidIdentityError:
|
||||||
# self.logger.debug("Could not complete handshake with %s: %s", remote, repr(e))
|
p.addSeen(remote.id, SeenTableTimeProtocolError)
|
||||||
# except Exception:
|
of RlpxHandshakeTransportError,
|
||||||
# self.logger.exception("Unexpected error during auth/p2p handshake with %s", remote)
|
P2PHandshakeError,
|
||||||
# return None
|
P2PTransportError,
|
||||||
|
PeerDisconnectedError,
|
||||||
|
TooManyPeersError:
|
||||||
|
p.addSeen(remote.id, SeenTableTimeReconnect)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
|
||||||
proc lookupRandomNode(p: PeerPool) {.async.} =
|
proc lookupRandomNode(p: PeerPool) {.async.} =
|
||||||
discard await p.discovery.lookupRandom()
|
discard await p.discovery.lookupRandom()
|
||||||
|
@ -118,7 +162,7 @@ proc getRandomBootnode(p: PeerPool): Option[Node] =
|
||||||
proc addPeer*(pool: PeerPool, peer: Peer) {.gcsafe.} =
|
proc addPeer*(pool: PeerPool, peer: Peer) {.gcsafe.} =
|
||||||
doAssert(peer.remote notin pool.connectedNodes)
|
doAssert(peer.remote notin pool.connectedNodes)
|
||||||
pool.connectedNodes[peer.remote] = peer
|
pool.connectedNodes[peer.remote] = peer
|
||||||
connected_peers.inc()
|
rlpx_connected_peers.inc()
|
||||||
for o in pool.observers.values:
|
for o in pool.observers.values:
|
||||||
if not o.onPeerConnected.isNil:
|
if not o.onPeerConnected.isNil:
|
||||||
if o.protocol.isNil or peer.supports(o.protocol):
|
if o.protocol.isNil or peer.supports(o.protocol):
|
||||||
|
|
|
@ -53,6 +53,10 @@ type
|
||||||
when useSnappy:
|
when useSnappy:
|
||||||
snappyEnabled*: bool
|
snappyEnabled*: bool
|
||||||
|
|
||||||
|
SeenNode* = object
|
||||||
|
nodeId*: NodeId
|
||||||
|
stamp*: chronos.Moment
|
||||||
|
|
||||||
PeerPool* = ref object
|
PeerPool* = ref object
|
||||||
# Private fields:
|
# Private fields:
|
||||||
network*: EthereumNode
|
network*: EthereumNode
|
||||||
|
@ -63,6 +67,7 @@ type
|
||||||
discovery*: DiscoveryProtocol
|
discovery*: DiscoveryProtocol
|
||||||
lastLookupTime*: float
|
lastLookupTime*: float
|
||||||
connQueue*: AsyncQueue[Node]
|
connQueue*: AsyncQueue[Node]
|
||||||
|
seenTable*: Table[NodeId, SeenNode]
|
||||||
connectedNodes*: Table[Node, Peer]
|
connectedNodes*: Table[Node, Peer]
|
||||||
connectingNodes*: HashSet[Node]
|
connectingNodes*: HashSet[Node]
|
||||||
running*: bool
|
running*: bool
|
||||||
|
|
243
eth/p2p/rlpx.nim
243
eth/p2p/rlpx.nim
|
@ -8,8 +8,8 @@
|
||||||
{.push raises: [Defect].}
|
{.push raises: [Defect].}
|
||||||
|
|
||||||
import
|
import
|
||||||
std/[tables, algorithm, deques, hashes, options, typetraits],
|
std/[tables, algorithm, deques, hashes, options, typetraits, os],
|
||||||
stew/shims/macros, chronicles, nimcrypto/utils, chronos,
|
stew/shims/macros, chronicles, nimcrypto/utils, chronos, metrics,
|
||||||
".."/[rlp, common, keys, async_utils],
|
".."/[rlp, common, keys, async_utils],
|
||||||
./private/p2p_types, "."/[kademlia, auth, rlpxcrypt, enode, p2p_protocol_dsl]
|
./private/p2p_types, "."/[kademlia, auth, rlpxcrypt, enode, p2p_protocol_dsl]
|
||||||
|
|
||||||
|
@ -24,6 +24,9 @@ const
|
||||||
# github.com/ethereum/devp2p/blob/master/rlpx.md#framing
|
# github.com/ethereum/devp2p/blob/master/rlpx.md#framing
|
||||||
allowObsoletedChunkedMessages = defined(chunked_rlpx_enabled)
|
allowObsoletedChunkedMessages = defined(chunked_rlpx_enabled)
|
||||||
|
|
||||||
|
# TODO: This doesn't get enabled currently in any of the builds, so we send a
|
||||||
|
# devp2p protocol handshake message with version. Need to check if some peers
|
||||||
|
# drop us because of this.
|
||||||
when useSnappy:
|
when useSnappy:
|
||||||
import snappy
|
import snappy
|
||||||
const devp2pSnappyVersion* = 5
|
const devp2pSnappyVersion* = 5
|
||||||
|
@ -34,7 +37,20 @@ when useSnappy:
|
||||||
export
|
export
|
||||||
options, p2pProtocol, rlp, chronicles
|
options, p2pProtocol, rlp, chronicles
|
||||||
|
|
||||||
declarePublicGauge connected_peers, "number of peers in the pool"
|
declarePublicGauge rlpx_connected_peers,
|
||||||
|
"Number of connected peers in the pool"
|
||||||
|
|
||||||
|
declarePublicCounter rlpx_connect_success,
|
||||||
|
"Number of successfull rlpx connects"
|
||||||
|
|
||||||
|
declarePublicCounter rlpx_connect_failure,
|
||||||
|
"Number of rlpx connects that failed", labels = ["reason"]
|
||||||
|
|
||||||
|
declarePublicCounter rlpx_accept_success,
|
||||||
|
"Number of successful rlpx accepted peers"
|
||||||
|
|
||||||
|
declarePublicCounter rlpx_accept_failure,
|
||||||
|
"Number of rlpx accept attempts that failed", labels = ["reason"]
|
||||||
|
|
||||||
logScope:
|
logScope:
|
||||||
topics = "eth p2p rlpx"
|
topics = "eth p2p rlpx"
|
||||||
|
@ -185,6 +201,10 @@ proc handshakeImpl[T](peer: Peer,
|
||||||
doAssert timeout.milliseconds > 0
|
doAssert timeout.milliseconds > 0
|
||||||
yield responseFut or sleepAsync(timeout)
|
yield responseFut or sleepAsync(timeout)
|
||||||
if not responseFut.finished:
|
if not responseFut.finished:
|
||||||
|
# TODO: Really shouldn't disconnect and raise everywhere. In order to avoid
|
||||||
|
# understanding what error occured where.
|
||||||
|
# And also, incoming and outgoing disconnect errors should be seperated,
|
||||||
|
# probably by seperating the actual disconnect call to begin with.
|
||||||
await disconnectAndRaise(peer, HandshakeTimeout,
|
await disconnectAndRaise(peer, HandshakeTimeout,
|
||||||
"Protocol handshake was not received in time.")
|
"Protocol handshake was not received in time.")
|
||||||
elif responseFut.failed:
|
elif responseFut.failed:
|
||||||
|
@ -708,6 +728,7 @@ proc waitSingleMsg(peer: Peer, MsgType: type): Future[MsgType] {.async.} =
|
||||||
"Invalid RLPx message body")
|
"Invalid RLPx message body")
|
||||||
|
|
||||||
elif nextMsgId == 1: # p2p.disconnect
|
elif nextMsgId == 1: # p2p.disconnect
|
||||||
|
# TODO: can still raise RlpError here...?
|
||||||
let reasonList = nextMsgData.read(DisconnectionReasonList)
|
let reasonList = nextMsgData.read(DisconnectionReasonList)
|
||||||
let reason = reasonList.value
|
let reason = reasonList.value
|
||||||
await peer.disconnect(reason)
|
await peer.disconnect(reason)
|
||||||
|
@ -716,6 +737,7 @@ proc waitSingleMsg(peer: Peer, MsgType: type): Future[MsgType] {.async.} =
|
||||||
else:
|
else:
|
||||||
warn "Dropped RLPX message",
|
warn "Dropped RLPX message",
|
||||||
msg = peer.dispatcher.messages[nextMsgId].name
|
msg = peer.dispatcher.messages[nextMsgId].name
|
||||||
|
# TODO: This is breach of protocol?
|
||||||
|
|
||||||
proc nextMsg*(peer: Peer, MsgType: type): Future[MsgType] =
|
proc nextMsg*(peer: Peer, MsgType: type): Future[MsgType] =
|
||||||
## This procs awaits a specific RLPx message.
|
## This procs awaits a specific RLPx message.
|
||||||
|
@ -1048,7 +1070,7 @@ proc removePeer(network: EthereumNode, peer: Peer) =
|
||||||
if network.peerPool != nil and not peer.remote.isNil and
|
if network.peerPool != nil and not peer.remote.isNil and
|
||||||
peer.remote in network.peerPool.connectedNodes:
|
peer.remote in network.peerPool.connectedNodes:
|
||||||
network.peerPool.connectedNodes.del(peer.remote)
|
network.peerPool.connectedNodes.del(peer.remote)
|
||||||
connected_peers.dec()
|
rlpx_connected_peers.dec()
|
||||||
|
|
||||||
# Note: we need to do this check as disconnect (and thus removePeer)
|
# Note: we need to do this check as disconnect (and thus removePeer)
|
||||||
# currently can get called before the dispatcher is initialized.
|
# currently can get called before the dispatcher is initialized.
|
||||||
|
@ -1104,11 +1126,11 @@ proc disconnect*(peer: Peer, reason: DisconnectionReason,
|
||||||
peer.connectionState = Disconnected
|
peer.connectionState = Disconnected
|
||||||
removePeer(peer.network, peer)
|
removePeer(peer.network, peer)
|
||||||
|
|
||||||
proc validatePubKeyInHello(msg: DevP2P.hello, pubKey: PublicKey): bool =
|
func validatePubKeyInHello(msg: DevP2P.hello, pubKey: PublicKey): bool =
|
||||||
let pk = PublicKey.fromRaw(msg.nodeId)
|
let pk = PublicKey.fromRaw(msg.nodeId)
|
||||||
pk.isOk and pk[] == pubKey
|
pk.isOk and pk[] == pubKey
|
||||||
|
|
||||||
proc checkUselessPeer(peer: Peer) {.raises: [UselessPeerError, Defect].} =
|
func checkUselessPeer(peer: Peer) {.raises: [UselessPeerError, Defect].} =
|
||||||
if peer.dispatcher.numProtocols == 0:
|
if peer.dispatcher.numProtocols == 0:
|
||||||
# XXX: Send disconnect + UselessPeer
|
# XXX: Send disconnect + UselessPeer
|
||||||
raise newException(UselessPeerError, "Useless peer")
|
raise newException(UselessPeerError, "Useless peer")
|
||||||
|
@ -1183,13 +1205,12 @@ template `^`(arr): auto =
|
||||||
# variable as an open array
|
# variable as an open array
|
||||||
arr.toOpenArray(0, `arr Len` - 1)
|
arr.toOpenArray(0, `arr Len` - 1)
|
||||||
|
|
||||||
proc initSecretState(hs: var Handshake, authMsg, ackMsg: openArray[byte],
|
proc initSecretState(p: Peer, hs: Handshake, authMsg, ackMsg: openArray[byte]) =
|
||||||
p: Peer) =
|
|
||||||
var secrets = hs.getSecrets(authMsg, ackMsg)
|
var secrets = hs.getSecrets(authMsg, ackMsg)
|
||||||
initSecretState(secrets, p.secretsState)
|
initSecretState(secrets, p.secretsState)
|
||||||
burnMem(secrets)
|
burnMem(secrets)
|
||||||
|
|
||||||
template checkSnappySupport(node: EthereumNode, handshake: Handshake, peer: Peer) =
|
template setSnappySupport(peer: Peer, node: EthereumNode, handshake: Handshake) =
|
||||||
when useSnappy:
|
when useSnappy:
|
||||||
peer.snappyEnabled = node.protocolVersion >= devp2pSnappyVersion.uint and
|
peer.snappyEnabled = node.protocolVersion >= devp2pSnappyVersion.uint and
|
||||||
handshake.version >= devp2pSnappyVersion.uint
|
handshake.version >= devp2pSnappyVersion.uint
|
||||||
|
@ -1213,106 +1234,170 @@ template baseProtocolVersion(peer: Peer): uint =
|
||||||
else:
|
else:
|
||||||
devp2pVersion
|
devp2pVersion
|
||||||
|
|
||||||
proc rlpxConnect*(node: EthereumNode, remote: Node): Future[Peer] {.async.} =
|
type
|
||||||
|
RlpxError* = enum
|
||||||
|
TransportConnectError,
|
||||||
|
RlpxHandshakeTransportError,
|
||||||
|
RlpxHandshakeError,
|
||||||
|
ProtocolError,
|
||||||
|
P2PHandshakeError,
|
||||||
|
P2PTransportError,
|
||||||
|
InvalidIdentityError,
|
||||||
|
UselessRlpxPeerError,
|
||||||
|
PeerDisconnectedError,
|
||||||
|
TooManyPeersError
|
||||||
|
|
||||||
|
proc rlpxConnect*(node: EthereumNode, remote: Node):
|
||||||
|
Future[Result[Peer, RlpxError]] {.async.} =
|
||||||
|
# TODO: Should we not set some timeouts on the `connect` and `readExactly`s?
|
||||||
|
# Or should we have a general timeout on the whole rlpxConnect where it gets
|
||||||
|
# called?
|
||||||
|
# Now, some parts could potential hang until a tcp timeout is hit?
|
||||||
initTracing(devp2pInfo, node.protocols)
|
initTracing(devp2pInfo, node.protocols)
|
||||||
|
|
||||||
let peer = Peer(remote: remote, network: node)
|
let peer = Peer(remote: remote, network: node)
|
||||||
let ta = initTAddress(remote.node.address.ip, remote.node.address.tcpPort)
|
let ta = initTAddress(remote.node.address.ip, remote.node.address.tcpPort)
|
||||||
var ok = false
|
var error = true
|
||||||
try:
|
|
||||||
peer.transport = await connect(ta)
|
|
||||||
var handshake = Handshake.init(
|
|
||||||
node.rng[], node.keys, {Initiator, EIP8}, node.baseProtocolVersion)
|
|
||||||
|
|
||||||
var authMsg: array[AuthMessageMaxEIP8, byte]
|
defer:
|
||||||
var authMsgLen = 0
|
if error: # TODO: Not sure if I like this much
|
||||||
|
if not isNil(peer.transport):
|
||||||
|
if not peer.transport.closed:
|
||||||
|
peer.transport.close()
|
||||||
|
|
||||||
|
peer.transport =
|
||||||
|
try:
|
||||||
|
await connect(ta)
|
||||||
|
except TransportError:
|
||||||
|
return err(TransportConnectError)
|
||||||
|
except CatchableError as e:
|
||||||
|
# Aside from TransportOsError, seems raw CatchableError can also occur?
|
||||||
|
debug "TCP connect with peer failed", err = $e.name, errMsg = $e.msg
|
||||||
|
return err(TransportConnectError)
|
||||||
|
|
||||||
|
# RLPx initial handshake
|
||||||
|
var
|
||||||
|
handshake = Handshake.init(
|
||||||
|
node.rng[], node.keys, {Initiator, EIP8}, node.baseProtocolVersion)
|
||||||
|
authMsg: array[AuthMessageMaxEIP8, byte]
|
||||||
|
authMsgLen = 0
|
||||||
|
# TODO: Rework this so we won't have to pass an array as parameter?
|
||||||
authMessage(
|
authMessage(
|
||||||
handshake, node.rng[], remote.node.pubkey, authMsg, authMsgLen).tryGet()
|
handshake, node.rng[], remote.node.pubkey, authMsg, authMsgLen).tryGet()
|
||||||
var res = await peer.transport.write(addr authMsg[0], authMsgLen)
|
|
||||||
if res != authMsgLen:
|
let writeRes =
|
||||||
raisePeerDisconnected("Unexpected disconnect while authenticating",
|
try:
|
||||||
TcpError)
|
await peer.transport.write(addr authMsg[0], authMsgLen)
|
||||||
|
except TransportError:
|
||||||
|
return err(RlpxHandshakeTransportError)
|
||||||
|
except CatchableError as e: # TODO: Only TransportErrors can occur?
|
||||||
|
raiseAssert($e.name & " " & $e.msg)
|
||||||
|
if writeRes != authMsgLen:
|
||||||
|
return err(RlpxHandshakeTransportError)
|
||||||
|
|
||||||
let initialSize = handshake.expectedLength
|
let initialSize = handshake.expectedLength
|
||||||
var ackMsg = newSeqOfCap[byte](1024)
|
var ackMsg = newSeqOfCap[byte](1024)
|
||||||
ackMsg.setLen(initialSize)
|
ackMsg.setLen(initialSize)
|
||||||
|
|
||||||
# TODO: Should we not set some timeouts on these `readExactly`s?
|
try:
|
||||||
await peer.transport.readExactly(addr ackMsg[0], len(ackMsg))
|
await peer.transport.readExactly(addr ackMsg[0], len(ackMsg))
|
||||||
|
except TransportError:
|
||||||
|
return err(RlpxHandshakeTransportError)
|
||||||
|
except CatchableError as e:
|
||||||
|
raiseAssert($e.name & " " & $e.msg)
|
||||||
|
|
||||||
var ret = handshake.decodeAckMessage(ackMsg)
|
let res = handshake.decodeAckMessage(ackMsg)
|
||||||
if ret.isErr and ret.error == AuthError.IncompleteError:
|
if res.isErr and res.error == AuthError.IncompleteError:
|
||||||
ackMsg.setLen(handshake.expectedLength)
|
ackMsg.setLen(handshake.expectedLength)
|
||||||
|
try:
|
||||||
await peer.transport.readExactly(addr ackMsg[initialSize],
|
await peer.transport.readExactly(addr ackMsg[initialSize],
|
||||||
len(ackMsg) - initialSize)
|
len(ackMsg) - initialSize)
|
||||||
ret = handshake.decodeAckMessage(ackMsg)
|
except TransportError:
|
||||||
|
return err(RlpxHandshakeTransportError)
|
||||||
|
except CatchableError as e: # TODO: Only TransportErrors can occur?
|
||||||
|
raiseAssert($e.name & " " & $e.msg)
|
||||||
|
|
||||||
if ret.isErr():
|
# TODO: Bullet 1 of https://github.com/status-im/nim-eth/issues/559
|
||||||
debug "rlpxConnect handshake error", error = ret.error
|
let res = handshake.decodeAckMessage(ackMsg)
|
||||||
if not isNil(peer.transport):
|
if res.isErr():
|
||||||
peer.transport.close()
|
debug "rlpxConnect handshake error", error = res.error
|
||||||
return nil
|
return err(RlpxHandshakeError)
|
||||||
|
|
||||||
ret.get()
|
peer.setSnappySupport(node, handshake)
|
||||||
|
peer.initSecretState(handshake, ^authMsg, ackMsg)
|
||||||
|
|
||||||
node.checkSnappySupport(handshake, peer)
|
|
||||||
initSecretState(handshake, ^authMsg, ackMsg, peer)
|
|
||||||
|
|
||||||
# if handshake.remoteHPubkey != remote.node.pubKey:
|
|
||||||
# raise newException(Exception, "Remote pubkey is wrong")
|
|
||||||
logConnectedPeer peer
|
logConnectedPeer peer
|
||||||
|
|
||||||
var sendHelloFut = peer.hello(
|
# RLPx p2p capability handshake: After the initial handshake, both sides of
|
||||||
|
# the connection must send either Hello or a Disconnect message.
|
||||||
|
let
|
||||||
|
sendHelloFut = peer.hello(
|
||||||
handshake.getVersion(),
|
handshake.getVersion(),
|
||||||
node.clientId,
|
node.clientId,
|
||||||
node.capabilities,
|
node.capabilities,
|
||||||
uint(node.address.tcpPort),
|
uint(node.address.tcpPort),
|
||||||
node.keys.pubkey.toRaw())
|
node.keys.pubkey.toRaw())
|
||||||
|
|
||||||
var response = await peer.handshakeImpl(
|
receiveHelloFut = peer.waitSingleMsg(DevP2P.hello)
|
||||||
|
|
||||||
|
response =
|
||||||
|
try:
|
||||||
|
await peer.handshakeImpl(
|
||||||
sendHelloFut,
|
sendHelloFut,
|
||||||
peer.waitSingleMsg(DevP2P.hello),
|
receiveHelloFut,
|
||||||
10.seconds)
|
10.seconds)
|
||||||
|
except RlpError:
|
||||||
|
return err(ProtocolError)
|
||||||
|
except PeerDisconnected as e:
|
||||||
|
return err(PeerDisconnectedError)
|
||||||
|
# TODO: Strange compiler error
|
||||||
|
# case e.reason:
|
||||||
|
# of HandshakeTimeout:
|
||||||
|
# # Yeah, a bit odd but in this case PeerDisconnected comes from a
|
||||||
|
# # timeout on the P2P Hello message. TODO: Clean-up that handshakeImpl
|
||||||
|
# return err(P2PHandshakeError)
|
||||||
|
# of TooManyPeers:
|
||||||
|
# return err(TooManyPeersError)
|
||||||
|
# else:
|
||||||
|
# return err(PeerDisconnectedError)
|
||||||
|
except TransportError:
|
||||||
|
return err(P2PTransportError)
|
||||||
|
except CatchableError as e:
|
||||||
|
raiseAssert($e.name & " " & $e.msg)
|
||||||
|
|
||||||
if not validatePubKeyInHello(response, remote.node.pubkey):
|
if not validatePubKeyInHello(response, remote.node.pubkey):
|
||||||
warn "Remote nodeId is not its public key" # XXX: Do we care?
|
warn "Wrong devp2p identity in Hello message"
|
||||||
|
return err(InvalidIdentityError)
|
||||||
|
|
||||||
trace "DevP2P handshake completed", peer = remote,
|
trace "DevP2P handshake completed", peer = remote,
|
||||||
clientId = response.clientId
|
clientId = response.clientId
|
||||||
|
|
||||||
|
try:
|
||||||
await postHelloSteps(peer, response)
|
await postHelloSteps(peer, response)
|
||||||
ok = true
|
except RlpError:
|
||||||
trace "Peer fully connected", peer = remote, clientId = response.clientId
|
return err(ProtocolError)
|
||||||
except PeerDisconnected as e:
|
except PeerDisconnected as e:
|
||||||
case e.reason
|
case e.reason:
|
||||||
of AlreadyConnected, TooManyPeers, MessageTimeout:
|
of TooManyPeers:
|
||||||
trace "Disconnect during rlpxConnect", reason = e.reason, peer = remote
|
return err(TooManyPeersError)
|
||||||
else:
|
else:
|
||||||
debug "Unexpected disconnect during rlpxConnect", reason = e.reason,
|
return err(PeerDisconnectedError)
|
||||||
msg = e.msg, peer = remote
|
|
||||||
except TransportIncompleteError:
|
|
||||||
trace "Connection dropped in rlpxConnect", remote
|
|
||||||
except UselessPeerError:
|
except UselessPeerError:
|
||||||
trace "Disconnecting useless peer", peer = remote
|
return err(UselessRlpxPeerError)
|
||||||
except RlpTypeMismatch:
|
except TransportError:
|
||||||
# Some peers report capabilities with names longer than 3 chars. We ignore
|
return err(P2PTransportError)
|
||||||
# those for now. Maybe we should allow this though.
|
|
||||||
debug "Rlp error in rlpxConnect"
|
|
||||||
except TransportOsError as e:
|
|
||||||
trace "TransportOsError", err = e.msg
|
|
||||||
except CatchableError as e:
|
except CatchableError as e:
|
||||||
error "Unexpected exception in rlpxConnect", remote, exc = e.name,
|
raiseAssert($e.name & " " & $e.msg)
|
||||||
err = e.msg
|
|
||||||
|
|
||||||
if not ok:
|
debug "Peer fully connected", peer = remote, clientId = response.clientId
|
||||||
if not isNil(peer.transport):
|
|
||||||
peer.transport.close()
|
|
||||||
return nil
|
|
||||||
else:
|
|
||||||
return peer
|
|
||||||
|
|
||||||
proc rlpxAccept*(node: EthereumNode,
|
error = false
|
||||||
transport: StreamTransport): Future[Peer] {.async.} =
|
|
||||||
|
return ok(peer)
|
||||||
|
|
||||||
|
# TODO: rework rlpxAccept similar to rlpxConnect.
|
||||||
|
proc rlpxAccept*(
|
||||||
|
node: EthereumNode, transport: StreamTransport): Future[Peer] {.async.} =
|
||||||
initTracing(devp2pInfo, node.protocols)
|
initTracing(devp2pInfo, node.protocols)
|
||||||
|
|
||||||
let peer = Peer(transport: transport, network: node)
|
let peer = Peer(transport: transport, network: node)
|
||||||
|
@ -1340,11 +1425,14 @@ proc rlpxAccept*(node: EthereumNode,
|
||||||
trace "rlpxAccept handshake error", error = ret.error
|
trace "rlpxAccept handshake error", error = ret.error
|
||||||
if not isNil(peer.transport):
|
if not isNil(peer.transport):
|
||||||
peer.transport.close()
|
peer.transport.close()
|
||||||
|
|
||||||
|
rlpx_accept_failure.inc()
|
||||||
|
rlpx_accept_failure.inc(labelValues = ["handshake_error"])
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
ret.get()
|
ret.get()
|
||||||
|
|
||||||
node.checkSnappySupport(handshake, peer)
|
peer.setSnappySupport(node, handshake)
|
||||||
handshake.version = uint8(peer.baseProtocolVersion)
|
handshake.version = uint8(peer.baseProtocolVersion)
|
||||||
|
|
||||||
var ackMsg: array[AckMessageMaxEIP8, byte]
|
var ackMsg: array[AckMessageMaxEIP8, byte]
|
||||||
|
@ -1355,7 +1443,7 @@ proc rlpxAccept*(node: EthereumNode,
|
||||||
raisePeerDisconnected("Unexpected disconnect while authenticating",
|
raisePeerDisconnected("Unexpected disconnect while authenticating",
|
||||||
TcpError)
|
TcpError)
|
||||||
|
|
||||||
initSecretState(handshake, authMsg, ^ackMsg, peer)
|
peer.initSecretState(handshake, authMsg, ^ackMsg)
|
||||||
|
|
||||||
let listenPort = transport.localAddress().port
|
let listenPort = transport.localAddress().port
|
||||||
|
|
||||||
|
@ -1407,24 +1495,37 @@ proc rlpxAccept*(node: EthereumNode,
|
||||||
else:
|
else:
|
||||||
debug "Unexpected disconnect during rlpxAccept", reason = e.reason,
|
debug "Unexpected disconnect during rlpxAccept", reason = e.reason,
|
||||||
msg = e.msg, peer = peer.remote
|
msg = e.msg, peer = peer.remote
|
||||||
|
|
||||||
|
rlpx_accept_failure.inc(labelValues = [$e.reason])
|
||||||
except TransportIncompleteError:
|
except TransportIncompleteError:
|
||||||
trace "Connection dropped in rlpxAccept", remote = peer.remote
|
trace "Connection dropped in rlpxAccept", remote = peer.remote
|
||||||
|
rlpx_accept_failure.inc(labelValues = [$TransportIncompleteError])
|
||||||
except UselessPeerError:
|
except UselessPeerError:
|
||||||
trace "Disconnecting useless peer", peer = peer.remote
|
trace "Disconnecting useless peer", peer = peer.remote
|
||||||
except RlpTypeMismatch:
|
rlpx_accept_failure.inc(labelValues = [$UselessPeerError])
|
||||||
|
except RlpTypeMismatch as e:
|
||||||
# Some peers report capabilities with names longer than 3 chars. We ignore
|
# Some peers report capabilities with names longer than 3 chars. We ignore
|
||||||
# those for now. Maybe we should allow this though.
|
# those for now. Maybe we should allow this though.
|
||||||
debug "Rlp error in rlpxAccept"
|
debug "Rlp error in rlpxAccept", err = e.msg, errName = e.name
|
||||||
|
rlpx_accept_failure.inc(labelValues = [$RlpTypeMismatch])
|
||||||
except TransportOsError as e:
|
except TransportOsError as e:
|
||||||
trace "TransportOsError", err = e.msg
|
trace "TransportOsError", err = e.msg, errName = e.name
|
||||||
|
if e.code == OSErrorCode(110):
|
||||||
|
rlpx_accept_failure.inc(labelValues = ["tcp_timeout"])
|
||||||
|
else:
|
||||||
|
rlpx_accept_failure.inc(labelValues = [$e.name])
|
||||||
except CatchableError as e:
|
except CatchableError as e:
|
||||||
error "Unexpected exception in rlpxAccept", exc = e.name, err = e.msg
|
error "Unexpected exception in rlpxAccept", exc = e.name, err = e.msg
|
||||||
|
rlpx_accept_failure.inc(labelValues = [$e.name])
|
||||||
|
|
||||||
if not ok:
|
if not ok:
|
||||||
if not isNil(peer.transport):
|
if not isNil(peer.transport):
|
||||||
peer.transport.close()
|
peer.transport.close()
|
||||||
|
|
||||||
|
rlpx_accept_failure.inc()
|
||||||
return nil
|
return nil
|
||||||
else:
|
else:
|
||||||
|
rlpx_accept_success.inc()
|
||||||
return peer
|
return peer
|
||||||
|
|
||||||
when isMainModule:
|
when isMainModule:
|
||||||
|
|
|
@ -18,7 +18,11 @@ init:
|
||||||
node2 = setupTestNode(rng, eth)
|
node2 = setupTestNode(rng, eth)
|
||||||
|
|
||||||
node2.startListening()
|
node2.startListening()
|
||||||
peer = waitFor node1.rlpxConnect(newNode(node2.toENode()))
|
let res = waitFor node1.rlpxConnect(newNode(node2.toENode()))
|
||||||
|
if res.isErr():
|
||||||
|
quit 1
|
||||||
|
else:
|
||||||
|
peer = res.get()
|
||||||
|
|
||||||
test:
|
test:
|
||||||
aflLoop: # This appears to have unstable results with afl-clang-fast, probably
|
aflLoop: # This appears to have unstable results with afl-clang-fast, probably
|
||||||
|
|
|
@ -65,9 +65,11 @@ suite "Testing protocol handlers":
|
||||||
var node2 = setupTestNode(rng, abc, xyz)
|
var node2 = setupTestNode(rng, abc, xyz)
|
||||||
|
|
||||||
node2.startListening()
|
node2.startListening()
|
||||||
let peer = await node1.rlpxConnect(newNode(node2.toENode()))
|
let res = await node1.rlpxConnect(newNode(node2.toENode()))
|
||||||
check:
|
check:
|
||||||
peer.isNil == false
|
res.isOk()
|
||||||
|
|
||||||
|
let peer = res.get()
|
||||||
|
|
||||||
await peer.disconnect(SubprotocolReason, true)
|
await peer.disconnect(SubprotocolReason, true)
|
||||||
check:
|
check:
|
||||||
|
@ -82,9 +84,9 @@ suite "Testing protocol handlers":
|
||||||
var node1 = setupTestNode(rng, hah)
|
var node1 = setupTestNode(rng, hah)
|
||||||
var node2 = setupTestNode(rng, hah)
|
var node2 = setupTestNode(rng, hah)
|
||||||
node2.startListening()
|
node2.startListening()
|
||||||
let peer = await node1.rlpxConnect(newNode(node2.toENode()))
|
let res = await node1.rlpxConnect(newNode(node2.toENode()))
|
||||||
check:
|
check:
|
||||||
peer.isNil == true
|
res.isErr()
|
||||||
# To check if the disconnection handler did not run
|
# To check if the disconnection handler did not run
|
||||||
node1.protocolState(hah).count == 0
|
node1.protocolState(hah).count == 0
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,10 @@ var
|
||||||
node2 = setupTestNode(rng, eth)
|
node2 = setupTestNode(rng, eth)
|
||||||
|
|
||||||
node2.startListening()
|
node2.startListening()
|
||||||
var peer = waitFor node1.rlpxConnect(newNode(node2.toENode()))
|
let res = waitFor node1.rlpxConnect(newNode(node2.toENode()))
|
||||||
|
check res.isOk()
|
||||||
|
|
||||||
|
let peer = res.get()
|
||||||
|
|
||||||
proc testThunk(payload: openArray[byte]) =
|
proc testThunk(payload: openArray[byte]) =
|
||||||
var (msgId, msgData) = recvMsgMock(payload)
|
var (msgId, msgData) = recvMsgMock(payload)
|
||||||
|
|
Loading…
Reference in New Issue