2020-04-21 21:23:29 +02:00
|
|
|
# nim-eth - Node Discovery Protocol v5
|
|
|
|
# Copyright (c) 2020 Status Research & Development GmbH
|
|
|
|
# Licensed under either of
|
|
|
|
# * Apache License, version 2.0, (LICENSE-APACHEv2)
|
|
|
|
# * MIT license (LICENSE-MIT)
|
|
|
|
# at your option. This file may not be copied, modified, or distributed except
|
|
|
|
# according to those terms.
|
|
|
|
|
|
|
|
## Node Discovery Protocol v5
|
|
|
|
##
|
|
|
|
## Node discovery protocol implementation as per specification:
|
|
|
|
## https://github.com/ethereum/devp2p/blob/master/discv5/discv5.md
|
|
|
|
##
|
|
|
|
## This node discovery protocol implementation uses the same underlying
|
|
|
|
## implementation of routing table as is also used for the discovery v4
|
|
|
|
## implementation, which is the same or similar as the one described in the
|
|
|
|
## original Kademlia paper:
|
|
|
|
## https://pdos.csail.mit.edu/~petar/papers/maymounkov-kademlia-lncs.pdf
|
|
|
|
##
|
|
|
|
## This might not be the most optimal implementation for the node discovery
|
|
|
|
## protocol v5. Why?
|
|
|
|
##
|
|
|
|
## The Kademlia paper describes an implementation that starts off from one
|
|
|
|
## k-bucket, and keeps splitting the bucket as more nodes are discovered and
|
|
|
|
## added. The bucket splits only on the part of the binary tree where our own
|
|
|
|
## node its id belongs too (same prefix). Resulting eventually in a k-bucket per
|
|
|
|
## logarithmic distance (log base2 distance). Well, not really, as nodes with
|
|
|
|
## ids in the closer distance ranges will never be found. And because of this an
|
|
|
|
## optimisation is done where buckets will also split sometimes even if the
|
|
|
|
## nodes own id does not have the same prefix (this is to avoid creating highly
|
|
|
|
## unbalanced branches which would require longer lookups).
|
|
|
|
##
|
|
|
|
## Now, some implementations take a more simplified approach. They just create
|
|
|
|
## directly a bucket for each possible logarithmic distance (e.g. here 1->256).
|
|
|
|
## Some implementations also don't create buckets with logarithmic distance
|
|
|
|
## lower than a certain value (e.g. only 1/15th of the highest buckets),
|
|
|
|
## because the closer to the node (the lower the distance), the less chance
|
|
|
|
## there is to still find nodes.
|
|
|
|
##
|
|
|
|
## The discovery protocol v4 its `FindNode` call will request the k closest
|
|
|
|
## nodes. As does original Kademlia. This effectively puts the work at the node
|
|
|
|
## that gets the request. This node will have to check its buckets and gather
|
|
|
|
## the closest. Some implementations go over all the nodes in all the buckets
|
|
|
|
## for this (e.g. go-ethereum discovery v4). However, in our bucket splitting
|
|
|
|
## approach, this search is improved.
|
|
|
|
##
|
|
|
|
## In the discovery protocol v5 the `FindNode` call is changed and now the
|
|
|
|
## logarithmic distance is passed as parameter instead of the NodeId. And only
|
|
|
|
## nodes that match that logarithmic distance are allowed to be returned.
|
|
|
|
## This change was made to not put the trust at the requested node for selecting
|
|
|
|
## the closest nodes. To counter a possible (mistaken) difference in
|
|
|
|
## implementation, but more importantly for security reasons. See also:
|
|
|
|
## https://github.com/ethereum/devp2p/blob/master/discv5/discv5-rationale.md#115-guard-against-kademlia-implementation-flaws
|
|
|
|
##
|
|
|
|
## The result is that in an implementation which just stores buckets per
|
|
|
|
## logarithmic distance, it simply needs to return the right bucket. In our
|
|
|
|
## split-bucket implementation, this cannot be done as such and thus the closest
|
|
|
|
## neighbours search is still done. And to do this, a reverse calculation of an
|
|
|
|
## id at given logarithmic distance is needed (which is why there is the
|
|
|
|
## `idAtDistance` proc). Next, nodes with invalid distances need to be filtered
|
|
|
|
## out to be compliant to the specification. This can most likely get further
|
|
|
|
## optimised, but it sounds likely better to switch away from the split-bucket
|
|
|
|
## approach. I believe that the main benefit it has is improved lookups
|
|
|
|
## (due to no unbalanced branches), and it looks like this will be negated by
|
|
|
|
## limiting the returned nodes to only the ones of the requested logarithmic
|
|
|
|
## distance for the `FindNode` call.
|
|
|
|
|
|
|
|
## This `FindNode` change in discovery v5 will also have an effect on the
|
|
|
|
## efficiency of the network. Work will be moved from the receiver of
|
|
|
|
## `FindNodes` to the requester. But this also means more network traffic,
|
|
|
|
## as less nodes will potentially be passed around per `FindNode` call, and thus
|
|
|
|
## more requests will be needed for a lookup (adding bandwidth and latency).
|
|
|
|
## This might be a concern for mobile devices.
|
|
|
|
|
2019-12-18 01:16:28 +02:00
|
|
|
import
|
2020-03-05 01:25:21 +01:00
|
|
|
std/[tables, sets, options, math, random],
|
|
|
|
json_serialization/std/net,
|
|
|
|
stew/[byteutils, endians2], chronicles, chronos, stint,
|
2020-03-02 14:10:19 +01:00
|
|
|
eth/[rlp, keys], ../enode, types, encoding, node, routing_table, enr
|
2019-12-18 01:16:28 +02:00
|
|
|
|
2019-12-16 21:38:45 +02:00
|
|
|
import nimcrypto except toHex
|
|
|
|
|
2020-03-30 13:21:32 +02:00
|
|
|
export options
|
|
|
|
|
2020-02-19 13:11:19 +01:00
|
|
|
logScope:
|
|
|
|
topics = "discv5"
|
|
|
|
|
2020-02-27 13:45:12 +01:00
|
|
|
const
|
|
|
|
alpha = 3 ## Kademlia concurrency factor
|
|
|
|
lookupRequestLimit = 3
|
|
|
|
findNodeResultLimit = 15 # applies in FINDNODE handler
|
|
|
|
maxNodesPerPacket = 3
|
|
|
|
lookupInterval = 60.seconds ## Interval of launching a random lookup to
|
|
|
|
## populate the routing table. go-ethereum seems to do 3 runs every 30
|
|
|
|
## minutes. Trinity starts one every minute.
|
|
|
|
handshakeTimeout* = 2.seconds ## timeout for the reply on the
|
|
|
|
## whoareyou message
|
|
|
|
responseTimeout* = 2.seconds ## timeout for the response of a request-response
|
|
|
|
## call
|
|
|
|
magicSize = 32 ## size of the magic which is the start of the whoareyou
|
|
|
|
## message
|
|
|
|
|
2019-12-16 21:38:45 +02:00
|
|
|
type
|
|
|
|
Protocol* = ref object
|
|
|
|
transp: DatagramTransport
|
2020-02-17 16:36:04 +01:00
|
|
|
localNode*: Node
|
2019-12-16 21:38:45 +02:00
|
|
|
privateKey: PrivateKey
|
2020-02-27 13:45:12 +01:00
|
|
|
whoareyouMagic: array[magicSize, byte]
|
2019-12-16 21:38:45 +02:00
|
|
|
idHash: array[32, byte]
|
2020-02-27 13:45:12 +01:00
|
|
|
pendingRequests: Table[AuthTag, PendingRequest]
|
2019-12-16 21:38:45 +02:00
|
|
|
db: Database
|
|
|
|
routingTable: RoutingTable
|
2020-02-26 23:15:14 +01:00
|
|
|
codec*: Codec
|
2020-03-18 16:24:23 +01:00
|
|
|
awaitedPackets: Table[(NodeId, RequestId), Future[Option[Packet]]]
|
2020-02-24 15:45:30 +01:00
|
|
|
lookupLoop: Future[void]
|
|
|
|
revalidateLoop: Future[void]
|
2020-03-24 10:51:34 +01:00
|
|
|
bootstrapRecords*: seq[Record]
|
2019-12-16 21:38:45 +02:00
|
|
|
|
|
|
|
PendingRequest = object
|
|
|
|
node: Node
|
|
|
|
packet: seq[byte]
|
|
|
|
|
2020-04-24 15:40:30 +02:00
|
|
|
RandomSourceDepleted* = object of CatchableError
|
|
|
|
|
2020-03-13 17:48:03 +01:00
|
|
|
proc addNode*(d: Protocol, node: Node) =
|
|
|
|
discard d.routingTable.addNode(node)
|
|
|
|
|
|
|
|
template addNode*(d: Protocol, enode: ENode) =
|
|
|
|
addNode d, newNode(enode)
|
|
|
|
|
|
|
|
template addNode*(d: Protocol, r: Record) =
|
|
|
|
addNode d, newNode(r)
|
|
|
|
|
|
|
|
proc addNode*(d: Protocol, enr: EnrUri) =
|
|
|
|
var r: Record
|
|
|
|
let res = r.fromUri(enr)
|
|
|
|
doAssert(res)
|
|
|
|
d.addNode newNode(r)
|
|
|
|
|
2020-04-20 13:50:22 +02:00
|
|
|
proc getNode*(d: Protocol, id: NodeId): Option[Node] =
|
2020-03-20 16:38:46 +01:00
|
|
|
d.routingTable.getNode(id)
|
2020-03-13 17:48:03 +01:00
|
|
|
|
2020-03-20 16:38:46 +01:00
|
|
|
proc randomNodes*(d: Protocol, count: int): seq[Node] =
|
|
|
|
d.routingTable.randomNodes(count)
|
|
|
|
|
|
|
|
proc neighbours*(d: Protocol, id: NodeId, k: int = BUCKET_SIZE): seq[Node] =
|
|
|
|
d.routingTable.neighbours(id, k)
|
|
|
|
|
|
|
|
proc nodesDiscovered*(d: Protocol): int {.inline.} = d.routingTable.len
|
2020-03-13 17:48:03 +01:00
|
|
|
|
2020-03-23 10:50:00 +01:00
|
|
|
func privKey*(d: Protocol): lent PrivateKey =
|
|
|
|
d.privateKey
|
2020-03-17 18:58:11 +02:00
|
|
|
|
2019-12-16 21:38:45 +02:00
|
|
|
proc send(d: Protocol, a: Address, data: seq[byte]) =
|
2020-02-17 17:44:56 +01:00
|
|
|
# debug "Sending bytes", amount = data.len, to = a
|
2019-12-16 21:38:45 +02:00
|
|
|
let ta = initTAddress(a.ip, a.udpPort)
|
|
|
|
let f = d.transp.sendTo(ta, data)
|
|
|
|
f.callback = proc(data: pointer) {.gcsafe.} =
|
|
|
|
if f.failed:
|
|
|
|
debug "Discovery send failed", msg = f.readError.msg
|
|
|
|
|
|
|
|
proc send(d: Protocol, n: Node, data: seq[byte]) =
|
|
|
|
d.send(n.node.address, data)
|
|
|
|
|
|
|
|
proc `xor`[N: static[int], T](a, b: array[N, T]): array[N, T] =
|
|
|
|
for i in 0 .. a.high:
|
|
|
|
result[i] = a[i] xor b[i]
|
|
|
|
|
2020-03-23 10:50:00 +01:00
|
|
|
proc whoareyouMagic(toNode: NodeId): array[magicSize, byte] =
|
|
|
|
const prefix = "WHOAREYOU"
|
|
|
|
var data: array[prefix.len + sizeof(toNode), byte]
|
|
|
|
data[0 .. sizeof(toNode) - 1] = toNode.toByteArrayBE()
|
|
|
|
for i, c in prefix: data[sizeof(toNode) + i] = byte(c)
|
|
|
|
sha256.digest(data).data
|
|
|
|
|
2020-04-20 20:14:39 +02:00
|
|
|
proc isWhoAreYou(d: Protocol, msg: openArray[byte]): bool =
|
2019-12-16 21:38:45 +02:00
|
|
|
if msg.len > d.whoareyouMagic.len:
|
2020-02-27 13:45:12 +01:00
|
|
|
result = d.whoareyouMagic == msg.toOpenArray(0, magicSize - 1)
|
2019-12-16 21:38:45 +02:00
|
|
|
|
2020-04-20 20:14:39 +02:00
|
|
|
proc decodeWhoAreYou(d: Protocol, msg: openArray[byte]): Whoareyou =
|
2019-12-16 21:38:45 +02:00
|
|
|
result = Whoareyou()
|
2020-04-20 20:14:39 +02:00
|
|
|
result[] = rlp.decode(msg.toOpenArray(magicSize, msg.high), WhoareyouObj)
|
2019-12-16 21:38:45 +02:00
|
|
|
|
2020-02-27 13:45:12 +01:00
|
|
|
proc sendWhoareyou(d: Protocol, address: Address, toNode: NodeId, authTag: AuthTag) =
|
2020-02-22 20:49:14 +02:00
|
|
|
trace "sending who are you", to = $toNode, toAddress = $address
|
2020-03-10 22:28:11 +01:00
|
|
|
let challenge = Whoareyou(authTag: authTag, recordSeq: 0)
|
2020-04-24 15:40:30 +02:00
|
|
|
|
|
|
|
if randomBytes(challenge.idNonce) != challenge.idNonce.len:
|
|
|
|
raise newException(RandomSourceDepleted, "Could not randomize bytes")
|
2020-02-26 23:15:14 +01:00
|
|
|
# If there is already a handshake going on for this nodeid then we drop this
|
|
|
|
# new one. Handshake will get cleaned up after `handshakeTimeout`.
|
|
|
|
# If instead overwriting the handshake would be allowed, the handshake timeout
|
|
|
|
# will need to be canceled each time.
|
|
|
|
# TODO: could also clean up handshakes in a seperate call, e.g. triggered in
|
|
|
|
# a loop.
|
2020-02-27 13:59:36 +01:00
|
|
|
# Use toNode + address to make it more difficult for an attacker to occupy
|
|
|
|
# the handshake of another node.
|
2020-02-27 22:36:42 +01:00
|
|
|
|
|
|
|
let key = HandShakeKey(nodeId: toNode, address: $address)
|
|
|
|
if not d.codec.handshakes.hasKeyOrPut(key, challenge):
|
2020-02-26 23:15:14 +01:00
|
|
|
sleepAsync(handshakeTimeout).addCallback() do(data: pointer):
|
|
|
|
# TODO: should we still provide cancellation in case handshake completes
|
|
|
|
# correctly?
|
2020-02-27 22:36:42 +01:00
|
|
|
d.codec.handshakes.del(key)
|
2020-02-26 23:15:14 +01:00
|
|
|
|
|
|
|
var data = @(whoareyouMagic(toNode))
|
|
|
|
data.add(rlp.encode(challenge[]))
|
|
|
|
d.send(address, data)
|
2019-12-16 21:38:45 +02:00
|
|
|
|
2020-03-18 16:24:23 +01:00
|
|
|
proc sendNodes(d: Protocol, toId: NodeId, toAddr: Address, reqId: RequestId,
|
|
|
|
nodes: openarray[Node]) =
|
|
|
|
proc sendNodes(d: Protocol, toId: NodeId, toAddr: Address,
|
|
|
|
packet: NodesPacket, reqId: RequestId) {.nimcall.} =
|
|
|
|
let (data, _) = d.codec.encodeEncrypted(toId, toAddr,
|
2020-04-24 15:40:30 +02:00
|
|
|
encodePacket(packet, reqId), challenge = nil).tryGet()
|
2020-03-18 16:24:23 +01:00
|
|
|
d.send(toAddr, data)
|
2019-12-16 21:38:45 +02:00
|
|
|
|
|
|
|
var packet: NodesPacket
|
|
|
|
packet.total = ceil(nodes.len / maxNodesPerPacket).uint32
|
|
|
|
|
|
|
|
for i in 0 ..< nodes.len:
|
|
|
|
packet.enrs.add(nodes[i].record)
|
|
|
|
if packet.enrs.len == 3:
|
2020-03-18 16:24:23 +01:00
|
|
|
d.sendNodes(toId, toAddr, packet, reqId)
|
2019-12-16 21:38:45 +02:00
|
|
|
packet.enrs.setLen(0)
|
|
|
|
|
|
|
|
if packet.enrs.len != 0:
|
2020-03-18 16:24:23 +01:00
|
|
|
d.sendNodes(toId, toAddr, packet, reqId)
|
2019-12-16 21:38:45 +02:00
|
|
|
|
2020-03-18 16:24:23 +01:00
|
|
|
proc handlePing(d: Protocol, fromId: NodeId, fromAddr: Address,
|
|
|
|
ping: PingPacket, reqId: RequestId) =
|
|
|
|
let a = fromAddr
|
2019-12-16 21:38:45 +02:00
|
|
|
var pong: PongPacket
|
|
|
|
pong.enrSeq = ping.enrSeq
|
|
|
|
pong.ip = case a.ip.family
|
|
|
|
of IpAddressFamily.IPv4: @(a.ip.address_v4)
|
|
|
|
of IpAddressFamily.IPv6: @(a.ip.address_v6)
|
|
|
|
pong.port = a.udpPort.uint16
|
|
|
|
|
2020-03-18 16:24:23 +01:00
|
|
|
let (data, _) = d.codec.encodeEncrypted(fromId, fromAddr,
|
2020-04-24 15:40:30 +02:00
|
|
|
encodePacket(pong, reqId), challenge = nil).tryGet()
|
2020-03-18 16:24:23 +01:00
|
|
|
d.send(fromAddr, data)
|
2019-12-16 21:38:45 +02:00
|
|
|
|
2020-03-18 16:24:23 +01:00
|
|
|
proc handleFindNode(d: Protocol, fromId: NodeId, fromAddr: Address,
|
|
|
|
fn: FindNodePacket, reqId: RequestId) =
|
2019-12-16 21:38:45 +02:00
|
|
|
if fn.distance == 0:
|
2020-03-18 16:24:23 +01:00
|
|
|
d.sendNodes(fromId, fromAddr, reqId, [d.localNode])
|
2019-12-16 21:38:45 +02:00
|
|
|
else:
|
|
|
|
let distance = min(fn.distance, 256)
|
2020-03-18 16:24:23 +01:00
|
|
|
d.sendNodes(fromId, fromAddr, reqId,
|
|
|
|
d.routingTable.neighboursAtDistance(distance))
|
2019-12-16 21:38:45 +02:00
|
|
|
|
2020-04-20 20:14:39 +02:00
|
|
|
proc receive*(d: Protocol, a: Address, msg: openArray[byte]) {.gcsafe,
|
2020-02-18 00:47:13 +02:00
|
|
|
raises: [
|
|
|
|
Defect,
|
|
|
|
# TODO This is now coming from Chronos's callSoon
|
|
|
|
Exception,
|
|
|
|
# TODO All of these should probably be handled here
|
|
|
|
RlpError,
|
|
|
|
IOError,
|
|
|
|
TransportAddressError,
|
|
|
|
].} =
|
2020-02-27 13:45:12 +01:00
|
|
|
if msg.len < tagSize: # or magicSize, can be either
|
2019-12-16 21:38:45 +02:00
|
|
|
return # Invalid msg
|
|
|
|
|
2020-02-17 17:44:56 +01:00
|
|
|
# debug "Packet received: ", length = msg.len
|
2019-12-16 21:38:45 +02:00
|
|
|
|
2020-02-17 17:44:56 +01:00
|
|
|
if d.isWhoAreYou(msg):
|
2020-03-02 14:10:19 +01:00
|
|
|
trace "Received whoareyou", localNode = $d.localNode, address = a
|
2020-02-17 17:44:56 +01:00
|
|
|
let whoareyou = d.decodeWhoAreYou(msg)
|
|
|
|
var pr: PendingRequest
|
|
|
|
if d.pendingRequests.take(whoareyou.authTag, pr):
|
|
|
|
let toNode = pr.node
|
2020-03-18 16:24:23 +01:00
|
|
|
whoareyou.pubKey = toNode.node.pubkey # TODO: Yeah, rather ugly this.
|
2020-02-18 00:47:13 +02:00
|
|
|
try:
|
2020-03-18 16:24:23 +01:00
|
|
|
let (data, _) = d.codec.encodeEncrypted(toNode.id, toNode.address,
|
2020-04-24 15:40:30 +02:00
|
|
|
pr.packet, challenge = whoareyou).tryGet()
|
2020-02-18 00:47:13 +02:00
|
|
|
d.send(toNode, data)
|
2020-04-24 15:40:30 +02:00
|
|
|
except RandomSourceDepleted:
|
2020-02-18 00:47:13 +02:00
|
|
|
debug "Failed to respond to a who-you-are msg " &
|
|
|
|
"due to randomness source depletion."
|
2019-12-16 21:38:45 +02:00
|
|
|
|
2020-02-17 17:44:56 +01:00
|
|
|
else:
|
2020-02-27 13:45:12 +01:00
|
|
|
var tag: array[tagSize, byte]
|
|
|
|
tag[0 .. ^1] = msg.toOpenArray(0, tagSize - 1)
|
2020-02-17 17:44:56 +01:00
|
|
|
let senderData = tag xor d.idHash
|
|
|
|
let sender = readUintBE[256](senderData)
|
|
|
|
|
2020-02-27 13:45:12 +01:00
|
|
|
var authTag: AuthTag
|
2020-02-17 17:44:56 +01:00
|
|
|
var node: Node
|
2020-04-24 16:52:41 +02:00
|
|
|
let decoded = d.codec.decodeEncrypted(sender, a, msg, authTag, node)
|
2020-04-24 15:40:30 +02:00
|
|
|
if decoded.isOk:
|
2020-04-24 16:52:41 +02:00
|
|
|
let packet = decoded[]
|
2020-04-20 13:50:22 +02:00
|
|
|
if not node.isNil:
|
2020-03-30 14:08:50 +02:00
|
|
|
# Not filling table with nodes without correct IP in the ENR
|
|
|
|
if a.ip == node.address.ip:
|
|
|
|
debug "Adding new node to routing table", node = $node,
|
|
|
|
localNode = $d.localNode
|
|
|
|
discard d.routingTable.addNode(node)
|
2019-12-16 21:38:45 +02:00
|
|
|
|
2020-02-17 17:44:56 +01:00
|
|
|
case packet.kind
|
|
|
|
of ping:
|
2020-03-18 16:24:23 +01:00
|
|
|
d.handlePing(sender, a, packet.ping, packet.reqId)
|
2020-02-17 17:44:56 +01:00
|
|
|
of findNode:
|
2020-03-18 16:24:23 +01:00
|
|
|
d.handleFindNode(sender, a, packet.findNode, packet.reqId)
|
2019-12-16 21:38:45 +02:00
|
|
|
else:
|
2020-02-17 17:44:56 +01:00
|
|
|
var waiter: Future[Option[Packet]]
|
2020-03-18 16:24:23 +01:00
|
|
|
if d.awaitedPackets.take((sender, packet.reqId), waiter):
|
2020-02-17 17:44:56 +01:00
|
|
|
waiter.complete(packet.some)
|
|
|
|
else:
|
2020-04-20 13:50:22 +02:00
|
|
|
debug "TODO: handle packet: ", packet = packet.kind, origin = a
|
2020-04-24 15:40:30 +02:00
|
|
|
elif decoded.error == DecodeError.DecryptError:
|
2020-03-10 16:01:04 +01:00
|
|
|
debug "Could not decrypt packet, respond with whoareyou",
|
2020-03-02 14:10:19 +01:00
|
|
|
localNode = $d.localNode, address = a
|
2020-03-10 16:01:04 +01:00
|
|
|
# only sendingWhoareyou in case it is a decryption failure
|
2020-02-17 17:44:56 +01:00
|
|
|
d.sendWhoareyou(a, sender, authTag)
|
2020-04-24 15:40:30 +02:00
|
|
|
elif decoded.error == DecodeError.UnsupportedPacketType:
|
|
|
|
# Still adding the node in case failure is because of unsupported packet.
|
2020-03-10 16:01:04 +01:00
|
|
|
if not node.isNil:
|
2020-03-30 14:08:50 +02:00
|
|
|
if a.ip == node.address.ip:
|
|
|
|
debug "Adding new node to routing table", node = $node,
|
|
|
|
localNode = $d.localNode
|
|
|
|
discard d.routingTable.addNode(node)
|
2020-04-24 15:40:30 +02:00
|
|
|
# elif decoded.error == DecodeError.PacketError:
|
|
|
|
# Not adding this node as from our perspective it is sending rubbish.
|
2019-12-16 21:38:45 +02:00
|
|
|
|
2020-03-23 10:50:00 +01:00
|
|
|
proc processClient(transp: DatagramTransport,
|
|
|
|
raddr: TransportAddress): Future[void] {.async, gcsafe.} =
|
|
|
|
var proto = getUserData[Protocol](transp)
|
|
|
|
try:
|
|
|
|
# TODO: Maybe here better to use `peekMessage()` to avoid allocation,
|
|
|
|
# but `Bytes` object is just a simple seq[byte], and `ByteRange` object
|
|
|
|
# do not support custom length.
|
|
|
|
var buf = transp.getMessage()
|
|
|
|
let a = Address(ip: raddr.address, udpPort: raddr.port, tcpPort: raddr.port)
|
|
|
|
proto.receive(a, buf)
|
|
|
|
except RlpError as e:
|
|
|
|
debug "Receive failed", exception = e.name, msg = e.msg
|
|
|
|
# TODO: what else can be raised? Figure this out and be more restrictive?
|
|
|
|
except CatchableError as e:
|
|
|
|
debug "Receive failed", exception = e.name, msg = e.msg,
|
|
|
|
stacktrace = e.getStackTrace()
|
|
|
|
|
2020-03-27 14:37:31 +01:00
|
|
|
proc validIp(sender, address: IpAddress): bool =
|
|
|
|
let
|
|
|
|
s = initTAddress(sender, Port(0))
|
|
|
|
a = initTAddress(address, Port(0))
|
|
|
|
if a.isAnyLocal():
|
|
|
|
return false
|
|
|
|
if a.isMulticast():
|
|
|
|
return false
|
|
|
|
if a.isLoopback() and not s.isLoopback():
|
|
|
|
return false
|
|
|
|
if a.isSiteLocal() and not s.isSiteLocal():
|
|
|
|
return false
|
|
|
|
# TODO: Also check for special reserved ip addresses:
|
|
|
|
# https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
|
|
|
|
# https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
|
|
|
|
return true
|
|
|
|
|
2020-03-23 10:50:00 +01:00
|
|
|
# TODO: This could be improved to do the clean-up immediatily in case a non
|
|
|
|
# whoareyou response does arrive, but we would need to store the AuthTag
|
|
|
|
# somewhere
|
|
|
|
proc registerRequest(d: Protocol, n: Node, packet: seq[byte], nonce: AuthTag) =
|
|
|
|
let request = PendingRequest(node: n, packet: packet)
|
|
|
|
if not d.pendingRequests.hasKeyOrPut(nonce, request):
|
|
|
|
sleepAsync(responseTimeout).addCallback() do(data: pointer):
|
|
|
|
d.pendingRequests.del(nonce)
|
|
|
|
|
2019-12-16 21:38:45 +02:00
|
|
|
proc waitPacket(d: Protocol, fromNode: Node, reqId: RequestId): Future[Option[Packet]] =
|
|
|
|
result = newFuture[Option[Packet]]("waitPacket")
|
|
|
|
let res = result
|
2020-03-18 16:24:23 +01:00
|
|
|
let key = (fromNode.id, reqId)
|
2020-02-26 23:15:14 +01:00
|
|
|
sleepAsync(responseTimeout).addCallback() do(data: pointer):
|
2019-12-16 21:38:45 +02:00
|
|
|
d.awaitedPackets.del(key)
|
|
|
|
if not res.finished:
|
|
|
|
res.complete(none(Packet))
|
|
|
|
d.awaitedPackets[key] = result
|
|
|
|
|
|
|
|
proc addNodesFromENRs(result: var seq[Node], enrs: openarray[Record]) =
|
|
|
|
for r in enrs: result.add(newNode(r))
|
|
|
|
|
|
|
|
proc waitNodes(d: Protocol, fromNode: Node, reqId: RequestId): Future[seq[Node]] {.async.} =
|
|
|
|
var op = await d.waitPacket(fromNode, reqId)
|
|
|
|
if op.isSome and op.get.kind == nodes:
|
|
|
|
result.addNodesFromENRs(op.get.nodes.enrs)
|
|
|
|
let total = op.get.nodes.total
|
|
|
|
for i in 1 ..< total:
|
|
|
|
op = await d.waitPacket(fromNode, reqId)
|
|
|
|
if op.isSome and op.get.kind == nodes:
|
|
|
|
result.addNodesFromENRs(op.get.nodes.enrs)
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
|
2020-03-23 10:50:00 +01:00
|
|
|
proc sendPing(d: Protocol, toNode: Node): RequestId =
|
|
|
|
let
|
2020-04-24 15:40:30 +02:00
|
|
|
reqId = newRequestId().tryGet()
|
2020-03-23 10:50:00 +01:00
|
|
|
ping = PingPacket(enrSeq: d.localNode.record.seqNum)
|
|
|
|
packet = encodePacket(ping, reqId)
|
|
|
|
(data, nonce) = d.codec.encodeEncrypted(toNode.id, toNode.address, packet,
|
2020-04-24 15:40:30 +02:00
|
|
|
challenge = nil).tryGet()
|
2020-03-23 10:50:00 +01:00
|
|
|
d.registerRequest(toNode, packet, nonce)
|
|
|
|
d.send(toNode, data)
|
|
|
|
return reqId
|
|
|
|
|
2020-03-24 10:51:34 +01:00
|
|
|
proc ping*(d: Protocol, toNode: Node): Future[Option[PongPacket]] {.async.} =
|
2020-03-23 10:50:00 +01:00
|
|
|
let reqId = d.sendPing(toNode)
|
|
|
|
let resp = await d.waitPacket(toNode, reqId)
|
|
|
|
|
|
|
|
if resp.isSome() and resp.get().kind == pong:
|
|
|
|
return some(resp.get().pong)
|
2020-03-22 22:14:10 +01:00
|
|
|
|
|
|
|
proc sendFindNode(d: Protocol, toNode: Node, distance: uint32): RequestId =
|
2020-04-24 15:40:30 +02:00
|
|
|
let reqId = newRequestId().tryGet()
|
2019-12-16 21:38:45 +02:00
|
|
|
let packet = encodePacket(FindNodePacket(distance: distance), reqId)
|
2020-03-18 16:24:23 +01:00
|
|
|
let (data, nonce) = d.codec.encodeEncrypted(toNode.id, toNode.address, packet,
|
2020-04-24 15:40:30 +02:00
|
|
|
challenge = nil).tryGet()
|
2020-03-22 22:14:10 +01:00
|
|
|
d.registerRequest(toNode, packet, nonce)
|
|
|
|
|
2019-12-16 21:38:45 +02:00
|
|
|
d.send(toNode, data)
|
2020-03-22 22:14:10 +01:00
|
|
|
return reqId
|
|
|
|
|
|
|
|
proc findNode*(d: Protocol, toNode: Node, distance: uint32): Future[seq[Node]] {.async.} =
|
|
|
|
let reqId = sendFindNode(d, toNode, distance)
|
2020-03-27 14:37:31 +01:00
|
|
|
let nodes = await d.waitNodes(toNode, reqId)
|
|
|
|
|
|
|
|
for n in nodes:
|
|
|
|
if validIp(toNode.address.ip, n.address.ip):
|
|
|
|
result.add(n)
|
2019-12-16 21:38:45 +02:00
|
|
|
|
|
|
|
proc lookupDistances(target, dest: NodeId): seq[uint32] =
|
|
|
|
let td = logDist(target, dest)
|
|
|
|
result.add(td)
|
|
|
|
var i = 1'u32
|
|
|
|
while result.len < lookupRequestLimit:
|
|
|
|
if td + i < 256:
|
|
|
|
result.add(td + i)
|
|
|
|
if td - i > 0'u32:
|
|
|
|
result.add(td - i)
|
|
|
|
inc i
|
|
|
|
|
2020-03-23 10:50:00 +01:00
|
|
|
proc lookupWorker(d: Protocol, destNode: Node, target: NodeId): Future[seq[Node]] {.async.} =
|
2019-12-16 21:38:45 +02:00
|
|
|
let dists = lookupDistances(target, destNode.id)
|
|
|
|
var i = 0
|
2020-02-24 15:45:30 +01:00
|
|
|
while i < lookupRequestLimit and result.len < findNodeResultLimit:
|
2020-02-17 17:44:56 +01:00
|
|
|
# TODO: Handle failures
|
2020-03-23 10:50:00 +01:00
|
|
|
let r = await d.findNode(destNode, dists[i])
|
2020-02-24 15:45:30 +01:00
|
|
|
# TODO: I guess it makes sense to limit here also to `findNodeResultLimit`?
|
2019-12-16 21:38:45 +02:00
|
|
|
result.add(r)
|
|
|
|
inc i
|
|
|
|
|
|
|
|
for n in result:
|
2020-03-23 10:50:00 +01:00
|
|
|
discard d.routingTable.addNode(n)
|
2019-12-16 21:38:45 +02:00
|
|
|
|
2020-03-23 10:50:00 +01:00
|
|
|
proc lookup*(d: Protocol, target: NodeId): Future[seq[Node]] {.async.} =
|
2020-02-19 13:11:19 +01:00
|
|
|
## Perform a lookup for the given target, return the closest n nodes to the
|
|
|
|
## target. Maximum value for n is `BUCKET_SIZE`.
|
|
|
|
# TODO: Sort the returned nodes on distance
|
2020-03-23 10:50:00 +01:00
|
|
|
result = d.routingTable.neighbours(target, BUCKET_SIZE)
|
2019-12-16 21:38:45 +02:00
|
|
|
var asked = initHashSet[NodeId]()
|
2020-03-23 10:50:00 +01:00
|
|
|
asked.incl(d.localNode.id)
|
2019-12-16 21:38:45 +02:00
|
|
|
var seen = asked
|
2020-02-19 13:11:19 +01:00
|
|
|
for node in result:
|
|
|
|
seen.incl(node.id)
|
2019-12-16 21:38:45 +02:00
|
|
|
|
|
|
|
var pendingQueries = newSeqOfCap[Future[seq[Node]]](alpha)
|
|
|
|
|
|
|
|
while true:
|
|
|
|
var i = 0
|
|
|
|
while i < result.len and pendingQueries.len < alpha:
|
|
|
|
let n = result[i]
|
|
|
|
if not asked.containsOrIncl(n.id):
|
2020-03-23 10:50:00 +01:00
|
|
|
pendingQueries.add(d.lookupWorker(n, target))
|
2019-12-16 21:38:45 +02:00
|
|
|
inc i
|
|
|
|
|
2020-02-25 14:49:31 +01:00
|
|
|
trace "discv5 pending queries", total = pendingQueries.len
|
2020-02-05 14:47:43 +01:00
|
|
|
|
2019-12-16 21:38:45 +02:00
|
|
|
if pendingQueries.len == 0:
|
|
|
|
break
|
|
|
|
|
|
|
|
let idx = await oneIndex(pendingQueries)
|
2020-02-25 14:49:31 +01:00
|
|
|
trace "Got discv5 lookup response", idx
|
2019-12-16 21:38:45 +02:00
|
|
|
|
|
|
|
let nodes = pendingQueries[idx].read
|
|
|
|
pendingQueries.del(idx)
|
|
|
|
for n in nodes:
|
|
|
|
if not seen.containsOrIncl(n.id):
|
|
|
|
if result.len < BUCKET_SIZE:
|
|
|
|
result.add(n)
|
|
|
|
|
2020-03-23 10:50:00 +01:00
|
|
|
proc lookupRandom*(d: Protocol): Future[seq[Node]]
|
2020-02-26 23:15:14 +01:00
|
|
|
{.raises:[RandomSourceDepleted, Defect, Exception].} =
|
2019-12-16 21:38:45 +02:00
|
|
|
var id: NodeId
|
2020-02-26 23:15:14 +01:00
|
|
|
if randomBytes(addr id, sizeof(id)) != sizeof(id):
|
|
|
|
raise newException(RandomSourceDepleted, "Could not randomize bytes")
|
2020-03-23 10:50:00 +01:00
|
|
|
d.lookup(id)
|
2020-03-06 17:01:45 +01:00
|
|
|
|
2020-04-20 13:50:22 +02:00
|
|
|
proc resolve*(d: Protocol, id: NodeId): Future[Option[Node]] {.async.} =
|
|
|
|
## Resolve a `Node` based on provided `NodeId`.
|
|
|
|
##
|
|
|
|
## This will first look in the own DHT. If the node is known, it will try to
|
|
|
|
## contact if for newer information. If node is not known or it does not
|
|
|
|
## reply, a lookup is done to see if it can find a (newer) record of the node
|
|
|
|
## on the network.
|
|
|
|
|
|
|
|
let node = d.getNode(id)
|
|
|
|
if node.isSome():
|
|
|
|
let request = await d.findNode(node.get(), 0)
|
|
|
|
|
|
|
|
if request.len > 0:
|
|
|
|
return some(request[0])
|
|
|
|
|
|
|
|
let discovered = await d.lookup(id)
|
|
|
|
for n in discovered:
|
|
|
|
if n.id == id:
|
|
|
|
# TODO: Not getting any new seqNum here as in a lookup nodes in table with
|
|
|
|
# new seqNum don't get replaced.
|
|
|
|
if node.isSome() and node.get().record.seqNum >= n.record.seqNum:
|
|
|
|
return node
|
|
|
|
else:
|
|
|
|
return some(n)
|
|
|
|
|
2020-03-24 10:51:34 +01:00
|
|
|
proc revalidateNode*(d: Protocol, n: Node)
|
2020-02-24 15:45:30 +01:00
|
|
|
{.async, raises:[Defect, Exception].} = # TODO: Exception
|
2020-03-13 17:48:03 +01:00
|
|
|
trace "Ping to revalidate node", node = $n
|
2020-03-23 10:50:00 +01:00
|
|
|
let pong = await d.ping(n)
|
2019-12-23 19:21:11 +02:00
|
|
|
|
2020-03-23 10:50:00 +01:00
|
|
|
if pong.isSome():
|
|
|
|
if pong.get().enrSeq > n.record.seqNum:
|
2019-12-23 19:21:11 +02:00
|
|
|
# TODO: Request new ENR
|
|
|
|
discard
|
|
|
|
|
2020-03-22 22:14:10 +01:00
|
|
|
d.routingTable.setJustSeen(n)
|
2020-03-10 22:28:11 +01:00
|
|
|
trace "Revalidated node", node = $n
|
2019-12-23 19:21:11 +02:00
|
|
|
else:
|
2020-03-13 17:48:03 +01:00
|
|
|
# For now we never remove bootstrap nodes. It might make sense to actually
|
|
|
|
# do so and to retry them only in case we drop to a really low amount of
|
|
|
|
# peers in the DHT
|
2020-03-24 10:51:34 +01:00
|
|
|
if n.record notin d.bootstrapRecords:
|
|
|
|
trace "Revalidation of node failed, removing node", record = n.record
|
2020-03-22 22:14:10 +01:00
|
|
|
d.routingTable.removeNode(n)
|
2020-03-18 15:27:26 +01:00
|
|
|
# Remove shared secrets when removing the node from routing table.
|
|
|
|
# This might be to direct, so we could keep these longer. But better
|
|
|
|
# would be to simply not remove the nodes immediatly but only after x
|
|
|
|
# amount of failures.
|
2020-03-22 22:14:10 +01:00
|
|
|
discard d.codec.db.deleteKeys(n.id, n.address)
|
2020-03-27 14:37:31 +01:00
|
|
|
else:
|
|
|
|
debug "Revalidation of bootstrap node failed", enr = toURI(n.record)
|
2019-12-23 19:21:11 +02:00
|
|
|
|
2020-03-22 22:14:10 +01:00
|
|
|
proc revalidateLoop(d: Protocol) {.async.} =
|
2020-02-24 15:45:30 +01:00
|
|
|
try:
|
|
|
|
# TODO: We need to handle actual errors still, which might just allow to
|
|
|
|
# continue the loop. However, currently `revalidateNode` raises a general
|
|
|
|
# `Exception` making this rather hard.
|
|
|
|
while true:
|
|
|
|
await sleepAsync(rand(10 * 1000).milliseconds)
|
2020-03-22 22:14:10 +01:00
|
|
|
let n = d.routingTable.nodeToRevalidate()
|
2020-02-24 15:45:30 +01:00
|
|
|
if not n.isNil:
|
2020-03-01 11:50:26 +01:00
|
|
|
# TODO: Should we do these in parallel and/or async to be certain of how
|
|
|
|
# often nodes are revalidated?
|
2020-03-22 22:14:10 +01:00
|
|
|
await d.revalidateNode(n)
|
2020-02-24 15:45:30 +01:00
|
|
|
except CancelledError:
|
|
|
|
trace "revalidateLoop canceled"
|
|
|
|
|
|
|
|
proc lookupLoop(d: Protocol) {.async.} =
|
|
|
|
## TODO: Same story as for `revalidateLoop`
|
|
|
|
try:
|
|
|
|
while true:
|
2020-03-13 17:48:03 +01:00
|
|
|
# lookup self (neighbour nodes)
|
|
|
|
var nodes = await d.lookup(d.localNode.id)
|
|
|
|
trace "Discovered nodes in self lookup", nodes = $nodes
|
|
|
|
|
|
|
|
nodes = await d.lookupRandom()
|
|
|
|
trace "Discovered nodes in random lookup", nodes = $nodes
|
2020-02-24 15:45:30 +01:00
|
|
|
await sleepAsync(lookupInterval)
|
|
|
|
except CancelledError:
|
|
|
|
trace "lookupLoop canceled"
|
2019-12-23 19:21:11 +02:00
|
|
|
|
2020-03-23 10:50:00 +01:00
|
|
|
proc newProtocol*(privKey: PrivateKey, db: Database,
|
2020-03-30 13:21:32 +02:00
|
|
|
externalIp: Option[IpAddress], tcpPort, udpPort: Port,
|
2020-04-15 05:55:08 +03:00
|
|
|
localEnrFields: openarray[FieldPair] = [],
|
2020-03-23 10:50:00 +01:00
|
|
|
bootstrapRecords: openarray[Record] = []): Protocol =
|
|
|
|
let
|
2020-03-30 13:21:32 +02:00
|
|
|
a = Address(ip: externalIp.get(IPv4_any()),
|
|
|
|
tcpPort: tcpPort, udpPort: udpPort)
|
2020-04-06 18:24:15 +02:00
|
|
|
enode = ENode(pubkey: privKey.toPublicKey().tryGet(), address: a)
|
2020-04-15 05:32:52 +03:00
|
|
|
enrRec = enr.Record.init(1, privKey, externalIp, tcpPort, udpPort, localEnrFields)
|
2020-03-23 10:50:00 +01:00
|
|
|
node = newNode(enode, enrRec)
|
|
|
|
|
|
|
|
result = Protocol(
|
|
|
|
privateKey: privKey,
|
|
|
|
db: db,
|
|
|
|
localNode: node,
|
|
|
|
whoareyouMagic: whoareyouMagic(node.id),
|
|
|
|
idHash: sha256.digest(node.id.toByteArrayBE).data,
|
|
|
|
codec: Codec(localNode: node, privKey: privKey, db: db),
|
2020-03-24 10:51:34 +01:00
|
|
|
bootstrapRecords: @bootstrapRecords)
|
2020-03-23 10:50:00 +01:00
|
|
|
|
|
|
|
result.routingTable.init(node)
|
|
|
|
|
2019-12-16 21:38:45 +02:00
|
|
|
proc open*(d: Protocol) =
|
2020-03-24 10:51:34 +01:00
|
|
|
info "Starting discovery node", node = $d.localNode,
|
2020-03-10 22:28:11 +01:00
|
|
|
uri = toURI(d.localNode.record)
|
2019-12-16 21:38:45 +02:00
|
|
|
# TODO allow binding to specific IP / IPv6 / etc
|
|
|
|
let ta = initTAddress(IPv4_any(), d.localNode.node.address.udpPort)
|
|
|
|
d.transp = newDatagramTransport(processClient, udata = d, local = ta)
|
2020-03-13 17:48:03 +01:00
|
|
|
|
2020-03-24 10:51:34 +01:00
|
|
|
for record in d.bootstrapRecords:
|
|
|
|
debug "Adding bootstrap node", uri = toURI(record)
|
|
|
|
d.addNode(record)
|
2020-03-13 17:48:03 +01:00
|
|
|
|
2020-03-18 23:05:04 +01:00
|
|
|
proc start*(d: Protocol) =
|
2020-02-24 15:45:30 +01:00
|
|
|
# Might want to move these to a separate proc if this turns out to be needed.
|
|
|
|
d.lookupLoop = lookupLoop(d)
|
|
|
|
d.revalidateLoop = revalidateLoop(d)
|
|
|
|
|
|
|
|
proc close*(d: Protocol) =
|
|
|
|
doAssert(not d.transp.closed)
|
|
|
|
|
2020-03-02 14:10:19 +01:00
|
|
|
debug "Closing discovery node", node = $d.localNode
|
2020-03-18 23:05:04 +01:00
|
|
|
if not d.revalidateLoop.isNil:
|
|
|
|
d.revalidateLoop.cancel()
|
|
|
|
if not d.lookupLoop.isNil:
|
|
|
|
d.lookupLoop.cancel()
|
2020-02-24 15:45:30 +01:00
|
|
|
# TODO: unsure if close can't create issues in the not awaited cancellations
|
|
|
|
# above
|
|
|
|
d.transp.close()
|
|
|
|
|
|
|
|
proc closeWait*(d: Protocol) {.async.} =
|
|
|
|
doAssert(not d.transp.closed)
|
|
|
|
|
2020-03-02 14:10:19 +01:00
|
|
|
debug "Closing discovery node", node = $d.localNode
|
2020-03-18 23:05:04 +01:00
|
|
|
if not d.revalidateLoop.isNil:
|
|
|
|
await d.revalidateLoop.cancelAndWait()
|
|
|
|
if not d.lookupLoop.isNil:
|
|
|
|
await d.lookupLoop.cancelAndWait()
|
|
|
|
|
2020-02-24 15:45:30 +01:00
|
|
|
await d.transp.closeWait()
|