From d3c9ccea671dd20a541b57e34e1fda3d5ce37966 Mon Sep 17 00:00:00 2001
From: kdeme <kim.demey@gmail.com>
Date: Wed, 15 Apr 2020 23:34:36 +0200
Subject: [PATCH 1/3] Fix FindNode to return nodes with specific distance +
 tests

---
 eth/p2p/discoveryv5/routing_table.nim |  22 ++-
 tests/p2p/test_discoveryv5.nim        | 204 +++++++++++++++-----------
 2 files changed, 130 insertions(+), 96 deletions(-)

diff --git a/eth/p2p/discoveryv5/routing_table.nim b/eth/p2p/discoveryv5/routing_table.nim
index fb4188b..d98b499 100644
--- a/eth/p2p/discoveryv5/routing_table.nim
+++ b/eth/p2p/discoveryv5/routing_table.nim
@@ -191,12 +191,15 @@ proc notFullBuckets(r: RoutingTable): seq[KBucket] =
 proc neighbours*(r: RoutingTable, id: NodeId, k: int = BUCKET_SIZE): seq[Node] =
   ## Return up to k neighbours of the given node.
   result = newSeqOfCap[Node](k * 2)
-  for bucket in r.bucketsByDistanceTo(id):
-    for n in bucket.nodesByDistanceTo(id):
-      result.add(n)
-      if result.len == k * 2:
-        break
+  block addNodes:
+    for bucket in r.bucketsByDistanceTo(id):
+      for n in bucket.nodesByDistanceTo(id):
+        result.add(n)
+        if result.len == k * 2:
+          break addNodes
 
+  # TODO: is this sort still needed? Can we get nodes closer from the "next"
+  # bucket?
   result = sortedByIt(result, it.distanceTo(id))
   if result.len > k:
     result.setLen(k)
@@ -209,9 +212,12 @@ proc idAtDistance*(id: NodeId, dist: uint32): NodeId =
   # zeroes and xor those` with the id.
   id xor (1.stuint(256) shl (dist.int - 1))
 
-proc neighboursAtDistance*(r: RoutingTable, distance: uint32, k: int = BUCKET_SIZE): seq[Node] =
-  # TODO: Filter out nodes with not exact distance here?
-  r.neighbours(idAtDistance(r.thisNode.id, distance), k)
+proc neighboursAtDistance*(r: RoutingTable, distance: uint32,
+    k: int = BUCKET_SIZE): seq[Node] =
+  result = r.neighbours(idAtDistance(r.thisNode.id, distance), k)
+  # This is a bit silly, first getting closest nodes then to only keep the ones
+  # that are exactly the requested distance.
+  keepIf(result, proc(n: Node): bool = logDist(n.id, r.thisNode.id) == distance)
 
 proc len*(r: RoutingTable): int =
   for b in r.buckets: result += b.len
diff --git a/tests/p2p/test_discoveryv5.nim b/tests/p2p/test_discoveryv5.nim
index ecaddd9..263ac92 100644
--- a/tests/p2p/test_discoveryv5.nim
+++ b/tests/p2p/test_discoveryv5.nim
@@ -32,108 +32,33 @@ proc randomPacket(tag: PacketTag): seq[byte] =
   result.add(rlp.encode(authTag))
   result.add(msg)
 
-proc generateNode(privKey = PrivateKey.random()[], port: int): Node =
+proc generateNode(privKey = PrivateKey.random()[], port: int = 20302): Node =
   let port = Port(port)
   let enr = enr.Record.init(1, privKey, some(parseIpAddress("127.0.0.1")),
     port, port)
   result = newNode(enr)
 
+proc nodeAtDistance(n: Node, d: uint32): Node =
+  while true:
+    let node = generateNode()
+    if logDist(n.id, node.id) == d:
+      return node
+
+proc nodesAtDistance(n: Node, d: uint32, amount: int): seq[Node] =
+  for i in 0..<amount:
+    result.add(nodeAtDistance(n, d))
+
 suite "Discovery v5 Tests":
-  asyncTest "Random nodes":
-    let
-      bootNodeKey = PrivateKey.fromHex(
-        "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a617")[]
-      bootNode = initDiscoveryNode(bootNodeKey, localAddress(20301))
-
-    let nodeKeys = [
-        PrivateKey.fromHex(
-          "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a618")[],
-        PrivateKey.fromHex(
-          "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a619")[],
-        PrivateKey.fromHex(
-          "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a620")[]
-      ]
-    var nodeAddrs = newSeqOfCap[Address](nodeKeys.len)
-    for i in 0 ..< nodeKeys.len: nodeAddrs.add(localAddress(20302 + i))
-
-    var nodes = zip(nodeKeys, nodeAddrs).mapIt(
-      initDiscoveryNode(it[0], it[1], @[bootNode.localNode.record]))
-    nodes.add(bootNode)
-
-    for node in nodes:
-      let discovered = await node.lookupRandom()
-      check discovered.len < nodes.len
-      debug "Lookup from random id", node = node.localNode, discovered
-
-    # Check for each node if the other nodes shows up in the routing table
-    for i in nodes:
-      for j in nodes:
-        if j != i:
-          check(nodeIdInNodes(i.localNode.id, j.randomNodes(nodes.len - 1)))
-
-    for node in nodes:
-      await node.closeWait()
-
-  asyncTest "Lookup targets":
-    const
-      nodeCount = 17
-
-    let bootNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20301))
-    bootNode.start()
-
-    var nodes = newSeqOfCap[discv5_protocol.Protocol](nodeCount)
-    nodes.add(bootNode)
-    for i in 1 ..< nodeCount:
-      nodes.add(initDiscoveryNode(PrivateKey.random()[], localAddress(20301 + i),
-        @[bootNode.localNode.record]))
-      nodes[i].start()
-
-    for i in 0..<nodeCount-1:
-      let target = nodes[i]
-      let discovered = await nodes[nodeCount-1].lookup(target.localNode.id)
-      debug "Lookup result", target = target.localNode, discovered
-      # if lookUp would return ordered on distance we could check discovered[0]
-      check discovered.contains(target.localNode)
-
-    for node in nodes:
-      await node.closeWait()
-
-  asyncTest "FindNode with test table":
-
-    let mainNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20301))
-
-    # Generate 1000 random nodes and add to our main node's routing table
-    for i in 0..<1000:
-      mainNode.addNode(generateNode(port = 20302 + i))
-
-    let
-      neighbours = mainNode.neighbours(mainNode.localNode.id)
-      closest = neighbours[0]
-      closestDistance = logDist(closest.id, mainNode.localNode.id)
-
-    debug "Closest neighbour", closestDistance, id=closest.id.toHex()
-
-    let
-      testNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20302),
-        @[mainNode.localNode.record])
-      discovered = await discv5_protocol.findNode(testNode, mainNode.localNode,
-        closestDistance)
-
-    check closest in discovered
-
-    await mainNode.closeWait()
-    await testNode.closeWait()
-
   asyncTest "GetNode":
     # TODO: This could be tested in just a routing table only context
     let
       node = initDiscoveryNode(PrivateKey.random()[], localAddress(20302))
-      targetNode = generateNode(port = 20303)
+      targetNode = generateNode()
 
     node.addNode(targetNode)
 
     for i in 0..<1000:
-      node.addNode(generateNode(port = 20303 + i))
+      node.addNode(generateNode())
 
     check node.getNode(targetNode.id) == targetNode
 
@@ -282,3 +207,106 @@ suite "Discovery v5 Tests":
 
     for (id, d) in testValues:
       check idAtDistance(targetId, d) == parse(id, UInt256, 16)
+
+  asyncTest "FindNode Test":
+    const dist = 253
+    let
+      mainNodeKey = PrivateKey.fromHex(
+        "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a617")[]
+      testNodeKey = PrivateKey.fromHex(
+        "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a618")[]
+      mainNode = initDiscoveryNode(mainNodeKey, localAddress(20301))
+      testNode = initDiscoveryNode(testNodeKey, localAddress(20302))
+      # logarithmic distance between mainNode and testNode is 256
+
+    let nodes = nodesAtDistance(mainNode.localNode, dist, 10)
+    for n in nodes:
+      mainNode.addNode(n)
+
+    # Get ENR of the node itself
+    var discovered =
+      await discv5_protocol.findNode(testNode, mainNode.localNode, 0)
+    check:
+      discovered.len == 1
+      discovered[0] == mainNode.localNode
+
+    # Get ENRs of nodes added at provided logarithmic distance
+    discovered =
+      await discv5_protocol.findNode(testNode, mainNode.localNode, dist)
+    check discovered.len == 10
+    for n in nodes:
+      check discovered.contains(n)
+
+    # Too high logarithmic distance, caps at 256
+    discovered =
+      await discv5_protocol.findNode(testNode, mainNode.localNode, 4294967295'u32)
+    check:
+      discovered.len == 1
+      discovered[0] == testNode.localNode
+
+    # Empty bucket
+    discovered =
+      await discv5_protocol.findNode(testNode, mainNode.localNode, 254)
+    check discovered.len == 0
+
+    let moreNodes = nodesAtDistance(mainNode.localNode, dist, 10)
+    for n in moreNodes:
+      mainNode.addNode(n)
+
+    # Full bucket
+    discovered =
+      await discv5_protocol.findNode(testNode, mainNode.localNode, dist)
+    check discovered.len == 16
+
+    await mainNode.closeWait()
+    await testNode.closeWait()
+
+  asyncTest "FindNode with test table":
+
+    let mainNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20301))
+
+    # Generate 1000 random nodes and add to our main node's routing table
+    for i in 0..<1000:
+      mainNode.addNode(generateNode())
+
+    let
+      neighbours = mainNode.neighbours(mainNode.localNode.id)
+      closest = neighbours[0]
+      closestDistance = logDist(closest.id, mainNode.localNode.id)
+
+    debug "Closest neighbour", closestDistance, id=closest.id.toHex()
+
+    let
+      testNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20302),
+        @[mainNode.localNode.record])
+      discovered = await discv5_protocol.findNode(testNode, mainNode.localNode,
+        closestDistance)
+
+    check closest in discovered
+
+    await mainNode.closeWait()
+    await testNode.closeWait()
+
+  asyncTest "Lookup targets":
+    const
+      nodeCount = 17
+
+    let bootNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20301))
+    bootNode.start()
+
+    var nodes = newSeqOfCap[discv5_protocol.Protocol](nodeCount)
+    nodes.add(bootNode)
+    for i in 1 ..< nodeCount:
+      nodes.add(initDiscoveryNode(PrivateKey.random()[], localAddress(20301 + i),
+        @[bootNode.localNode.record]))
+      nodes[i].start()
+
+    for i in 0..<nodeCount-1:
+      let target = nodes[i]
+      let discovered = await nodes[nodeCount-1].lookup(target.localNode.id)
+      debug "Lookup result", target = target.localNode, discovered
+      # if lookUp would return ordered on distance we could check discovered[0]
+      check discovered.contains(target.localNode)
+
+    for node in nodes:
+      await node.closeWait()

From 59ba3704d617e03896873e75c3f2502a07c12135 Mon Sep 17 00:00:00 2001
From: kdeme <kim.demey@gmail.com>
Date: Mon, 20 Apr 2020 13:50:22 +0200
Subject: [PATCH 2/3] Add resolve proc + test

---
 eth/p2p/discoveryv5/protocol.nim      | 33 ++++++++++--
 eth/p2p/discoveryv5/routing_table.nim |  7 +--
 tests/p2p/test_discoveryv5.nim        | 73 +++++++++++++++++++++++++--
 3 files changed, 102 insertions(+), 11 deletions(-)

diff --git a/eth/p2p/discoveryv5/protocol.nim b/eth/p2p/discoveryv5/protocol.nim
index 7e53237..ffa3d7f 100644
--- a/eth/p2p/discoveryv5/protocol.nim
+++ b/eth/p2p/discoveryv5/protocol.nim
@@ -61,7 +61,7 @@ proc addNode*(d: Protocol, enr: EnrUri) =
   doAssert(res)
   d.addNode newNode(r)
 
-proc getNode*(d: Protocol, id: NodeId): Node =
+proc getNode*(d: Protocol, id: NodeId): Option[Node] =
   d.routingTable.getNode(id)
 
 proc randomNodes*(d: Protocol, count: int): seq[Node] =
@@ -213,9 +213,7 @@ proc receive*(d: Protocol, a: Address, msg: openArray[byte]) {.gcsafe,
     var packet: Packet
     let decoded = d.codec.decodeEncrypted(sender, a, msg, authTag, node, packet)
     if decoded == DecodeStatus.Success:
-      if node.isNil:
-        node = d.routingTable.getNode(sender)
-      else:
+      if not node.isNil:
         # Not filling table with nodes without correct IP in the ENR
         if a.ip == node.address.ip:
           debug "Adding new node to routing table", node = $node,
@@ -232,7 +230,7 @@ proc receive*(d: Protocol, a: Address, msg: openArray[byte]) {.gcsafe,
         if d.awaitedPackets.take((sender, packet.reqId), waiter):
           waiter.complete(packet.some)
         else:
-          debug "TODO: handle packet: ", packet = packet.kind, origin = $node
+          debug "TODO: handle packet: ", packet = packet.kind, origin = a
     elif decoded == DecodeStatus.DecryptError:
       debug "Could not decrypt packet, respond with whoareyou",
         localNode = $d.localNode, address = a
@@ -418,6 +416,31 @@ proc lookupRandom*(d: Protocol): Future[seq[Node]]
     raise newException(RandomSourceDepleted, "Could not randomize bytes")
   d.lookup(id)
 
+proc resolve*(d: Protocol, id: NodeId): Future[Option[Node]] {.async.} =
+  ## Resolve a `Node` based on provided `NodeId`.
+  ##
+  ## This will first look in the own DHT. If the node is known, it will try to
+  ## contact if for newer information. If node is not known or it does not
+  ## reply, a lookup is done to see if it can find a (newer) record of the node
+  ## on the network.
+
+  let node = d.getNode(id)
+  if node.isSome():
+    let request = await d.findNode(node.get(), 0)
+
+    if request.len > 0:
+      return some(request[0])
+
+  let discovered = await d.lookup(id)
+  for n in discovered:
+    if n.id == id:
+      # TODO: Not getting any new seqNum here as in a lookup nodes in table with
+      # new seqNum don't get replaced.
+      if node.isSome() and node.get().record.seqNum >= n.record.seqNum:
+        return node
+      else:
+        return some(n)
+
 proc revalidateNode*(d: Protocol, n: Node)
     {.async, raises:[Defect, Exception].} = # TODO: Exception
   trace "Ping to revalidate node", node = $n
diff --git a/eth/p2p/discoveryv5/routing_table.nim b/eth/p2p/discoveryv5/routing_table.nim
index d98b499..b1b633b 100644
--- a/eth/p2p/discoveryv5/routing_table.nim
+++ b/eth/p2p/discoveryv5/routing_table.nim
@@ -1,5 +1,6 @@
 import
-  std/[algorithm, times, sequtils, bitops, random, sets], stint, chronicles,
+  std/[algorithm, times, sequtils, bitops, random, sets, options],
+  stint, chronicles,
   types, node
 
 type
@@ -174,11 +175,11 @@ proc addNode*(r: var RoutingTable, n: Node): Node =
     # Nothing added, ping evictionCandidate
     return evictionCandidate
 
-proc getNode*(r: RoutingTable, id: NodeId): Node =
+proc getNode*(r: RoutingTable, id: NodeId): Option[Node] =
   let b = r.bucketForNode(id)
   for n in b.nodes:
     if n.id == id:
-      return n
+      return some(n)
 
 proc contains*(r: RoutingTable, n: Node): bool = n in r.bucketForNode(n.id)
 
diff --git a/tests/p2p/test_discoveryv5.nim b/tests/p2p/test_discoveryv5.nim
index 263ac92..186069b 100644
--- a/tests/p2p/test_discoveryv5.nim
+++ b/tests/p2p/test_discoveryv5.nim
@@ -60,7 +60,9 @@ suite "Discovery v5 Tests":
     for i in 0..<1000:
       node.addNode(generateNode())
 
-    check node.getNode(targetNode.id) == targetNode
+    let n = node.getNode(targetNode.id)
+    require n.isSome()
+    check n.get() == targetNode
 
     await node.closeWait()
 
@@ -82,8 +84,10 @@ suite "Discovery v5 Tests":
     await node1.revalidateNode(bootnode.localNode)
     await node1.revalidateNode(node2.localNode)
 
-    check node1.getNode(bootnode.localNode.id) == bootnode.localNode
-    check node1.getNode(node2.localNode.id) == nil
+    let n = node1.getNode(bootnode.localNode.id)
+    require n.isSome()
+    check n.get() == bootnode.localNode
+    check node1.getNode(node2.localNode.id).isNone()
 
     await node1.closeWait()
 
@@ -310,3 +314,66 @@ suite "Discovery v5 Tests":
 
     for node in nodes:
       await node.closeWait()
+
+  asyncTest "Resolve target":
+    let
+      mainNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20301))
+      lookupNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20302))
+      targetKey = PrivateKey.random()[]
+      targetAddress = localAddress(20303)
+      targetNode = initDiscoveryNode(targetKey, targetAddress)
+      targetId = targetNode.localNode.id
+
+    var targetSeqNum = targetNode.localNode.record.seqNum
+
+    # Populate DHT with target through a ping. Next, close target and see
+    # if resolve works (only local lookup)
+    block:
+      let pong = await targetNode.ping(mainNode.localNode)
+      require pong.isSome()
+      await targetNode.closeWait()
+      let n = await mainNode.resolve(targetId)
+      require n.isSome()
+      check:
+        n.get().id == targetId
+        n.get().record.seqNum == targetSeqNum
+
+    # Bring target back online, update seqNum in ENR, check if we get the
+    # updated ENR.
+    block:
+      # TODO: need to add some logic to update ENRs properly
+      targetSeqNum.inc()
+      let r = enr.Record.init(targetSeqNum, targetKey,
+        some(targetAddress.ip), targetAddress.tcpPort, targetAddress.udpPort)
+      targetNode.localNode.record = r
+      targetNode.open()
+      let n = await mainNode.resolve(targetId)
+      require n.isSome()
+      check:
+        n.get().id == targetId
+        n.get().record.seqNum == targetSeqNum
+
+    # Update seqNum in ENR again, ping lookupNode to be added in DHT,
+    # close targetNode, resolve should lookup, check if we get updated ENR.
+    block:
+      targetSeqNum.inc()
+      let r = enr.Record.init(3, targetKey, some(targetAddress.ip),
+        targetAddress.tcpPort, targetAddress.udpPort)
+      targetNode.localNode.record = r
+      let pong = await targetNode.ping(lookupNode.localNode)
+      require pong.isSome()
+
+      await targetNode.closeWait()
+      # TODO: This step should eventually not be needed and ENRs with new seqNum
+      # should just get updated in the lookup.
+      await mainNode.revalidateNode(targetNode.localNode)
+
+      mainNode.addNode(lookupNode.localNode.record)
+      let n = await mainNode.resolve(targetId)
+      require n.isSome()
+      check:
+        n.get().id == targetId
+        n.get().record.seqNum == targetSeqNum
+
+    await mainNode.closeWait()
+    await lookupNode.closeWait()

From 0c6c4b969ce8e0015be5034984f79b475a0280c7 Mon Sep 17 00:00:00 2001
From: kdeme <kim.demey@gmail.com>
Date: Tue, 21 Apr 2020 21:23:29 +0200
Subject: [PATCH 3/3] Add comment about used routing table and FindNode call
 [skip ci]

---
 eth/p2p/discoveryv5/protocol.nim | 74 ++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/eth/p2p/discoveryv5/protocol.nim b/eth/p2p/discoveryv5/protocol.nim
index ffa3d7f..8c5d4db 100644
--- a/eth/p2p/discoveryv5/protocol.nim
+++ b/eth/p2p/discoveryv5/protocol.nim
@@ -1,3 +1,77 @@
+# nim-eth - Node Discovery Protocol v5
+# Copyright (c) 2020 Status Research & Development GmbH
+# Licensed under either of
+#   * Apache License, version 2.0, (LICENSE-APACHEv2)
+#   * MIT license (LICENSE-MIT)
+# at your option. This file may not be copied, modified, or distributed except
+# according to those terms.
+
+## Node Discovery Protocol v5
+##
+## Node discovery protocol implementation as per specification:
+## https://github.com/ethereum/devp2p/blob/master/discv5/discv5.md
+##
+## This node discovery protocol implementation uses the same underlying
+## implementation of routing table as is also used for the discovery v4
+## implementation, which is the same or similar as the one described in the
+## original Kademlia paper:
+## https://pdos.csail.mit.edu/~petar/papers/maymounkov-kademlia-lncs.pdf
+##
+## This might not be the most optimal implementation for the node discovery
+## protocol v5. Why?
+##
+## The Kademlia paper describes an implementation that starts off from one
+## k-bucket, and keeps splitting the bucket as more nodes are discovered and
+## added. The bucket splits only on the part of the binary tree where our own
+## node its id belongs too (same prefix). Resulting eventually in a k-bucket per
+## logarithmic distance (log base2 distance). Well, not really, as nodes with
+## ids in the closer distance ranges will never be found. And because of this an
+## optimisation is done where buckets will also split sometimes even if the
+## nodes own id does not have the same prefix (this is to avoid creating highly
+## unbalanced branches which would require longer lookups).
+##
+## Now, some implementations take a more simplified approach. They just create
+## directly a bucket for each possible logarithmic distance (e.g. here 1->256).
+## Some implementations also don't create buckets with logarithmic distance
+## lower than a certain value (e.g. only 1/15th of the highest buckets),
+## because the closer to the node (the lower the distance), the less chance
+## there is to still find nodes.
+##
+## The discovery protocol v4 its `FindNode` call will request the k closest
+## nodes. As does original Kademlia. This effectively puts the work at the node
+## that gets the request. This node will have to check its buckets and gather
+## the closest. Some implementations go over all the nodes in all the buckets
+## for this (e.g. go-ethereum discovery v4). However, in our bucket splitting
+## approach, this search is improved.
+##
+## In the discovery protocol v5 the `FindNode` call is changed and now the
+## logarithmic distance is passed as parameter instead of the NodeId. And only
+## nodes that match that logarithmic distance are allowed to be returned.
+## This change was made to not put the trust at the requested node for selecting
+## the closest nodes. To counter a possible (mistaken) difference in
+## implementation, but more importantly for security reasons. See also:
+## https://github.com/ethereum/devp2p/blob/master/discv5/discv5-rationale.md#115-guard-against-kademlia-implementation-flaws
+##
+## The result is that in an implementation which just stores buckets per
+## logarithmic distance, it simply needs to return the right bucket. In our
+## split-bucket implementation, this cannot be done as such and thus the closest
+## neighbours search is still done. And to do this, a reverse calculation of an
+## id at given logarithmic distance is needed (which is why there is the
+## `idAtDistance` proc). Next, nodes with invalid distances need to be filtered
+## out to be compliant to the specification. This can most likely get further
+## optimised, but it sounds likely better to switch away from the split-bucket
+## approach. I believe that the main benefit it has is improved lookups
+## (due to no unbalanced branches), and it looks like this will be negated by
+## limiting the returned nodes to only the ones of the requested logarithmic
+## distance for the `FindNode` call.
+
+## This `FindNode` change in discovery v5 will also have an effect on the
+## efficiency of the network. Work will be moved from the receiver of
+## `FindNodes` to the requester. But this also means more network traffic,
+## as less nodes will potentially be passed around per `FindNode` call, and thus
+## more requests will be needed for a lookup (adding bandwidth and latency).
+## This might be a concern for mobile devices.
+
 import
   std/[tables, sets, options, math, random],
   json_serialization/std/net,