Merge pull request #223 from status-im/findnode-further-fixes

- Fix FindNode to return nodes with specific distance + tests - Add Resolve + test
2020-04-21 21:32:02 +02:00 · 2020-04-21 21:32:02 +02:00 · 8d207cec9b
parent 5bb6ee6451 0c6c4b969c
commit 8d207cec9b
3 changed files with 306 additions and 107 deletions
--- a/eth/p2p/discoveryv5/protocol.nim
+++ b/eth/p2p/discoveryv5/protocol.nim
@ -1,3 +1,77 @@
 # nim-eth - Node Discovery Protocol v5
 # Copyright (c) 2020 Status Research & Development GmbH
 # Licensed under either of
 #   * Apache License, version 2.0, (LICENSE-APACHEv2)
 #   * MIT license (LICENSE-MIT)
 # at your option. This file may not be copied, modified, or distributed except
 # according to those terms.
 ## Node Discovery Protocol v5
 ##
 ## Node discovery protocol implementation as per specification:
 ## https://github.com/ethereum/devp2p/blob/master/discv5/discv5.md
 ##
 ## This node discovery protocol implementation uses the same underlying
 ## implementation of routing table as is also used for the discovery v4
 ## implementation, which is the same or similar as the one described in the
 ## original Kademlia paper:
 ## https://pdos.csail.mit.edu/~petar/papers/maymounkov-kademlia-lncs.pdf
 ##
 ## This might not be the most optimal implementation for the node discovery
 ## protocol v5. Why?
 ##
 ## The Kademlia paper describes an implementation that starts off from one
 ## k-bucket, and keeps splitting the bucket as more nodes are discovered and
 ## added. The bucket splits only on the part of the binary tree where our own
 ## node its id belongs too (same prefix). Resulting eventually in a k-bucket per
 ## logarithmic distance (log base2 distance). Well, not really, as nodes with
 ## ids in the closer distance ranges will never be found. And because of this an
 ## optimisation is done where buckets will also split sometimes even if the
 ## nodes own id does not have the same prefix (this is to avoid creating highly
 ## unbalanced branches which would require longer lookups).
 ##
 ## Now, some implementations take a more simplified approach. They just create
 ## directly a bucket for each possible logarithmic distance (e.g. here 1->256).
 ## Some implementations also don't create buckets with logarithmic distance
 ## lower than a certain value (e.g. only 1/15th of the highest buckets),
 ## because the closer to the node (the lower the distance), the less chance
 ## there is to still find nodes.
 ##
 ## The discovery protocol v4 its `FindNode` call will request the k closest
 ## nodes. As does original Kademlia. This effectively puts the work at the node
 ## that gets the request. This node will have to check its buckets and gather
 ## the closest. Some implementations go over all the nodes in all the buckets
 ## for this (e.g. go-ethereum discovery v4). However, in our bucket splitting
 ## approach, this search is improved.
 ##
 ## In the discovery protocol v5 the `FindNode` call is changed and now the
 ## logarithmic distance is passed as parameter instead of the NodeId. And only
 ## nodes that match that logarithmic distance are allowed to be returned.
 ## This change was made to not put the trust at the requested node for selecting
 ## the closest nodes. To counter a possible (mistaken) difference in
 ## implementation, but more importantly for security reasons. See also:
 ## https://github.com/ethereum/devp2p/blob/master/discv5/discv5-rationale.md#115-guard-against-kademlia-implementation-flaws
 ##
 ## The result is that in an implementation which just stores buckets per
 ## logarithmic distance, it simply needs to return the right bucket. In our
 ## split-bucket implementation, this cannot be done as such and thus the closest
 ## neighbours search is still done. And to do this, a reverse calculation of an
 ## id at given logarithmic distance is needed (which is why there is the
 ## `idAtDistance` proc). Next, nodes with invalid distances need to be filtered
 ## out to be compliant to the specification. This can most likely get further
 ## optimised, but it sounds likely better to switch away from the split-bucket
 ## approach. I believe that the main benefit it has is improved lookups
 ## (due to no unbalanced branches), and it looks like this will be negated by
 ## limiting the returned nodes to only the ones of the requested logarithmic
 ## distance for the `FindNode` call.
 ## This `FindNode` change in discovery v5 will also have an effect on the
 ## efficiency of the network. Work will be moved from the receiver of
 ## `FindNodes` to the requester. But this also means more network traffic,
 ## as less nodes will potentially be passed around per `FindNode` call, and thus
 ## more requests will be needed for a lookup (adding bandwidth and latency).
 ## This might be a concern for mobile devices.
 import
  std/[tables, sets, options, math, random],
  json_serialization/std/net,
@ -61,7 +135,7 @@ proc addNode*(d: Protocol, enr: EnrUri) =
  doAssert(res)
  d.addNode newNode(r)
-proc getNode*(d: Protocol, id: NodeId): Node =
+proc getNode*(d: Protocol, id: NodeId): Option[Node] =
  d.routingTable.getNode(id)
 proc randomNodes*(d: Protocol, count: int): seq[Node] =
@ -213,9 +287,7 @@ proc receive*(d: Protocol, a: Address, msg: openArray[byte]) {.gcsafe,
    var packet: Packet
    let decoded = d.codec.decodeEncrypted(sender, a, msg, authTag, node, packet)
    if decoded == DecodeStatus.Success:
-      if node.isNil:
+      if not node.isNil:
        node = d.routingTable.getNode(sender)
      else:
        # Not filling table with nodes without correct IP in the ENR
        if a.ip == node.address.ip:
          debug "Adding new node to routing table", node = $node,
@ -232,7 +304,7 @@ proc receive*(d: Protocol, a: Address, msg: openArray[byte]) {.gcsafe,
        if d.awaitedPackets.take((sender, packet.reqId), waiter):
          waiter.complete(packet.some)
        else:
-          debug "TODO: handle packet: ", packet = packet.kind, origin = $node
+          debug "TODO: handle packet: ", packet = packet.kind, origin = a
    elif decoded == DecodeStatus.DecryptError:
      debug "Could not decrypt packet, respond with whoareyou",
        localNode = $d.localNode, address = a
@ -418,6 +490,31 @@ proc lookupRandom*(d: Protocol): Future[seq[Node]]
    raise newException(RandomSourceDepleted, "Could not randomize bytes")
  d.lookup(id)
 proc resolve*(d: Protocol, id: NodeId): Future[Option[Node]] {.async.} =
  ## Resolve a `Node` based on provided `NodeId`.
  ##
  ## This will first look in the own DHT. If the node is known, it will try to
  ## contact if for newer information. If node is not known or it does not
  ## reply, a lookup is done to see if it can find a (newer) record of the node
  ## on the network.
  let node = d.getNode(id)
  if node.isSome():
    let request = await d.findNode(node.get(), 0)
    if request.len > 0:
      return some(request[0])
  let discovered = await d.lookup(id)
  for n in discovered:
    if n.id == id:
      # TODO: Not getting any new seqNum here as in a lookup nodes in table with
      # new seqNum don't get replaced.
      if node.isSome() and node.get().record.seqNum >= n.record.seqNum:
        return node
      else:
        return some(n)
 proc revalidateNode*(d: Protocol, n: Node)
    {.async, raises:[Defect, Exception].} = # TODO: Exception
  trace "Ping to revalidate node", node = $n
--- a/eth/p2p/discoveryv5/routing_table.nim
+++ b/eth/p2p/discoveryv5/routing_table.nim
@ -1,5 +1,6 @@
 import
-  std/[algorithm, times, sequtils, bitops, random, sets], stint, chronicles,
+  std/[algorithm, times, sequtils, bitops, random, sets, options],
  stint, chronicles,
  types, node
 type
@ -174,11 +175,11 @@ proc addNode*(r: var RoutingTable, n: Node): Node =
    # Nothing added, ping evictionCandidate
    return evictionCandidate
-proc getNode*(r: RoutingTable, id: NodeId): Node =
+proc getNode*(r: RoutingTable, id: NodeId): Option[Node] =
  let b = r.bucketForNode(id)
  for n in b.nodes:
    if n.id == id:
-      return n
+      return some(n)
 proc contains*(r: RoutingTable, n: Node): bool = n in r.bucketForNode(n.id)
@ -191,12 +192,15 @@ proc notFullBuckets(r: RoutingTable): seq[KBucket] =
 proc neighbours*(r: RoutingTable, id: NodeId, k: int = BUCKET_SIZE): seq[Node] =
  ## Return up to k neighbours of the given node.
  result = newSeqOfCap[Node](k * 2)
  block addNodes:
    for bucket in r.bucketsByDistanceTo(id):
      for n in bucket.nodesByDistanceTo(id):
        result.add(n)
        if result.len == k * 2:
-        break
+          break addNodes
  # TODO: is this sort still needed? Can we get nodes closer from the "next"
  # bucket?
  result = sortedByIt(result, it.distanceTo(id))
  if result.len > k:
    result.setLen(k)
@ -209,9 +213,12 @@ proc idAtDistance*(id: NodeId, dist: uint32): NodeId =
  # zeroes and xor those` with the id.
  id xor (1.stuint(256) shl (dist.int - 1))
-proc neighboursAtDistance*(r: RoutingTable, distance: uint32, k: int = BUCKET_SIZE): seq[Node] =
+proc neighboursAtDistance*(r: RoutingTable, distance: uint32,
-  # TODO: Filter out nodes with not exact distance here?
+    k: int = BUCKET_SIZE): seq[Node] =
-  r.neighbours(idAtDistance(r.thisNode.id, distance), k)
+  result = r.neighbours(idAtDistance(r.thisNode.id, distance), k)
  # This is a bit silly, first getting closest nodes then to only keep the ones
  # that are exactly the requested distance.
  keepIf(result, proc(n: Node): bool = logDist(n.id, r.thisNode.id) == distance)
 proc len*(r: RoutingTable): int =
  for b in r.buckets: result += b.len
--- a/tests/p2p/test_discoveryv5.nim
+++ b/tests/p2p/test_discoveryv5.nim
@ -32,110 +32,37 @@ proc randomPacket(tag: PacketTag): seq[byte] =
  result.add(rlp.encode(authTag))
  result.add(msg)
-proc generateNode(privKey = PrivateKey.random()[], port: int): Node =
+proc generateNode(privKey = PrivateKey.random()[], port: int = 20302): Node =
  let port = Port(port)
  let enr = enr.Record.init(1, privKey, some(parseIpAddress("127.0.0.1")),
    port, port)
  result = newNode(enr)
 proc nodeAtDistance(n: Node, d: uint32): Node =
  while true:
    let node = generateNode()
    if logDist(n.id, node.id) == d:
      return node
 proc nodesAtDistance(n: Node, d: uint32, amount: int): seq[Node] =
  for i in 0..<amount:
    result.add(nodeAtDistance(n, d))
 suite "Discovery v5 Tests":
  asyncTest "Random nodes":
    let
      bootNodeKey = PrivateKey.fromHex(
        "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a617")[]
      bootNode = initDiscoveryNode(bootNodeKey, localAddress(20301))
    let nodeKeys = [
        PrivateKey.fromHex(
          "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a618")[],
        PrivateKey.fromHex(
          "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a619")[],
        PrivateKey.fromHex(
          "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a620")[]
      ]
    var nodeAddrs = newSeqOfCap[Address](nodeKeys.len)
    for i in 0 ..< nodeKeys.len: nodeAddrs.add(localAddress(20302 + i))
    var nodes = zip(nodeKeys, nodeAddrs).mapIt(
      initDiscoveryNode(it[0], it[1], @[bootNode.localNode.record]))
    nodes.add(bootNode)
    for node in nodes:
      let discovered = await node.lookupRandom()
      check discovered.len < nodes.len
      debug "Lookup from random id", node = node.localNode, discovered
    # Check for each node if the other nodes shows up in the routing table
    for i in nodes:
      for j in nodes:
        if j != i:
          check(nodeIdInNodes(i.localNode.id, j.randomNodes(nodes.len - 1)))
    for node in nodes:
      await node.closeWait()
  asyncTest "Lookup targets":
    const
      nodeCount = 17
    let bootNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20301))
    bootNode.start()
    var nodes = newSeqOfCap[discv5_protocol.Protocol](nodeCount)
    nodes.add(bootNode)
    for i in 1 ..< nodeCount:
      nodes.add(initDiscoveryNode(PrivateKey.random()[], localAddress(20301 + i),
        @[bootNode.localNode.record]))
      nodes[i].start()
    for i in 0..<nodeCount-1:
      let target = nodes[i]
      let discovered = await nodes[nodeCount-1].lookup(target.localNode.id)
      debug "Lookup result", target = target.localNode, discovered
      # if lookUp would return ordered on distance we could check discovered[0]
      check discovered.contains(target.localNode)
    for node in nodes:
      await node.closeWait()
  asyncTest "FindNode with test table":
    let mainNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20301))
    # Generate 1000 random nodes and add to our main node's routing table
    for i in 0..<1000:
      mainNode.addNode(generateNode(port = 20302 + i))
    let
      neighbours = mainNode.neighbours(mainNode.localNode.id)
      closest = neighbours[0]
      closestDistance = logDist(closest.id, mainNode.localNode.id)
    debug "Closest neighbour", closestDistance, id=closest.id.toHex()
    let
      testNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20302),
        @[mainNode.localNode.record])
      discovered = await discv5_protocol.findNode(testNode, mainNode.localNode,
        closestDistance)
    check closest in discovered
    await mainNode.closeWait()
    await testNode.closeWait()
  asyncTest "GetNode":
    # TODO: This could be tested in just a routing table only context
    let
      node = initDiscoveryNode(PrivateKey.random()[], localAddress(20302))
-      targetNode = generateNode(port = 20303)
+      targetNode = generateNode()
    node.addNode(targetNode)
    for i in 0..<1000:
-      node.addNode(generateNode(port = 20303 + i))
+      node.addNode(generateNode())
-    check node.getNode(targetNode.id) == targetNode
+    let n = node.getNode(targetNode.id)
    require n.isSome()
    check n.get() == targetNode
    await node.closeWait()
@ -157,8 +84,10 @@ suite "Discovery v5 Tests":
    await node1.revalidateNode(bootnode.localNode)
    await node1.revalidateNode(node2.localNode)
-    check node1.getNode(bootnode.localNode.id) == bootnode.localNode
+    let n = node1.getNode(bootnode.localNode.id)
-    check node1.getNode(node2.localNode.id) == nil
+    require n.isSome()
    check n.get() == bootnode.localNode
    check node1.getNode(node2.localNode.id).isNone()
    await node1.closeWait()
@ -282,3 +211,169 @@ suite "Discovery v5 Tests":
    for (id, d) in testValues:
      check idAtDistance(targetId, d) == parse(id, UInt256, 16)
  asyncTest "FindNode Test":
    const dist = 253
    let
      mainNodeKey = PrivateKey.fromHex(
        "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a617")[]
      testNodeKey = PrivateKey.fromHex(
        "a2b50376a79b1a8c8a3296485572bdfbf54708bb46d3c25d73d2723aaaf6a618")[]
      mainNode = initDiscoveryNode(mainNodeKey, localAddress(20301))
      testNode = initDiscoveryNode(testNodeKey, localAddress(20302))
      # logarithmic distance between mainNode and testNode is 256
    let nodes = nodesAtDistance(mainNode.localNode, dist, 10)
    for n in nodes:
      mainNode.addNode(n)
    # Get ENR of the node itself
    var discovered =
      await discv5_protocol.findNode(testNode, mainNode.localNode, 0)
    check:
      discovered.len == 1
      discovered[0] == mainNode.localNode
    # Get ENRs of nodes added at provided logarithmic distance
    discovered =
      await discv5_protocol.findNode(testNode, mainNode.localNode, dist)
    check discovered.len == 10
    for n in nodes:
      check discovered.contains(n)
    # Too high logarithmic distance, caps at 256
    discovered =
      await discv5_protocol.findNode(testNode, mainNode.localNode, 4294967295'u32)
    check:
      discovered.len == 1
      discovered[0] == testNode.localNode
    # Empty bucket
    discovered =
      await discv5_protocol.findNode(testNode, mainNode.localNode, 254)
    check discovered.len == 0
    let moreNodes = nodesAtDistance(mainNode.localNode, dist, 10)
    for n in moreNodes:
      mainNode.addNode(n)
    # Full bucket
    discovered =
      await discv5_protocol.findNode(testNode, mainNode.localNode, dist)
    check discovered.len == 16
    await mainNode.closeWait()
    await testNode.closeWait()
  asyncTest "FindNode with test table":
    let mainNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20301))
    # Generate 1000 random nodes and add to our main node's routing table
    for i in 0..<1000:
      mainNode.addNode(generateNode())
    let
      neighbours = mainNode.neighbours(mainNode.localNode.id)
      closest = neighbours[0]
      closestDistance = logDist(closest.id, mainNode.localNode.id)
    debug "Closest neighbour", closestDistance, id=closest.id.toHex()
    let
      testNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20302),
        @[mainNode.localNode.record])
      discovered = await discv5_protocol.findNode(testNode, mainNode.localNode,
        closestDistance)
    check closest in discovered
    await mainNode.closeWait()
    await testNode.closeWait()
  asyncTest "Lookup targets":
    const
      nodeCount = 17
    let bootNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20301))
    bootNode.start()
    var nodes = newSeqOfCap[discv5_protocol.Protocol](nodeCount)
    nodes.add(bootNode)
    for i in 1 ..< nodeCount:
      nodes.add(initDiscoveryNode(PrivateKey.random()[], localAddress(20301 + i),
        @[bootNode.localNode.record]))
      nodes[i].start()
    for i in 0..<nodeCount-1:
      let target = nodes[i]
      let discovered = await nodes[nodeCount-1].lookup(target.localNode.id)
      debug "Lookup result", target = target.localNode, discovered
      # if lookUp would return ordered on distance we could check discovered[0]
      check discovered.contains(target.localNode)
    for node in nodes:
      await node.closeWait()
  asyncTest "Resolve target":
    let
      mainNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20301))
      lookupNode = initDiscoveryNode(PrivateKey.random()[], localAddress(20302))
      targetKey = PrivateKey.random()[]
      targetAddress = localAddress(20303)
      targetNode = initDiscoveryNode(targetKey, targetAddress)
      targetId = targetNode.localNode.id
    var targetSeqNum = targetNode.localNode.record.seqNum
    # Populate DHT with target through a ping. Next, close target and see
    # if resolve works (only local lookup)
    block:
      let pong = await targetNode.ping(mainNode.localNode)
      require pong.isSome()
      await targetNode.closeWait()
      let n = await mainNode.resolve(targetId)
      require n.isSome()
      check:
        n.get().id == targetId
        n.get().record.seqNum == targetSeqNum
    # Bring target back online, update seqNum in ENR, check if we get the
    # updated ENR.
    block:
      # TODO: need to add some logic to update ENRs properly
      targetSeqNum.inc()
      let r = enr.Record.init(targetSeqNum, targetKey,
        some(targetAddress.ip), targetAddress.tcpPort, targetAddress.udpPort)
      targetNode.localNode.record = r
      targetNode.open()
      let n = await mainNode.resolve(targetId)
      require n.isSome()
      check:
        n.get().id == targetId
        n.get().record.seqNum == targetSeqNum
    # Update seqNum in ENR again, ping lookupNode to be added in DHT,
    # close targetNode, resolve should lookup, check if we get updated ENR.
    block:
      targetSeqNum.inc()
      let r = enr.Record.init(3, targetKey, some(targetAddress.ip),
        targetAddress.tcpPort, targetAddress.udpPort)
      targetNode.localNode.record = r
      let pong = await targetNode.ping(lookupNode.localNode)
      require pong.isSome()
      await targetNode.closeWait()
      # TODO: This step should eventually not be needed and ENRs with new seqNum
      # should just get updated in the lookup.
      await mainNode.revalidateNode(targetNode.localNode)
      mainNode.addNode(lookupNode.localNode.record)
      let n = await mainNode.resolve(targetId)
      require n.isSome()
      check:
        n.get().id == targetId
        n.get().record.seqNum == targetSeqNum
    await mainNode.closeWait()
    await lookupNode.closeWait()