slot fixes

* fix slot time navigation, add tests * skip block proposal if head is more recent already - shouldn't happen * use correct head when attesting to previous blocks * log slot start/end processing
2025-02-26 05:05:23 +00:00 · 2019-12-23 16:34:09 +01:00 · 2019-12-23 16:34:09 +01:00 · 148527c716
commit 148527c716
parent c731c67a50
3 changed files with 160 additions and 74 deletions
--- a/beacon_chain/beacon_node.nim
+++ b/beacon_chain/beacon_node.nim
@ -47,7 +47,6 @@ type
    forkVersion: array[4, byte]
    networkIdentity: Eth2NodeIdentity
    requestManager: RequestManager
-    isBootstrapNode: bool
    bootstrapNodes: seq[BootstrapAddr]
    db: BeaconChainDB
    config: BeaconNodeConf
@ -135,12 +134,10 @@ proc commitGenesisState(node: BeaconNode, tailState: BeaconState) =
    quit 1

 proc addBootstrapNode(node: BeaconNode, bootstrapNode: BootstrapAddr) =
-  if bootstrapNode.isSameNode(node.networkIdentity):
-    node.isBootstrapNode = true
-  else:
+  if not bootstrapNode.isSameNode(node.networkIdentity):
    node.bootstrapNodes.add bootstrapNode

-proc useBootstrapFile(node: BeaconNode, bootstrapFile: string) =
+proc loadBootstrapFile(node: BeaconNode, bootstrapFile: string) =
  for ln in lines(bootstrapFile):
    node.addBootstrapNode BootstrapAddr.initAddress(string ln)

@ -156,11 +153,11 @@ proc init*(T: type BeaconNode, conf: BeaconNodeConf): Future[BeaconNode] {.async

  let bootstrapFile = string conf.bootstrapNodesFile
  if bootstrapFile.len > 0:
-    result.useBootstrapFile(bootstrapFile)
+    result.loadBootstrapFile(bootstrapFile)

  let siteLocalBootstrapFile = conf.dataDir / "bootstrap_nodes.txt"
  if fileExists(siteLocalBootstrapFile):
-    result.useBootstrapFile(siteLocalBootstrapFile)
+    result.loadBootstrapFile(siteLocalBootstrapFile)

  result.attachedValidators = ValidatorPool.init

@ -361,40 +358,20 @@ proc proposeBlock(node: BeaconNode,
                  slot: Slot): Future[BlockRef] {.async.} =
  logScope: pcs = "block_proposal"

-  if head.slot > slot:
-    notice "Skipping proposal, we've already selected a newer head",
+  if head.slot >= slot:
+    # We should normally not have a head newer than the slot we're proposing for
+    # but this can happen if block proposal is delayed
+    warn "Skipping proposal, have newer head already",
      headSlot = shortLog(head.slot),
      headBlockRoot = shortLog(head.root),
      slot = shortLog(slot),
      cat = "fastforward"
    return head

-  if head.slot == 0 and slot == 0:
-    # TODO there's been a startup assertion, which sometimes (but not always
-    # evidently) crashes exactly one node on simulation startup, the one the
-    # beacon chain proposer index points to first for slot 0. it tries using
-    # slot 0 as required, notices head block's slot is also 0 (which, that's
-    # how it's created; it's never less), and promptly fails, with assertion
-    # occuring downstream via async code. This is most easily reproduced via
-    # make clean_eth2_network_simulation_files && make eth2_network_simulation
-    return head
-
-  if head.slot == slot:
-    # Weird, we should never see as head the same slot as we're proposing a
-    # block for - did someone else steal our slot? why didn't we discard it?
-    warn "Found head at same slot as we're supposed to propose for!",
-      headSlot = shortLog(head.slot),
-      headBlockRoot = shortLog(head.root),
-      cat = "consensus_conflict"
-    # TODO investigate how and when this happens.. maybe it shouldn't be an
-    #      assert?
-    doAssert false, "head slot matches proposal slot (!)"
-    # return
-
  # Advance state to the slot immediately preceding the one we're creating a
  # block for - potentially we will be processing empty slots along the way.
  let (nroot, nblck) = node.blockPool.withState(
-      node.blockPool.tmpState, BlockSlot(blck: head, slot: slot - 1)):
+      node.blockPool.tmpState, head.atSlot(slot)):
    let (eth1data, deposits) =
      if node.mainchainMonitor.isNil:
        (get_eth1data_stub(
@ -594,12 +571,11 @@ proc handleProposal(node: BeaconNode, head: BlockRef, slot: Slot):
  ## Perform the proposal for the given slot, iff we have a validator attached
  ## that is supposed to do so, given the shuffling in head

-  # TODO here we advanced the state to the new slot, but later we'll be
+  # TODO here we advance the state to the new slot, but later we'll be
  #      proposing for it - basically, we're selecting proposer based on an
-  #      empty slot.. wait for the committee selection to settle, then
-  #      revisit this - we should be able to advance behind
+  #      empty slot
  var cache = get_empty_per_epoch_cache()
-  node.blockPool.withState(node.blockPool.tmpState, BlockSlot(blck: head, slot: slot)):
+  node.blockPool.withState(node.blockPool.tmpState, head.atSlot(slot)):
    let proposerIdx = get_beacon_proposer_index(state, cache)
    if proposerIdx.isNone:
      notice "Missing proposer index",
@ -640,14 +616,26 @@ proc onSlotStart(node: BeaconNode, lastSlot, scheduledSlot: Slot) {.gcsafe, asyn
    beaconTime = node.beaconClock.now()
    wallSlot = beaconTime.toSlot()

-  debug "Slot start",
+  info "Slot start",
    lastSlot = shortLog(lastSlot),
    scheduledSlot = shortLog(scheduledSlot),
    beaconTime = shortLog(beaconTime),
    peers = node.network.peersCount,
+    headSlot = shortLog(node.blockPool.head.blck.slot),
+    headEpoch = shortLog(node.blockPool.head.blck.slot.compute_epoch_at_slot()),
+    headRoot = shortLog(node.blockPool.head.blck.root),
+    finalizedSlot = shortLog(node.blockPool.finalizedHead.blck.slot),
+    finalizedRoot = shortLog(node.blockPool.finalizedHead.blck.root),
+    finalizedSlot = shortLog(node.blockPool.finalizedHead.blck.slot.compute_epoch_at_slot()),
    cat = "scheduling"

  if not wallSlot.afterGenesis or (wallSlot.slot < lastSlot):
+    let
+      slot =
+        if wallSlot.afterGenesis: wallSlot.slot
+        else: GENESIS_SLOT
+      nextSlot = slot + 1 # At least GENESIS_SLOT + 1!
+
    # This can happen if the system clock changes time for example, and it's
    # pretty bad
    # TODO shut down? time either was or is bad, and PoS relies on accuracy..
@ -655,15 +643,9 @@ proc onSlotStart(node: BeaconNode, lastSlot, scheduledSlot: Slot) {.gcsafe, asyn
      beaconTime = shortLog(beaconTime),
      lastSlot = shortLog(lastSlot),
      scheduledSlot = shortLog(scheduledSlot),
+      nextSlot = shortLog(nextSlot),
      cat = "clock_drift" # tag "scheduling|clock_drift"?

-    let
-      slot = Slot(
-        if wallSlot.afterGenesis:
-          max(1'u64, wallSlot.slot.uint64)
-        else: GENESIS_SLOT.uint64 + 1)
-      nextSlot = slot + 1
-
    addTimer(saturate(node.beaconClock.fromNow(nextSlot))) do (p: pointer):
      asyncCheck node.onSlotStart(slot, nextSlot)

@ -681,9 +663,10 @@ proc onSlotStart(node: BeaconNode, lastSlot, scheduledSlot: Slot) {.gcsafe, asyn
    # TODO how long should the period be? Using an epoch because that's roughly
    #      how long attestations remain interesting
    # TODO should we shut down instead? clearly we're unable to keep up
-    warn "Unable to keep up, skipping ahead without doing work",
+    warn "Unable to keep up, skipping ahead",
      lastSlot = shortLog(lastSlot),
      slot = shortLog(slot),
+      nextSlot = shortLog(nextSlot),
      scheduledSlot = shortLog(scheduledSlot),
      cat = "overload"

@ -795,6 +778,17 @@ proc onSlotStart(node: BeaconNode, lastSlot, scheduledSlot: Slot) {.gcsafe, asyn
  let
    nextSlotStart = saturate(node.beaconClock.fromNow(nextSlot))

+  info "Slot end",
+    slot = shortLog(slot),
+    nextSlot = shortLog(nextSlot),
+    headSlot = shortLog(node.blockPool.head.blck.slot),
+    headEpoch = shortLog(node.blockPool.head.blck.slot.compute_epoch_at_slot()),
+    headRoot = shortLog(node.blockPool.head.blck.root),
+    finalizedSlot = shortLog(node.blockPool.finalizedHead.blck.slot),
+    finalizedEpoch = shortLog(node.blockPool.finalizedHead.blck.slot.compute_epoch_at_slot()),
+    finalizedRoot = shortLog(node.blockPool.finalizedHead.blck.root),
+    cat = "scheduling"
+
  addTimer(nextSlotStart) do (p: pointer):
    asyncCheck node.onSlotStart(slot, nextSlot)

@ -837,19 +831,20 @@ proc run*(node: BeaconNode) =
    node.onAttestation(attestation)

  let
-    t = node.beaconClock.now()
-    startSlot = if t > BeaconTime(0): t.toSlot.slot + 1
-                else: GENESIS_SLOT + 1
-    fromNow = saturate(node.beaconClock.fromNow(startSlot))
+    t = node.beaconClock.now().toSlot()
+    curSlot = if t.afterGenesis: t.slot
+              else: GENESIS_SLOT
+    nextSlot = curSlot + 1 # No earlier than GENESIS_SLOT + 1
+    fromNow = saturate(node.beaconClock.fromNow(nextSlot))

  info "Scheduling first slot action",
    beaconTime = shortLog(node.beaconClock.now()),
-    nextSlot = shortLog(startSlot),
+    nextSlot = shortLog(nextSlot),
    fromNow = shortLog(fromNow),
    cat = "scheduling"

  addTimer(fromNow) do (p: pointer):
-    asyncCheck node.onSlotStart(startSlot - 1, startSlot)
+    asyncCheck node.onSlotStart(curSlot, nextSlot)

  let second = Moment.now() + chronos.seconds(1)
  addTimer(second) do (p: pointer):
--- a/beacon_chain/block_pool.nim
+++ b/beacon_chain/block_pool.nim
@ -31,10 +31,15 @@ template withState*(
  body

 func parent*(bs: BlockSlot): BlockSlot =
-  BlockSlot(
-    blck: if bs.slot > bs.blck.slot: bs.blck else: bs.blck.parent,
-    slot: bs.slot - 1
-  )
+  ## Return a blockslot representing the previous slot, using the parent block
+  ## if the current slot had a block
+  if bs.slot == Slot(0):
+    BlockSlot(blck: nil, slot: Slot(0))
+  else:
+    BlockSlot(
+      blck: if bs.slot > bs.blck.slot: bs.blck else: bs.blck.parent,
+      slot: bs.slot - 1
+    )

 func link(parent, child: BlockRef) =
  doAssert (not (parent.root == Eth2Digest() or child.root == Eth2Digest())),
@ -62,7 +67,30 @@ func isAncestorOf*(a, b: BlockRef): bool =
    doAssert b.slot > b.parent.slot
    b = b.parent

-func getAncestor*(blck: BlockRef, slot: Slot): BlockRef =
+func getAncestorAt*(blck: BlockRef, slot: Slot): BlockRef =
+  ## Return the most recent block as of the time at `slot` that not more recent
+  ## than `blck` itself
+
+  var blck = blck
+
+  var depth = 0
+  const maxDepth = (100'i64 * 365 * 24 * 60 * 60 div SECONDS_PER_SLOT.int)
+
+  while true:
+    if blck.slot <= slot:
+      return blck
+
+    if blck.parent.isNil:
+      return nil
+
+    doAssert depth < maxDepth
+    depth += 1
+
+    blck = blck.parent
+
+func get_ancestor*(blck: BlockRef, slot: Slot): BlockRef =
+  ## https://github.com/ethereum/eth2.0-specs/blob/v0.9.4/specs/core/0_fork-choice.md#get_ancestor
+  ## Return ancestor at slot, or nil if queried block is older
  var blck = blck

  var depth = 0
@ -75,7 +103,7 @@ func getAncestor*(blck: BlockRef, slot: Slot): BlockRef =
    if blck.slot < slot:
      return nil

-    if blck.parent == nil:
+    if blck.parent.isNil:
      return nil

    doAssert depth < maxDepth
@ -83,6 +111,16 @@ func getAncestor*(blck: BlockRef, slot: Slot): BlockRef =

    blck = blck.parent

+func atSlot*(blck: BlockRef, slot: Slot): BlockSlot =
+  ## Return a BlockSlot at a given slot, with the block set to the closest block
+  ## available. If slot comes from before the block, a suitable block ancestor
+  ## will be used, else blck is returned as if all slots after it were empty.
+  ## This helper is useful when imagining what the chain looked like at a
+  ## particular moment in time, or when imagining what it will look like in the
+  ## near future if nothing happens (such as when looking ahead for the next
+  ## block proposal)
+  BlockSlot(blck: blck.getAncestorAt(slot), slot: slot)
+
 func init*(T: type BlockRef, root: Eth2Digest, slot: Slot): BlockRef =
  BlockRef(
    root: root,
--- a/tests/test_block_pool.nim
+++ b/tests/test_block_pool.nim
@ -13,6 +13,75 @@ import
  ../beacon_chain/spec/[datatypes, digest],
  ../beacon_chain/[beacon_node_types, block_pool, beacon_chain_db, ssz]

+suite "BlockRef and helpers" & preset():
+  timedTest "isAncestorOf sanity" & preset():
+    let
+      s0 = BlockRef(slot: Slot(0))
+      s1 = BlockRef(slot: Slot(1), parent: s0)
+      s2 = BlockRef(slot: Slot(2), parent: s1)
+
+    check:
+      s0.isAncestorOf(s0)
+      s0.isAncestorOf(s1)
+      s0.isAncestorOf(s2)
+      s1.isAncestorOf(s1)
+      s1.isAncestorOf(s2)
+
+      not s2.isAncestorOf(s0)
+      not s2.isAncestorOf(s1)
+      not s1.isAncestorOf(s0)
+
+  timedTest "getAncestorAt sanity" & preset():
+    let
+      s0 = BlockRef(slot: Slot(0))
+      s1 = BlockRef(slot: Slot(1), parent: s0)
+      s2 = BlockRef(slot: Slot(2), parent: s1)
+      s4 = BlockRef(slot: Slot(4), parent: s2)
+
+    check:
+      s0.getAncestorAt(Slot(0)) == s0
+      s0.getAncestorAt(Slot(1)) == s0
+
+      s1.getAncestorAt(Slot(0)) == s0
+      s1.getAncestorAt(Slot(1)) == s1
+
+      s4.getAncestorAt(Slot(0)) == s0
+      s4.getAncestorAt(Slot(1)) == s1
+      s4.getAncestorAt(Slot(2)) == s2
+      s4.getAncestorAt(Slot(3)) == s2
+      s4.getAncestorAt(Slot(4)) == s4
+
+suite "BlockSlot and helpers" & preset():
+  timedTest "atSlot sanity" & preset():
+    let
+      s0 = BlockRef(slot: Slot(0))
+      s1 = BlockRef(slot: Slot(1), parent: s0)
+      s2 = BlockRef(slot: Slot(2), parent: s1)
+      s4 = BlockRef(slot: Slot(4), parent: s2)
+
+    check:
+      s0.atSlot(Slot(0)).blck == s0
+      s0.atSlot(Slot(0)) == s1.atSlot(Slot(0))
+      s1.atSlot(Slot(1)).blck == s1
+
+      s4.atSlot(Slot(0)).blck == s0
+
+  timedTest "parent sanity" & preset():
+    let
+      s0 = BlockRef(slot: Slot(0))
+      s00 = BlockSlot(blck: s0, slot: Slot(0))
+      s01 = BlockSlot(blck: s0, slot: Slot(1))
+      s2 = BlockRef(slot: Slot(2), parent: s0)
+      s22 = BlockSlot(blck: s2, slot: Slot(2))
+      s24 = BlockSlot(blck: s2, slot: Slot(4))
+
+    check:
+      s00.parent == BlockSlot(blck: nil, slot: Slot(0))
+      s01.parent == s00
+      s22.parent == s01
+      s24.parent == BlockSlot(blck: s2, slot: Slot(3))
+      s24.parent.parent == s22
+
 when const_preset == "minimal": # Too much stack space used on mainnet
  suite "Block pool processing" & preset():
    setup:
@ -30,7 +99,7 @@ when const_preset == "minimal": # Too much stack space used on mainnet
        pool.getRef(default Eth2Digest) == nil

    timedTest "loadTailState gets genesis block on first load" & preset():
-      var
+      let
        b0 = pool.get(pool.tail.root)

      check:
@ -93,22 +162,6 @@ when const_preset == "minimal": # Too much stack space used on mainnet
        pool2.get(b1Root).isSome()
        pool2.get(b2Root).isSome()

-    timedTest "isAncestorOf sanity" & preset():
-      let
-        a = BlockRef(slot: Slot(1))
-        b = BlockRef(slot: Slot(2), parent: a)
-        c = BlockRef(slot: Slot(3), parent: b)
-
-      check:
-        a.isAncestorOf(a)
-        a.isAncestorOf(b)
-        a.isAncestorOf(c)
-        b.isAncestorOf(c)
-
-        not c.isAncestorOf(a)
-        not c.isAncestorOf(b)
-        not b.isAncestorOf(a)
-
    timedTest "Can add same block twice" & preset():
      let
        b10 = pool.add(b1Root, b1)