From 7e0bf7b1b53060c972ee017b499af97979465222 Mon Sep 17 00:00:00 2001
From: Jacek Sieka <jacek@status.im>
Date: Wed, 22 Jul 2020 08:25:13 +0200
Subject: [PATCH] Add separate state for clearance (#1352)

When clearing blocks, a callback is called - this callback, if it uses
`tmpState`, will be corrupted because it's not fully up to date when the
callback is called - we thus introduce a specific state cache for this
purpose - ideally, it can be removed later when epoch caching is
improved.

Incidentally, this helps block sync speed a lot - without this state,
the block sync would ping-pong between attestation state and block state
which is costly.
---
 .../block_pools/block_pools_types.nim         |  4 +++
 beacon_chain/block_pools/candidate_chains.nim |  2 ++
 beacon_chain/block_pools/clearance.nim        | 32 +++++++------------
 3 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/beacon_chain/block_pools/block_pools_types.nim b/beacon_chain/block_pools/block_pools_types.nim
index b340b1c58..1803f0d56 100644
--- a/beacon_chain/block_pools/block_pools_types.nim
+++ b/beacon_chain/block_pools/block_pools_types.nim
@@ -126,6 +126,10 @@ type
 
     tmpState*: StateData ## Scratchpad - may be any state
 
+    clearanceState*: StateData ##\
+      ## Cached state used during block clearance - should only be used in the
+      ## clearance module to avoid the risk of modifying it in a callback
+
     updateFlags*: UpdateFlags
 
     runtimePreset*: RuntimePreset
diff --git a/beacon_chain/block_pools/candidate_chains.nim b/beacon_chain/block_pools/candidate_chains.nim
index af1b7e3f0..5197991a6 100644
--- a/beacon_chain/block_pools/candidate_chains.nim
+++ b/beacon_chain/block_pools/candidate_chains.nim
@@ -300,6 +300,7 @@ proc init*(T: type CandidateChains,
     headState: tmpState[],
     justifiedState: tmpState[], # This is wrong but we'll update it below
     tmpState: tmpState[],
+    clearanceState: tmpState[],
 
     # The only allowed flag right now is verifyFinalization, as the others all
     # allow skipping some validation.
@@ -311,6 +312,7 @@ proc init*(T: type CandidateChains,
 
   res.updateStateData(res.justifiedState, justifiedHead)
   res.updateStateData(res.headState, headRef.atSlot(headRef.slot))
+  res.clearanceState = res.headState
 
   info "Block dag initialized",
     head = head.blck, justifiedHead, finalizedHead, tail = tailRef,
diff --git a/beacon_chain/block_pools/clearance.nim b/beacon_chain/block_pools/clearance.nim
index f3436fa6a..abab4aa06 100644
--- a/beacon_chain/block_pools/clearance.nim
+++ b/beacon_chain/block_pools/clearance.nim
@@ -98,8 +98,6 @@ proc addResolvedBlock(
 
   # Now that we have the new block, we should see if any of the previously
   # unresolved blocks magically become resolved
-  # TODO there are more efficient ways of doing this that don't risk
-  #      running out of stack etc
   # TODO This code is convoluted because when there are more than ~1.5k
   #      blocks being synced, there's a stack overflow as `add` gets called
   #      for the whole chain of blocks. Instead we use this ugly field in `dag`
@@ -132,12 +130,6 @@ proc addRawBlock*(
   # - the callback
   # - callback may be problematic as it's called in async validator duties
 
-  # TODO: to facilitate adding the block to the attestation pool
-  #       this should also return justified and finalized epoch corresponding
-  #       to each block.
-  #       This would be easy apart from the "Block already exists"
-  #       early return.
-
   logScope:
     blck = shortLog(signedBlock.message)
     blockRoot = shortLog(signedBlock.root)
@@ -194,7 +186,6 @@ proc addRawBlock*(
 
       return err Unviable
 
-
     # The block might have been in either of `orphans` or `missing` - we don't
     # want any more work done on its behalf
     quarantine.orphans.del(blockRoot)
@@ -205,7 +196,7 @@ proc addRawBlock*(
     # TODO if the block is from the future, we should not be resolving it (yet),
     #      but maybe we should use it as a hint that our clock is wrong?
     updateStateData(
-      dag, dag.tmpState, BlockSlot(blck: parent, slot: blck.slot - 1))
+      dag, dag.clearanceState, BlockSlot(blck: parent, slot: blck.slot - 1))
 
     let
       poolPtr = unsafeAddr dag # safe because restore is short-lived
@@ -213,27 +204,26 @@ proc addRawBlock*(
       # TODO address this ugly workaround - there should probably be a
       #      `state_transition` that takes a `StateData` instead and updates
       #      the block as well
-      doAssert v.addr == addr poolPtr.tmpState.data
-      assign(poolPtr.tmpState, poolPtr.headState)
+      doAssert v.addr == addr poolPtr.clearanceState.data
+      assign(poolPtr.clearanceState, poolPtr.headState)
 
-    var stateCache = getEpochCache(parent, dag.tmpState.data.data)
-    if not state_transition(dag.runtimePreset, dag.tmpState.data, signedBlock,
+    var stateCache = getEpochCache(parent, dag.clearanceState.data.data)
+    if not state_transition(dag.runtimePreset, dag.clearanceState.data, signedBlock,
                             stateCache, dag.updateFlags, restore):
-      # TODO find a better way to log all this block data
       notice "Invalid block"
 
       return err Invalid
-    # Careful, tmpState.data has been updated but not blck - we need to create
+    # Careful, clearanceState.data has been updated but not blck - we need to create
     # the BlockRef first!
-    dag.tmpState.blck = addResolvedBlock(
+    dag.clearanceState.blck = addResolvedBlock(
       dag, quarantine,
-      dag.tmpState.data.data, signedBlock, parent,
+      dag.clearanceState.data.data, signedBlock, parent,
       callback
     )
-    dag.putState(dag.tmpState.data, dag.tmpState.blck)
+    dag.putState(dag.clearanceState.data, dag.clearanceState.blck)
 
-    callback(dag.tmpState.blck)
-    return ok dag.tmpState.blck
+    callback(dag.clearanceState.blck)
+    return ok dag.clearanceState.blck
 
   # TODO already checked hash though? main reason to keep this is because
   # the pending dag calls this function back later in a loop, so as long