improve slot processing speeds (#1670)

about 40% better slot processing times (with LTO enabled) - these don't do BLS but are used heavily during replay (state transition = slot + block transition) tests using a recent medalla state and advancing it 1000 slots: ``` ./ncli slots --preState2:state-302271-3c1dbf19-c1f944bf.ssz --slot:1000 --postState2:xx.ssz ``` pre: ``` All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 39.236, 0.000, 39.236, 39.236, 1, Load state from file 0.049, 0.002, 0.046, 0.063, 968, Apply slot 256.504, 81.008, 213.471, 591.902, 32, Apply epoch slot 28.597, 0.000, 28.597, 28.597, 1, Save state to file ``` cast: ``` All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 37.079, 0.000, 37.079, 37.079, 1, Load state from file 0.042, 0.002, 0.040, 0.090, 968, Apply slot 215.552, 68.763, 180.155, 500.103, 32, Apply epoch slot 25.106, 0.000, 25.106, 25.106, 1, Save state to file ``` cast+rewards: ``` All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 40.049, 0.000, 40.049, 40.049, 1, Load state from file 0.048, 0.001, 0.045, 0.060, 968, Apply slot 164.981, 76.273, 142.099, 477.868, 32, Apply epoch slot 28.498, 0.000, 28.498, 28.498, 1, Save state to file ``` cast+rewards+shr ``` All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 12.898, 0.000, 12.898, 12.898, 1, Load state from file 0.039, 0.002, 0.038, 0.054, 968, Apply slot 139.971, 68.797, 120.088, 428.844, 32, Apply epoch slot 24.761, 0.000, 24.761, 24.761, 1, Save state to file ```
2020-09-16 22:59:33 +02:00 · 2020-09-16 22:59:33 +02:00 · dcf8a6b05d
parent ce85f58d29
commit dcf8a6b05d
5 changed files with 130 additions and 86 deletions
--- a/AllTests-mainnet.md
+++ b/AllTests-mainnet.md
@ -43,7 +43,7 @@ OK: 1/1 Fail: 0/1 Skip: 0/1
 + Reverse order block add & get [Preset: mainnet]                                            OK
 + Simple block add&get [Preset: mainnet]                                                     OK
 + getRef returns nil for missing blocks                                                      OK
-+ loadTailState gets genesis block on first load [Preset: mainnet]                           OK
+ loading tail block works [Preset: mainnet]                                                 OK
 + updateHead updates head and headState [Preset: mainnet]                                    OK
 + updateStateData sanity [Preset: mainnet]                                                   OK
 ```
@ -173,6 +173,7 @@ OK: 52/59 Fail: 0/59 Skip: 7/59
 + Access peers by key test                                                                   OK
 + Acquire from empty pool                                                                    OK
 + Acquire/Sorting and consistency test                                                       OK
+ Delete peer on release text                                                                OK
 + Iterators test                                                                             OK
 + Peer lifetime test                                                                         OK
 + Safe/Clear test                                                                            OK
@ -181,7 +182,7 @@ OK: 52/59 Fail: 0/59 Skip: 7/59
 + addPeerNoWait() test                                                                       OK
 + deletePeer() test                                                                          OK
 ```
-OK: 10/10 Fail: 0/10 Skip: 0/10
+OK: 11/11 Fail: 0/11 Skip: 0/11
 ## SSZ dynamic navigator
 ```diff
 + navigating fields                                                                          OK
@ -253,4 +254,4 @@ OK: 1/1 Fail: 0/1 Skip: 0/1
 OK: 1/1 Fail: 0/1 Skip: 0/1

 ---TOTAL---
-OK: 136/143 Fail: 0/143 Skip: 7/143
+OK: 137/144 Fail: 0/144 Skip: 7/144
--- a/beacon_chain/spec/state_transition_epoch.nim
+++ b/beacon_chain/spec/state_transition_epoch.nim
@ -241,18 +241,19 @@ proc process_justification_and_finalization*(state: var BeaconState,
      checkpoint = shortLog(state.finalized_checkpoint)

 # https://github.com/ethereum/eth2.0-specs/blob/v0.12.2/specs/phase0/beacon-chain.md#helpers
-func get_base_reward(state: BeaconState, index: ValidatorIndex,
-    total_balance: auto): Gwei =
+func get_base_reward_sqrt(state: BeaconState, index: ValidatorIndex,
+    total_balance_sqrt: auto): Gwei =
  # Spec function recalculates total_balance every time, which creates an
  # O(n^2) situation.
  let effective_balance = state.validators[index].effective_balance
  effective_balance * BASE_REWARD_FACTOR div
-    integer_squareroot(total_balance) div BASE_REWARDS_PER_EPOCH
+    total_balance_sqrt div BASE_REWARDS_PER_EPOCH

-func get_proposer_reward(state: BeaconState, attesting_index: ValidatorIndex,
-    total_balance: Gwei): Gwei =
+func get_proposer_reward_sqrt(state: BeaconState, attesting_index: ValidatorIndex,
+    total_balance_sqrt: Gwei): Gwei =
  # Spec version recalculates get_total_active_balance(state) quadratically
-  get_base_reward(state, attesting_index, total_balance) div PROPOSER_REWARD_QUOTIENT
+  get_base_reward_sqrt(state, attesting_index, total_balance_sqrt) div
+    PROPOSER_REWARD_QUOTIENT

 func get_finality_delay(state: BeaconState): uint64 =
  get_previous_epoch(state) - state.finalized_checkpoint.epoch
@ -271,17 +272,16 @@ iterator get_eligible_validator_indices(state: BeaconState): ValidatorIndex =
 func get_attestation_component_deltas(state: BeaconState,
                                      attestations: seq[PendingAttestation],
                                      total_balance: Gwei,
+                                      rewards, penalties: var seq[Gwei],
                                      cache: var StateCache,
-                                      ): tuple[a: seq[Gwei], b: seq[Gwei]] =
+                                      ) =
  # Helper with shared logic for use by get source, target, and head deltas
  # functions
-  var
-    rewards = repeat(0'u64, len(state.validators))
-    penalties = repeat(0'u64, len(state.validators))
  let
    unslashed_attesting_indices =
      get_unslashed_attesting_indices(state, attestations, cache)
    attesting_balance = get_total_balance(state, unslashed_attesting_indices)
+    total_balance_sqrt = integer_squareroot(total_balance)

  for index in get_eligible_validator_indices(state):
    if index in unslashed_attesting_indices:
@ -291,50 +291,50 @@ func get_attestation_component_deltas(state: BeaconState,
      if is_in_inactivity_leak(state):
        # Since full base reward will be canceled out by inactivity penalty deltas,
        # optimal participation receives full base reward compensation here.
-        rewards[index] += get_base_reward(state, index, total_balance)
+        rewards[index] += get_base_reward_sqrt(state, index, total_balance_sqrt)
      else:
-        let reward_numerator = get_base_reward(state, index, total_balance) * (attesting_balance div increment)
+        let reward_numerator = get_base_reward_sqrt(state, index, total_balance_sqrt) *
+          (attesting_balance div increment)
        rewards[index] += reward_numerator div (total_balance div increment)
    else:
-       penalties[index] += get_base_reward(state, index, total_balance)
-  (rewards, penalties)
+       penalties[index] += get_base_reward_sqrt(state, index, total_balance_sqrt)

 # https://github.com/ethereum/eth2.0-specs/blob/v0.12.2/specs/phase0/beacon-chain.md#components-of-attestation-deltas
 # These is slightly refactored to calculate total_balance once.
 func get_source_deltas*(
-    state: BeaconState, total_balance: Gwei, cache: var StateCache):
-    tuple[a: seq[Gwei], b: seq[Gwei]] =
+    state: BeaconState, total_balance: Gwei, rewards, penalties: var seq[Gwei],
+    cache: var StateCache) =
  ## Return attester micro-rewards/penalties for source-vote for each validator.

  get_attestation_component_deltas(
    state,
    get_matching_source_attestations(state, get_previous_epoch(state)),
-    total_balance, cache)
+    total_balance, rewards, penalties, cache)

 func get_target_deltas*(
-    state: BeaconState, total_balance: Gwei, cache: var StateCache):
-    tuple[a: seq[Gwei], b: seq[Gwei]] =
+    state: BeaconState, total_balance: Gwei, rewards, penalties: var seq[Gwei],
+    cache: var StateCache) =
  ## Return attester micro-rewards/penalties for target-vote for each validator.
  let matching_target_attestations =
    get_matching_target_attestations(state, get_previous_epoch(state))
  get_attestation_component_deltas(
-    state, matching_target_attestations, total_balance, cache)
+    state, matching_target_attestations, total_balance, rewards, penalties,
+    cache)

 func get_head_deltas*(
-    state: BeaconState, total_balance: Gwei, cache: var StateCache):
-    tuple[a: seq[Gwei], b: seq[Gwei]] =
+    state: BeaconState, total_balance: Gwei, rewards, penalties: var seq[Gwei],
+    cache: var StateCache) =
  ## Return attester micro-rewards/penalties for head-vote for each validator.
  let matching_head_attestations =
    get_matching_head_attestations(state, get_previous_epoch(state))
  get_attestation_component_deltas(
-    state, matching_head_attestations, total_balance, cache)
+    state, matching_head_attestations, total_balance, rewards, penalties, cache)

 func get_inclusion_delay_deltas*(
-    state: BeaconState, total_balance: Gwei, cache: var StateCache):
-    seq[Gwei] =
+    state: BeaconState, total_balance: Gwei, rewards: var seq[Gwei],
+    cache: var StateCache) =
  ## Return proposer and inclusion delay micro-rewards/penalties for each validator.
  var
-    rewards = repeat(0'u64, len(state.validators))
    matching_source_attestations =
      get_matching_source_attestations(state, get_previous_epoch(state))

@ -351,9 +351,11 @@ func get_inclusion_delay_deltas*(
    cmp(x.inclusion_delay, y.inclusion_delay)

  # Order/indices in source_attestation_attesting_indices matches sorted order
-  let source_attestation_attesting_indices = mapIt(
+  let
+    source_attestation_attesting_indices = mapIt(
      matching_source_attestations,
      get_attesting_indices(state, it.data, it.aggregation_bits, cache))
+    total_balance_sqrt = integer_squareroot(total_balance)

  for index in get_unslashed_attesting_indices(
      state, matching_source_attestations, cache):
@ -361,35 +363,31 @@ func get_inclusion_delay_deltas*(
      if index in
          source_attestation_attesting_indices[source_attestation_index]:
        rewards[attestation.proposer_index] +=
-          get_proposer_reward(state, index, total_balance)
+          get_proposer_reward_sqrt(state, index, total_balance_sqrt)
        let max_attester_reward =
-          get_base_reward(state, index, total_balance) -
-            get_proposer_reward(state, index, total_balance)
+          get_base_reward_sqrt(state, index, total_balance_sqrt) -
+            get_proposer_reward_sqrt(state, index, total_balance_sqrt)
        rewards[index] +=
          Gwei(max_attester_reward div attestation.inclusion_delay)
        break

-  # No penalties associated with inclusion delay
-  # Spec constructs both and returns both; this doesn't
-  rewards
-
 func get_inactivity_penalty_deltas*(
-    state: BeaconState, total_balance: Gwei, cache: var StateCache):
-    seq[Gwei] =
+    state: BeaconState, total_balance: Gwei, penalties: var seq[Gwei],
+    cache: var StateCache) =
  ## Return inactivity reward/penalty deltas for each validator.
-  var penalties = repeat(0'u64, len(state.validators))
  if is_in_inactivity_leak(state):
    let
      matching_target_attestations =
        get_matching_target_attestations(state, get_previous_epoch(state))
      matching_target_attesting_indices =
        get_unslashed_attesting_indices(state, matching_target_attestations, cache)
+      total_balance_sqrt = integer_squareroot(total_balance)
    for index in get_eligible_validator_indices(state):
      # If validator is performing optimally this cancels all rewards for a neutral balance
-      let base_reward = get_base_reward(state, index, total_balance)
+      let base_reward = get_base_reward_sqrt(state, index, total_balance_sqrt)
      penalties[index] +=
        Gwei(BASE_REWARDS_PER_EPOCH * base_reward -
-          get_proposer_reward(state, index, total_balance))
+          get_proposer_reward_sqrt(state, index, total_balance_sqrt))
      # matching_target_attesting_indices is a HashSet
      if index notin matching_target_attesting_indices:
        let effective_balance = state.validators[index].effective_balance
@ -397,36 +395,20 @@ func get_inactivity_penalty_deltas*(
          Gwei(effective_balance * get_finality_delay(state) div
            INACTIVITY_PENALTY_QUOTIENT)

-  # No rewards associated with inactivity penalties
-  # Spec constructs rewards anyway; this doesn't
-  penalties
-
 # https://github.com/ethereum/eth2.0-specs/blob/v0.12.2/specs/phase0/beacon-chain.md#get_attestation_deltas
-func get_attestation_deltas(state: BeaconState, cache: var StateCache):
-    tuple[a: seq[Gwei], b: seq[Gwei]] =
+func get_attestation_deltas(
+    state: BeaconState, rewards, penalties: var seq[Gwei],
+    cache: var StateCache) =
  ## Return attestation reward/penalty deltas for each validator.
  let
    total_balance = get_total_active_balance(state, cache)
-    (source_rewards, source_penalties) =
-      get_source_deltas(state, total_balance, cache)
-    (target_rewards, target_penalties) =
-      get_target_deltas(state, total_balance, cache)
-    (head_rewards, head_penalties) =
-      get_head_deltas(state, total_balance, cache)
-    inclusion_delay_rewards =
-      get_inclusion_delay_deltas(state, total_balance, cache)
-    inactivity_penalties =
-      get_inactivity_penalty_deltas(state, total_balance, cache)

-  let rewards = mapIt(0 ..< len(state.validators),
-    source_rewards[it] + target_rewards[it] + head_rewards[it] +
-      inclusion_delay_rewards[it])

-  let penalties = mapIt(0 ..< len(state.validators),
-    source_penalties[it] + target_penalties[it] + head_penalties[it] +
-      inactivity_penalties[it])
-
-  (rewards, penalties)
+  get_source_deltas(state, total_balance, rewards, penalties, cache)
+  get_target_deltas(state, total_balance, rewards, penalties, cache)
+  get_head_deltas(state, total_balance, rewards, penalties, cache)
+  get_inclusion_delay_deltas(state, total_balance, rewards, cache)
+  get_inactivity_penalty_deltas(state, total_balance, penalties, cache)

 # https://github.com/ethereum/eth2.0-specs/blob/v0.12.2/specs/phase0/beacon-chain.md#process_rewards_and_penalties
 func process_rewards_and_penalties(
@ -434,7 +416,10 @@ func process_rewards_and_penalties(
  if get_current_epoch(state) == GENESIS_EPOCH:
    return

-  let (rewards, penalties) = get_attestation_deltas(state, cache)
+  var
+    rewards = newSeq[uint64](len(state.validators))
+    penalties = newSeq[uint64](len(state.validators))
+  get_attestation_deltas(state, rewards, penalties, cache)

  for i in 0 ..< len(state.validators):
    increase_balance(state, i.ValidatorIndex, rewards[i])
--- a/beacon_chain/ssz/types.nim
+++ b/beacon_chain/ssz/types.nim
@ -147,9 +147,13 @@ template isCached*(v: Eth2Digest): bool =
  ## An entry is "in the cache" if the first 8 bytes are zero - conveniently,
  ## Nim initializes values this way, and while there may be false positives,
  ## that's fine.
-  v.data.toOpenArray(0, 7) != [byte 0, 0, 0, 0, 0, 0, 0, 0]
+
+  # Checking and resetting the cache status are hotspots - profile before
+  # touching!
+  cast[ptr uint64](unsafeAddr v.data[0])[] != 0 # endian safe
+
 template clearCache*(v: var Eth2Digest) =
-  v.data[0..<8] = [byte 0, 0, 0, 0, 0, 0, 0, 0]
+  cast[ptr uint64](addr v.data[0])[] = 0 # endian safe

 template maxChunks*(a: HashList|HashArray): int64 =
  ## Layer where data is
@ -164,10 +168,10 @@ template chunkIdx(a: HashList|HashArray, dataIdx: int64): int64 =

 proc clearCaches*(a: var HashArray, dataIdx: auto) =
  ## Clear all cache entries after data at dataIdx has been modified
-  var idx = 1 shl (a.maxDepth - 1) + (chunkIdx(a, dataIdx) div 2)
+  var idx = 1 shl (a.maxDepth - 1) + (chunkIdx(a, dataIdx) shr 1)
  while idx != 0:
    clearCache(a.hashes[idx])
-    idx = idx div 2
+    idx = idx shr 1

 func nodesAtLayer*(layer, depth, leaves: int): int =
  ## Given a number of leaves, how many nodes do you need at a given layer
@ -188,7 +192,7 @@ proc clearCaches*(a: var HashList, dataIdx: int64) =
    return

  var
-    idx = 1'i64 shl (a.maxDepth - 1) + (chunkIdx(a, dataIdx) div 2)
+    idx = 1'i64 shl (a.maxDepth - 1) + (chunkIdx(a, dataIdx) shr 1)
    layer = a.maxDepth - 1
  while idx > 0:
    let
@ -197,7 +201,7 @@ proc clearCaches*(a: var HashList, dataIdx: int64) =
    if layerIdx < a.indices[layer + 1]:
      clearCache(a.hashes[layerIdx])

-    idx = idx div 2
+    idx = idx shr 1
    layer = layer - 1

  clearCache(a.hashes[0])
--- a/ncli/ncli.nim
+++ b/ncli/ncli.nim
@ -1,7 +1,9 @@
 import
-  confutils, chronicles, os, strutils, json_serialization,
+  std/[os, strutils, stats],
+  confutils, chronicles, json_serialization,
  stew/byteutils,
-  ../beacon_chain/spec/[crypto, datatypes, digest, state_transition],
+  ../research/simutils,
+  ../beacon_chain/spec/[crypto, datatypes, digest, helpers, state_transition],
  ../beacon_chain/extras,
  ../beacon_chain/network_metadata,
  ../beacon_chain/ssz/[merkleization, ssz_serialization]
@ -11,6 +13,7 @@ type
    hashTreeRoot = "Compute hash tree root of SSZ object"
    pretty = "Pretty-print SSZ object"
    transition = "Run state transition function"
+    slots = "Apply empty slots"

  NcliConf* = object

@ -57,6 +60,19 @@ type
        desc: "Verify state root (default true)"
        defaultValue: true}: bool

+    of slots:
+      preState2* {.
+        argument
+        desc: "State to which to apply specified block"}: string
+
+      slot* {.
+        argument
+        desc: "Block to apply to preState"}: uint64
+
+      postState2* {.
+        argument
+        desc: "Filename of state resulting from applying blck to preState"}: string
+
 proc doTransition(conf: NcliConf) =
  let
    stateY = (ref HashedBeaconState)(
@ -74,12 +90,39 @@ proc doTransition(conf: NcliConf) =
  else:
    SSZ.saveFile(conf.postState, stateY.data)

+proc doSlots(conf: NcliConf) =
+  type
+    Timers = enum
+      tLoadState = "Load state from file"
+      tApplySlot = "Apply slot"
+      tApplyEpochSlot = "Apply epoch slot"
+      tSaveState = "Save state to file"
+
+  var timers: array[Timers, RunningStat]
+  let
+    stateY = withTimerRet(timers[tLoadState]): (ref HashedBeaconState)(
+      data: SSZ.loadFile(conf.preState2, BeaconState),
+    )
+
+  stateY.root = hash_tree_root(stateY.data)
+
+  var cache: StateCache
+  for i in 0'u64..<conf.slot:
+    let isEpoch = (stateY[].data.slot + 1).isEpoch
+    withTimer(timers[if isEpoch: tApplyEpochSlot else: tApplySlot]):
+      advance_slot(stateY[], {}, cache)
+
+  withTimer(timers[tSaveState]):
+    SSZ.saveFile(conf.postState, stateY.data)
+
+  printTimers(false, timers)
+
 proc doSSZ(conf: NcliConf) =
  let (kind, file) =
    case conf.cmd:
    of hashTreeRoot: (conf.htrKind, conf.htrFile)
    of pretty: (conf.prettyKind, conf.prettyFile)
-    of transition:
+    else:
      raiseAssert "doSSZ() only implements hashTreeRoot and pretty commands"

  template printit(t: untyped) {.dirty.} =
@ -101,7 +144,7 @@ proc doSSZ(conf: NcliConf) =
        echo hash_tree_root(v[]).data.toHex()
    of pretty:
      echo JSON.encode(v[], pretty = true)
-    of transition:
+    else:
      raiseAssert "doSSZ() only implements hashTreeRoot and pretty commands"

  let ext = splitFile(file).ext
@ -127,3 +170,4 @@ when isMainModule:
  of hashTreeRoot: doSSZ(conf)
  of pretty: doSSZ(conf)
  of transition: doTransition(conf)
+  of slots: doSlots(conf)
--- a/tests/official/test_fixture_rewards.nim
+++ b/tests/official/test_fixture_rewards.nim
@ -54,14 +54,24 @@ proc runTest(identifier: string) =
        inactivityPenaltyDeltas =
          parseTest(testDir/"inactivity_penalty_deltas.ssz", SSZ, Deltas)

+      template get_deltas(body: untyped): untyped =
+        var
+          rewards {.inject.} = newSeq[Gwei](state[].validators.len)
+          penalties {.inject.} = newSeq[Gwei](state[].validators.len)
+        body
+        (rewards, penalties)
+
      check:
-        compareDeltas(sourceDeltas, get_source_deltas(state[], total_balance, cache))
-        compareDeltas(targetDeltas, get_target_deltas(state[], total_balance, cache))
-        compareDeltas(headDeltas, get_head_deltas(state[], total_balance, cache))
-        inclusionDelayDeltas.rewards.asSeq ==
-          get_inclusion_delay_deltas(state[], total_balance, cache)
-        inactivityPenaltyDeltas.penalties.asSeq ==
-          get_inactivity_penalty_deltas(state[], total_balance, cache)
+        compareDeltas(sourceDeltas, get_deltas(
+          get_source_deltas(state[], total_balance, rewards, penalties, cache)))
+        compareDeltas(targetDeltas, get_deltas(
+          get_target_deltas(state[], total_balance, rewards, penalties, cache)))
+        compareDeltas(headDeltas, get_deltas(
+          get_head_deltas(state[], total_balance, rewards, penalties, cache)))
+        compareDeltas(inclusionDelayDeltas, get_deltas(
+          get_inclusion_delay_deltas(state[], total_balance, rewards, cache)))
+        compareDeltas(inactivityPenaltyDeltas, get_deltas(
+          get_inactivity_penalty_deltas(state[], total_balance, penalties, cache)))

  `testImpl _ rewards _ identifier`()