speed up shuffling

Replace shuffling function with zrnt version - `get_shuffled_seq` in particular puts more strain on the GC by allocating superfluous seq's which turns out to have a significant impact on block processing (when replaying blocks for example) - 4x improvement on non-epoch, 1.5x on epoch blocks (replay is done without signature checking) Medalla, first 10k slots - pre: ``` Loaded 68973 blocks, head slot 117077 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 76855.848, 0.000, 76855.848, 76855.848, 1, Initialize DB 1.073, 0.914, 0.071, 12.454, 7831, Load block from database 31.382, 0.000, 31.382, 31.382, 1, Load state from database 85.644, 30.350, 3.056, 466.136, 7519, Apply block 506.569, 91.129, 130.654, 874.786, 312, Apply epoch block ``` post: ``` Loaded 68973 blocks, head slot 117077 All time are ms Average, StdDev, Min, Max, Samples, Test Validation is turned off meaning that no BLS operations are performed 72457.303, 0.000, 72457.303, 72457.303, 1, Initialize DB 1.015, 0.858, 0.070, 11.231, 7831, Load block from database 28.983, 0.000, 28.983, 28.983, 1, Load state from database 21.725, 17.461, 2.659, 393.217, 7519, Apply block 324.012, 33.954, 45.452, 440.532, 312, Apply epoch block ```
2020-08-21 12:06:26 +02:00 · 2020-08-21 12:06:26 +02:00 · 61538fa581
parent 5fc07fef75
commit 61538fa581
5 changed files with 109 additions and 105 deletions
--- a/beacon_chain/spec/validator.nim
+++ b/beacon_chain/spec/validator.nim
@ -9,88 +9,129 @@
 {.push raises: [Defect].}

 import
-  options, sequtils, math, tables,
+  options, math, tables,
  ./datatypes, ./digest, ./helpers

+const
+  SEED_SIZE = sizeof(Eth2Digest)
+  ROUND_SIZE = 1
+  POSITION_WINDOW_SIZE = 4
+  PIVOT_VIEW_SIZE = SEED_SIZE + ROUND_SIZE
+  TOTAL_SIZE = PIVOT_VIEW_SIZE + POSITION_WINDOW_SIZE
+
 # https://github.com/ethereum/eth2.0-specs/blob/v0.12.2/specs/phase0/beacon-chain.md#compute_shuffled_index
 # https://github.com/ethereum/eth2.0-specs/blob/v0.12.2/specs/phase0/beacon-chain.md#compute_committee
-func get_shuffled_seq*(seed: Eth2Digest,
-                      list_size: uint64,
-                      ): seq[ValidatorIndex] =
-  ## Via https://github.com/protolambda/eth2-shuffle/blob/master/shuffle.go
-  ## Shuffles ``validators`` into beacon committees, seeded by ``seed`` and
-  ## ``slot``.
-  ## Returns a list of ``SLOTS_PER_EPOCH * committees_per_slot`` committees
-  ## where each committee is itself a list of validator indices.
-  ##
-  ## Invert the inner/outer loops from the spec, essentially. Most useful
-  ## hash result re-use occurs within a round.
+# Port of https://github.com/protolambda/zrnt/blob/master/eth2/beacon/shuffle.go
+# Shuffles or unshuffles, depending on the `dir` (true for shuffling, false for unshuffling
+func shuffle_list*(input: var seq[ValidatorIndex], seed: Eth2Digest) =
+  let list_size = input.lenu64

-  # Empty size -> empty list.
-  if list_size == 0:
-    return
+  if list_size <= 1: return
+
+  var buf {.noinit.}: array[TOTAL_SIZE, byte]
+
+  # Seed is always the first 32 bytes of the hash input, we never have to change
+  # this part of the buffer.
+  buf[0..<32] = seed.data
+
+  # The original code includes a direction flag, but only the reverse direction
+  # is used in eth2, so we simplify it here
+  for r in 0'u8..<SHUFFLE_ROUND_COUNT.uint8:
+    # spec: pivot = bytes_to_int(hash(seed + int_to_bytes1(round))[0:8]) % list_size
+    # This is the "int_to_bytes1(round)", appended to the seed.
+    buf[SEED_SIZE] = (SHUFFLE_ROUND_COUNT.uint8 - r - 1)
+
+    # Seed is already in place, now just hash the correct part of the buffer,
+    # and take a uint64 from it, and modulo it to get a pivot within range.
+    let
+      pivotDigest = eth2digest(buf.toOpenArray(0, PIVOT_VIEW_SIZE - 1))
+      pivot = bytes_to_uint64(pivotDigest.data.toOpenArray(0, 7)) mod listSize
+
+    # Split up the for-loop in two:
+    #  1. Handle the part from 0 (incl) to pivot (incl). This is mirrored around
+    #     (pivot / 2)
+    #  2. Handle the part from pivot (excl) to N (excl). This is mirrored around
+    #     ((pivot / 2) + (size/2))
+    # The pivot defines a split in the array, with each of the splits mirroring
+    # their data within the split.
+    # Print out some example even/odd sized index lists, with some even/odd pivots,
+    # and you can deduce how the mirroring works exactly.
+    # Note that the mirror is strict enough to not consider swapping the index
+    # @mirror with itself.
+    # Since we are iterating through the "positions" in order, we can just
+    # repeat the hash every 256th position.
+    # No need to pre-compute every possible hash for efficiency like in the
+    # example code.
+    # We only need it consecutively (we are going through each in reverse order
+    # however, but same thing)
+
+    # spec: source = hash(seed + int_to_bytes1(round) + int_to_bytes4(position // 256))
+    # - seed is still in 0:32 (excl., 32 bytes)
+    # - round number is still in 32
+    # - mix in the position for randomness, except the last byte of it,
+    #     which will be used later to select a bit from the resulting hash.
+    # We start from the pivot position, and work back to the mirror position
+    # (of the part left to the pivot).
+    # This makes us process each pear exactly once (instead of unnecessarily
+    # twice, like in the spec)
+    buf[33..<37] = uint_to_bytes4(pivot shr 8)

    var
-    # Share these buffers.
-    # TODO: Redo to follow spec.
-    #       We can have an "Impl" private version that takes buffer as parameters
-    #       so that we avoid alloc on repeated calls from compute_committee
-    pivot_buffer: array[(32+1), byte]
-    source_buffer: array[(32+1+4), byte]
-    shuffled_active_validator_indices = mapIt(
-      0 ..< list_size.int, it.ValidatorIndex)
-    sources = repeat(Eth2Digest(), (list_size div 256) + 1)
+      mirror = (pivot + 1) shr 1
+      source = eth2digest(buf)
+      byteV = source.data[(pivot and 0xff) shr 3]
+      i = 0'u64
+      j = pivot

-  ## The pivot's a function of seed and round only.
-  ## This doesn't change across rounds.
-  pivot_buffer[0..31] = seed.data
-  source_buffer[0..31] = seed.data
+    template shuffle =
+      while i < mirror:
+        # The pair is i,j. With j being the bigger of the two, hence the "position" identifier of the pair.
+        # Every 256th bit (aligned to j).
+        if (j and 0xff) == 0xff:
+          # just overwrite the last part of the buffer, reuse the start (seed, round)
+          buf[33..<37] = uint_to_bytes4(j shr 8)
+          source = eth2digest(buf)

-  static: doAssert SHUFFLE_ROUND_COUNT < uint8.high
-  for round in 0'u8 ..< SHUFFLE_ROUND_COUNT.uint8:
-    pivot_buffer[32] = round
-    source_buffer[32] = round
-
-    # Only one pivot per round.
-    let pivot =
-      bytes_to_uint64(eth2digest(pivot_buffer).data.toOpenArray(0, 7)) mod
-        list_size
-
-    ## Only need to run, per round, position div 256 hashes, so precalculate
-    ## them. This consumes memory, but for low-memory devices, it's possible
-    ## to mitigate by some light LRU caching and similar.
-    for reduced_position in 0 ..< sources.len:
-      source_buffer[33..36] = uint_to_bytes4(reduced_position.uint64)
-      sources[reduced_position] = eth2digest(source_buffer)
-
-    ## Iterate over all the indices. This was in get_permuted_index, but large
-    ## efficiency gains exist in caching and re-using data.
-    for index in 0 ..< list_size.int:
-      let
-        cur_idx_permuted = shuffled_active_validator_indices[index]
-        flip = ((list_size + pivot) - cur_idx_permuted.uint64) mod list_size
-        position = max(cur_idx_permuted.int, flip.int)
+        # Same trick with byte retrieval. Only every 8th.
+        if (j and 0x07) == 0x7:
+          byteV = source.data[(j and 0xff'u64) shr 3]

        let
-        source = sources[position div 256].data
-        byte_value = source[(position mod 256) div 8]
-        bit = (byte_value shr (position mod 8)) mod 2
+          bitV = (byteV shr (j and 0x7)) and 0x1

-      if bit != 0:
-        shuffled_active_validator_indices[index] = flip.ValidatorIndex
+        if bitV == 1:
+          swap(input[i], input[j])

-  shuffled_active_validator_indices
+        i.inc
+        j.dec
+
+    shuffle
+
+    # Now repeat, but for the part after the pivot.
+    mirror = (pivot + list_size + 1) shr 1
+    let lend = list_size - 1
+    # Again, seed and round input is in place, just update the position.
+    # We start at the end, and work back to the mirror point.
+    # This makes us process each pear exactly once (instead of unnecessarily twice, like in the spec)
+    buf[33..<37] = uint_to_bytes4(lend shr 8)
+
+    source = eth2digest(buf)
+    byteV = source.data[(lend and 0xff) shr 3]
+    i = pivot + 1'u64
+    j = lend
+
+    shuffle

 func get_shuffled_active_validator_indices*(state: BeaconState, epoch: Epoch):
    seq[ValidatorIndex] =
  # Non-spec function, to cache a data structure from which one can cheaply
  # compute both get_active_validator_indexes() and get_beacon_committee().
-  let active_validator_indices = get_active_validator_indices(state, epoch)
-  mapIt(
-    get_shuffled_seq(
-      get_seed(state, epoch, DOMAIN_BEACON_ATTESTER),
-      active_validator_indices.lenu64),
-    active_validator_indices[it])
+  var active_validator_indices = get_active_validator_indices(state, epoch)
+
+  shuffle_list(
+    active_validator_indices, get_seed(state, epoch, DOMAIN_BEACON_ATTESTER))
+
+  active_validator_indices

 func get_shuffled_active_validator_indices*(
    cache: var StateCache, state: BeaconState, epoch: Epoch):
--- a/nfuzz/libnfuzz.nim
+++ b/nfuzz/libnfuzz.nim
@ -153,12 +153,9 @@ proc nfuzz_shuffle(input_seed: ptr byte, xoutput: var openArray[uint64]): bool
  copyMem(addr(seed.data), input_seed, sizeof(seed.data))

  var shuffled_seq: seq[ValidatorIndex]
-  shuffled_seq = get_shuffled_seq(seed, list_size.uint64)
-
-  doAssert(
-    list_size == shuffled_seq.len,
-    "Shuffled list should be of requested size."
-  )
+  for i in 0..<list_size:
+    shuffled_seq.add i.ValidatorIndex
+  shuffle_list(shuffled_seq, seed)

  for i in 0..<list_size:
    # ValidatorIndex is currently wrongly uint32 so we copy this 1 by 1,
--- a/tests/all_tests.nim
+++ b/tests/all_tests.nim
@ -42,7 +42,6 @@ import # Refactor state transition unit tests

 # import # Official fixtures that don't require SSZ parsing of invalid BLS signatures
 #        # https://github.com/status-im/nim-beacon-chain/issues/374
-#   ./official/test_fixture_shuffling,
 #   ./official/test_fixture_bls

 summarizeLongTests("AllTests")
--- a/tests/official/fixtures_utils.nim
+++ b/tests/official/fixtures_utils.nim
@ -10,7 +10,7 @@ import
  os, strutils, typetraits,
  # Internals
  ../../beacon_chain/ssz,
-  ../../beacon_chain/spec/datatypes,
+  ../../beacon_chain/spec/[datatypes, crypto],
  # Status libs
  stew/byteutils,
  serialization, json_serialization
@ -20,7 +20,7 @@ export  # Workaround:
  #   - https://github.com/status-im/nim-serialization/issues/5
  #   - https://github.com/nim-lang/Nim/issues/11225
  serialization.readValue,
-  Json, ssz
+  Json, ssz, crypto

 # Process current EF test format
 # ---------------------------------------------
--- a/tests/official/test_fixture_shuffling.nim
+++ b/tests/official/test_fixture_shuffling.nim
@ -1,33 +0,0 @@
-# beacon_chain
-# Copyright (c) 2018-Present Status Research & Development GmbH
-# Licensed and distributed under either of
-#   * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
-#   * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
-# at your option. This file may not be copied, modified, or distributed except according to those terms.
-
-{.used.}
-
-import
-  # Standard library
-  os, unittest, sequtils,
-  # Beacon chain internals
-  ../../beacon_chain/spec/[datatypes, validator, digest],
-  # Test utilities
-  ../testutil,
-  ./fixtures_utils
-
-type
-  Shuffling* = object
-    seed*: Eth2Digest
-    count*: uint64
-    mapping*: seq[uint64]
-
-# TODO: json tests were removed
-const ShufflingDir = JsonTestsDir/const_preset/"phase0"/"shuffling"/"core"/"shuffle"
-
-suite "Official - Shuffling tests [Preset: " & preset():
-  timedTest "Shuffling a sequence of N validators" & preset():
-    for file in walkDirRec(ShufflingDir):
-      let t = parseTest(file, Json, Shuffling)
-      let implResult = get_shuffled_seq(t.seed, t.count)
-      check: implResult == mapIt(t.mapping, it.ValidatorIndex)