From 6700f283d5c7da9b052000019e572f90b4fc8220 Mon Sep 17 00:00:00 2001
From: Paul Hauner <paul@paulhauner.com>
Date: Thu, 11 Oct 2018 16:56:46 +1100
Subject: [PATCH 1/2] Fix shuffle() function errors

 - `rand_max` now represents the greatest value returned by the RNG,
instead of the length of the range.
 - Loop condition fixed to stop infinite loop.
 - Comments updated
---
 specs/beacon-chain.md | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/specs/beacon-chain.md b/specs/beacon-chain.md
index ab397fb06..1718fe482 100644
--- a/specs/beacon-chain.md
+++ b/specs/beacon-chain.md
@@ -324,36 +324,39 @@ def shuffle(values: List[Any],
     """
     values_count = len(values)
 
-    # entropy is consumed in 3 byte chunks
-    # sample_max is defined to remove the modulo bias from this entropy source
-    sample_max = 2 ** 24
-    assert values_count <= sample_max
+    # Entropy is consumed from the seed in 3-byte (24 bit) chunks.
+    rand_max = 2 ** 24 - 1
+    assert values_count < rand_max
 
     output = [x for x in values]
     source = seed
     index = 0
-    while index < values_count:
+    while index < values_count - 1:
         # Re-hash the source
         source = hash(source)
-        for position in range(0, 30, 3):  # gets indices 3 bytes at a time
-            # Select a 3-byte sampled int
-            sample_from_source = int.from_bytes(source[position:position + 3], 'big')
-            # `remaining` is the size of remaining indices of this round
+        for position in range(0, 30, 3):  # Reads indices 3 bytes at a time
+            # Determine the number of indices remaining and exit once the last
+            # index is reached.
             remaining = values_count - index
             if remaining == 1:
                 break
 
-            # Set a random maximum bound of sample_from_source
-            sample_max = sample_max - sample_max % remaining
+            # Read 3-bytes of the seed as a 24-bit big-endian integer.
+            sample_from_source = int.from_bytes(source[position:position + 3], 'big')
 
-            # Select `replacement_position` with the given `sample_from_source` and `remaining`
+            # Sample values greater than or equal to `sample_max` will cause
+            # modulo bias when mapped into the `remaining` range.
+            sample_max = rand_max - rand_max % remaining
+
+            # Perform a swap if the consumed entropy will not cause modulo bias.
             if sample_from_source < sample_max:
-                # Use random number to get `replacement_position`, where it's not `index`
+                # Select a replacement index for the present index.
                 replacement_position = (sample_from_source % remaining) + index
-                # Swap the index-th and replacement_position-th elements
+                # Swap the present index with the replacement index.
                 output[index], output[replacement_position] = output[replacement_position], output[index]
                 index += 1
             else:
+                # The sample causes modulo bias. A new sample should be read.
                 pass
 
     return output

From 3791cb5a5d80519f182ab30e0a95458c1d164319 Mon Sep 17 00:00:00 2001
From: Paul Hauner <paul@paulhauner.com>
Date: Tue, 16 Oct 2018 11:17:34 +1100
Subject: [PATCH 2/2] Update shuffle() as per review

- Add `rand_bytes`
- Change `for` loop condition for readability and generality.
- Ensure consistency of comment spacing
- Update comments
---
 specs/beacon-chain.md | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/specs/beacon-chain.md b/specs/beacon-chain.md
index 1718fe482..11bceb553 100644
--- a/specs/beacon-chain.md
+++ b/specs/beacon-chain.md
@@ -325,24 +325,32 @@ def shuffle(values: List[Any],
     values_count = len(values)
 
     # Entropy is consumed from the seed in 3-byte (24 bit) chunks.
-    rand_max = 2 ** 24 - 1
+    rand_bytes = 3
+    # The highest possible result of the RNG.
+    rand_max = 2 ** (rand_bytes * 8) - 1
+
+    # The range of the RNG places an upper-bound on the size of the list that
+    # may be shuffled. It is a logic error to supply an oversized list.
     assert values_count < rand_max
 
     output = [x for x in values]
     source = seed
     index = 0
     while index < values_count - 1:
-        # Re-hash the source
+        # Re-hash the `source` to obtain a new pattern of bytes.
         source = hash(source)
-        for position in range(0, 30, 3):  # Reads indices 3 bytes at a time
-            # Determine the number of indices remaining and exit once the last
-            # index is reached.
+        # Iterate through the `source` bytes in 3-byte chunks.
+        for position in range(0, 32 - (32 % rand_bytes), rand_bytes):
+            # Determine the number of indices remaining in `values` and exit
+            # once the last index is reached.
             remaining = values_count - index
             if remaining == 1:
                 break
 
-            # Read 3-bytes of the seed as a 24-bit big-endian integer.
-            sample_from_source = int.from_bytes(source[position:position + 3], 'big')
+            # Read 3-bytes of `source` as a 24-bit big-endian integer.
+            sample_from_source = int.from_bytes(
+                source[position:position + rand_bytes], 'big'
+            )
 
             # Sample values greater than or equal to `sample_max` will cause
             # modulo bias when mapped into the `remaining` range.
@@ -350,9 +358,9 @@ def shuffle(values: List[Any],
 
             # Perform a swap if the consumed entropy will not cause modulo bias.
             if sample_from_source < sample_max:
-                # Select a replacement index for the present index.
+                # Select a replacement index for the current index.
                 replacement_position = (sample_from_source % remaining) + index
-                # Swap the present index with the replacement index.
+                # Swap the current index with the replacement index.
                 output[index], output[replacement_position] = output[replacement_position], output[index]
                 index += 1
             else: