From 6700f283d5c7da9b052000019e572f90b4fc8220 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Thu, 11 Oct 2018 16:56:46 +1100 Subject: [PATCH 1/2] Fix shuffle() function errors - `rand_max` now represents the greatest value returned by the RNG, instead of the length of the range. - Loop condition fixed to stop infinite loop. - Comments updated --- specs/beacon-chain.md | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/specs/beacon-chain.md b/specs/beacon-chain.md index ab397fb06..1718fe482 100644 --- a/specs/beacon-chain.md +++ b/specs/beacon-chain.md @@ -324,36 +324,39 @@ def shuffle(values: List[Any], """ values_count = len(values) - # entropy is consumed in 3 byte chunks - # sample_max is defined to remove the modulo bias from this entropy source - sample_max = 2 ** 24 - assert values_count <= sample_max + # Entropy is consumed from the seed in 3-byte (24 bit) chunks. + rand_max = 2 ** 24 - 1 + assert values_count < rand_max output = [x for x in values] source = seed index = 0 - while index < values_count: + while index < values_count - 1: # Re-hash the source source = hash(source) - for position in range(0, 30, 3): # gets indices 3 bytes at a time - # Select a 3-byte sampled int - sample_from_source = int.from_bytes(source[position:position + 3], 'big') - # `remaining` is the size of remaining indices of this round + for position in range(0, 30, 3): # Reads indices 3 bytes at a time + # Determine the number of indices remaining and exit once the last + # index is reached. remaining = values_count - index if remaining == 1: break - # Set a random maximum bound of sample_from_source - sample_max = sample_max - sample_max % remaining + # Read 3-bytes of the seed as a 24-bit big-endian integer. + sample_from_source = int.from_bytes(source[position:position + 3], 'big') - # Select `replacement_position` with the given `sample_from_source` and `remaining` + # Sample values greater than or equal to `sample_max` will cause + # modulo bias when mapped into the `remaining` range. + sample_max = rand_max - rand_max % remaining + + # Perform a swap if the consumed entropy will not cause modulo bias. if sample_from_source < sample_max: - # Use random number to get `replacement_position`, where it's not `index` + # Select a replacement index for the present index. replacement_position = (sample_from_source % remaining) + index - # Swap the index-th and replacement_position-th elements + # Swap the present index with the replacement index. output[index], output[replacement_position] = output[replacement_position], output[index] index += 1 else: + # The sample causes modulo bias. A new sample should be read. pass return output From 3791cb5a5d80519f182ab30e0a95458c1d164319 Mon Sep 17 00:00:00 2001 From: Paul Hauner Date: Tue, 16 Oct 2018 11:17:34 +1100 Subject: [PATCH 2/2] Update shuffle() as per review - Add `rand_bytes` - Change `for` loop condition for readability and generality. - Ensure consistency of comment spacing - Update comments --- specs/beacon-chain.md | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/specs/beacon-chain.md b/specs/beacon-chain.md index 1718fe482..11bceb553 100644 --- a/specs/beacon-chain.md +++ b/specs/beacon-chain.md @@ -325,24 +325,32 @@ def shuffle(values: List[Any], values_count = len(values) # Entropy is consumed from the seed in 3-byte (24 bit) chunks. - rand_max = 2 ** 24 - 1 + rand_bytes = 3 + # The highest possible result of the RNG. + rand_max = 2 ** (rand_bytes * 8) - 1 + + # The range of the RNG places an upper-bound on the size of the list that + # may be shuffled. It is a logic error to supply an oversized list. assert values_count < rand_max output = [x for x in values] source = seed index = 0 while index < values_count - 1: - # Re-hash the source + # Re-hash the `source` to obtain a new pattern of bytes. source = hash(source) - for position in range(0, 30, 3): # Reads indices 3 bytes at a time - # Determine the number of indices remaining and exit once the last - # index is reached. + # Iterate through the `source` bytes in 3-byte chunks. + for position in range(0, 32 - (32 % rand_bytes), rand_bytes): + # Determine the number of indices remaining in `values` and exit + # once the last index is reached. remaining = values_count - index if remaining == 1: break - # Read 3-bytes of the seed as a 24-bit big-endian integer. - sample_from_source = int.from_bytes(source[position:position + 3], 'big') + # Read 3-bytes of `source` as a 24-bit big-endian integer. + sample_from_source = int.from_bytes( + source[position:position + rand_bytes], 'big' + ) # Sample values greater than or equal to `sample_max` will cause # modulo bias when mapped into the `remaining` range. @@ -350,9 +358,9 @@ def shuffle(values: List[Any], # Perform a swap if the consumed entropy will not cause modulo bias. if sample_from_source < sample_max: - # Select a replacement index for the present index. + # Select a replacement index for the current index. replacement_position = (sample_from_source % remaining) + index - # Swap the present index with the replacement index. + # Swap the current index with the replacement index. output[index], output[replacement_position] = output[replacement_position], output[index] index += 1 else: