Merge pull request #65 from sigp/shuffling_update
Fix shuffle() function errors
This commit is contained in:
commit
11012448fa
|
@ -325,36 +325,47 @@ def shuffle(values: List[Any],
|
||||||
"""
|
"""
|
||||||
values_count = len(values)
|
values_count = len(values)
|
||||||
|
|
||||||
# entropy is consumed in 3 byte chunks
|
# Entropy is consumed from the seed in 3-byte (24 bit) chunks.
|
||||||
# sample_max is defined to remove the modulo bias from this entropy source
|
rand_bytes = 3
|
||||||
sample_max = 2 ** 24
|
# The highest possible result of the RNG.
|
||||||
assert values_count <= sample_max
|
rand_max = 2 ** (rand_bytes * 8) - 1
|
||||||
|
|
||||||
|
# The range of the RNG places an upper-bound on the size of the list that
|
||||||
|
# may be shuffled. It is a logic error to supply an oversized list.
|
||||||
|
assert values_count < rand_max
|
||||||
|
|
||||||
output = [x for x in values]
|
output = [x for x in values]
|
||||||
source = seed
|
source = seed
|
||||||
index = 0
|
index = 0
|
||||||
while index < values_count:
|
while index < values_count - 1:
|
||||||
# Re-hash the source
|
# Re-hash the `source` to obtain a new pattern of bytes.
|
||||||
source = hash(source)
|
source = hash(source)
|
||||||
for position in range(0, 30, 3): # gets indices 3 bytes at a time
|
# Iterate through the `source` bytes in 3-byte chunks.
|
||||||
# Select a 3-byte sampled int
|
for position in range(0, 32 - (32 % rand_bytes), rand_bytes):
|
||||||
sample_from_source = int.from_bytes(source[position:position + 3], 'big')
|
# Determine the number of indices remaining in `values` and exit
|
||||||
# `remaining` is the size of remaining indices of this round
|
# once the last index is reached.
|
||||||
remaining = values_count - index
|
remaining = values_count - index
|
||||||
if remaining == 1:
|
if remaining == 1:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Set a random maximum bound of sample_from_source
|
# Read 3-bytes of `source` as a 24-bit big-endian integer.
|
||||||
sample_max = sample_max - sample_max % remaining
|
sample_from_source = int.from_bytes(
|
||||||
|
source[position:position + rand_bytes], 'big'
|
||||||
|
)
|
||||||
|
|
||||||
# Select `replacement_position` with the given `sample_from_source` and `remaining`
|
# Sample values greater than or equal to `sample_max` will cause
|
||||||
|
# modulo bias when mapped into the `remaining` range.
|
||||||
|
sample_max = rand_max - rand_max % remaining
|
||||||
|
|
||||||
|
# Perform a swap if the consumed entropy will not cause modulo bias.
|
||||||
if sample_from_source < sample_max:
|
if sample_from_source < sample_max:
|
||||||
# Use random number to get `replacement_position`, where it's not `index`
|
# Select a replacement index for the current index.
|
||||||
replacement_position = (sample_from_source % remaining) + index
|
replacement_position = (sample_from_source % remaining) + index
|
||||||
# Swap the index-th and replacement_position-th elements
|
# Swap the current index with the replacement index.
|
||||||
output[index], output[replacement_position] = output[replacement_position], output[index]
|
output[index], output[replacement_position] = output[replacement_position], output[index]
|
||||||
index += 1
|
index += 1
|
||||||
else:
|
else:
|
||||||
|
# The sample causes modulo bias. A new sample should be read.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
Loading…
Reference in New Issue