Introduce swap-or-not shuffle

See #563 for discussion.
This commit is contained in:
vbuterin 2019-02-06 06:48:46 -06:00 committed by GitHub
parent ec1a08278b
commit c58410e6ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 15 additions and 46 deletions

View File

@ -697,53 +697,22 @@ def get_active_validator_indices(validators: List[Validator], epoch: EpochNumber
```python ```python
def shuffle(values: List[Any], seed: Bytes32) -> List[Any]: def shuffle(values: List[Any], seed: Bytes32) -> List[Any]:
""" indices = list(range(len(values)))
Return the shuffled ``values`` with ``seed`` as entropy. for round in range(90):
""" hashvalues = b''.join([
values_count = len(values) hash(seed + round.to_bytes(1, 'big') + i.to_bytes(4, 'big'))
for i in range((n + 255) // 256)
])
pivot = int.from_bytes(hash(seed + round.to_bytes(1, 'big')), 'big') % n
# Entropy is consumed from the seed in 3-byte (24 bit) chunks. def permute(pos):
rand_bytes = 3 flip = (pivot - pos) % n
# The highest possible result of the RNG. maxpos = max(pos, flip)
rand_max = 2 ** (rand_bytes * 8) - 1 bit = (hashvalues[maxpos // 8] >> (maxpos % 8)) % 2
return flip if bit else pos
# The range of the RNG places an upper-bound on the size of the list that indices = [permute(v) for v in indices]
# may be shuffled. It is a logic error to supply an oversized list. return [v[index] for index in indices]
assert values_count < rand_max
output = [x for x in values]
source = seed
index = 0
while index < values_count - 1:
# Re-hash the `source` to obtain a new pattern of bytes.
source = hash(source)
# Iterate through the `source` bytes in 3-byte chunks.
for position in range(0, 32 - (32 % rand_bytes), rand_bytes):
# Determine the number of indices remaining in `values` and exit
# once the last index is reached.
remaining = values_count - index
if remaining == 1:
break
# Read 3-bytes of `source` as a 24-bit big-endian integer.
sample_from_source = int.from_bytes(source[position:position + rand_bytes], 'big')
# Sample values greater than or equal to `sample_max` will cause
# modulo bias when mapped into the `remaining` range.
sample_max = rand_max - rand_max % remaining
# Perform a swap if the consumed entropy will not cause modulo bias.
if sample_from_source < sample_max:
# Select a replacement index for the current index.
replacement_position = (sample_from_source % remaining) + index
# Swap the current index with the replacement index.
output[index], output[replacement_position] = output[replacement_position], output[index]
index += 1
else:
# The sample causes modulo bias. A new sample should be read.
pass
return output
``` ```
### `split` ### `split`