From 3aa84a76dd7cf4a003876d2766ab4aa1726f2622 Mon Sep 17 00:00:00 2001
From: Vitalik Buterin <v@buterin.com>
Date: Tue, 18 Dec 2018 17:33:28 -0500
Subject: [PATCH] Added shuffling algos

---
 shuffling/feistel_shuffle.py      | 44 ++++++++++++++++++++++++++
 shuffling/fisher_yates_shuffle.py | 52 +++++++++++++++++++++++++++++++
 shuffling/prime_shuffle.py        | 42 +++++++++++++++++++++++++
 shuffling/test_shuffle.py         | 35 +++++++++++++++++++++
 4 files changed, 173 insertions(+)
 create mode 100644 shuffling/feistel_shuffle.py
 create mode 100644 shuffling/fisher_yates_shuffle.py
 create mode 100644 shuffling/prime_shuffle.py
 create mode 100644 shuffling/test_shuffle.py

diff --git a/shuffling/feistel_shuffle.py b/shuffling/feistel_shuffle.py
new file mode 100644
index 0000000..3a7be57
--- /dev/null
+++ b/shuffling/feistel_shuffle.py
@@ -0,0 +1,44 @@
+from hashlib import blake2s
+
+def hash(x): return blake2s(x).digest()[:32]
+
+def numhash(x, i, seed, modulus):
+    assert 0 <= i < 4
+    return (int.from_bytes(hash(x.to_bytes(32, 'big') + seed), 'big') // modulus**i) % modulus
+
+def numhash_all(x, seed, modulus):
+    h = int.from_bytes(hash(x.to_bytes(32, 'big') + seed), 'big')
+    return [(h // modulus ** i) % modulus for i in range(4)]
+
+def next_perfect_square(n):
+    if int(n ** 0.5) ** 2 == n:
+        return n
+    return (int(n ** 0.5) + 1) ** 2
+
+def multi_feistel(modulus, xs, seed, precompute=False):
+    h = int(next_perfect_square(modulus) ** 0.5)
+
+    numhashes = [numhash_all(i, seed, modulus) for i in range(h)] if precompute else None
+
+    o = []
+    for x in xs:
+        while 1:
+            L, R = x//h, x%h
+            for i in range(4):
+                if precompute:
+                    new_R = (L + numhashes[R][i]) % h
+                else:
+                    new_R = (L + numhash(R, i, seed, modulus)) % h
+                L = R
+                R = new_R
+            x = L * h + R
+            if x < modulus:
+                o.append(x)
+                break
+    return o
+
+def feistel_shuffle(values, seed):
+    return [values[i] for i in multi_feistel(len(values), list(range(len(values))), seed, True)]
+
+def feistel_shuffle_partial(values, seed, count):
+    return [values[i] for i in multi_feistel(len(values), list(range(count)), seed, False)]
diff --git a/shuffling/fisher_yates_shuffle.py b/shuffling/fisher_yates_shuffle.py
new file mode 100644
index 0000000..ebe85ec
--- /dev/null
+++ b/shuffling/fisher_yates_shuffle.py
@@ -0,0 +1,52 @@
+from hashlib import blake2s
+
+def hash(x): return blake2s(x).digest()[:32]
+
+def fisher_yates_shuffle(values,  seed):
+    """
+    Returns the shuffled ``values`` with ``seed`` as entropy.
+    """
+    values_count = len(values)
+
+    # Entropy is consumed from the seed in 3-byte (24 bit) chunks.
+    rand_bytes = 3
+    # The highest possible result of the RNG.
+    rand_max = 2 ** (rand_bytes * 8) - 1
+
+    # The range of the RNG places an upper-bound on the size of the list that
+    # may be shuffled. It is a logic error to supply an oversized list.
+    assert values_count < rand_max
+
+    output = [x for x in values]
+    source = seed
+    index = 0
+    while index < values_count - 1:
+        # Re-hash the `source` to obtain a new pattern of bytes.
+        source = hash(source)
+        # Iterate through the `source` bytes in 3-byte chunks.
+        for position in range(0, 32 - (32 % rand_bytes), rand_bytes):
+            # Determine the number of indices remaining in `values` and exit
+            # once the last index is reached.
+            remaining = values_count - index
+            if remaining == 1:
+                break
+
+            # Read 3-bytes of `source` as a 24-bit big-endian integer.
+            sample_from_source = int.from_bytes(source[position:position + rand_bytes], 'big')
+
+            # Sample values greater than or equal to `sample_max` will cause
+            # modulo bias when mapped into the `remaining` range.
+            sample_max = rand_max - rand_max % remaining
+
+            # Perform a swap if the consumed entropy will not cause modulo bias.
+            if sample_from_source < sample_max:
+                # Select a replacement index for the current index.
+                replacement_position = (sample_from_source % remaining) + index
+                # Swap the current index with the replacement index.
+                output[index], output[replacement_position] = output[replacement_position], output[index]
+                index += 1
+            else:
+                # The sample causes modulo bias. A new sample should be read.
+                pass
+
+    return output
diff --git a/shuffling/prime_shuffle.py b/shuffling/prime_shuffle.py
new file mode 100644
index 0000000..219edb0
--- /dev/null
+++ b/shuffling/prime_shuffle.py
@@ -0,0 +1,42 @@
+from hashlib import blake2s
+
+def hash(x): return blake2s(x).digest()[:32]
+
+def is_prime(x):
+    return [i for i in range(2, int(x**0.5)+1) if x%i == 0] == []
+
+def values_at_position(n, positions, seed, precompute=False):
+    # We do the shuffling mod p, the lowest prime >= n, but if we actually shuffle into
+    # the "forbidden" [n...p-1] slice we just reshuffle until we get out of that slice
+    p = n 
+    while not is_prime(p):
+        p += 1 
+    # x -> x**power is a permutation mod p
+    power = 3 
+    while (p-1) % power == 0 or not is_prime(power):
+        power += 2 
+    values = positions[::]
+    power_of = [pow(i, power, p) for i in range(p)] if precompute else None
+    indices = list(range(len(values)))
+    for round in range(40):
+        a = int.from_bytes(seed[(round % 8)*4: (round % 8)*4 + 4], 'big')
+        if precompute:
+            values = [(power_of[v] + a) % p for v in values]
+        else:
+            values = [(pow(v, power, p) + a) % p for v in values]
+        for i in [i for i in indices if values[i] >= n]:
+                while values[i] >= n:
+                    if precompute:
+                        values[i] = (power_of[values[i]] + a) % p
+                    else:
+                        values[i] = (pow(values[i], power, p) + a) % p
+        # Update the seed if needed
+        if round % 8 == 0:
+            seed = hash(seed)
+    return values
+
+def prime_shuffle(values, seed):
+    return [values[i] for i in values_at_position(len(values), list(range(len(values))), seed, True)]
+
+def prime_shuffle_partial(values, seed, count):
+    return [values[i] for i in values_at_position(len(values), list(range(count)), seed, False)]
diff --git a/shuffling/test_shuffle.py b/shuffling/test_shuffle.py
new file mode 100644
index 0000000..245d379
--- /dev/null
+++ b/shuffling/test_shuffle.py
@@ -0,0 +1,35 @@
+from prime_shuffle import prime_shuffle, prime_shuffle_partial
+from feistel_shuffle import feistel_shuffle, feistel_shuffle_partial
+from fisher_yates_shuffle import fisher_yates_shuffle
+import time
+
+count = 100000
+subcount = 500
+
+print("Testing prime shuffle")
+a = time.time()
+o = prime_shuffle(range(count), b'doge'*8)
+print(o[:10])
+t2 = time.time()
+o2 = prime_shuffle_partial(range(count), b'doge' * 8, subcount)
+print(o2[:10])
+print("Total runtime: ", t2 - a)
+print("Runtime to compute committee: ", time.time() - t2)
+print("\n")
+
+print("Testing feistel shuffle")
+a = time.time()
+o = feistel_shuffle(range(count), b'doge'*8)
+print(o[:10])
+t2 = time.time()
+o2 = feistel_shuffle_partial(range(count), b'doge' * 8, subcount)
+print(o2[:10])
+print("Total runtime: ", t2 - a)
+print("Runtime to compute committee: ", time.time() - t2)
+print("\n")
+
+print("Testing Fisher-Yates shuffle")
+a = time.time()
+o = fisher_yates_shuffle(range(count), b'doge'*8)
+print(o[:10])
+print("Total runtime: ", time.time() - a)