initial commit

2018-12-13 11:25:06 +01:00 · 2018-12-13 11:25:06 +01:00 · 0cb6b93974
commit 0cb6b93974
3 changed files with 6312 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,14 @@
+# Eth 2.0 test vectors generators
+
+Test vectors generators based on the specifications at https://github.com/ethereum/eth2.0-specs.
+
+## Implementation
+
+As much as possible the generators copy-paste the specifications. If an example implementation
+is not available in the specifications, the generator will be implemented using one of the
+Ethereum Foundation library instead.
+
+## License
+
+Similar to Eth 2.0 specifications, all code and generated test vectors
+are public domain under [CC0](https://creativecommons.org/publicdomain/zero/1.0/)
--- a/eth2_testgen/tgen_shuffling.py
+++ b/eth2_testgen/tgen_shuffling.py
@ -0,0 +1,363 @@
+# Eth2.0 test vector generator
+# 2018 Status Research & Development GmbH
+# Copyright and related rights waived via [CC0](https://creativecommons.org/publicdomain/zero/1.0/).
+#
+# This work uses public domain work under CC0 from the Ethereum Foundation
+# https://github.com/ethereum/eth2.0-specs
+
+# This file implements a test vectors generator for the shuffling algorithm described in the Ethereum
+# specs as of https://github.com/ethereum/eth2.0-specs/blob/2983e68f0305551083fac7fcf9330c1fc9da3411/specs/core/0_beacon-chain.md#get_new_shuffling
+
+# Reference picture: http://vitalik.ca/files/ShuffleAndAssign.png
+# and description from Py-EVM: https://github.com/ethereum/py-evm/blob/f2d0d5d187400ba46a6b8f5b1f1c9997dc7fbb5a/eth/beacon/helpers.py#L272-L344
+#
+# validators:
+#     [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+# After shuffling:
+#     [6, 0, 2, 12, 14, 8, 10, 4, 9, 1, 5, 13, 15, 7, 3, 11]
+# Split by slot:
+#     [
+#         [6, 0, 2, 12, 14], [8, 10, 4, 9, 1], [5, 13, 15, 7, 3, 11]
+#     ]
+# Split by shard:
+#     [
+#         [6, 0], [2, 12, 14], [8, 10], [4, 9, 1], [5, 13, 15] ,[7, 3, 11]
+#     ]
+# Fill to output:
+#     [
+#         # slot 0
+#         [
+#             ShardAndCommittee(shard_id=0, committee=[6, 0]),
+#             ShardAndCommittee(shard_id=1, committee=[2, 12, 14]),
+#         ],
+#         # slot 1
+#         [
+#             ShardAndCommittee(shard_id=2, committee=[8, 10]),
+#             ShardAndCommittee(shard_id=3, committee=[4, 9, 1]),
+#         ],
+#         # slot 2
+#         [
+#             ShardAndCommittee(shard_id=4, committee=[5, 13, 15]),
+#             ShardAndCommittee(shard_id=5, committee=[7, 3, 11]),
+#         ],
+#     ]
+
+# Note that as of 2018-12-03, several implementations are outdated
+# as they are still using dynasty or min_committee_size that are not in the specs
+
+# ################################################################
+#
+#                       YAML config
+#
+# ################################################################
+
+import yaml    # Requires pyyaml
+
+# Prevent !!str or !!binary tags
+def noop(self, *args, **kw):
+    pass
+yaml.emitter.Emitter.process_tag = noop
+
+# ################################################################
+#
+#                  Imports and simplified types
+#
+# ################################################################
+
+from typing import(
+    List, Any, Dict, NewType
+)
+
+from enum import IntEnum
+import random
+
+Hash32 = NewType('Hash32', bytes)
+
+## See https://github.com/ethereum/eth2.0-specs/pull/227
+## and https://github.com/ethereum/eth2.0-specs/issues/151
+## Hashing as been changed from Blake2b-512[:32] to Keccak256
+
+# from hashlib import blake2b
+# def hash(x):
+#     return blake2b(x).digest()[:32]
+
+from eth_utils import keccak
+def hash(x):
+    return keccak(x)
+
+class ValidatorStatus(IntEnum):
+    PENDING_ACTIVATION = 0
+    ACTIVE = 1
+    EXITED_WITHOUT_PENALTY = 2
+    EXITED_WITH_PENALTY = 3
+    # Not in specs anymore - https://github.com/ethereum/eth2.0-specs/issues/216
+    PENDING_EXIT = 4
+
+class ValidatorRecord(yaml.YAMLObject):
+    fields = {
+        # Status code
+        'status': 'ValidatorStatus',
+        # Extra index field to ease testing/debugging
+        'original_index': 'uint64'
+    }
+
+    def __init__(self, **kwargs):
+        for k in self.fields.keys():
+            setattr(self, k, kwargs.get(k))
+
+    def __setattr__(self, name: str, value: Any) -> None:
+        super().__setattr__(name, value)
+
+    def __getattribute__(self, name: str) -> Any:
+        return super().__getattribute__(name)
+
+class ShardAndCommittee(yaml.YAMLObject):
+    fields = {
+        # Shard number
+        'shard': 'uint64',
+        # Validator indices
+        'committee': ['uint24'],
+        # Total validator count (for proofs of custody)
+        'total_validator_count': 'uint64',
+    }
+
+    def __init__(self, **kwargs):
+        for k in self.fields.keys():
+            setattr(self, k, kwargs.get(k))
+
+    def __setattr__(self, name: str, value: Any) -> None:
+        super().__setattr__(name, value)
+
+    def __getattribute__(self, name: str) -> Any:
+        return super().__getattribute__(name)
+
+# ################################################################
+#
+#                    Environment variables
+#
+# ################################################################
+
+SHARD_COUNT           = 2**10 # 1024
+EPOCH_LENGTH          = 2**6  # 64 slots, 6.4 minutes
+TARGET_COMMITTEE_SIZE = 2**8  # 256 validators
+
+# ################################################################
+#
+#              Procedures (copy-pasted from specs)
+#
+# ################################################################
+
+def get_active_validator_indices(validators: [ValidatorRecord]) -> List[int]:
+    """
+    Gets indices of active validators from ``validators``.
+    """
+    return [i for i, v in enumerate(validators) if v.status in [ValidatorStatus.ACTIVE, ValidatorStatus.PENDING_EXIT]]
+
+def shuffle(values: List[Any], seed: Hash32) -> List[Any]:
+    """
+    Returns the shuffled ``values`` with ``seed`` as entropy.
+    """
+    values_count = len(values)
+
+    # Entropy is consumed from the seed in 3-byte (24 bit) chunks.
+    rand_bytes = 3
+    # The highest possible result of the RNG.
+    rand_max = 2 ** (rand_bytes * 8) - 1
+
+    # The range of the RNG places an upper-bound on the size of the list that
+    # may be shuffled. It is a logic error to supply an oversized list.
+    assert values_count < rand_max
+
+    output = [x for x in values]
+    source = seed
+    index = 0
+    while index < values_count - 1:
+        # Re-hash the `source` to obtain a new pattern of bytes.
+        source = hash(source)
+        # Iterate through the `source` bytes in 3-byte chunks.
+        for position in range(0, 32 - (32 % rand_bytes), rand_bytes):
+            # Determine the number of indices remaining in `values` and exit
+            # once the last index is reached.
+            remaining = values_count - index
+            if remaining == 1:
+                break
+
+            # Read 3-bytes of `source` as a 24-bit big-endian integer.
+            sample_from_source = int.from_bytes(source[position:position + rand_bytes], 'big')
+
+            # Sample values greater than or equal to `sample_max` will cause
+            # modulo bias when mapped into the `remaining` range.
+            sample_max = rand_max - rand_max % remaining
+
+            # Perform a swap if the consumed entropy will not cause modulo bias.
+            if sample_from_source < sample_max:
+                # Select a replacement index for the current index.
+                replacement_position = (sample_from_source % remaining) + index
+                # Swap the current index with the replacement index.
+                output[index], output[replacement_position] = output[replacement_position], output[index]
+                index += 1
+            else:
+                # The sample causes modulo bias. A new sample should be read.
+                pass
+
+    return output
+
+def split(values: List[Any], split_count: int) -> List[Any]:
+    """
+    Splits ``values`` into ``split_count`` pieces.
+    """
+    list_length = len(values)
+    return [
+        values[(list_length * i // split_count): (list_length * (i + 1) // split_count)]
+        for i in range(split_count)
+    ]
+
+def clamp(minval: int, maxval: int, x: int) -> int:
+    """
+    Clamps ``x`` between ``minval`` and ``maxval``.
+    """
+    if x <= minval:
+        return minval
+    elif x >= maxval:
+        return maxval
+    else:
+        return x
+
+def get_new_shuffling(seed: Hash32,
+                      validators: List[ValidatorRecord],
+                      crosslinking_start_shard: int) -> List[List[ShardAndCommittee]]:
+    """
+    Shuffles ``validators`` into shard committees using ``seed`` as entropy.
+    """
+    active_validator_indices = get_active_validator_indices(validators)
+
+    committees_per_slot = clamp(
+        1,
+        SHARD_COUNT // EPOCH_LENGTH,
+        len(active_validator_indices) // EPOCH_LENGTH // TARGET_COMMITTEE_SIZE,
+    )
+
+    # Shuffle with seed
+    shuffled_active_validator_indices = shuffle(active_validator_indices, seed)
+
+    # Split the shuffled list into epoch_length pieces
+    validators_per_slot = split(shuffled_active_validator_indices, EPOCH_LENGTH)
+
+    output = []
+    for slot, slot_indices in enumerate(validators_per_slot):
+        # Split the shuffled list into committees_per_slot pieces
+        shard_indices = split(slot_indices, committees_per_slot)
+
+        shard_id_start = crosslinking_start_shard + slot * committees_per_slot
+
+        shards_and_committees_for_slot = [
+            ShardAndCommittee(
+                shard=(shard_id_start + shard_position) % SHARD_COUNT,
+                committee=indices,
+                total_validator_count=len(active_validator_indices),
+            )
+            for shard_position, indices in enumerate(shard_indices)
+        ]
+        output.append(shards_and_committees_for_slot)
+
+    return output
+
+# ################################################################
+#
+#                       Print helpers
+#
+# ################################################################
+
+def toStrValidator(validators: [ValidatorRecord]) -> str:
+    return ', '.join(
+        f'Val(idx: {val.original_index}, status: {val.status})' for val in validators
+    )
+
+def toStrShardComs(shard_comms: List[List[ShardAndCommittee]]) -> str:
+
+    def strSlot(slot_id: int, sacs: List[ShardAndCommittee]) -> str:
+        result = ', '.join(
+            f'SaC(shard: {sac.shard}, comm: {sac.committee})'
+            for sac in sacs if sac.committee
+        )
+        if result != '': # Only return non-empty slots
+            return f'[Slot {slot_id}: ' + result
+        else:
+            return ''
+
+    return '\n\t'.join(
+        strSlot(slot_id, sacs) for slot_id, sacs in enumerate(shard_comms) if strSlot(slot_id, sacs)
+    )
+
+# ################################################################
+#
+#                       Testing
+#
+# ################################################################
+# if __name__ == '__main__':
+#
+#     # Config
+#     random.seed(int("0xEF00BEAC", 16))
+#     num_val = 256 # Number of validators
+#
+#
+#     seedhash = bytes(random.randint(0, 255) for byte in range(32))
+#     list_val_state = list(ValidatorStatus)
+#     validators = [ValidatorRecord(status=random.choice(list_val_state), original_index=num_val) for num_val in range(num_val)]
+#     crosslinking_start_shard = random.randint(0, SHARD_COUNT)
+#
+#     print(f"Hash: 0x{seedhash.hex()}")
+#     print(f"validators: {toStrValidator(validators)}")
+#     print(f"crosslinking_start_shard: {crosslinking_start_shard}")
+#
+#     shuffle = get_new_shuffling(seedhash, validators, crosslinking_start_shard)
+#     print(f"shuffling: {toStrShardComs(shuffle)}")
+
+# ################################################################
+#
+#                       YAML Generator
+#
+# ################################################################
+
+## Try to deal with enums - otherwise for "ValidatorStatus.Active" you get [1], instead of 1
+def yaml_ValidatorStatus(dumper, data):
+    return dumper.represent_data(data.value)
+yaml.add_representer(ValidatorStatus, yaml_ValidatorStatus)
+
+if __name__ == '__main__':
+    import sys, random
+
+    # Order not preserved - https://github.com/yaml/pyyaml/issues/110
+    metadata = {
+        'title': 'Shuffling Algorithm Tests',
+        'summary': 'Test vectors for shuffling a list based upon a seed using `shuffle`',
+        'test_suite': 'shuffle',
+        'fork': 'tchaikovsky',
+        'version': 1.0
+    }
+
+    # Config
+    random.seed(int("0xEF00BEAC", 16))
+    num_cases = 10
+    list_val_state = list(ValidatorStatus)
+    test_cases = []
+
+    for case in range(num_cases):
+        seedhash = bytes(random.randint(0, 255) for byte in range(32))
+        num_val = random.randint(128, 512)
+        input = {
+            'validators': [ValidatorRecord(status=random.choice(list_val_state), original_index=num_val) for num_val in range(num_val)],
+            'crosslinking_start_shard': random.randint(0, SHARD_COUNT)
+        }
+        output = get_new_shuffling(seedhash, input['validators'], input['crosslinking_start_shard'])
+
+        test_cases.append({
+            'seed': '0x' + seedhash.hex(), 'input': input, 'output': output
+        })
+
+    ## Debug
+    # yaml.dump(metadata, sys.stdout)
+    # yaml.dump(test_cases, sys.stdout)
+    with open('test_vector_shuffling.yml', 'w') as outfile:
+        yaml.dump(metadata, outfile, default_flow_style=False) # Dump at top level
+        yaml.dump({'test_cases': test_cases}, outfile)         # default_flow_style will unravel "ValidatorRecord" and "committee" line, exploding file size
--- a/test_vectors/test_vector_shuffling.yml
+++ b/test_vectors/test_vector_shuffling.yml