Refactor python script (#4)

* refactor python script

* update objects and helpers to follow the latest spec

* minimize validators' representation
This commit is contained in:
Chih Cheng Liang 2018-12-27 18:03:05 +08:00 committed by Mamy Ratsimbazafy
parent a3eaa8ae2a
commit d77b8b289a
12 changed files with 485 additions and 4332 deletions

2
Makefile Normal file
View File

@ -0,0 +1,2 @@
all:
python eth2_testgen/shuffling/tgen_shuffling.py test_vectors/test_vector_shuffling.yml

View File

@ -2,6 +2,12 @@
Test vectors generators based on the specifications at https://github.com/ethereum/eth2.0-specs.
## Getting Started
```
make
```
## Implementation
As much as possible the generators copy-paste the specifications. If an example implementation

View File

@ -0,0 +1,54 @@
# Shuffling Test Generator
```
2018 Status Research & Development GmbH
Copyright and related rights waived via [CC0](https://creativecommons.org/publicdomain/zero/1.0/).
This work uses public domain work under CC0 from the Ethereum Foundation
https://github.com/ethereum/eth2.0-specs
```
This file implements a test vectors generator for the shuffling algorithm described in the Ethereum
[specs](https://github.com/ethereum/eth2.0-specs/blob/2983e68f0305551083fac7fcf9330c1fc9da3411/specs/core/0_beacon-chain.md#get_new_shuffling)
Reference picture:
![](https://vitalik.ca/files/ShuffleAndAssign.png)
and [description](https://github.com/ethereum/py-evm/blob/f2d0d5d187400ba46a6b8f5b1f1c9997dc7fbb5a/eth/beacon/helpers.py#L272-L344) from Py-EVM
```
validators:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
After shuffling:
[6, 0, 2, 12, 14, 8, 10, 4, 9, 1, 5, 13, 15, 7, 3, 11]
Split by slot:
[
[6, 0, 2, 12, 14], [8, 10, 4, 9, 1], [5, 13, 15, 7, 3, 11]
]
Split by shard:
[
[6, 0], [2, 12, 14], [8, 10], [4, 9, 1], [5, 13, 15] ,[7, 3, 11]
]
Fill to output:
[
# slot 0
[
ShardAndCommittee(shard_id=0, committee=[6, 0]),
ShardAndCommittee(shard_id=1, committee=[2, 12, 14]),
],
# slot 1
[
ShardAndCommittee(shard_id=2, committee=[8, 10]),
ShardAndCommittee(shard_id=3, committee=[4, 9, 1]),
],
# slot 2
[
ShardAndCommittee(shard_id=4, committee=[5, 13, 15]),
ShardAndCommittee(shard_id=5, committee=[7, 3, 11]),
],
]
```

View File

@ -0,0 +1,4 @@
SHARD_COUNT = 2**10 # 1024
EPOCH_LENGTH = 2**6 # 64 slots, 6.4 minutes
TARGET_COMMITTEE_SIZE = 2**8 # 256 validators

View File

@ -0,0 +1,134 @@
"""
copy-pasted from specs
"""
from typing import Any, Dict, List
import yaml
from eth_typing import Hash32
from constants import EPOCH_LENGTH, SHARD_COUNT, TARGET_COMMITTEE_SIZE
from enums import ValidatorStatusCode
from utils import hash
from yaml_objects import ShardCommittee, ValidatorRecord
def is_active_validator(validator: ValidatorRecord) -> bool:
"""
Checks if ``validator`` is active.
"""
return validator.status in [ValidatorStatusCode.ACTIVE, ValidatorStatusCode.ACTIVE_PENDING_EXIT]
def get_active_validator_indices(validators: [ValidatorRecord]) -> List[int]:
"""
Gets indices of active validators from ``validators``.
"""
return [i for i, v in enumerate(validators) if is_active_validator(v)]
def shuffle(values: List[Any], seed: Hash32) -> List[Any]:
"""
Returns the shuffled ``values`` with ``seed`` as entropy.
"""
values_count = len(values)
# Entropy is consumed from the seed in 3-byte (24 bit) chunks.
rand_bytes = 3
# The highest possible result of the RNG.
rand_max = 2 ** (rand_bytes * 8) - 1
# The range of the RNG places an upper-bound on the size of the list that
# may be shuffled. It is a logic error to supply an oversized list.
assert values_count < rand_max
output = [x for x in values]
source = seed
index = 0
while index < values_count - 1:
# Re-hash the `source` to obtain a new pattern of bytes.
source = hash(source)
# Iterate through the `source` bytes in 3-byte chunks.
for position in range(0, 32 - (32 % rand_bytes), rand_bytes):
# Determine the number of indices remaining in `values` and exit
# once the last index is reached.
remaining = values_count - index
if remaining == 1:
break
# Read 3-bytes of `source` as a 24-bit big-endian integer.
sample_from_source = int.from_bytes(
source[position:position + rand_bytes], 'big')
# Sample values greater than or equal to `sample_max` will cause
# modulo bias when mapped into the `remaining` range.
sample_max = rand_max - rand_max % remaining
# Perform a swap if the consumed entropy will not cause modulo bias.
if sample_from_source < sample_max:
# Select a replacement index for the current index.
replacement_position = (sample_from_source % remaining) + index
# Swap the current index with the replacement index.
output[index], output[replacement_position] = output[replacement_position], output[index]
index += 1
else:
# The sample causes modulo bias. A new sample should be read.
pass
return output
def split(values: List[Any], split_count: int) -> List[Any]:
"""
Splits ``values`` into ``split_count`` pieces.
"""
list_length = len(values)
return [
values[
(list_length * i // split_count): (list_length * (i + 1) // split_count)
]
for i in range(split_count)
]
def get_new_shuffling(seed: Hash32,
validators: List[ValidatorRecord],
crosslinking_start_shard: int) -> List[List[ShardCommittee]]:
"""
Shuffles ``validators`` into shard committees using ``seed`` as entropy.
"""
active_validator_indices = get_active_validator_indices(validators)
committees_per_slot = max(
1,
min(
SHARD_COUNT // EPOCH_LENGTH,
len(active_validator_indices) // EPOCH_LENGTH // TARGET_COMMITTEE_SIZE,
)
)
# Shuffle with seed
shuffled_active_validator_indices = shuffle(active_validator_indices, seed)
# Split the shuffled list into epoch_length pieces
validators_per_slot = split(
shuffled_active_validator_indices, EPOCH_LENGTH)
output = []
for slot, slot_indices in enumerate(validators_per_slot):
# Split the shuffled list into committees_per_slot pieces
shard_indices = split(slot_indices, committees_per_slot)
shard_id_start = crosslinking_start_shard + slot * committees_per_slot
shard_committees = [
ShardCommittee(
shard=(shard_id_start + shard_position) % SHARD_COUNT,
committee=indices,
total_validator_count=len(active_validator_indices),
)
for shard_position, indices in enumerate(shard_indices)
]
output.append(shard_committees)
return output

View File

@ -0,0 +1,9 @@
from enum import IntEnum
class ValidatorStatusCode(IntEnum):
PENDING_ACTIVATION = 0
ACTIVE = 1
ACTIVE_PENDING_EXIT = 2
EXITED_WITHOUT_PENALTY = 3
EXITED_WITH_PENALTY = 4

View File

@ -0,0 +1,69 @@
import random
import sys
from typing import Any, Dict, List
import yaml
from constants import SHARD_COUNT
from core_helpers import get_new_shuffling
from enums import ValidatorStatusCode
from yaml_objects import ShardCommittee, ValidatorRecord
def noop(self, *args, **kw):
# Prevent !!str or !!binary tags
pass
yaml.emitter.Emitter.process_tag = noop
def yaml_ValidatorStatusCode(dumper, data):
# Try to deal with enums - otherwise for "ValidatorStatus.Active" you get [1], instead of 1
return dumper.represent_data(data.value)
yaml.add_representer(ValidatorStatusCode, yaml_ValidatorStatusCode)
if __name__ == '__main__':
# Order not preserved - https://github.com/yaml/pyyaml/issues/110
metadata = {
'title': 'Shuffling Algorithm Tests',
'summary': 'Test vectors for shuffling a list based upon a seed using `shuffle`',
'test_suite': 'shuffle',
'fork': 'tchaikovsky',
'version': 1.0
}
# Config
random.seed(int("0xEF00BEAC", 16))
num_cases = 10
list_val_state = list(ValidatorStatusCode)
test_cases = []
for case in range(num_cases):
seedhash = bytes(random.randint(0, 255) for byte in range(32))
num_val = random.randint(128, 512)
validators = [
ValidatorRecord(
status=random.choice(list_val_state),
original_index=num_val)
for num_val in range(num_val)
]
input_ = {
'validators_status': [v.status.value for v in validators],
'crosslinking_start_shard': random.randint(0, SHARD_COUNT)
}
output = get_new_shuffling(
seedhash, validators, input_['crosslinking_start_shard'])
test_cases.append({
'seed': '0x' + seedhash.hex(), 'input': input_, 'output': output
})
with open(sys.argv[1], 'w') as outfile:
# Dump at top level
yaml.dump(metadata, outfile, default_flow_style=False)
# default_flow_style will unravel "ValidatorRecord" and "committee" line, exploding file size
yaml.dump({'test_cases': test_cases}, outfile)

View File

@ -0,0 +1,6 @@
from eth_typing import Hash32
from eth_utils import keccak
def hash(x: bytes) -> Hash32:
return keccak(x)

View File

@ -0,0 +1,43 @@
from typing import Any
import yaml
class ValidatorRecord(yaml.YAMLObject):
fields = {
# Status code
'status': 'ValidatorStatusCode',
# Extra index field to ease testing/debugging
'original_index': 'uint64'
}
def __init__(self, **kwargs):
for k in self.fields.keys():
setattr(self, k, kwargs.get(k))
def __setattr__(self, name: str, value: Any) -> None:
super().__setattr__(name, value)
def __getattribute__(self, name: str) -> Any:
return super().__getattribute__(name)
class ShardCommittee(yaml.YAMLObject):
fields = {
# Shard number
'shard': 'uint64',
# Validator indices
'committee': ['uint24'],
# Total validator count (for proofs of custody)
'total_validator_count': 'uint64',
}
def __init__(self, **kwargs):
for k in self.fields.keys():
setattr(self, k, kwargs.get(k))
def __setattr__(self, name: str, value: Any) -> None:
super().__setattr__(name, value)
def __getattribute__(self, name: str) -> Any:
return super().__getattribute__(name)

View File

@ -1,363 +0,0 @@
# Eth2.0 test vector generator
# 2018 Status Research & Development GmbH
# Copyright and related rights waived via [CC0](https://creativecommons.org/publicdomain/zero/1.0/).
#
# This work uses public domain work under CC0 from the Ethereum Foundation
# https://github.com/ethereum/eth2.0-specs
# This file implements a test vectors generator for the shuffling algorithm described in the Ethereum
# specs as of https://github.com/ethereum/eth2.0-specs/blob/2983e68f0305551083fac7fcf9330c1fc9da3411/specs/core/0_beacon-chain.md#get_new_shuffling
# Reference picture: http://vitalik.ca/files/ShuffleAndAssign.png
# and description from Py-EVM: https://github.com/ethereum/py-evm/blob/f2d0d5d187400ba46a6b8f5b1f1c9997dc7fbb5a/eth/beacon/helpers.py#L272-L344
#
# validators:
# [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
# After shuffling:
# [6, 0, 2, 12, 14, 8, 10, 4, 9, 1, 5, 13, 15, 7, 3, 11]
# Split by slot:
# [
# [6, 0, 2, 12, 14], [8, 10, 4, 9, 1], [5, 13, 15, 7, 3, 11]
# ]
# Split by shard:
# [
# [6, 0], [2, 12, 14], [8, 10], [4, 9, 1], [5, 13, 15] ,[7, 3, 11]
# ]
# Fill to output:
# [
# # slot 0
# [
# ShardAndCommittee(shard_id=0, committee=[6, 0]),
# ShardAndCommittee(shard_id=1, committee=[2, 12, 14]),
# ],
# # slot 1
# [
# ShardAndCommittee(shard_id=2, committee=[8, 10]),
# ShardAndCommittee(shard_id=3, committee=[4, 9, 1]),
# ],
# # slot 2
# [
# ShardAndCommittee(shard_id=4, committee=[5, 13, 15]),
# ShardAndCommittee(shard_id=5, committee=[7, 3, 11]),
# ],
# ]
# Note that as of 2018-12-03, several implementations are outdated
# as they are still using dynasty or min_committee_size that are not in the specs
# ################################################################
#
# YAML config
#
# ################################################################
import yaml # Requires pyyaml
# Prevent !!str or !!binary tags
def noop(self, *args, **kw):
pass
yaml.emitter.Emitter.process_tag = noop
# ################################################################
#
# Imports and simplified types
#
# ################################################################
from typing import(
List, Any, Dict, NewType
)
from enum import IntEnum
import random
Hash32 = NewType('Hash32', bytes)
## See https://github.com/ethereum/eth2.0-specs/pull/227
## and https://github.com/ethereum/eth2.0-specs/issues/151
## Hashing as been changed from Blake2b-512[:32] to Keccak256
# from hashlib import blake2b
# def hash(x):
# return blake2b(x).digest()[:32]
from eth_utils import keccak
def hash(x):
return keccak(x)
class ValidatorStatusCode(IntEnum):
PENDING_ACTIVATION = 0
ACTIVE = 1
ACTIVE_PENDING_EXIT = 2
EXITED_WITHOUT_PENALTY = 3
EXITED_WITH_PENALTY = 4
class ValidatorRecord(yaml.YAMLObject):
fields = {
# Status code
'status': 'ValidatorStatusCode',
# Extra index field to ease testing/debugging
'original_index': 'uint64'
}
def __init__(self, **kwargs):
for k in self.fields.keys():
setattr(self, k, kwargs.get(k))
def __setattr__(self, name: str, value: Any) -> None:
super().__setattr__(name, value)
def __getattribute__(self, name: str) -> Any:
return super().__getattribute__(name)
class ShardAndCommittee(yaml.YAMLObject):
fields = {
# Shard number
'shard': 'uint64',
# Validator indices
'committee': ['uint24'],
# Total validator count (for proofs of custody)
'total_validator_count': 'uint64',
}
def __init__(self, **kwargs):
for k in self.fields.keys():
setattr(self, k, kwargs.get(k))
def __setattr__(self, name: str, value: Any) -> None:
super().__setattr__(name, value)
def __getattribute__(self, name: str) -> Any:
return super().__getattribute__(name)
# ################################################################
#
# Environment variables
#
# ################################################################
SHARD_COUNT = 2**10 # 1024
EPOCH_LENGTH = 2**6 # 64 slots, 6.4 minutes
TARGET_COMMITTEE_SIZE = 2**8 # 256 validators
# ################################################################
#
# Procedures (copy-pasted from specs)
#
# ################################################################
def get_active_validator_indices(validators: [ValidatorRecord]) -> List[int]:
"""
Gets indices of active validators from ``validators``.
"""
return [i for i, v in enumerate(validators) if v.status in [ValidatorStatusCode.ACTIVE, ValidatorStatusCode.ACTIVE_PENDING_EXIT]]
def shuffle(values: List[Any], seed: Hash32) -> List[Any]:
"""
Returns the shuffled ``values`` with ``seed`` as entropy.
"""
values_count = len(values)
# Entropy is consumed from the seed in 3-byte (24 bit) chunks.
rand_bytes = 3
# The highest possible result of the RNG.
rand_max = 2 ** (rand_bytes * 8) - 1
# The range of the RNG places an upper-bound on the size of the list that
# may be shuffled. It is a logic error to supply an oversized list.
assert values_count < rand_max
output = [x for x in values]
source = seed
index = 0
while index < values_count - 1:
# Re-hash the `source` to obtain a new pattern of bytes.
source = hash(source)
# Iterate through the `source` bytes in 3-byte chunks.
for position in range(0, 32 - (32 % rand_bytes), rand_bytes):
# Determine the number of indices remaining in `values` and exit
# once the last index is reached.
remaining = values_count - index
if remaining == 1:
break
# Read 3-bytes of `source` as a 24-bit big-endian integer.
sample_from_source = int.from_bytes(source[position:position + rand_bytes], 'big')
# Sample values greater than or equal to `sample_max` will cause
# modulo bias when mapped into the `remaining` range.
sample_max = rand_max - rand_max % remaining
# Perform a swap if the consumed entropy will not cause modulo bias.
if sample_from_source < sample_max:
# Select a replacement index for the current index.
replacement_position = (sample_from_source % remaining) + index
# Swap the current index with the replacement index.
output[index], output[replacement_position] = output[replacement_position], output[index]
index += 1
else:
# The sample causes modulo bias. A new sample should be read.
pass
return output
def split(values: List[Any], split_count: int) -> List[Any]:
"""
Splits ``values`` into ``split_count`` pieces.
"""
list_length = len(values)
return [
values[(list_length * i // split_count): (list_length * (i + 1) // split_count)]
for i in range(split_count)
]
def clamp(minval: int, maxval: int, x: int) -> int:
"""
Clamps ``x`` between ``minval`` and ``maxval``.
"""
if x <= minval:
return minval
elif x >= maxval:
return maxval
else:
return x
def get_new_shuffling(seed: Hash32,
validators: List[ValidatorRecord],
crosslinking_start_shard: int) -> List[List[ShardAndCommittee]]:
"""
Shuffles ``validators`` into shard committees using ``seed`` as entropy.
"""
active_validator_indices = get_active_validator_indices(validators)
committees_per_slot = clamp(
1,
SHARD_COUNT // EPOCH_LENGTH,
len(active_validator_indices) // EPOCH_LENGTH // TARGET_COMMITTEE_SIZE,
)
# Shuffle with seed
shuffled_active_validator_indices = shuffle(active_validator_indices, seed)
# Split the shuffled list into epoch_length pieces
validators_per_slot = split(shuffled_active_validator_indices, EPOCH_LENGTH)
output = []
for slot, slot_indices in enumerate(validators_per_slot):
# Split the shuffled list into committees_per_slot pieces
shard_indices = split(slot_indices, committees_per_slot)
shard_id_start = crosslinking_start_shard + slot * committees_per_slot
shards_and_committees_for_slot = [
ShardAndCommittee(
shard=(shard_id_start + shard_position) % SHARD_COUNT,
committee=indices,
total_validator_count=len(active_validator_indices),
)
for shard_position, indices in enumerate(shard_indices)
]
output.append(shards_and_committees_for_slot)
return output
# ################################################################
#
# Print helpers
#
# ################################################################
def toStrValidator(validators: [ValidatorRecord]) -> str:
return ', '.join(
f'Val(idx: {val.original_index}, status: {val.status})' for val in validators
)
def toStrShardComs(shard_comms: List[List[ShardAndCommittee]]) -> str:
def strSlot(slot_id: int, sacs: List[ShardAndCommittee]) -> str:
result = ', '.join(
f'SaC(shard: {sac.shard}, comm: {sac.committee})'
for sac in sacs if sac.committee
)
if result != '': # Only return non-empty slots
return f'[Slot {slot_id}: ' + result
else:
return ''
return '\n\t'.join(
strSlot(slot_id, sacs) for slot_id, sacs in enumerate(shard_comms) if strSlot(slot_id, sacs)
)
# ################################################################
#
# Testing
#
# ################################################################
# if __name__ == '__main__':
#
# # Config
# random.seed(int("0xEF00BEAC", 16))
# num_val = 256 # Number of validators
#
#
# seedhash = bytes(random.randint(0, 255) for byte in range(32))
# list_val_state = list(ValidatorStatus)
# validators = [ValidatorRecord(status=random.choice(list_val_state), original_index=num_val) for num_val in range(num_val)]
# crosslinking_start_shard = random.randint(0, SHARD_COUNT)
#
# print(f"Hash: 0x{seedhash.hex()}")
# print(f"validators: {toStrValidator(validators)}")
# print(f"crosslinking_start_shard: {crosslinking_start_shard}")
#
# shuffle = get_new_shuffling(seedhash, validators, crosslinking_start_shard)
# print(f"shuffling: {toStrShardComs(shuffle)}")
# ################################################################
#
# YAML Generator
#
# ################################################################
## Try to deal with enums - otherwise for "ValidatorStatus.Active" you get [1], instead of 1
def yaml_ValidatorStatusCode(dumper, data):
return dumper.represent_data(data.value)
yaml.add_representer(ValidatorStatusCode, yaml_ValidatorStatusCode)
if __name__ == '__main__':
import sys, random
# Order not preserved - https://github.com/yaml/pyyaml/issues/110
metadata = {
'title': 'Shuffling Algorithm Tests',
'summary': 'Test vectors for shuffling a list based upon a seed using `shuffle`',
'test_suite': 'shuffle',
'fork': 'tchaikovsky',
'version': 1.0
}
# Config
random.seed(int("0xEF00BEAC", 16))
num_cases = 10
list_val_state = list(ValidatorStatusCode)
test_cases = []
for case in range(num_cases):
seedhash = bytes(random.randint(0, 255) for byte in range(32))
num_val = random.randint(128, 512)
input = {
'validators': [ValidatorRecord(status=random.choice(list_val_state), original_index=num_val) for num_val in range(num_val)],
'crosslinking_start_shard': random.randint(0, SHARD_COUNT)
}
output = get_new_shuffling(seedhash, input['validators'], input['crosslinking_start_shard'])
test_cases.append({
'seed': '0x' + seedhash.hex(), 'input': input, 'output': output
})
## Debug
# yaml.dump(metadata, sys.stdout)
# yaml.dump(test_cases, sys.stdout)
with open('test_vector_shuffling.yml', 'w') as outfile:
yaml.dump(metadata, outfile, default_flow_style=False) # Dump at top level
yaml.dump({'test_cases': test_cases}, outfile) # default_flow_style will unravel "ValidatorRecord" and "committee" line, exploding file size

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
eth-typing==2.0.0

File diff suppressed because it is too large Load Diff