From ea2f577a8dbd83baa9f32d1e342d84c43c13073f Mon Sep 17 00:00:00 2001 From: Dean Eigenmann Date: Tue, 2 Jul 2019 05:31:49 +0200 Subject: [PATCH 01/22] Update 0_beacon-chain.md --- specs/core/0_beacon-chain.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index 975874d51..76f199e39 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -389,7 +389,7 @@ class DepositData(Container): ```python class CompactCommittee(Container): - pubkeys: List[Bytes48, MAX_VALIDATORS_PER_COMMITTEE] + pubkeys: List[BLSPubKey, MAX_VALIDATORS_PER_COMMITTEE] compact_validators: List[uint64, MAX_VALIDATORS_PER_COMMITTEE] ``` From f8fa6f00461cd874d854952087459d2ed6b9fc69 Mon Sep 17 00:00:00 2001 From: Dean Eigenmann Date: Tue, 2 Jul 2019 05:43:17 +0200 Subject: [PATCH 02/22] Update 0_beacon-chain.md --- specs/core/0_beacon-chain.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index 76f199e39..c9c2531ea 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -389,7 +389,7 @@ class DepositData(Container): ```python class CompactCommittee(Container): - pubkeys: List[BLSPubKey, MAX_VALIDATORS_PER_COMMITTEE] + pubkeys: List[BLSPubkey, MAX_VALIDATORS_PER_COMMITTEE] compact_validators: List[uint64, MAX_VALIDATORS_PER_COMMITTEE] ``` From 88892028dc8993795a282734838221358f1d43f6 Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Tue, 2 Jul 2019 12:35:26 -0600 Subject: [PATCH 03/22] update test formats list in readme --- specs/test_formats/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/specs/test_formats/README.md b/specs/test_formats/README.md index 277e986d5..e4f013d8b 100644 --- a/specs/test_formats/README.md +++ b/specs/test_formats/README.md @@ -33,7 +33,10 @@ The particular formats of specific types of tests (test suites) are defined in s Test formats: - [`bls`](./bls/README.md) +- [`epoch_processing`](./epoch_processing/README.md) +- [`genesis`](./genesis/README.md) - [`operations`](./operations/README.md) +- [`sanity`](./sanity/README.md) - [`shuffling`](./shuffling/README.md) - [`ssz_generic`](./ssz_generic/README.md) - [`ssz_static`](./ssz_static/README.md) From 6422acdcdd80afc3ffbf88fb6cdafecbb7c1eea7 Mon Sep 17 00:00:00 2001 From: Dankrad Feist Date: Wed, 3 Jul 2019 13:31:03 +0100 Subject: [PATCH 04/22] Cosmetic change: Define Bitlist/Bitvector serialization using bytes, not bigints --- .../pyspec/eth2spec/utils/ssz/ssz_impl.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py index d5855a755..a4abef966 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py @@ -41,11 +41,16 @@ def serialize(obj: SSZValue): if isinstance(obj, BasicValue): return serialize_basic(obj) elif isinstance(obj, Bitvector): - as_integer = sum([obj[i] << i for i in range(len(obj))]) - return as_integer.to_bytes((len(obj) + 7) // 8, "little") + as_bytearray = [0] * ((len(obj) + 7) // 8) + for i in range(len(obj)): + as_bytearray[i // 8] |= obj[i] << (i % 8) + return bytes(as_bytearray) elif isinstance(obj, Bitlist): - as_integer = (1 << len(obj)) + sum([obj[i] << i for i in range(len(obj))]) - return as_integer.to_bytes((as_integer.bit_length() + 7) // 8, "little") + as_bytearray = [0] * (len(obj) // 8 + 1) + for i in range(len(obj)): + as_bytearray[i // 8] |= obj[i] << (i % 8) + as_bytearray[len(obj) // 8] |= 1 << (len(obj) % 8) + return bytes(as_bytearray) elif isinstance(obj, Series): return encode_series(obj) else: @@ -92,12 +97,11 @@ def encode_series(values: Series): def pack(values: Series): if isinstance(values, bytes): # Bytes and BytesN are already packed return values - elif isinstance(values, Bitvector): - as_integer = sum([values[i] << i for i in range(len(values))]) - return as_integer.to_bytes((values.length + 7) // 8, "little") - elif isinstance(values, Bitlist): - as_integer = sum([values[i] << i for i in range(len(values))]) - return as_integer.to_bytes((values.length + 7) // 8, "little") + elif isinstance(values, Bitvector) or isinstance(values, Bitlist): + as_bytearray = [0] * ((len(values) + 7) // 8) + for i in range(len(values)): + as_bytearray[i // 8] |= values[i] << (i % 8) + return bytes(as_bytearray) return b''.join([serialize_basic(value) for value in values]) From 619b2a3573b3a0c83cc50e13adb7d97a3816539a Mon Sep 17 00:00:00 2001 From: dankrad Date: Wed, 3 Jul 2019 15:10:37 +0100 Subject: [PATCH 05/22] Update test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py Co-Authored-By: Diederik Loerakker --- test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py index a4abef966..1e0c806d9 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py @@ -97,7 +97,7 @@ def encode_series(values: Series): def pack(values: Series): if isinstance(values, bytes): # Bytes and BytesN are already packed return values - elif isinstance(values, Bitvector) or isinstance(values, Bitlist): + elif isinstance(values, (Bitvector, Bitlist)): as_bytearray = [0] * ((len(values) + 7) // 8) for i in range(len(values)): as_bytearray[i // 8] |= values[i] << (i % 8) From e3309b0896a7058f5c325bef75266a0b47fa7519 Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Wed, 3 Jul 2019 15:09:06 -0600 Subject: [PATCH 06/22] fix test gen make command in readme --- test_generators/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_generators/README.md b/test_generators/README.md index 95d7e70a8..9fdb45f4f 100644 --- a/test_generators/README.md +++ b/test_generators/README.md @@ -38,7 +38,7 @@ The `-j N` flag makes the generators run in parallel, with `N` being the amount The makefile auto-detects generators in the `test_generators` directory and provides a tests-gen target for each generator. See example: ```bash -make ./yaml_tests/shuffling/ +make ./eth2.0-spec-tests/tests/shuffling/ ``` ## Developing a generator From d483a3b573dda25347eced555cad8a6c68e601a7 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 4 Jul 2019 21:17:21 +0800 Subject: [PATCH 07/22] Set default `pad_for=1` --- specs/simple-serialize.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 8d9c33103..17ec7ad8a 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -178,7 +178,7 @@ We first define helper functions: * `pack`: Given ordered objects of the same basic type, serialize them, pack them into `BYTES_PER_CHUNK`-byte chunks, right-pad the last chunk with zero bytes, and return the chunks. * `next_pow_of_two(i)`: get the next power of 2 of `i`, if not already a power of 2, with 0 mapping to 1. Examples: `0->1, 1->1, 2->2, 3->4, 4->4, 6->8, 9->16` -* `merkleize(data, pad_for)`: Given ordered `BYTES_PER_CHUNK`-byte chunks, if necessary append zero chunks so that the number of chunks is a power of two, Merkleize the chunks, and return the root. +* `merkleize(data, pad_for=1)`: Given ordered `BYTES_PER_CHUNK`-byte chunks, if necessary append zero chunks so that the number of chunks is a power of two, Merkleize the chunks, and return the root. The merkleization depends on the effective input, which can be padded: if `pad_for=L`, then pad the `data` with zeroed chunks to `next_pow_of_two(L)` (virtually for memory efficiency). Then, merkleize the chunks (empty input is padded to 1 zero chunk): - If `1` chunk: A single chunk is simply that chunk, i.e. the identity when the number of chunks is one. From f37aa1acde7503f13e1bf6d3a381e334aecfcfa4 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 4 Jul 2019 21:26:07 +0800 Subject: [PATCH 08/22] Add `pack()` for `Bitvector` and `Bitlist` Merkleization --- specs/simple-serialize.md | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 17ec7ad8a..67a99a23c 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -25,7 +25,7 @@ - [Vectors, containers, lists, unions](#vectors-containers-lists-unions) - [Deserialization](#deserialization) - [Merkleization](#merkleization) - - [Merkleization of `Bitvector[N]`](#merkleization-of-bitvectorn) + - [`Bitvector[N]`](#bitvectorn-1) - [`Bitlist[N]`](#bitlistn-1) - [Self-signed containers](#self-signed-containers) - [Implementations](#implementations) @@ -50,11 +50,12 @@ * **container**: ordered heterogeneous collection of values * python dataclass notation with key-type pairs, e.g. -```python -class ContainerExample(Container): - foo: uint64 - bar: boolean -``` + ```python + class ContainerExample(Container): + foo: uint64 + bar: boolean + ``` + * **vector**: ordered fixed-length homogeneous collection, with `N` values * notation `Vector[type, N]`, e.g. `Vector[uint64, N]` * **list**: ordered variable-length homogeneous collection, limited to `N` values @@ -168,7 +169,8 @@ return serialized_type_index + serialized_bytes Because serialization is an injective function (i.e. two distinct objects of the same type will serialize to different values) any bytestring has at most one object it could deserialize to. Efficient algorithms for computing this object can be found in [the implementations](#implementations). Note that deserialization requires hardening against invalid inputs. A non-exhaustive list: -- Offsets: out of order, out of range, mismatching minimum element size + +- Offsets: out of order, out of range, mismatching minimum element size. - Scope: Extra unused bytes, not aligned with element size. - More elements than a list limit allows. Part of enforcing consensus. @@ -179,10 +181,10 @@ We first define helper functions: * `pack`: Given ordered objects of the same basic type, serialize them, pack them into `BYTES_PER_CHUNK`-byte chunks, right-pad the last chunk with zero bytes, and return the chunks. * `next_pow_of_two(i)`: get the next power of 2 of `i`, if not already a power of 2, with 0 mapping to 1. Examples: `0->1, 1->1, 2->2, 3->4, 4->4, 6->8, 9->16` * `merkleize(data, pad_for=1)`: Given ordered `BYTES_PER_CHUNK`-byte chunks, if necessary append zero chunks so that the number of chunks is a power of two, Merkleize the chunks, and return the root. - The merkleization depends on the effective input, which can be padded: if `pad_for=L`, then pad the `data` with zeroed chunks to `next_pow_of_two(L)` (virtually for memory efficiency). - Then, merkleize the chunks (empty input is padded to 1 zero chunk): - - If `1` chunk: A single chunk is simply that chunk, i.e. the identity when the number of chunks is one. - - If `> 1` chunks: pad to `next_pow_of_two(len(chunks))`, merkleize as binary tree. + * The merkleization depends on the effective input, which can be padded: if `pad_for=L`, then pad the `data` with zeroed chunks to `next_pow_of_two(L)` (virtually for memory efficiency). + * Then, merkleize the chunks (empty input is padded to 1 zero chunk): + - If `1` chunk: A single chunk is simply that chunk, i.e. the identity when the number of chunks is one. + - If `> 1` chunks: pad to `next_pow_of_two(len(chunks))`, merkleize as binary tree. * `mix_in_length`: Given a Merkle root `root` and a length `length` (`"uint256"` little-endian serialization) return `hash(root + length)`. * `mix_in_type`: Given a Merkle root `root` and a type_index `type_index` (`"uint256"` little-endian serialization) return `hash(root + type_index)`. @@ -194,18 +196,18 @@ We now define Merkleization `hash_tree_root(value)` of an object `value` recursi * `mix_in_length(merkleize([hash_tree_root(element) for element in value], pad_for=N), len(value))` if `value` is a list of composite objects. * `mix_in_type(merkleize(value.value), value.type_index)` if `value` is of union type -### Merkleization of `Bitvector[N]` +### `Bitvector[N]` ```python as_integer = sum([value[i] << i for i in range(len(value))]) -return merkleize(as_integer.to_bytes((N + 7) // 8, "little")) +return merkleize(pack(as_integer.to_bytes((N + 7) // 8, "little"))) ``` ### `Bitlist[N]` ```python as_integer = sum([value[i] << i for i in range(len(value))]) -return mix_in_length(merkleize(as_integer.to_bytes((N + 7) // 8, "little")), len(value)) +return mix_in_length(merkleize(pack(as_integer.to_bytes((N + 7) // 8, "little"))), len(value)) ``` ## Self-signed containers From 21535fe901f69fe077d9456d094b134d40929302 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 4 Jul 2019 21:38:08 +0800 Subject: [PATCH 09/22] Fix indent --- specs/simple-serialize.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 67a99a23c..8efd08c0a 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -55,7 +55,6 @@ foo: uint64 bar: boolean ``` - * **vector**: ordered fixed-length homogeneous collection, with `N` values * notation `Vector[type, N]`, e.g. `Vector[uint64, N]` * **list**: ordered variable-length homogeneous collection, limited to `N` values @@ -102,7 +101,7 @@ We recursively define the `serialize` function which consumes an object `value` ```python assert N in [8, 16, 32, 64, 128, 256] -return value.to_bytes(N // 8, "little") +return value.to_bytes(N // BITS_PER_BYTE, "little") ``` ### `boolean` @@ -190,11 +189,11 @@ We first define helper functions: We now define Merkleization `hash_tree_root(value)` of an object `value` recursively: -* `merkleize(pack(value))` if `value` is a basic object or a vector of basic objects +* `merkleize(pack(value))` if `value` is a basic object or a vector of basic objects. * `mix_in_length(merkleize(pack(value), pad_for=(N * elem_size / BYTES_PER_CHUNK)), len(value))` if `value` is a list of basic objects. -* `merkleize([hash_tree_root(element) for element in value])` if `value` is a vector of composite objects or a container +* `merkleize([hash_tree_root(element) for element in value])` if `value` is a vector of composite objects or a container. * `mix_in_length(merkleize([hash_tree_root(element) for element in value], pad_for=N), len(value))` if `value` is a list of composite objects. -* `mix_in_type(merkleize(value.value), value.type_index)` if `value` is of union type +* `mix_in_type(merkleize(value.value), value.type_index)` if `value` is of union type. ### `Bitvector[N]` From c8c810c0e107a20a6ed086aa3187d6e1ddeeb330 Mon Sep 17 00:00:00 2001 From: Justin Drake Date: Fri, 5 Jul 2019 15:03:37 +0100 Subject: [PATCH 10/22] Minor fixes --- specs/core/0_beacon-chain.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index 975874d51..75fa127f7 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -540,8 +540,6 @@ class BeaconState(Container): ### Math -#### `int_to_bytes` - #### `integer_squareroot` ```python @@ -560,13 +558,15 @@ def integer_squareroot(n: uint64) -> uint64: #### `xor` ```python -def xor(bytes1: Bytes32, bytes2: Bytes32) -> Bytes32: +def xor(bytes_1: Bytes32, bytes_2: Bytes32) -> Bytes32: """ Return the exclusive-or of two 32-byte strings. """ - return Bytes32(a ^ b for a, b in zip(bytes1, bytes2)) + return Bytes32(a ^ b for a, b in zip(bytes_1, bytes_2)) ``` +#### `int_to_bytes` + ```python def int_to_bytes(n: uint64, length: uint64) -> bytes: """ @@ -653,7 +653,7 @@ def is_slashable_attestation_data(data_1: AttestationData, data_2: AttestationDa ```python def is_valid_indexed_attestation(state: BeaconState, indexed_attestation: IndexedAttestation) -> bool: """ - Verify validity of ``indexed_attestation``. + Check indices and signature of ``indexed_attestation``. """ bit_0_indices = indexed_attestation.custody_bit_0_indices bit_1_indices = indexed_attestation.custody_bit_1_indices @@ -989,7 +989,7 @@ def get_total_balance(state: BeaconState, indices: Set[ValidatorIndex]) -> Gwei: """ Return the combined effective balance of the ``indices``. (1 Gwei minimum to avoid divisions by zero.) """ - return Gwei(max(sum([state.validators[index].effective_balance for index in indices]), 1)) + return Gwei(max(1, sum([state.validators[index].effective_balance for index in indices]))) ``` #### `get_total_active_balance` From 591f9658d349c8a970865d53e716ea7aed9e24d8 Mon Sep 17 00:00:00 2001 From: Justin Drake Date: Fri, 5 Jul 2019 15:04:57 +0100 Subject: [PATCH 11/22] Copy edit --- specs/core/0_beacon-chain.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index 75fa127f7..441d6f72b 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -653,7 +653,7 @@ def is_slashable_attestation_data(data_1: AttestationData, data_2: AttestationDa ```python def is_valid_indexed_attestation(state: BeaconState, indexed_attestation: IndexedAttestation) -> bool: """ - Check indices and signature of ``indexed_attestation``. + Check if ``indexed_attestation`` has valid indices and signature. """ bit_0_indices = indexed_attestation.custody_bit_0_indices bit_1_indices = indexed_attestation.custody_bit_1_indices From 0eadf61631410281514ee0dce0bedeee9ccf49bd Mon Sep 17 00:00:00 2001 From: Dean Eigenmann Date: Wed, 10 Jul 2019 13:11:34 -0400 Subject: [PATCH 12/22] Update 0_beacon-chain.md --- specs/core/0_beacon-chain.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index 975874d51..357ad44fc 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -49,9 +49,9 @@ - [`BeaconState`](#beaconstate) - [Helper functions](#helper-functions) - [Math](#math) - - [`int_to_bytes`](#int_to_bytes) - [`integer_squareroot`](#integer_squareroot) - [`xor`](#xor) + - [`int_to_bytes`](#int_to_bytes) - [`bytes_to_int`](#bytes_to_int) - [Crypto](#crypto) - [`hash`](#hash) @@ -540,8 +540,6 @@ class BeaconState(Container): ### Math -#### `int_to_bytes` - #### `integer_squareroot` ```python @@ -567,6 +565,8 @@ def xor(bytes1: Bytes32, bytes2: Bytes32) -> Bytes32: return Bytes32(a ^ b for a, b in zip(bytes1, bytes2)) ``` +#### `int_to_bytes` + ```python def int_to_bytes(n: uint64, length: uint64) -> bytes: """ From 6dc306700b0b343bb788c1bfca083253da3cffdb Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Thu, 11 Jul 2019 02:53:51 -0600 Subject: [PATCH 13/22] avoid overflow in slashing penalty calculation (#1286) Change presentation to avoid uint64 overflow in slashing penalty calculation. (Factor out `EFFECTIVE_BALANCE_INCREMENT` from `validator.effective_balance`.) --- specs/core/0_beacon-chain.md | 4 +++- .../epoch_processing/test_process_slashings.py | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index b0b4fdce5..7f9d3964a 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -1482,7 +1482,9 @@ def process_slashings(state: BeaconState) -> None: total_balance = get_total_active_balance(state) for index, validator in enumerate(state.validators): if validator.slashed and epoch + EPOCHS_PER_SLASHINGS_VECTOR // 2 == validator.withdrawable_epoch: - penalty = validator.effective_balance * min(sum(state.slashings) * 3, total_balance) // total_balance + increment = EFFECTIVE_BALANCE_INCREMENT # Factored out from penalty numerator to avoid uint64 overflow + penalty_numerator = validator.effective_balance // increment * min(sum(state.slashings) * 3, total_balance) + penalty = penalty_numerator // total_balance * increment decrease_balance(state, ValidatorIndex(index), penalty) ``` diff --git a/test_libs/pyspec/eth2spec/test/phase_0/epoch_processing/test_process_slashings.py b/test_libs/pyspec/eth2spec/test/phase_0/epoch_processing/test_process_slashings.py index 7be23a04d..c58da5a4a 100644 --- a/test_libs/pyspec/eth2spec/test/phase_0/epoch_processing/test_process_slashings.py +++ b/test_libs/pyspec/eth2spec/test/phase_0/epoch_processing/test_process_slashings.py @@ -66,8 +66,13 @@ def test_small_penalty(spec, state): spec.process_slashings(state) yield 'post', state - assert state.balances[0] == pre_slash_balances[0] - (state.validators[0].effective_balance - * 3 * total_penalties // total_balance) + expected_penalty = ( + state.validators[0].effective_balance // spec.EFFECTIVE_BALANCE_INCREMENT + * (3 * total_penalties) + // total_balance + * spec.EFFECTIVE_BALANCE_INCREMENT + ) + assert state.balances[0] == pre_slash_balances[0] - expected_penalty @with_all_phases @@ -121,5 +126,10 @@ def test_scaled_penalties(spec, state): for i in slashed_indices: v = state.validators[i] - penalty = v.effective_balance * total_penalties * 3 // total_balance - assert state.balances[i] == pre_slash_balances[i] - penalty + expected_penalty = ( + v.effective_balance // spec.EFFECTIVE_BALANCE_INCREMENT + * (3 * total_penalties) + // (total_balance) + * spec.EFFECTIVE_BALANCE_INCREMENT + ) + assert state.balances[i] == pre_slash_balances[i] - expected_penalty From b2c85706062d6ce3134ca6a3f02b7d5d9ebb00b9 Mon Sep 17 00:00:00 2001 From: protolambda Date: Fri, 12 Jul 2019 19:09:33 +0200 Subject: [PATCH 14/22] fix merkleization with 0-limit case, and enforce padding limit --- .../pyspec/eth2spec/utils/merkle_minimal.py | 10 ++- .../eth2spec/utils/test_merkle_minimal.py | 81 +++++++++++-------- 2 files changed, 53 insertions(+), 38 deletions(-) diff --git a/test_libs/pyspec/eth2spec/utils/merkle_minimal.py b/test_libs/pyspec/eth2spec/utils/merkle_minimal.py index e9416ea05..972b32d40 100644 --- a/test_libs/pyspec/eth2spec/utils/merkle_minimal.py +++ b/test_libs/pyspec/eth2spec/utils/merkle_minimal.py @@ -1,4 +1,4 @@ -from .hash_function import hash +from eth2spec.utils.hash_function import hash from math import log2 @@ -21,6 +21,8 @@ def calc_merkle_tree_from_leaves(values, layer_count=32): def get_merkle_root(values, pad_to=1): + if pad_to == 0: + return zerohashes[0] layer_count = int(log2(pad_to)) if len(values) == 0: return zerohashes[layer_count] @@ -36,9 +38,11 @@ def get_merkle_proof(tree, item_index): def merkleize_chunks(chunks, pad_to: int=1): - count = len(chunks) + if pad_to == 0: + return zerohashes[0] + count = min(len(chunks), pad_to) depth = max(count - 1, 0).bit_length() - max_depth = max(depth, (pad_to - 1).bit_length()) + max_depth = (pad_to - 1).bit_length() tmp = [None for _ in range(max_depth + 1)] def merge(h, i): diff --git a/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py b/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py index f1ed768e6..52e50d57a 100644 --- a/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py +++ b/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py @@ -8,7 +8,8 @@ def h(a: bytes, b: bytes) -> bytes: def e(v: int) -> bytes: - return v.to_bytes(length=32, byteorder='little') + # prefix with 0xfff... to make it non-zero + return b'\xff' * 28 + v.to_bytes(length=4, byteorder='little') def z(i: int) -> bytes: @@ -16,44 +17,54 @@ def z(i: int) -> bytes: cases = [ - (0, 0, 1, z(0)), - (0, 1, 1, e(0)), - (1, 0, 2, h(z(0), z(0))), - (1, 1, 2, h(e(0), z(0))), - (1, 2, 2, h(e(0), e(1))), - (2, 0, 4, h(h(z(0), z(0)), z(1))), - (2, 1, 4, h(h(e(0), z(0)), z(1))), - (2, 2, 4, h(h(e(0), e(1)), z(1))), - (2, 3, 4, h(h(e(0), e(1)), h(e(2), z(0)))), - (2, 4, 4, h(h(e(0), e(1)), h(e(2), e(3)))), - (3, 0, 8, h(h(h(z(0), z(0)), z(1)), z(2))), - (3, 1, 8, h(h(h(e(0), z(0)), z(1)), z(2))), - (3, 2, 8, h(h(h(e(0), e(1)), z(1)), z(2))), - (3, 3, 8, h(h(h(e(0), e(1)), h(e(2), z(0))), z(2))), - (3, 4, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), z(2))), - (3, 5, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), z(0)), z(1)))), - (3, 6, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(z(0), z(0))))), - (3, 7, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), z(0))))), - (3, 8, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), e(7))))), - (4, 0, 16, h(h(h(h(z(0), z(0)), z(1)), z(2)), z(3))), - (4, 1, 16, h(h(h(h(e(0), z(0)), z(1)), z(2)), z(3))), - (4, 2, 16, h(h(h(h(e(0), e(1)), z(1)), z(2)), z(3))), - (4, 3, 16, h(h(h(h(e(0), e(1)), h(e(2), z(0))), z(2)), z(3))), - (4, 4, 16, h(h(h(h(e(0), e(1)), h(e(2), e(3))), z(2)), z(3))), - (4, 5, 16, h(h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), z(0)), z(1))), z(3))), - (4, 6, 16, h(h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(z(0), z(0)))), z(3))), - (4, 7, 16, h(h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), z(0)))), z(3))), - (4, 8, 16, h(h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), e(7)))), z(3))), - (4, 9, 16, - h(h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), e(7)))), h(h(h(e(8), z(0)), z(1)), z(2)))), + # limit 0: always zero hash + (0, 0, z(0)), + (1, 0, z(0)), # cut-off due to limit + (2, 0, z(0)), # cut-off due to limit + # limit 1: padded to 1 element if not already. Returned (like identity func) + (0, 1, z(0)), + (1, 1, e(0)), + (2, 1, e(0)), # cut-off due to limit + (1, 1, e(0)), + (0, 2, h(z(0), z(0))), + (1, 2, h(e(0), z(0))), + (2, 2, h(e(0), e(1))), + (3, 2, h(e(0), e(1))), # cut-off due to limit + (16, 2, h(e(0), e(1))), # bigger cut-off due to limit + (0, 4, h(h(z(0), z(0)), z(1))), + (1, 4, h(h(e(0), z(0)), z(1))), + (2, 4, h(h(e(0), e(1)), z(1))), + (3, 4, h(h(e(0), e(1)), h(e(2), z(0)))), + (4, 4, h(h(e(0), e(1)), h(e(2), e(3)))), + (5, 4, h(h(e(0), e(1)), h(e(2), e(3)))), # cut-off due to limit + (0, 8, h(h(h(z(0), z(0)), z(1)), z(2))), + (1, 8, h(h(h(e(0), z(0)), z(1)), z(2))), + (2, 8, h(h(h(e(0), e(1)), z(1)), z(2))), + (3, 8, h(h(h(e(0), e(1)), h(e(2), z(0))), z(2))), + (4, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), z(2))), + (5, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), z(0)), z(1)))), + (6, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(z(0), z(0))))), + (7, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), z(0))))), + (8, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), e(7))))), + (9, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), e(7))))), # cut-off due to limit + (0, 16, h(h(h(h(z(0), z(0)), z(1)), z(2)), z(3))), + (1, 16, h(h(h(h(e(0), z(0)), z(1)), z(2)), z(3))), + (2, 16, h(h(h(h(e(0), e(1)), z(1)), z(2)), z(3))), + (3, 16, h(h(h(h(e(0), e(1)), h(e(2), z(0))), z(2)), z(3))), + (4, 16, h(h(h(h(e(0), e(1)), h(e(2), e(3))), z(2)), z(3))), + (5, 16, h(h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), z(0)), z(1))), z(3))), + (6, 16, h(h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(z(0), z(0)))), z(3))), + (7, 16, h(h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), z(0)))), z(3))), + (8, 16, h(h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), e(7)))), z(3))), + (9, 16, h(h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), e(7)))), h(h(h(e(8), z(0)), z(1)), z(2)))), ] @pytest.mark.parametrize( - 'depth,count,pow2,value', + 'count,pad_to,value', cases, ) -def test_merkleize_chunks_and_get_merkle_root(depth, count, pow2, value): +def test_merkleize_chunks_and_get_merkle_root(count, pad_to, value): chunks = [e(i) for i in range(count)] - assert merkleize_chunks(chunks, pad_to=pow2) == value - assert get_merkle_root(chunks, pad_to=pow2) == value + assert merkleize_chunks(chunks, pad_to=pad_to) == value + assert get_merkle_root(chunks, pad_to=pad_to) == value From 65b031158297e7ed938af1f73f3919100c3189d6 Mon Sep 17 00:00:00 2001 From: protolambda Date: Fri, 12 Jul 2019 20:39:55 +0200 Subject: [PATCH 15/22] more explicit about merkleization limit/pad --- test_libs/pyspec/eth2spec/utils/merkle_minimal.py | 13 +++++++++---- test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py | 3 ++- .../pyspec/eth2spec/utils/test_merkle_minimal.py | 8 ++++---- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/test_libs/pyspec/eth2spec/utils/merkle_minimal.py b/test_libs/pyspec/eth2spec/utils/merkle_minimal.py index 972b32d40..503926517 100644 --- a/test_libs/pyspec/eth2spec/utils/merkle_minimal.py +++ b/test_libs/pyspec/eth2spec/utils/merkle_minimal.py @@ -37,12 +37,17 @@ def get_merkle_proof(tree, item_index): return proof -def merkleize_chunks(chunks, pad_to: int=1): - if pad_to == 0: +def merkleize_chunks(chunks, limit=None): + # If no limit is defined, we are just merkleizing chunks (e.g. SSZ container). + if limit is None: + limit = len(chunks) + if limit == 0: return zerohashes[0] - count = min(len(chunks), pad_to) + # Limit strictly. Makes no sense to merkleize objects above the intended padding. + # And illegal to exceed list limits, just as with serialization. + count = min(len(chunks), limit) depth = max(count - 1, 0).bit_length() - max_depth = (pad_to - 1).bit_length() + max_depth = (limit - 1).bit_length() tmp = [None for _ in range(max_depth + 1)] def merge(h, i): diff --git a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py index d5855a755..5b37a2bb7 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py @@ -126,6 +126,7 @@ def item_length(typ: SSZType) -> int: def chunk_count(typ: SSZType) -> int: + # note that for lists, .length *on the type* describes the list limit. if isinstance(typ, BasicType): return 1 elif issubclass(typ, Bits): @@ -150,7 +151,7 @@ def hash_tree_root(obj: SSZValue): raise Exception(f"Type not supported: {type(obj)}") if isinstance(obj, (List, Bytes, Bitlist)): - return mix_in_length(merkleize_chunks(leaves, pad_to=chunk_count(obj.type())), len(obj)) + return mix_in_length(merkleize_chunks(leaves, limit=chunk_count(obj.type())), len(obj)) else: return merkleize_chunks(leaves) diff --git a/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py b/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py index 52e50d57a..a40ec05cf 100644 --- a/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py +++ b/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py @@ -61,10 +61,10 @@ cases = [ @pytest.mark.parametrize( - 'count,pad_to,value', + 'count,limit,value', cases, ) -def test_merkleize_chunks_and_get_merkle_root(count, pad_to, value): +def test_merkleize_chunks_and_get_merkle_root(count, limit, value): chunks = [e(i) for i in range(count)] - assert merkleize_chunks(chunks, pad_to=pad_to) == value - assert get_merkle_root(chunks, pad_to=pad_to) == value + assert merkleize_chunks(chunks, limit=limit) == value + assert get_merkle_root(chunks, pad_to=limit) == value From a8dc9157b8c288a8f2491370762cde89a05dedac Mon Sep 17 00:00:00 2001 From: protolambda Date: Fri, 12 Jul 2019 21:15:28 +0200 Subject: [PATCH 16/22] clean up merkleization text in SSZ spec --- specs/simple-serialize.md | 40 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 8efd08c0a..6c6377843 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -25,8 +25,6 @@ - [Vectors, containers, lists, unions](#vectors-containers-lists-unions) - [Deserialization](#deserialization) - [Merkleization](#merkleization) - - [`Bitvector[N]`](#bitvectorn-1) - - [`Bitlist[N]`](#bitlistn-1) - [Self-signed containers](#self-signed-containers) - [Implementations](#implementations) @@ -177,38 +175,36 @@ Note that deserialization requires hardening against invalid inputs. A non-exhau We first define helper functions: +* `chunk_count(type)`: calculate the amount of leafs for merkleization of the type. + * all basic types: `1` + * bitlists and bitvectors: `(N + 255) // 256` (dividing by chunk size, rounding up) + * lists and vectors of basic types: `N * item_length(elem_type) + 31) // 32` (dividing by chunk size, rounding up) + * lists and vectors of composite types: `N` + * containers: `len(fields)` +* `bitfield_bytes(bits)`: return the bits of the bitlist or bitvector, packed in bytes, aligned to the start. Exclusive length-delimiting bit for bitlists. * `pack`: Given ordered objects of the same basic type, serialize them, pack them into `BYTES_PER_CHUNK`-byte chunks, right-pad the last chunk with zero bytes, and return the chunks. * `next_pow_of_two(i)`: get the next power of 2 of `i`, if not already a power of 2, with 0 mapping to 1. Examples: `0->1, 1->1, 2->2, 3->4, 4->4, 6->8, 9->16` -* `merkleize(data, pad_for=1)`: Given ordered `BYTES_PER_CHUNK`-byte chunks, if necessary append zero chunks so that the number of chunks is a power of two, Merkleize the chunks, and return the root. - * The merkleization depends on the effective input, which can be padded: if `pad_for=L`, then pad the `data` with zeroed chunks to `next_pow_of_two(L)` (virtually for memory efficiency). +* `merkleize(chunks, limit=None)`: Given ordered `BYTES_PER_CHUNK`-byte chunks, merkleize the chunks, and return the root: + * The merkleization depends on the effective input, which can be padded/limited: + - if no limit: pad the `chunks` with zeroed chunks to `next_pow_of_two(len(chunks))` (virtually for memory efficiency). + - if `limit > len(chunks)`, pad the `chunks` with zeroed chunks to `next_pow_of_two(limit)` (virtually for memory efficiency). + - if `limit < len(chunks)`: do not merkleize, input exceeds limit. Raise an error instead. * Then, merkleize the chunks (empty input is padded to 1 zero chunk): - - If `1` chunk: A single chunk is simply that chunk, i.e. the identity when the number of chunks is one. - - If `> 1` chunks: pad to `next_pow_of_two(len(chunks))`, merkleize as binary tree. + - If `1` chunk: the root is the chunk itself. + - If `> 1` chunks: merkleize as binary tree. * `mix_in_length`: Given a Merkle root `root` and a length `length` (`"uint256"` little-endian serialization) return `hash(root + length)`. * `mix_in_type`: Given a Merkle root `root` and a type_index `type_index` (`"uint256"` little-endian serialization) return `hash(root + type_index)`. We now define Merkleization `hash_tree_root(value)` of an object `value` recursively: * `merkleize(pack(value))` if `value` is a basic object or a vector of basic objects. -* `mix_in_length(merkleize(pack(value), pad_for=(N * elem_size / BYTES_PER_CHUNK)), len(value))` if `value` is a list of basic objects. +* `merkleize(bitfield_bytes(value), limit=chunk_count(type))` if `value` is a bitvector. +* `mix_in_length(merkleize(pack(value), limit=chunk_count(type)), len(value))` if `value` is a list of basic objects. +* `mix_in_length(merkleize(bitfield_bytes(value), limit=chunk_count(type)), len(value))` if `value` is a bitlist. * `merkleize([hash_tree_root(element) for element in value])` if `value` is a vector of composite objects or a container. -* `mix_in_length(merkleize([hash_tree_root(element) for element in value], pad_for=N), len(value))` if `value` is a list of composite objects. +* `mix_in_length(merkleize([hash_tree_root(element) for element in value], limit=chunk_count(type)), len(value))` if `value` is a list of composite objects. * `mix_in_type(merkleize(value.value), value.type_index)` if `value` is of union type. -### `Bitvector[N]` - -```python -as_integer = sum([value[i] << i for i in range(len(value))]) -return merkleize(pack(as_integer.to_bytes((N + 7) // 8, "little"))) -``` - -### `Bitlist[N]` - -```python -as_integer = sum([value[i] << i for i in range(len(value))]) -return mix_in_length(merkleize(pack(as_integer.to_bytes((N + 7) // 8, "little"))), len(value)) -``` - ## Self-signed containers Let `value` be a self-signed container object. The convention is that the signature (e.g. a `"bytes96"` BLS12-381 signature) be the last field of `value`. Further, the signed message for `value` is `signing_root(value) = hash_tree_root(truncate_last(value))` where `truncate_last` truncates the last element of `value`. From 5a13684c7f2a9b274d1772ce6aa2db03cb0112fb Mon Sep 17 00:00:00 2001 From: protolambda Date: Fri, 12 Jul 2019 21:23:45 +0200 Subject: [PATCH 17/22] make exceeding limit raise an error --- .../pyspec/eth2spec/utils/merkle_minimal.py | 10 +++++-- .../eth2spec/utils/test_merkle_minimal.py | 28 +++++++++++++------ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/test_libs/pyspec/eth2spec/utils/merkle_minimal.py b/test_libs/pyspec/eth2spec/utils/merkle_minimal.py index 503926517..9d7138d7d 100644 --- a/test_libs/pyspec/eth2spec/utils/merkle_minimal.py +++ b/test_libs/pyspec/eth2spec/utils/merkle_minimal.py @@ -41,11 +41,15 @@ def merkleize_chunks(chunks, limit=None): # If no limit is defined, we are just merkleizing chunks (e.g. SSZ container). if limit is None: limit = len(chunks) + + count = len(chunks) + # See if the input is within expected size. + # If not, a list-limit is set incorrectly, or a value is unexpectedly large. + assert count <= limit + if limit == 0: return zerohashes[0] - # Limit strictly. Makes no sense to merkleize objects above the intended padding. - # And illegal to exceed list limits, just as with serialization. - count = min(len(chunks), limit) + depth = max(count - 1, 0).bit_length() max_depth = (limit - 1).bit_length() tmp = [None for _ in range(max_depth + 1)] diff --git a/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py b/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py index a40ec05cf..3746ea6ca 100644 --- a/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py +++ b/test_libs/pyspec/eth2spec/utils/test_merkle_minimal.py @@ -19,24 +19,24 @@ def z(i: int) -> bytes: cases = [ # limit 0: always zero hash (0, 0, z(0)), - (1, 0, z(0)), # cut-off due to limit - (2, 0, z(0)), # cut-off due to limit + (1, 0, None), # cut-off due to limit + (2, 0, None), # cut-off due to limit # limit 1: padded to 1 element if not already. Returned (like identity func) (0, 1, z(0)), (1, 1, e(0)), - (2, 1, e(0)), # cut-off due to limit + (2, 1, None), # cut-off due to limit (1, 1, e(0)), (0, 2, h(z(0), z(0))), (1, 2, h(e(0), z(0))), (2, 2, h(e(0), e(1))), - (3, 2, h(e(0), e(1))), # cut-off due to limit - (16, 2, h(e(0), e(1))), # bigger cut-off due to limit + (3, 2, None), # cut-off due to limit + (16, 2, None), # bigger cut-off due to limit (0, 4, h(h(z(0), z(0)), z(1))), (1, 4, h(h(e(0), z(0)), z(1))), (2, 4, h(h(e(0), e(1)), z(1))), (3, 4, h(h(e(0), e(1)), h(e(2), z(0)))), (4, 4, h(h(e(0), e(1)), h(e(2), e(3)))), - (5, 4, h(h(e(0), e(1)), h(e(2), e(3)))), # cut-off due to limit + (5, 4, None), # cut-off due to limit (0, 8, h(h(h(z(0), z(0)), z(1)), z(2))), (1, 8, h(h(h(e(0), z(0)), z(1)), z(2))), (2, 8, h(h(h(e(0), e(1)), z(1)), z(2))), @@ -46,7 +46,7 @@ cases = [ (6, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(z(0), z(0))))), (7, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), z(0))))), (8, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), e(7))))), - (9, 8, h(h(h(e(0), e(1)), h(e(2), e(3))), h(h(e(4), e(5)), h(e(6), e(7))))), # cut-off due to limit + (9, 8, None), # cut-off due to limit (0, 16, h(h(h(h(z(0), z(0)), z(1)), z(2)), z(3))), (1, 16, h(h(h(h(e(0), z(0)), z(1)), z(2)), z(3))), (2, 16, h(h(h(h(e(0), e(1)), z(1)), z(2)), z(3))), @@ -66,5 +66,15 @@ cases = [ ) def test_merkleize_chunks_and_get_merkle_root(count, limit, value): chunks = [e(i) for i in range(count)] - assert merkleize_chunks(chunks, limit=limit) == value - assert get_merkle_root(chunks, pad_to=limit) == value + if value is None: + bad = False + try: + merkleize_chunks(chunks, limit=limit) + bad = True + except AssertionError: + pass + if bad: + assert False, "expected merkleization to be invalid" + else: + assert merkleize_chunks(chunks, limit=limit) == value + assert get_merkle_root(chunks, pad_to=limit) == value From b98679957b5e056e852c4a5f57b1eab4d29da118 Mon Sep 17 00:00:00 2001 From: protolambda Date: Fri, 12 Jul 2019 22:11:33 +0200 Subject: [PATCH 18/22] use as_bytes function to reduce code duplication, and for later usage --- .../pyspec/eth2spec/utils/ssz/ssz_impl.py | 23 ++++++++----------- .../pyspec/eth2spec/utils/ssz/ssz_typing.py | 7 +++++- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py index 1e0c806d9..2a7d92314 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py @@ -41,15 +41,13 @@ def serialize(obj: SSZValue): if isinstance(obj, BasicValue): return serialize_basic(obj) elif isinstance(obj, Bitvector): - as_bytearray = [0] * ((len(obj) + 7) // 8) - for i in range(len(obj)): - as_bytearray[i // 8] |= obj[i] << (i % 8) - return bytes(as_bytearray) + return obj.as_bytes() elif isinstance(obj, Bitlist): - as_bytearray = [0] * (len(obj) // 8 + 1) - for i in range(len(obj)): - as_bytearray[i // 8] |= obj[i] << (i % 8) - as_bytearray[len(obj) // 8] |= 1 << (len(obj) % 8) + as_bytearray = list(obj.as_bytes()) + if len(obj) % 8 == 0: + as_bytearray.append(1) + else: + as_bytearray[len(obj) // 8] |= 1 << (len(obj) % 8) return bytes(as_bytearray) elif isinstance(obj, Series): return encode_series(obj) @@ -97,11 +95,10 @@ def encode_series(values: Series): def pack(values: Series): if isinstance(values, bytes): # Bytes and BytesN are already packed return values - elif isinstance(values, (Bitvector, Bitlist)): - as_bytearray = [0] * ((len(values) + 7) // 8) - for i in range(len(values)): - as_bytearray[i // 8] |= values[i] << (i % 8) - return bytes(as_bytearray) + elif isinstance(values, Bits): + # packs the bits in bytes, left-aligned. + # Exclusive length delimiting bits for bitlists. + return values.as_bytes() return b''.join([serialize_basic(value) for value in values]) diff --git a/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py b/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py index 2ec4b5ce2..1f199e6e1 100644 --- a/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py +++ b/test_libs/pyspec/eth2spec/utils/ssz/ssz_typing.py @@ -354,7 +354,12 @@ class BitElementsType(ElementsType): class Bits(BaseList, metaclass=BitElementsType): - pass + + def as_bytes(self): + as_bytearray = [0] * ((len(self) + 7) // 8) + for i in range(len(self)): + as_bytearray[i // 8] |= int(self[i]) << (i % 8) + return bytes(as_bytearray) class Bitlist(Bits): From ac6d019870b8a79ad57a9043f30ec5cc4eafe82e Mon Sep 17 00:00:00 2001 From: protolambda Date: Fri, 12 Jul 2019 22:20:07 +0200 Subject: [PATCH 19/22] bits serialization clear now, directly to bytes --- specs/simple-serialize.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 8d9c33103..915cb772a 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -120,8 +120,10 @@ return b"" ### `Bitvector[N]` ```python -as_integer = sum([value[i] << i for i in range(len(value))]) -return as_integer.to_bytes((N + 7) // 8, "little") +array = [0] * ((N + 7) // 8) +for i in range(N): + array[i // 8] |= value[i] << (i % 8) +return bytes(array) ``` ### `Bitlist[N]` @@ -129,8 +131,11 @@ return as_integer.to_bytes((N + 7) // 8, "little") Note that from the offset coding, the length (in bytes) of the bitlist is known. An additional leading `1` bit is added so that the length in bits will also be known. ```python -as_integer = (1 << len(value)) + sum([value[i] << i for i in range(len(value))]) -return as_integer.to_bytes((as_integer.bit_length() + 7) // 8, "little") +array = [0] * ((len(value) // 8) + 1) +for i in range(len(value)): + array[i // 8] |= value[i] << (i % 8) +array[len(value) // 8] |= 1 << (len(value) % 8) +return bytes(array) ``` ### Vectors, containers, lists, unions From 8970b71ca405d3971ca973d5a5f3b6afba7d6964 Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Sun, 14 Jul 2019 16:05:51 -0600 Subject: [PATCH 20/22] ensure min_seed_lookahead functions properly --- specs/core/0_beacon-chain.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index ebfe41a6a..49c64e3ed 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -865,7 +865,7 @@ def get_seed(state: BeaconState, epoch: Epoch) -> Hash: """ Return the seed at ``epoch``. """ - mix = get_randao_mix(state, Epoch(epoch + EPOCHS_PER_HISTORICAL_VECTOR - MIN_SEED_LOOKAHEAD)) # Avoid underflow + mix = get_randao_mix(state, Epoch(epoch + EPOCHS_PER_HISTORICAL_VECTOR - MIN_SEED_LOOKAHEAD - 1)) # Avoid underflow active_index_root = state.active_index_roots[epoch % EPOCHS_PER_HISTORICAL_VECTOR] return hash(mix + active_index_root + int_to_bytes(epoch, length=32)) ``` From d9fd1d3a2a4c73148757adf92787f5600807a21c Mon Sep 17 00:00:00 2001 From: protolambda Date: Mon, 15 Jul 2019 00:12:12 +0200 Subject: [PATCH 21/22] improve type wording based on PR 1292 feedback --- specs/simple-serialize.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 6c6377843..01440c1cd 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -175,11 +175,12 @@ Note that deserialization requires hardening against invalid inputs. A non-exhau We first define helper functions: +* `size_of(B)`, where `B` is a basic type: the length, in bytes, of the serialized form of the basic type. * `chunk_count(type)`: calculate the amount of leafs for merkleization of the type. * all basic types: `1` - * bitlists and bitvectors: `(N + 255) // 256` (dividing by chunk size, rounding up) - * lists and vectors of basic types: `N * item_length(elem_type) + 31) // 32` (dividing by chunk size, rounding up) - * lists and vectors of composite types: `N` + * `Bitlist[N]` and `Bitvector[N]`: `(N + 255) // 256` (dividing by chunk size, rounding up) + * `List[B, N]` and `Vector[B, N]`, where `B` is a basic type: `(N * size_of(B) + 31) // 32` (dividing by chunk size, rounding up) + * `List[C, N]` and `Vector[C, N]`, where `C` is a composite type: `N` * containers: `len(fields)` * `bitfield_bytes(bits)`: return the bits of the bitlist or bitvector, packed in bytes, aligned to the start. Exclusive length-delimiting bit for bitlists. * `pack`: Given ordered objects of the same basic type, serialize them, pack them into `BYTES_PER_CHUNK`-byte chunks, right-pad the last chunk with zero bytes, and return the chunks. From ef659144b48ca45afd3b67cd5162ba25144a4e21 Mon Sep 17 00:00:00 2001 From: protolambda Date: Mon, 15 Jul 2019 02:05:04 +0200 Subject: [PATCH 22/22] make zero hash representation clear, fixes #1282 --- specs/core/0_beacon-chain.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/specs/core/0_beacon-chain.md b/specs/core/0_beacon-chain.md index 49c64e3ed..2682807b2 100644 --- a/specs/core/0_beacon-chain.md +++ b/specs/core/0_beacon-chain.md @@ -1216,7 +1216,7 @@ def process_slot(state: BeaconState) -> None: previous_state_root = hash_tree_root(state) state.state_roots[state.slot % SLOTS_PER_HISTORICAL_ROOT] = previous_state_root # Cache latest block header state root - if state.latest_block_header.state_root == Hash(): + if state.latest_block_header.state_root == Bytes32(): state.latest_block_header.state_root = previous_state_root # Cache block root previous_block_root = signing_root(state.latest_block_header) @@ -1548,8 +1548,9 @@ def process_block_header(state: BeaconState, block: BeaconBlock) -> None: state.latest_block_header = BeaconBlockHeader( slot=block.slot, parent_root=block.parent_root, - state_root=Hash(), # Overwritten in the next `process_slot` call + # state_root: zeroed, overwritten in the next `process_slot` call body_root=hash_tree_root(block.body), + # signature is always zeroed ) # Verify proposer is not slashed proposer = state.validators[get_beacon_proposer_index(state)] @@ -1672,7 +1673,7 @@ def process_attestation(state: BeaconState, attestation: Attestation) -> None: assert data.crosslink.parent_root == hash_tree_root(parent_crosslink) assert data.crosslink.start_epoch == parent_crosslink.end_epoch assert data.crosslink.end_epoch == min(data.target.epoch, parent_crosslink.end_epoch + MAX_EPOCHS_PER_CROSSLINK) - assert data.crosslink.data_root == Hash() # [to be removed in phase 1] + assert data.crosslink.data_root == Bytes32() # [to be removed in phase 1] # Check signature assert is_valid_indexed_attestation(state, get_indexed_attestation(state, attestation))