Add Bitvector and Bitlist

Bool, Bit -> boolean, bit
Fix simple-serialize.md
This commit is contained in:
Dankrad Feist 2019-06-27 09:51:06 +01:00
parent ab012b8adf
commit 02f6ba36f0
No known key found for this signature in database
GPG Key ID: 6815E6A20BEBBABA
11 changed files with 127 additions and 59 deletions

View File

@ -25,7 +25,7 @@ from eth2spec.utils.ssz.ssz_impl import (
signing_root,
)
from eth2spec.utils.ssz.ssz_typing import (
Bit, Bool, Container, List, Vector, Bytes, uint64,
bit, boolean, Container, List, Vector, Bytes, uint64,
Bytes4, Bytes32, Bytes48, Bytes96,
)
from eth2spec.utils.bls import (
@ -52,7 +52,7 @@ from eth2spec.utils.ssz.ssz_impl import (
is_empty,
)
from eth2spec.utils.ssz.ssz_typing import (
Bit, Bool, Container, List, Vector, Bytes, uint64,
bit, boolean, Container, List, Vector, Bytes, uint64,
Bytes4, Bytes32, Bytes48, Bytes96,
)
from eth2spec.utils.bls import (
@ -174,7 +174,7 @@ def combine_constants(old_constants: Dict[str, str], new_constants: Dict[str, st
ignored_dependencies = [
'Bit', 'Bool', 'Vector', 'List', 'Container', 'Hash', 'BLSPubkey', 'BLSSignature', 'Bytes', 'BytesN'
'bit', 'boolean', 'Vector', 'List', 'Container', 'Hash', 'BLSPubkey', 'BLSSignature', 'Bytes', 'BytesN'
'Bytes4', 'Bytes32', 'Bytes48', 'Bytes96',
'uint8', 'uint16', 'uint32', 'uint64', 'uint128', 'uint256',
'bytes' # to be removed after updating spec doc

View File

@ -297,7 +297,7 @@ class Validator(Container):
pubkey: BLSPubkey
withdrawal_credentials: Hash # Commitment to pubkey for withdrawals and transfers
effective_balance: Gwei # Balance at stake
slashed: Bool
slashed: boolean
# Status epochs
activation_eligibility_epoch: Epoch # When criteria for activation were met
activation_epoch: Epoch
@ -337,7 +337,7 @@ class AttestationData(Container):
```python
class AttestationDataAndCustodyBit(Container):
data: AttestationData
custody_bit: Bit # Challengeable bit (SSZ-bool, 1 byte) for the custody of crosslink data
custody_bit: bit # Challengeable bit (SSZ-bool, 1 byte) for the custody of crosslink data
```
#### `IndexedAttestation`

View File

@ -15,9 +15,9 @@
- [Default values](#default-values)
- [Illegal types](#illegal-types)
- [Serialization](#serialization)
- [`"uintN"`](#uintn)
- [`"bool"`](#bool)
- [`"null`](#null)
- [`uintN`](#uintn)
- [`boolean`](#boolean)
- [`null`](#null)
- [Vectors, containers, lists, unions](#vectors-containers-lists-unions)
- [Deserialization](#deserialization)
- [Merkleization](#merkleization)
@ -37,36 +37,45 @@
## Typing
### Basic types
* `"uintN"`: `N`-bit unsigned integer (where `N in [8, 16, 32, 64, 128, 256]`)
* `"bool"`: `True` or `False`
* `uintN`: `N`-bit unsigned integer (where `N in [8, 16, 32, 64, 128, 256]`)
* `boolean`: `True` or `False`
### Composite types
* **container**: ordered heterogeneous collection of values
* key-pair curly bracket notation `{}`, e.g. `{"foo": "uint64", "bar": "bool"}`
* python dataclass notation with key-type pairs, e.g.
```python
class ContainerExample(Container):
foo: uint64
bar: boolean
```
* **vector**: ordered fixed-length homogeneous collection of values
* angle bracket notation `[type, N]`, e.g. `["uint64", N]`
* **list**: ordered variable-length homogeneous collection of values
* angle bracket notation `[type]`, e.g. `["uint64"]`
* notation `Vector[type, N]`, e.g. `Vector[uint64, N]`
* **list**: ordered variable-length homogeneous collection of values, with maximum length `N`
* notation `List[type, N]`, e.g. `List[uint64, N]`
* **union**: union type containing one of the given subtypes
* round bracket notation `(type_1, type_2, ...)`, e.g. `("null", "uint64")`
* notation `Union[type_1, type_2, ...]`, e.g. `union[null, uint64]`
* **Bitvector**: a fixed-length list of `boolean` values
* notation `Bitvector[N]`
* **Bitlist**: a variable-length list of `boolean` values with maximum length `N`
* notation `Bitlist[N]`
### Variable-size and fixed-size
We recursively define "variable-size" types to be lists and unions and all types that contain a variable-size type. All other types are said to be "fixed-size".
We recursively define "variable-size" types to be lists, unions, `Bitlist` and all types that contain a variable-size type. All other types are said to be "fixed-size".
### Aliases
For convenience we alias:
* `"byte"` to `"uint8"` (this is a basic type)
* `"bytes"` to `["byte"]` (this is *not* a basic type)
* `"bytesN"` to `["byte", N]` (this is *not* a basic type)
* `"null"`: `{}`, i.e. the empty container
* `bit` to `boolean`
* `byte` to `uint8` (this is a basic type)
* `BytesN` to `Vector[byte, N]` (this is *not* a basic type)
* `null`: `{}`, i.e. the empty container
### Default values
The default value of a type upon initialization is recursively defined using `0` for `"uintN"`, `False` for `"bool"`, and `[]` for lists. Unions default to the first type in the union (with type index zero), which is `"null"` if present in the union.
The default value of a type upon initialization is recursively defined using `0` for `uintN`, `False` for `boolean` and the elements of `Bitvector`, and `[]` for lists and `Bitlist`. Unions default to the first type in the union (with type index zero), which is `null` if present in the union.
#### `is_empty`
@ -74,34 +83,50 @@ An SSZ object is called empty (and thus, `is_empty(object)` returns true) if it
### Illegal types
Empty vector types (i.e. `[subtype, 0]` for some `subtype`) are not legal. The `"null"` type is only legal as the first type in a union subtype (i.e. with type index zero).
Empty vector types (i.e. `[subtype, 0]` for some `subtype`) are not legal. The `null` type is only legal as the first type in a union subtype (i.e. with type index zero).
## Serialization
We recursively define the `serialize` function which consumes an object `value` (of the type specified) and returns a bytestring of type `"bytes"`.
We recursively define the `serialize` function which consumes an object `value` (of the type specified) and returns a bytestring of type `bytes`.
*Note*: In the function definitions below (`serialize`, `hash_tree_root`, `signing_root`, `is_variable_size`, etc.) objects implicitly carry their type.
### `"uintN"`
### `uintN`
```python
assert N in [8, 16, 32, 64, 128, 256]
return value.to_bytes(N // 8, "little")
```
### `"bool"`
### `boolean`
```python
assert value in (True, False)
return b"\x01" if value is True else b"\x00"
```
### `"null"`
### `null`
```python
return b""
```
### `Bitvector[N]`
```python
as_integer = sum([value[i] << i for i in range(len(value))])
return as_integer.to_bytes((N + 7) // 8, "little")
```
### `Bitlist[N]`
Note that from the offset coding, the length (in bytes) of the bitlist is known. An additional leading `1` bit is added so that the length in bits will also be known.
```python
as_integer = (1 << len(value)) + sum([value[i] << i for i in range(len(value))])
return as_integer.to_bytes((as_integer.bit_length() + 7) // 8, "little")
```
### Vectors, containers, lists, unions
```python
@ -142,17 +167,33 @@ We first define helper functions:
* `pack`: Given ordered objects of the same basic type, serialize them, pack them into `BYTES_PER_CHUNK`-byte chunks, right-pad the last chunk with zero bytes, and return the chunks.
* `merkleize`: Given ordered `BYTES_PER_CHUNK`-byte chunks, if necessary append zero chunks so that the number of chunks is a power of two, Merkleize the chunks, and return the root. Note that `merkleize` on a single chunk is simply that chunk, i.e. the identity when the number of chunks is one.
* `pad`: given a list `l` and a length `N`, adds `N-len(l)` empty objects to the end of the list (the type of the empty object is implicit in the list type)
* `mix_in_length`: Given a Merkle root `root` and a length `length` (`"uint256"` little-endian serialization) return `hash(root + length)`.
* `mix_in_type`: Given a Merkle root `root` and a type_index `type_index` (`"uint256"` little-endian serialization) return `hash(root + type_index)`.
We now define Merkleization `hash_tree_root(value)` of an object `value` recursively:
* `merkleize(pack(value))` if `value` is a basic object or a vector of basic objects
* `mix_in_length(merkleize(pack(value)), len(value))` if `value` is a list of basic objects
* `mix_in_length(merkleize(pack(pad(value, N))), len(value))` if `value` is a list of basic objects
* `merkleize([hash_tree_root(element) for element in value])` if `value` is a vector of composite objects or a container
* `mix_in_length(merkleize([hash_tree_root(element) for element in value]), len(value))` if `value` is a list of composite objects
* `mix_in_length(merkleize([hash_tree_root(element) for element in pad(value, N)]), len(value))` if `value` is a list of composite objects
* `mix_in_type(merkleize(value.value), value.type_index)` if `value` is of union type
### Merkleization of `Bitvector[N]`
```python
as_integer = sum([value[i] << i for i in range(len(value))])
return merkleize(as_integer.to_bytes((N + 7) // 8, "little"))
```
### `Bitlist[N]`
```python
as_integer = sum([value[i] << i for i in range(len(value))])
return mix_in_length(merkleize(as_integer.to_bytes((N + 7) // 8, "little")), len(value))
```
## Self-signed containers
Let `value` be a self-signed container object. The convention is that the signature (e.g. a `"bytes96"` BLS12-381 signature) be the last field of `value`. Further, the signed message for `value` is `signing_root(value) = hash_tree_root(truncate_last(value))` where `truncate_last` truncates the last element of `value`.

View File

@ -1,13 +1,13 @@
from typing import Any
from eth2spec.utils.ssz.ssz_impl import hash_tree_root
from eth2spec.utils.ssz.ssz_typing import (
SSZType, SSZValue, uint, Container, Bytes, List, Bool,
SSZType, SSZValue, uint, Container, Bytes, List, boolean,
Vector, BytesN
)
def decode(data: Any, typ: SSZType) -> SSZValue:
if issubclass(typ, (uint, Bool)):
if issubclass(typ, (uint, boolean)):
return typ(data)
elif issubclass(typ, (List, Vector)):
return typ(decode(element, typ.elem_type) for element in data)

View File

@ -1,6 +1,6 @@
from eth2spec.utils.ssz.ssz_impl import hash_tree_root
from eth2spec.utils.ssz.ssz_typing import (
SSZValue, uint, Container, Bool
SSZValue, uint, Container, boolean
)
@ -10,7 +10,7 @@ def encode(value: SSZValue, include_hash_tree_roots=False):
if value.type().byte_len > 8:
return str(int(value))
return int(value)
elif isinstance(value, Bool):
elif isinstance(value, boolean):
return value == 1
elif isinstance(value, list): # normal python lists, ssz-List, Vector
return [encode(element, include_hash_tree_roots) for element in value]

View File

@ -2,7 +2,7 @@ from random import Random
from enum import Enum
from eth2spec.utils.ssz.ssz_typing import (
SSZType, SSZValue, BasicValue, BasicType, uint, Container, Bytes, List, Bool,
SSZType, SSZValue, BasicValue, BasicType, uint, Container, Bytes, List, boolean,
Vector, BytesN
)
@ -118,7 +118,7 @@ def get_random_bytes_list(rng: Random, length: int) -> bytes:
def get_random_basic_value(rng: Random, typ: BasicType) -> BasicValue:
if issubclass(typ, Bool):
if issubclass(typ, boolean):
return typ(rng.choice((True, False)))
elif issubclass(typ, uint):
assert typ.byte_len in UINT_BYTE_SIZES
@ -128,7 +128,7 @@ def get_random_basic_value(rng: Random, typ: BasicType) -> BasicValue:
def get_min_basic_value(typ: BasicType) -> BasicValue:
if issubclass(typ, Bool):
if issubclass(typ, boolean):
return typ(False)
elif issubclass(typ, uint):
assert typ.byte_len in UINT_BYTE_SIZES
@ -138,7 +138,7 @@ def get_min_basic_value(typ: BasicType) -> BasicValue:
def get_max_basic_value(typ: BasicType) -> BasicValue:
if issubclass(typ, Bool):
if issubclass(typ, boolean):
return typ(True)
elif issubclass(typ, uint):
assert typ.byte_len in UINT_BYTE_SIZES

View File

@ -19,7 +19,7 @@ def translate_typ(typ) -> ssz.BaseSedes:
return ssz.Vector(translate_typ(typ.elem_type), typ.length)
elif issubclass(typ, spec_ssz.List):
return ssz.List(translate_typ(typ.elem_type))
elif issubclass(typ, spec_ssz.Bool):
elif issubclass(typ, spec_ssz.boolean):
return ssz.boolean
elif issubclass(typ, spec_ssz.uint):
if typ.byte_len == 1:
@ -64,7 +64,7 @@ def translate_value(value, typ):
raise TypeError("invalid uint size")
elif issubclass(typ, spec_ssz.List):
return [translate_value(elem, typ.elem_type) for elem in value]
elif issubclass(typ, spec_ssz.Bool):
elif issubclass(typ, spec_ssz.boolean):
return value
elif issubclass(typ, spec_ssz.Vector):
return typ(*(translate_value(elem, typ.elem_type) for elem in value))

View File

@ -1,7 +1,8 @@
from ..merkle_minimal import merkleize_chunks
from ..hash_function import hash
from .ssz_typing import (
SSZValue, SSZType, BasicValue, BasicType, Series, Elements, Bool, Container, List, Bytes, uint,
SSZValue, SSZType, BasicValue, BasicType, Series, Elements, boolean, Container, List, Bytes,
Bitlist, Bitvector, uint,
)
# SSZ Serialization
@ -13,7 +14,7 @@ BYTES_PER_LENGTH_OFFSET = 4
def serialize_basic(value: SSZValue):
if isinstance(value, uint):
return value.to_bytes(value.type().byte_len, 'little')
elif isinstance(value, Bool):
elif isinstance(value, boolean):
if value:
return b'\x01'
else:
@ -39,6 +40,12 @@ def is_empty(obj: SSZValue):
def serialize(obj: SSZValue):
if isinstance(obj, BasicValue):
return serialize_basic(obj)
elif isinstance(obj, Bitvector):
as_integer = sum([obj[i] << i for i in range(len(obj))])
return as_integer.to_bytes((len(obj) + 7) // 8, "little")
elif isinstance(obj, Bitlist):
as_integer = (1 << len(obj)) + sum([obj[i] << i for i in range(len(obj))])
return as_integer.to_bytes((as_integer.bit_length() + 7) // 8, "little")
elif isinstance(obj, Series):
return encode_series(obj)
else:
@ -85,6 +92,12 @@ def encode_series(values: Series):
def pack(values: Series):
if isinstance(values, bytes): # Bytes and BytesN are already packed
return values
elif isinstance(values, Bitvector):
as_integer = sum([values[i] << i for i in range(len(values))])
return as_integer.to_bytes((values.length + 7) // 8, "little")
elif isinstance(values, Bitlist):
as_integer = (1 << len(values)) + sum([values[i] << i for i in range(len(values))])
return as_integer.to_bytes((values.length + 7) // 8, "little")
return b''.join([serialize_basic(value) for value in values])
@ -134,7 +147,7 @@ def hash_tree_root(obj: SSZValue):
else:
raise Exception(f"Type not supported: {type(obj)}")
if isinstance(obj, (List, Bytes)):
if isinstance(obj, (List, Bytes, Bitlist)):
return mix_in_length(merkleize_chunks(leaves, pad_to=chunk_count(obj.type())), len(obj))
else:
return merkleize_chunks(leaves)

View File

@ -31,7 +31,7 @@ class BasicValue(int, SSZValue, metaclass=BasicType):
pass
class Bool(BasicValue): # can't subclass bool.
class boolean(BasicValue): # can't subclass bool.
byte_len = 1
def __new__(cls, value: int): # int value, but can be any subclass of int (bool, Bit, Bool, etc...)
@ -48,7 +48,7 @@ class Bool(BasicValue): # can't subclass bool.
# Alias for Bool
class Bit(Bool):
class bit(boolean):
pass
@ -233,7 +233,7 @@ class ParamsMeta(SSZType):
return f"{self.__name__}~{self.__class__.__name__}"
def __repr__(self):
return self, self.__class__
return f"{self.__name__}~{self.__class__.__name__}"
def attr_from_params(self, p):
# single key params are valid too. Wrap them in a tuple.
@ -280,11 +280,12 @@ class ElementsType(ParamsMeta):
elem_type: SSZType
length: int
class BitElementsType(ElementsType):
elem_type = boolean
class Elements(ParamsBase, metaclass=ElementsType):
pass
class BaseList(list, Elements):
def __init__(self, *args):
@ -310,6 +311,10 @@ class BaseList(list, Elements):
cls = self.__class__
return f"{cls.__name__}[{cls.elem_type.__name__}, {cls.length}]({', '.join(str(v) for v in self)})"
def __repr__(self):
cls = self.__class__
return f"{cls.__name__}[{cls.elem_type.__name__}, {cls.length}]({', '.join(str(v) for v in self)})"
def __getitem__(self, k) -> SSZValue:
if isinstance(k, int): # check if we are just doing a lookup, and not slicing
if k < 0:
@ -337,6 +342,15 @@ class BaseList(list, Elements):
# be explict about getting the last item, for the non-python readers, and negative-index safety
return self[len(self) - 1]
class BaseBitfield(BaseList, metaclass=BitElementsType):
elem_type = bool
class Bitlist(BaseBitfield):
pass
class Bitvector(BaseBitfield):
pass
class List(BaseList):

View File

@ -1,7 +1,7 @@
from typing import Iterable
from .ssz_impl import serialize, hash_tree_root
from .ssz_typing import (
Bit, Bool, Container, List, Vector, Bytes, BytesN,
bit, boolean, Container, List, Vector, Bytes, BytesN,
uint8, uint16, uint32, uint64, uint256, byte
)
from ..hash_function import hash as bytes_hash
@ -74,10 +74,10 @@ def merge(a: str, branch: Iterable[str]) -> str:
test_data = [
("bit F", Bit(False), "00", chunk("00")),
("bit T", Bit(True), "01", chunk("01")),
("bool F", Bool(False), "00", chunk("00")),
("bool T", Bool(True), "01", chunk("01")),
("bit F", bit(False), "00", chunk("00")),
("bit T", bit(True), "01", chunk("01")),
("boolean F", boolean(False), "00", chunk("00")),
("boolean T", boolean(True), "01", chunk("01")),
("uint8 00", uint8(0x00), "00", chunk("00")),
("uint8 01", uint8(0x01), "01", chunk("01")),
("uint8 ab", uint8(0xab), "ab", chunk("ab")),

View File

@ -1,6 +1,6 @@
from .ssz_typing import (
SSZValue, SSZType, BasicValue, BasicType, Series, ElementsType,
Elements, Bit, Bool, Container, List, Vector, Bytes, BytesN,
Elements, bit, boolean, Container, List, Vector, Bytes, BytesN,
byte, uint, uint8, uint16, uint32, uint64, uint128, uint256,
Bytes32, Bytes48
)
@ -22,8 +22,8 @@ def test_subclasses():
assert issubclass(u, SSZValue)
assert isinstance(u, SSZType)
assert isinstance(u, BasicType)
assert issubclass(Bool, BasicValue)
assert isinstance(Bool, BasicType)
assert issubclass(boolean, BasicValue)
assert isinstance(boolean, BasicType)
for c in [Container, List, Vector, Bytes, BytesN]:
assert issubclass(c, Series)
@ -45,16 +45,16 @@ def test_basic_instances():
assert isinstance(v, BasicValue)
assert isinstance(v, SSZValue)
assert isinstance(Bool(True), BasicValue)
assert isinstance(Bool(False), BasicValue)
assert isinstance(Bit(True), Bool)
assert isinstance(Bit(False), Bool)
assert isinstance(boolean(True), BasicValue)
assert isinstance(boolean(False), BasicValue)
assert isinstance(bit(True), boolean)
assert isinstance(bit(False), boolean)
def test_basic_value_bounds():
max = {
Bool: 2 ** 1,
Bit: 2 ** 1,
boolean: 2 ** 1,
bit: 2 ** 1,
uint8: 2 ** (8 * 1),
byte: 2 ** (8 * 1),
uint16: 2 ** (8 * 2),