SSZ impl. rework started, see issue 1064

This commit is contained in:
protolambda 2019-05-12 23:16:17 +02:00
parent 9bd771f787
commit 761c9e55fe
No known key found for this signature in database
GPG Key ID: EC89FDBB2B4C7623
5 changed files with 368 additions and 332 deletions

View File

@ -1,7 +1,9 @@
from .hash_function import hash from .hash_function import hash
zerohashes = [b'\x00' * 32] ZERO_BYTES32 = b'\x00' * 32
zerohashes = [ZERO_BYTES32]
for layer in range(1, 32): for layer in range(1, 32):
zerohashes.append(hash(zerohashes[layer - 1] + zerohashes[layer - 1])) zerohashes.append(hash(zerohashes[layer - 1] + zerohashes[layer - 1]))
@ -28,3 +30,31 @@ def get_merkle_proof(tree, item_index):
subindex = (item_index // 2**i) ^ 1 subindex = (item_index // 2**i) ^ 1
proof.append(tree[i][subindex] if subindex < len(tree[i]) else zerohashes[i]) proof.append(tree[i][subindex] if subindex < len(tree[i]) else zerohashes[i])
return proof return proof
def next_power_of_two(v: int) -> int:
"""
Get the next power of 2. (for 64 bit range ints)
Examples:
0 -> 0, 1 -> 1, 2 -> 2, 3 -> 4, 32 -> 32, 33 -> 64
"""
# effectively fill the bitstring (1 less, do not want to with ones, then increment for next power of 2.
v -= 1
v |= v >> (1 << 0)
v |= v >> (1 << 1)
v |= v >> (1 << 2)
v |= v >> (1 << 3)
v |= v >> (1 << 4)
v |= v >> (1 << 5)
v += 1
return v
def merkleize_chunks(chunks):
tree = chunks[::]
margin = next_power_of_two(len(chunks)) - len(chunks)
tree.extend([ZERO_BYTES32] * margin)
tree = [ZERO_BYTES32] * len(tree) + tree
for i in range(len(tree) // 2 - 1, 0, -1):
tree[i] = hash(tree[i * 2] + tree[i * 2 + 1])
return tree[1]

View File

@ -1,331 +0,0 @@
from typing import Any
from .hash_function import hash
BYTES_PER_CHUNK = 32
BYTES_PER_LENGTH_OFFSET = 4
ZERO_CHUNK = b'\x00' * BYTES_PER_CHUNK
def SSZType(fields):
class SSZObject():
def __init__(self, **kwargs):
for f, t in fields.items():
if f not in kwargs:
setattr(self, f, get_zero_value(t))
else:
setattr(self, f, kwargs[f])
def __eq__(self, other):
return self.fields == other.fields and self.serialize() == other.serialize()
def __hash__(self):
return int.from_bytes(self.hash_tree_root(), byteorder="little")
def __str__(self):
output = []
for field in self.fields:
output.append(f'{field}: {getattr(self, field)}')
return "\n".join(output)
def serialize(self):
return serialize_value(self, self.__class__)
def hash_tree_root(self):
return hash_tree_root(self, self.__class__)
SSZObject.fields = fields
return SSZObject
class Vector():
def __init__(self, items):
self.items = items
self.length = len(items)
def __getitem__(self, key):
return self.items[key]
def __setitem__(self, key, value):
self.items[key] = value
def __iter__(self):
return iter(self.items)
def __len__(self):
return self.length
def is_basic(typ):
# if not a string, it is a complex, and cannot be basic
if not isinstance(typ, str):
return False
# "uintN": N-bit unsigned integer (where N in [8, 16, 32, 64, 128, 256])
elif typ[:4] == 'uint' and typ[4:] in ['8', '16', '32', '64', '128', '256']:
return True
# "bool": True or False
elif typ == 'bool':
return True
# alias: "byte" -> "uint8"
elif typ == 'byte':
return True
# default
else:
return False
def is_constant_sized(typ):
# basic objects are fixed size by definition
if is_basic(typ):
return True
# dynamic size array type, "list": [elem_type].
# Not constant size by definition.
elif isinstance(typ, list) and len(typ) == 1:
return False
# fixed size array type, "vector": [elem_type, length]
# Constant size, but only if the elements are.
elif isinstance(typ, list) and len(typ) == 2:
return is_constant_sized(typ[0])
# bytes array (fixed or dynamic size)
elif isinstance(typ, str) and typ[:5] == 'bytes':
# if no length suffix, it has a dynamic size
return typ != 'bytes'
# containers are only constant-size if all of the fields are constant size.
elif hasattr(typ, 'fields'):
for subtype in typ.fields.values():
if not is_constant_sized(subtype):
return False
return True
else:
raise Exception("Type not recognized")
def coerce_to_bytes(x):
if isinstance(x, str):
o = x.encode('utf-8')
assert len(o) == len(x)
return o
elif isinstance(x, bytes):
return x
else:
raise Exception("Expecting bytes")
def encode_series(values, types):
# Recursively serialize
parts = [(is_constant_sized(types[i]), serialize_value(values[i], types[i])) for i in range(len(values))]
# Compute and check lengths
fixed_lengths = [len(serialized) if constant_size else BYTES_PER_LENGTH_OFFSET
for (constant_size, serialized) in parts]
variable_lengths = [len(serialized) if not constant_size else 0
for (constant_size, serialized) in parts]
# Check if integer is not out of bounds (Python)
assert sum(fixed_lengths + variable_lengths) < 2 ** (BYTES_PER_LENGTH_OFFSET * 8)
# Interleave offsets of variable-size parts with fixed-size parts.
# Avoid quadratic complexity in calculation of offsets.
offset = sum(fixed_lengths)
variable_parts = []
fixed_parts = []
for (constant_size, serialized) in parts:
if constant_size:
fixed_parts.append(serialized)
else:
fixed_parts.append(offset.to_bytes(BYTES_PER_LENGTH_OFFSET, 'little'))
variable_parts.append(serialized)
offset += len(serialized)
# Return the concatenation of the fixed-size parts (offsets interleaved) with the variable-size parts
return b"".join(fixed_parts + variable_parts)
def serialize_value(value, typ=None):
if typ is None:
typ = infer_type(value)
# "uintN"
if isinstance(typ, str) and typ[:4] == 'uint':
length = int(typ[4:])
assert length in (8, 16, 32, 64, 128, 256)
return value.to_bytes(length // 8, 'little')
# "bool"
elif isinstance(typ, str) and typ == 'bool':
assert value in (True, False)
return b'\x01' if value is True else b'\x00'
# Vector
elif isinstance(typ, list) and len(typ) == 2:
# (regardless of element type, sanity-check if the length reported in the vector type matches the value length)
assert len(value) == typ[1]
return encode_series(value, [typ[0]] * len(value))
# List
elif isinstance(typ, list) and len(typ) == 1:
return encode_series(value, [typ[0]] * len(value))
# "bytes" (variable size)
elif isinstance(typ, str) and typ == 'bytes':
return coerce_to_bytes(value)
# "bytesN" (fixed size)
elif isinstance(typ, str) and len(typ) > 5 and typ[:5] == 'bytes':
assert len(value) == int(typ[5:]), (value, int(typ[5:]))
return coerce_to_bytes(value)
# containers
elif hasattr(typ, 'fields'):
values = [getattr(value, field) for field in typ.fields.keys()]
types = list(typ.fields.values())
return encode_series(values, types)
else:
print(value, typ)
raise Exception("Type not recognized")
def get_zero_value(typ: Any) -> Any:
if isinstance(typ, str):
# Bytes array
if typ == 'bytes':
return b''
# bytesN
elif typ[:5] == 'bytes' and len(typ) > 5:
length = int(typ[5:])
return b'\x00' * length
# Basic types
elif typ == 'bool':
return False
elif typ[:4] == 'uint':
return 0
elif typ == 'byte':
return 0x00
else:
raise ValueError("Type not recognized")
# Vector:
elif isinstance(typ, list) and len(typ) == 2:
return [get_zero_value(typ[0]) for _ in range(typ[1])]
# List:
elif isinstance(typ, list) and len(typ) == 1:
return []
# Container:
elif hasattr(typ, 'fields'):
return typ(**{field: get_zero_value(subtype) for field, subtype in typ.fields.items()})
else:
print(typ)
raise Exception("Type not recognized")
def chunkify(bytez):
bytez += b'\x00' * (-len(bytez) % BYTES_PER_CHUNK)
return [bytez[i:i + 32] for i in range(0, len(bytez), 32)]
def pack(values, subtype):
return chunkify(b''.join([serialize_value(value, subtype) for value in values]))
def is_power_of_two(x):
return x > 0 and x & (x - 1) == 0
def merkleize(chunks):
tree = chunks[::]
while not is_power_of_two(len(tree)):
tree.append(ZERO_CHUNK)
tree = [ZERO_CHUNK] * len(tree) + tree
for i in range(len(tree) // 2 - 1, 0, -1):
tree[i] = hash(tree[i * 2] + tree[i * 2 + 1])
return tree[1]
def mix_in_length(root, length):
return hash(root + length.to_bytes(32, 'little'))
def infer_type(value):
"""
Note: defaults to uint64 for integer type inference due to lack of information.
Other integer sizes are still supported, see spec.
:param value: The value to infer a SSZ type for.
:return: The SSZ type.
"""
if hasattr(value.__class__, 'fields'):
return value.__class__
elif isinstance(value, Vector):
if len(value) > 0:
return [infer_type(value[0]), len(value)]
else:
# Element type does not matter too much,
# assumed to be a basic type for size-encoding purposes, vector is empty.
return ['uint64']
elif isinstance(value, list):
if len(value) > 0:
return [infer_type(value[0])]
else:
# Element type does not matter, list-content size will be encoded regardless, list is empty.
return ['uint64']
elif isinstance(value, (bytes, str)):
return 'bytes'
elif isinstance(value, int):
return 'uint64'
else:
raise Exception("Failed to infer type")
def hash_tree_root(value, typ=None):
if typ is None:
typ = infer_type(value)
# -------------------------------------
# merkleize(pack(value))
# basic object: merkleize packed version (merkleization pads it to 32 bytes if it is not already)
if is_basic(typ):
return merkleize(pack([value], typ))
# or a vector of basic objects
elif isinstance(typ, list) and len(typ) == 2 and is_basic(typ[0]):
assert len(value) == typ[1]
return merkleize(pack(value, typ[0]))
# -------------------------------------
# mix_in_length(merkleize(pack(value)), len(value))
# if value is a list of basic objects
elif isinstance(typ, list) and len(typ) == 1 and is_basic(typ[0]):
return mix_in_length(merkleize(pack(value, typ[0])), len(value))
# (needs some extra work for non-fixed-sized bytes array)
elif typ == 'bytes':
return mix_in_length(merkleize(chunkify(coerce_to_bytes(value))), len(value))
# -------------------------------------
# merkleize([hash_tree_root(element) for element in value])
# if value is a vector of composite objects
elif isinstance(typ, list) and len(typ) == 2 and not is_basic(typ[0]):
return merkleize([hash_tree_root(element, typ[0]) for element in value])
# (needs some extra work for fixed-sized bytes array)
elif isinstance(typ, str) and typ[:5] == 'bytes' and len(typ) > 5:
assert len(value) == int(typ[5:])
return merkleize(chunkify(coerce_to_bytes(value)))
# or a container
elif hasattr(typ, 'fields'):
return merkleize([hash_tree_root(getattr(value, field), subtype) for field, subtype in typ.fields.items()])
# -------------------------------------
# mix_in_length(merkleize([hash_tree_root(element) for element in value]), len(value))
# if value is a list of composite objects
elif isinstance(typ, list) and len(typ) == 1 and not is_basic(typ[0]):
return mix_in_length(merkleize([hash_tree_root(element, typ[0]) for element in value]), len(value))
# -------------------------------------
else:
raise Exception("Type not recognized")
def truncate(container):
field_keys = list(container.fields.keys())
truncated_fields = {
key: container.fields[key]
for key in field_keys[:-1]
}
truncated_class = SSZType(truncated_fields)
kwargs = {
field: getattr(container, field)
for field in field_keys[:-1]
}
return truncated_class(**kwargs)
def signing_root(container):
return hash_tree_root(truncate(container))
def serialize(ssz_object):
return getattr(ssz_object, 'serialize')()

View File

@ -0,0 +1,117 @@
from eth2spec.utils.merkle_minimal import merkleize_chunks
from .ssz_switch import *
# SSZ Helpers
# -----------------------------
def pack(values, subtype):
return b''.join([serialize(value, subtype) for value in values])
def chunkify(byte_string):
byte_string += b'\x00' * (-len(byte_string) % 32)
return [byte_string[i:i + 32] for i in range(0, len(byte_string), 32)]
BYTES_PER_LENGTH_OFFSET = 4
# SSZ Implementation
# -----------------------------
get_zero_value = ssz_type_switch({
ssz_bool: lambda: False,
ssz_uint: lambda: 0,
ssz_list: lambda byte_form: b'' if byte_form else [],
ssz_vector: lambda length, elem_typ, byte_form:
(b'\x00' * length if length > 0 else b'') if byte_form else
[get_zero_value(elem_typ) for _ in range(length)],
ssz_container: lambda typ, field_names, field_types:
typ(**{f_name: get_zero_value(f_typ) for f_name, f_typ in zip(field_names, field_types)}),
})
serialize = ssz_switch({
ssz_bool: lambda value: b'\x01' if value else b'\x00',
ssz_uint: lambda value, byte_len: value.to_bytes(byte_len, 'little'),
ssz_list: lambda value, elem_typ: encode_series(value, [elem_typ] * len(value)),
ssz_vector: lambda value, elem_typ, length: encode_series(value, [elem_typ] * length),
ssz_container: lambda value, get_field_values, field_types: encode_series(get_field_values(value), field_types),
})
ssz_basic_type = (ssz_bool, ssz_uint)
is_basic_type = ssz_type_switch({
ssz_basic_type: lambda: True,
ssz_default: lambda: False,
})
is_fixed_size = ssz_type_switch({
ssz_basic_type: lambda: True,
ssz_vector: lambda elem_typ: is_fixed_size(elem_typ),
ssz_container: lambda field_types: all(is_fixed_size(f_typ) for f_typ in field_types),
ssz_list: lambda: False,
})
def hash_tree_root_list(value, elem_typ):
if is_basic_type(elem_typ):
return merkleize_chunks(chunkify(pack(value, elem_typ)))
else:
return merkleize_chunks([hash_tree_root(element, elem_typ) for element in value])
def mix_in_length(root, length):
return hash(root + length.to_bytes(32, 'little'))
def hash_tree_root_container(fields):
return merkleize_chunks([hash_tree_root(field, subtype) for field, subtype in fields])
hash_tree_root = ssz_switch({
ssz_basic_type: lambda value, typ: merkleize_chunks(chunkify(pack([value], typ))),
ssz_list: lambda value, elem_typ: mix_in_length(hash_tree_root_list(value, elem_typ), len(value)),
ssz_vector: lambda value, elem_typ: hash_tree_root_list(value, elem_typ),
ssz_container: lambda value, get_field_values, field_types: hash_tree_root_container(zip(get_field_values(value), field_types)),
})
signing_root = ssz_switch({
ssz_container: lambda value, get_field_values, field_types: hash_tree_root_container(zip(get_field_values(value), field_types)[:-1]),
ssz_default: lambda value, typ: hash_tree_root(value, typ),
})
def encode_series(values, types):
# bytes and bytesN are already in the right format.
if isinstance(values, bytes):
return values
# Recursively serialize
parts = [(is_fixed_size(types[i]), serialize(values[i], types[i])) for i in range(len(values))]
# Compute and check lengths
fixed_lengths = [len(serialized) if constant_size else BYTES_PER_LENGTH_OFFSET
for (constant_size, serialized) in parts]
variable_lengths = [len(serialized) if not constant_size else 0
for (constant_size, serialized) in parts]
# Check if integer is not out of bounds (Python)
assert sum(fixed_lengths + variable_lengths) < 2 ** (BYTES_PER_LENGTH_OFFSET * 8)
# Interleave offsets of variable-size parts with fixed-size parts.
# Avoid quadratic complexity in calculation of offsets.
offset = sum(fixed_lengths)
variable_parts = []
fixed_parts = []
for (constant_size, serialized) in parts:
if constant_size:
fixed_parts.append(serialized)
else:
fixed_parts.append(offset.to_bytes(BYTES_PER_LENGTH_OFFSET, 'little'))
variable_parts.append(serialized)
offset += len(serialized)
# Return the concatenation of the fixed-size parts (offsets interleaved) with the variable-size parts
return b''.join(fixed_parts + variable_parts)

View File

@ -0,0 +1,105 @@
from typing import Dict, Any
from .ssz_typing import *
# SSZ Switch statement runner factory
# -----------------------------
def ssz_switch(sw: Dict[Any, Any], arg_names=None):
"""
Creates an SSZ switch statement: a function, that when executed, checks every switch-statement
"""
if arg_names is None:
arg_names = ["value", "typ"]
# Runner, the function that executes the switch when called.
# Accepts a arguments based on the arg_names declared in the ssz_switch.
def run_switch(*args):
# value may be None
value = None
try:
value = args[arg_names.index("value")]
except ValueError:
pass # no value argument
# typ may be None when value is not None
typ = None
try:
typ = args[arg_names.index("typ")]
except ValueError:
# no typ argument expected
pass
except IndexError:
# typ argument expected, but not passed. Try to get it from the class info
typ = value.__class__
if hasattr(typ, '__forward_arg__'):
typ = typ.__forward_arg__
# Now, go over all switch cases
for matchers, worker in sw.items():
if not isinstance(matchers, tuple):
matchers = (matchers,)
# for each matcher of the case key
for m in matchers:
data = m(typ)
# if we have data, the matcher matched, and we can return the result
if data is not None:
# Supply value and type by default, and any data presented by the matcher.
kwargs = {"value": value, "typ": typ, **data}
# Filter out unwanted arguments
filtered_kwargs = {k: kwargs[k] for k in worker.__code__.co_varnames}
# run the switch case and return result
return worker(**filtered_kwargs)
raise Exception("cannot find matcher for type: %s (value: %s)" % (typ, value))
return run_switch
def ssz_type_switch(sw: Dict[Any, Any]):
return ssz_switch(sw, ["typ"])
# SSZ Switch matchers
# -----------------------------
def ssz_bool(typ):
if typ == bool:
return {}
def ssz_uint(typ):
# Note: only the type reference exists,
# but it really resolves to 'int' during run-time for zero computational/memory overhead.
# Hence, we check equality to the type references (which are really just 'NewType' instances),
# and don't use any sub-classing like we normally would.
if typ == uint8 or typ == uint16 or typ == uint32 or typ == uint64\
or typ == uint128 or typ == uint256 or typ == byte:
return {"byte_len": typ.byte_len}
def ssz_list(typ):
if hasattr(typ, '__bases__') and List in typ.__bases__:
return {"elem_typ": read_list_elem_typ(typ), "byte_form": False}
if typ == bytes:
return {"elem_typ": uint8, "byte_form": True}
def ssz_vector(typ):
if hasattr(typ, '__bases__'):
if Vector in typ.__bases__:
return {"elem_typ": read_vec_elem_typ(typ), "length": read_vec_len(typ), "byte_form": False}
if BytesN in typ.__bases__:
return {"elem_typ": uint8, "length": read_bytesN_len(typ), "byte_form": True}
def ssz_container(typ):
if hasattr(typ, '__bases__') and SSZContainer in typ.__bases__:
def get_field_values(value):
return [getattr(value, field) for field in typ.__annotations__.keys()]
field_names = list(typ.__annotations__.keys())
field_types = list(typ.__annotations__.values())
return {"get_field_values": get_field_values, "field_names": field_names, "field_types": field_types}
def ssz_default(typ):
return {}

View File

@ -0,0 +1,115 @@
from typing import Generic, List, TypeVar, Type, Iterable, NewType
# SSZ base length, to limit length generic type param of vector/bytesN
SSZLenAny = type('SSZLenAny', (), {})
def SSZLen(length: int):
"""
SSZ length factory. Creates a type corresponding to a given length. To be used as parameter in type generics.
"""
assert length >= 0
typ = type('SSZLen_%d' % length, (SSZLenAny,), {})
typ.length = length
return typ
# SSZ element type
T = TypeVar('T')
# SSZ vector/bytesN length
L = TypeVar('L', bound=SSZLenAny)
# SSZ vector
# -----------------------------
class Vector(Generic[T, L]):
def __init__(self, *args: Iterable[T]):
self.items = list(args)
def __getitem__(self, key):
return self.items[key]
def __setitem__(self, key, value):
self.items[key] = value
def __iter__(self):
return iter(self.items)
def __len__(self):
return len(self.items)
def read_vec_elem_typ(vec_typ: Type[Vector[T,L]]) -> T:
assert vec_typ.__args__ is not None
return vec_typ.__args__[0]
def read_vec_len(vec_typ: Type[Vector[T,L]]) -> int:
assert vec_typ.__args__ is not None
return vec_typ.__args__[1].length
# SSZ list
# -----------------------------
def read_list_elem_typ(list_typ: Type[List[T]]) -> T:
assert list_typ.__args__ is not None
return list_typ.__args__[0]
# SSZ bytesN
# -----------------------------
class BytesN(Generic[L]):
pass
def read_bytesN_len(bytesN_typ: Type[BytesN[L]]) -> int:
assert bytesN_typ.__args__ is not None
return bytesN_typ.__args__[0].length
# SSZ integer types, with 0 computational overhead (NewType)
# -----------------------------
uint8 = NewType('uint8', int)
uint8.byte_len = 1
uint16 = NewType('uint16', int)
uint16.byte_len = 2
uint32 = NewType('uint32', int)
uint32.byte_len = 4
uint64 = NewType('uint64', int)
uint64.byte_len = 8
uint128 = NewType('uint128', int)
uint128.byte_len = 16
uint256 = NewType('uint256', int)
uint256.byte_len = 32
byte = NewType('byte', uint8)
# SSZ Container base class
# -----------------------------
# Note: importing ssz functionality locally, to avoid import loop
class SSZContainer(object):
def __init__(self, **kwargs):
from .ssz_impl import get_zero_value
for f, t in self.__annotations__.items():
if f not in kwargs:
setattr(self, f, get_zero_value(t))
else:
setattr(self, f, kwargs[f])
def serialize(self):
from .ssz_impl import serialize
return serialize(self, self.__class__)
def hash_tree_root(self):
from .ssz_impl import hash_tree_root
return hash_tree_root(self, self.__class__)
def signing_root(self):
from .ssz_impl import signing_root
return signing_root(self, self.__class__)