From ea7ad28298e6060943aab69a29d6a99d2b752ae8 Mon Sep 17 00:00:00 2001 From: Jacek Sieka Date: Fri, 16 Nov 2018 17:33:49 -0600 Subject: [PATCH 1/2] simplify merkle tree chunking * pack small items tightly to fit more items in single chunk, decreasing the number of hash operations needed * remove chunk padding - hash algorithm will pad to its own block size anyway * express data length in number of items instead of binary bytes at leaf level (equivalent) --- specs/simple-serialize.md | 42 ++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index defe9bba7..9c7744e68 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -402,40 +402,32 @@ Return the hash of the serialization of the value. First, we define some helpers and then the Merkle tree function. The constant `CHUNK_SIZE` is set to 128. ```python -# Returns the smallest power of 2 equal to or higher than x -def next_power_of_2(x): - return x if x == 1 else next_power_of_2((x+1) // 2) * 2 - -# Extends data length to a power of 2 by minimally right-zero-padding -def extend_to_power_of_2(data): - return data + b'\x00' * (next_power_of_2(len(data)) - len(data)) - -# Concatenate a list of homogeneous objects into data and pad it -def list_to_glob(lst): - if len(lst) == 0: - return b'' - if len(lst[0]) != next_power_of_2(len(lst[0])): - lst = [extend_to_power_of_2(x) for x in lst] - data = b''.join(lst) - # Pad to chunksize - data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE)) - return data - -# Merkle tree hash of a list of items +# Merkle tree hash of a list of homogenous, non-empty items def merkle_hash(lst): - # Turn list into padded data - data = list_to_glob(lst) # Store length of list (to compensate for non-bijectiveness of padding) datalen = len(lst).to_bytes(32, 'big') - # Convert to chunks - chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)] + + if len(lst) == 0: + # Handle empty list case + chunkz = [b'\x00' * CHUNKSIZE] + elif len(lst[0]) < CHUNKSIZE: + # See how many items fit in a chunk + items_per_chunk = CHUNKSIZE // len(lst[0]) + + # Build a list of chunks based on the number of items in the chunk + chunkz = [b''.join(lst[i:i+items_per_chunk]) for i in range(0, len(lst), items_per_chunk)] + else: + # Leave large items alone + chunkz = lst + # Tree-hash while len(chunkz) > 1: if len(chunkz) % 2 == 1: chunkz.append(b'\x00' * CHUNKSIZE) chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)] + # Return hash of root and length data - return hash((chunkz[0] if len(chunks) > 0 else b'\x00' * 32) + datalen) + return hash((chunkz[0] + datalen) ``` To `tree_hash` a list, we simply do: From a217e9b32c833d2f18f52fde5005fe4a7715c821 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Tue, 20 Nov 2018 08:12:56 -0600 Subject: [PATCH 2/2] tree_ssz: fix extra parens Co-Authored-By: arnetheduck --- specs/simple-serialize.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 9c7744e68..e7df673a8 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -427,7 +427,7 @@ def merkle_hash(lst): chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)] # Return hash of root and length data - return hash((chunkz[0] + datalen) + return hash(chunkz[0] + datalen) ``` To `tree_hash` a list, we simply do: