From ea7ad28298e6060943aab69a29d6a99d2b752ae8 Mon Sep 17 00:00:00 2001
From: Jacek Sieka <jacek@status.im>
Date: Fri, 16 Nov 2018 17:33:49 -0600
Subject: [PATCH 1/2] simplify merkle tree chunking

* pack small items tightly to fit more items in single chunk, decreasing
the number of hash operations needed
* remove chunk padding - hash algorithm will pad to its own block size
anyway
* express data length in number of items instead of binary bytes at leaf
level (equivalent)
---
 specs/simple-serialize.md | 42 ++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md
index defe9bba7..9c7744e68 100644
--- a/specs/simple-serialize.md
+++ b/specs/simple-serialize.md
@@ -402,40 +402,32 @@ Return the hash of the serialization of the value.
 First, we define some helpers and then the Merkle tree function. The constant `CHUNK_SIZE` is set to 128.
 
 ```python
-# Returns the smallest power of 2 equal to or higher than x
-def next_power_of_2(x):
-    return x if x == 1 else next_power_of_2((x+1) // 2) * 2
-
-# Extends data length to a power of 2 by minimally right-zero-padding
-def extend_to_power_of_2(data):
-    return data + b'\x00' * (next_power_of_2(len(data)) - len(data))
-
-# Concatenate a list of homogeneous objects into data and pad it
-def list_to_glob(lst):
-    if len(lst) == 0:
-        return b''
-    if len(lst[0]) != next_power_of_2(len(lst[0])):
-        lst = [extend_to_power_of_2(x) for x in lst]
-    data = b''.join(lst)
-    # Pad to chunksize
-    data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE))
-    return data
-
-# Merkle tree hash of a list of items
+# Merkle tree hash of a list of homogenous, non-empty items
 def merkle_hash(lst):
-    # Turn list into padded data
-    data = list_to_glob(lst)
     # Store length of list (to compensate for non-bijectiveness of padding)
     datalen = len(lst).to_bytes(32, 'big')
-    # Convert to chunks
-    chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)]
+
+    if len(lst) == 0:
+        # Handle empty list case
+        chunkz = [b'\x00' * CHUNKSIZE]
+    elif len(lst[0]) < CHUNKSIZE:
+        # See how many items fit in a chunk
+        items_per_chunk = CHUNKSIZE // len(lst[0])
+
+        # Build a list of chunks based on the number of items in the chunk
+        chunkz = [b''.join(lst[i:i+items_per_chunk]) for i in range(0, len(lst), items_per_chunk)]
+    else:
+        # Leave large items alone
+        chunkz = lst
+
     # Tree-hash
     while len(chunkz) > 1:
         if len(chunkz) % 2 == 1:
             chunkz.append(b'\x00' * CHUNKSIZE)
         chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)]
+
     # Return hash of root and length data
-    return hash((chunkz[0] if len(chunks) > 0 else b'\x00' * 32) + datalen)
+    return hash((chunkz[0] + datalen)
 ```
 
 To `tree_hash` a list, we simply do:

From a217e9b32c833d2f18f52fde5005fe4a7715c821 Mon Sep 17 00:00:00 2001
From: Hsiao-Wei Wang <hwwang156@gmail.com>
Date: Tue, 20 Nov 2018 08:12:56 -0600
Subject: [PATCH 2/2] tree_ssz: fix extra parens

Co-Authored-By: arnetheduck <arnetheduck@gmail.com>
---
 specs/simple-serialize.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md
index 9c7744e68..e7df673a8 100644
--- a/specs/simple-serialize.md
+++ b/specs/simple-serialize.md
@@ -427,7 +427,7 @@ def merkle_hash(lst):
         chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)]
 
     # Return hash of root and length data
-    return hash((chunkz[0] + datalen)
+    return hash(chunkz[0] + datalen)
 ```
 
 To `tree_hash` a list, we simply do: