Merge pull request #140 from status-im/simplify-merkle-tree

simplify merkle tree chunking
This commit is contained in:
Hsiao-Wei Wang 2018-11-20 23:19:04 +09:00 committed by GitHub
commit 98312f40b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 17 additions and 25 deletions

View File

@ -402,40 +402,32 @@ Return the hash of the serialization of the value.
First, we define some helpers and then the Merkle tree function. The constant `CHUNK_SIZE` is set to 128. First, we define some helpers and then the Merkle tree function. The constant `CHUNK_SIZE` is set to 128.
```python ```python
# Returns the smallest power of 2 equal to or higher than x # Merkle tree hash of a list of homogenous, non-empty items
def next_power_of_2(x):
return x if x == 1 else next_power_of_2((x+1) // 2) * 2
# Extends data length to a power of 2 by minimally right-zero-padding
def extend_to_power_of_2(data):
return data + b'\x00' * (next_power_of_2(len(data)) - len(data))
# Concatenate a list of homogeneous objects into data and pad it
def list_to_glob(lst):
if len(lst) == 0:
return b''
if len(lst[0]) != next_power_of_2(len(lst[0])):
lst = [extend_to_power_of_2(x) for x in lst]
data = b''.join(lst)
# Pad to chunksize
data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE))
return data
# Merkle tree hash of a list of items
def merkle_hash(lst): def merkle_hash(lst):
# Turn list into padded data
data = list_to_glob(lst)
# Store length of list (to compensate for non-bijectiveness of padding) # Store length of list (to compensate for non-bijectiveness of padding)
datalen = len(lst).to_bytes(32, 'big') datalen = len(lst).to_bytes(32, 'big')
# Convert to chunks
chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)] if len(lst) == 0:
# Handle empty list case
chunkz = [b'\x00' * CHUNKSIZE]
elif len(lst[0]) < CHUNKSIZE:
# See how many items fit in a chunk
items_per_chunk = CHUNKSIZE // len(lst[0])
# Build a list of chunks based on the number of items in the chunk
chunkz = [b''.join(lst[i:i+items_per_chunk]) for i in range(0, len(lst), items_per_chunk)]
else:
# Leave large items alone
chunkz = lst
# Tree-hash # Tree-hash
while len(chunkz) > 1: while len(chunkz) > 1:
if len(chunkz) % 2 == 1: if len(chunkz) % 2 == 1:
chunkz.append(b'\x00' * CHUNKSIZE) chunkz.append(b'\x00' * CHUNKSIZE)
chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)] chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)]
# Return hash of root and length data # Return hash of root and length data
return hash((chunkz[0] if len(chunks) > 0 else b'\x00' * 32) + datalen) return hash(chunkz[0] + datalen)
``` ```
To `tree_hash` a list, we simply do: To `tree_hash` a list, we simply do: