From b65601afdae18b35c9aad8dfa25c1c677f757ec0 Mon Sep 17 00:00:00 2001 From: Vitalik Buterin Date: Thu, 14 Mar 2019 08:29:03 -0500 Subject: [PATCH] Updated Merkle proof file --- specs/light_client/merkle_proofs.md | 63 +++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index cf4dad2e3..f52941118 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -1,3 +1,9 @@ +### Constants + +| Name | Value | +| - | - | +| `LENGTH_FLAG` | `2**64 - 1` | + ### Generalized Merkle tree index In a binary Merkle tree, we define a "generalized index" of a node as `2**depth + index`. Visually, this looks as follows: @@ -36,17 +42,34 @@ y_data_root len(y) ....... ``` -We can now define a concept of a "path", a way of describing a function that takes as input an SSZ object and outputs some specific (possibly deeply nested) member. For example, `foo -> foo.x` is a path, as are `foo -> len(foo.y)` and `foo -> foo[5]`. We'll describe paths as lists: in these three cases they are `["x"]`, `["y", "len"]` and `["y", 5]` respectively. We can now define a function `get_generalized_indices(object: Any, path: List[str OR int], root=1: int) -> int` that converts an object and a path to a set of generalized indices (note that for constant-sized objects, there is only one generalized index and it only depends on the path, but for dynamically sized objects the indices may depend on the object itself too). For dynamically-sized objects, the set of indices will have more than one member because of the need to access an array's length to determine the correct generalized index for some array access. +We can now define a concept of a "path", a way of describing a function that takes as input an SSZ object and outputs some specific (possibly deeply nested) member. For example, `foo -> foo.x` is a path, as are `foo -> len(foo.y)` and `foo -> foo.y[5].w`. We'll describe paths as lists, which can have two representations. In "human-readable form", they are `["x"]`, `["y", "__len__"]` and `["y", 5, "w"]` respectively. In "encoded form", they are lists of `uint64` values, in these cases (assuming the fields of `foo` in order are `x` then `y`, and `w` is the first field of `y[i]`) `[0]`, `[1, 2**64-1]`, `[1, 5, 0]`. ```python -def get_generalized_indices(obj: Any, path: List[str or int], root=1) -> List[int]: +def path_to_encoded_form(obj: Any, path: List[str or int]) -> List[int]: + if len(path) == 0: + return [] + if isinstance(path[0], "__len__"): + assert len(path) == 1 + return [LENGTH_FLAG] + elif isinstance(path[0], str) and hasattr(obj, "fields"): + return [list(obj.fields.keys()).index(path[0])] + path_to_encoded_form(getattr(obj, path[0]), path[1:]) + elif isinstance(obj, (StaticList, DynamicList)): + return [path[0]] + path_to_encoded_form(obj[path[0]], path[1:]) + else: + raise Exception("Unknown type / path") +``` + +We can now define a function `get_generalized_indices(object: Any, path: List[int], root=1: int) -> int` that converts an object and a path to a set of generalized indices (note that for constant-sized objects, there is only one generalized index and it only depends on the path, but for dynamically sized objects the indices may depend on the object itself too). For dynamically-sized objects, the set of indices will have more than one member because of the need to access an array's length to determine the correct generalized index for some array access. + +```python +def get_generalized_indices(obj: Any, path: List[int], root=1) -> List[int]: if len(path) == 0: return [root] elif isinstance(obj, StaticList): items_per_chunk = (32 // len(serialize(x))) if isinstance(x, int) else 1 new_root = root * next_power_of_2(len(obj) // items_per_chunk) + path[0] // items_per_chunk return get_generalized_indices(obj[path[0]], path[1:], new_root) - elif isinstance(obj, DynamicList) and path[0] == "len": + elif isinstance(obj, DynamicList) and path[0] == LENGTH_FLAG: return [root * 2 + 1] elif isinstance(obj, DynamicList) and isinstance(path[0], int): assert path[0] < len(obj) @@ -54,9 +77,9 @@ def get_generalized_indices(obj: Any, path: List[str or int], root=1) -> List[in new_root = root * 2 * next_power_of_2(len(obj) // items_per_chunk) + path[0] // items_per_chunk return [root *2 + 1] + get_generalized_indices(obj[path[0]], path[1:], new_root) elif hasattr(obj, "fields"): - index = list(fields.keys()).index(path[0]) - new_root = root * next_power_of_2(len(fields)) + index - return get_generalized_indices(getattr(obj, path[0]), path[1:], new_root) + field = list(fields.keys())[path[0]] + new_root = root * next_power_of_2(len(fields)) + path[0] + return get_generalized_indices(getattr(obj, field), path[1:], new_root) else: raise Exception("Unknown type / path") ``` @@ -109,6 +132,8 @@ def get_proof_indices(tree_indices: List[int]) -> List[int]: Generating a proof is simply a matter of taking the node of the SSZ hash tree with the union of the given generalized indices for each index given by `get_proof_indices`, and outputting the list of nodes in the same order. +Here is the verification function: + ```python def verify_multi_proof(root, indices, leaves, proof): tree = {} @@ -127,8 +152,32 @@ def verify_multi_proof(root, indices, leaves, proof): return (indices == []) or (1 in tree and tree[1] == root) ``` +### MerklePartial + +We define: + +#### `MerklePartialLeaf` + +```python +{ + "path": ["uint64"], + "value": "bytes32" +} +``` + +#### `MerklePartial` + + +```python +{ + "root": "bytes32", + "values": [MerklePartialLeaf], + "proof": ["bytes32"] +} +``` + #### Proofs for execution -We define `MerklePartial(f, arg1, arg2...)` as being a list of Merkle multiproofs of the sets of nodes in the hash trees of the SSZ objects that are needed to authenticate the values needed to compute some function `f(arg1, arg2...)`. An individual Merkle multiproof is given as a dynamic sized list of `bytes32` values, a `MerklePartial` is a fixed-size list of objects `{proof: ["bytes32"], value: "bytes32"}`, one for each `arg` to `f` (if some `arg` is a base type, then the multiproof is empty). +We define `MerklePartial(f, arg1, arg2..., focus=0)` as being a `MerklePartial` object wrapping a Merkle multiproof of the set of nodes in the hash tree of the SSZ object `arg[focus]` that is needed to authenticate the parts of the object needed to compute `f(arg1, arg2...)`. Ideally, any function which accepts an SSZ object should also be able to accept a `MerklePartial` object as a substitute.