From 693c2c2ff4c5f94bfad10cbf12fe46ba77a8aca3 Mon Sep 17 00:00:00 2001 From: tersec Date: Mon, 12 Feb 2024 17:06:02 +0000 Subject: [PATCH] blob sidecar era/erb proposal --- docs/e2store.md | 57 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/docs/e2store.md b/docs/e2store.md index 382cbb427..396511428 100644 --- a/docs/e2store.md +++ b/docs/e2store.md @@ -116,6 +116,14 @@ data: snappyFramed(ssz(BeaconState)) The fork and thus the exact format of the `BeaconState` should be derived from the `slot`. +## BlobCompressedSidecars +``` +type: [0x02, 0x00] +data: ssz(List[snappyFramed(BlobSidecar), MAX_BLOBS_PER_BLOCK]) +``` + +`BlobCompressedSidecars` contain a list of `BlobSidecar` objects encoded using `SSZ` then compressed using the snappy [framing format](https://github.com/google/snappy/blob/master/framing_format.txt). + ## Empty ``` @@ -169,7 +177,9 @@ def read_slot_index(f): return (start_slot, record_start, slot_offsets) ``` -# Era files +# Erb files + +Stand-in: like .era files, but blobs instead of blocks. `.era` files are special instances of `.e2s` files that follow a more strict content format optimised for reading and long-term storage and distribution. @@ -183,11 +193,10 @@ Each era is identified by when it ends. Thus, the genesis era is era `0`, follow `.era` file names follow a simple convention: `---.era`: -* `config-name` is the `CONFIG_NAME` field of the runtime configation (`mainnet`, `prater`, `sepolia`, `holesky`, etc) +* `config-name` is the `CONFIG_NAME` field of the runtime configation (`mainnet`, `sepolia`, `holesky`, etc) * `era-number` is the number of the _first_ era stored in the file - for example, the genesis era file has number 0 - as a 5-digit 0-filled decimal integer * `short-era-root` is the first 4 bytes of the last historical root in the _last_ state in the era file, lower-case hex-encoded (8 characters), except the genesis era which instead uses the `genesis_validators_root` field from the genesis state. - * The root is available as `state.historical_roots[era - 1]` except for genesis, which is `state.genesis_validators_root` - * Post-Capella, the root must be computed from `state.historical_summaries[era - state.historical_roots.len - 1]` + * The root is available as `state.historical_summaries[era - state.historical_roots.len - 1]` Era files with multiple eras use the era number of the lowest era stored in the file, and the root of the highest era. @@ -199,9 +208,8 @@ An `.era` file is structured in the following way: ``` era := group+ -group := Version | block* | era-state | other-entries* | slot-index(block)? | slot-index(state) -block := CompressedSignedBeaconBlock -era-state := CompressedBeaconState +group := Version | blobs* | other-entries* | slot-index(block)? +blobs := BlobCompressedSidecars ``` The `block` entries of a group include all blocks leading up to the era transition in slot order. For example, the group representing era `1` contains blocks from slot `0` up to and including block `8191`. Empty slots are skipped. @@ -228,7 +236,7 @@ def read_era_file(name): # Print contents of an era file, backwards with open(name, "rb") as f: - # Seek to end of file to figure out the indices of the state and blocks + # Seek to end of file to figure out the indices of the blobs f.seek(0, 2) groups = 0 @@ -252,8 +260,8 @@ def read_era_file(name): (block_slot, block_index_start, block_slot_offsets) = read_slot_index(f) print( - "Block start slot:", block_slot, - "block index start:", block_index_start, + "Blob start slot:", block_slot, + "blob index start:", block_index_start, "offsets", len(block_slot_offsets)) if any((x for x in block_slot_offsets if x != 0)): @@ -261,7 +269,7 @@ def read_era_file(name): prev_group = block_index_start + [x for x in block_slot_offsets if x != 0][0] - 8 print("Previous group starts at:", prev_group) - # The beginning of the first block (or the state, if there are no blocks) + # The beginning of the first blob list # TODO or the state, if there are no blobs # is the end of the previous group f.seek(prev_group) # Skip header @@ -273,24 +281,19 @@ def read_era_file(name): To verify the internal consistency of an era file, the following checks should be made to verify that an era file is valid for a given network: -* each group follows the given structure of era files with regards to blocks, states and their indices +* each group follows the given structure of era files with regards to blobs and their indices * offsets within indices must point to entries of the correct kind that can be decompressed and deserialized * era file readers must be prepared to handle malicious inputs, including out-of-range offsets, invalid length prefixes and other trivial errors * unknown record types should be ignored, but it is recommended that verifiers report their size and tag + * all blobs are consistent with regard to blocks to which the point * the state is loadable and consistent with the given runtime configuration -* the root of each block in the era file matches that of `state.block_roots` - if a slot is empty according to the block index, this should be confirmed by verifying that - `state.get_block_root_at_slot(empty_slot - 1) == state.get_block_root_at_slot(empty_slot)` except for the first slot of the era which, if possible, should be verified against `era - 1` - * the genesis era file does not have any blocks -* the signature of each block can be verified by the keys in the given state (or any newer state). +* TODO need the block era file here; in general, blobs can only be verified to a limited existent standalone Extended verification consists of verifying a list of era files against a particular history anchored in a checkpoint or a head block. Verification starts from a well-known finalized checkpoint for a slot within the era, using `anchor_state_root = checkpoint_state.state_roots[0]` as anchor and walking the era files as a linked list. For each era file: -* verify that `hash_tree_root(state) == anchor_state_root` - * this anchors the era in a particular history, starting from the given state root - the state root is available from any state within the anchor era. * verify the internal consistency of the era, as above -* set `anchor_state_root == state.state_roots[0]` # FAQ @@ -351,3 +354,19 @@ Each era file contains a full `BeaconState` object whose `block_roots` field cor Offsets in `SSZ` are `uint32` thus from a practical point of view, any one SSZ object may generally not exceed that size. A future entry type can introduce chunking should larger entries be needed, or spill the remaining size bytes into `reserved`, effectively turning the encoding of the length into a fictive `uint48` type. + +## Why are are entire BlobSidecar sets per blob stored together? + +SlotIndex only allows one index per slot, and this also allows exact one-to-one correspondence with block-based .era files, while avoiding adding special cases for blocks without blobs. + +## Why use a single SSZ structure for this BlobSidecar set per blob? + +It similarly creates a mirrored e2s structure between Era and Erb, while reusing existing SSZ parsing and loading code. + +## Why use lists of compressed blob sidecars rather than either compressed lists of blob sidecars or uncompressed lists of uncompressed blob sidecars? + +This enables req/resp copying directly, which operates on a per-blob-sidecar-basis rather than fetching all blob sidecars at once, without additional Snappy decompression. + +## Why separate BlobSidecar from block storage? + +Blob sidecars aren't, properly, part of the consensus record. It is reasonable for an archival node to archive only blocks, not blob sidecars. This isn't unique to Era/Erb files, but occurs, e.g., while syncing, where blob verification only must occur within the blob retention window.